├── Debug
    ├── makefile
    ├── nsightbuilddata
    ├── objects.mk
    ├── sources.mk
    └── src
    │   ├── darkSrc
    │       └── subdir.mk
    │   ├── feature
    │       └── subdir.mk
    │   ├── matching
    │       └── subdir.mk
    │   ├── subdir.mk
    │   └── thirdPart
    │       ├── munkres
    │           ├── adapters
    │           │   └── subdir.mk
    │           └── subdir.mk
    │       └── subdir.mk
├── README.md
├── RUNNINGDATA
    └── tensor_networks
    │   └── 111.meta
└── src
    ├── VideoTracker.cpp
    ├── VideoTracker.h
    ├── darkSrc
        ├── activation_kernels.cu
        ├── activation_layer.c
        ├── activation_layer.h
        ├── activations.c
        ├── activations.h
        ├── avgpool_layer.c
        ├── avgpool_layer.h
        ├── avgpool_layer_kernels.cu
        ├── batchnorm_layer.c
        ├── batchnorm_layer.h
        ├── blas.c
        ├── blas.h
        ├── blas_kernels.cu
        ├── box.c
        ├── box.h
        ├── col2im.c
        ├── col2im.h
        ├── col2im_kernels.cu
        ├── connected_layer.c
        ├── connected_layer.h
        ├── convolutional_kernels.cu
        ├── convolutional_layer.c
        ├── convolutional_layer.h
        ├── cost_layer.c
        ├── cost_layer.h
        ├── crnn_layer.c
        ├── crnn_layer.h
        ├── crop_layer.c
        ├── crop_layer.h
        ├── crop_layer_kernels.cu
        ├── cuda.c
        ├── cuda.h
        ├── data.c
        ├── data.h
        ├── deconvolutional_kernels.cu
        ├── deconvolutional_layer.c
        ├── deconvolutional_layer.h
        ├── demo.c
        ├── demo.h
        ├── detection_layer.c
        ├── detection_layer.h
        ├── dropout_layer.c
        ├── dropout_layer.h
        ├── dropout_layer_kernels.cu
        ├── gemm.c
        ├── gemm.h
        ├── gettimeofday.c
        ├── gettimeofday.h
        ├── gru_layer.c
        ├── gru_layer.h
        ├── im2col.c
        ├── im2col.h
        ├── im2col_kernels.cu
        ├── image.c
        ├── image.h
        ├── layer.c
        ├── layer.h
        ├── list.c
        ├── list.h
        ├── local_layer.c
        ├── local_layer.h
        ├── matrix.c
        ├── matrix.h
        ├── maxpool_layer.c
        ├── maxpool_layer.h
        ├── maxpool_layer_kernels.cu
        ├── network.c
        ├── network.h
        ├── network_kernels.cu
        ├── normalization_layer.c
        ├── normalization_layer.h
        ├── option_list.c
        ├── option_list.h
        ├── parser.c
        ├── parser.h
        ├── region_layer.c
        ├── region_layer.h
        ├── reorg_layer.c
        ├── reorg_layer.h
        ├── rnn_layer.c
        ├── rnn_layer.h
        ├── route_layer.c
        ├── route_layer.h
        ├── shortcut_layer.c
        ├── shortcut_layer.h
        ├── softmax_layer.c
        ├── softmax_layer.h
        ├── stb_image.h
        ├── stb_image_write.h
        ├── tree.c
        ├── tree.h
        ├── utils.c
        └── utils.h
    ├── errmsg.cpp
    ├── errmsg.h
    ├── feature
        ├── FeatureTensor.cpp
        ├── FeatureTensor.h
        ├── dataType.h
        ├── model.cpp
        └── model.h
    ├── main.cpp
    ├── matching
        ├── kalmanfilter.cpp
        ├── kalmanfilter.h
        ├── linear_assignment.cpp
        ├── linear_assignment.h
        ├── nn_matching.cpp
        ├── nn_matching.h
        ├── track.cpp
        ├── track.h
        ├── tracker.cpp
        └── tracker.h
    └── thirdPart
        ├── hungarianoper.cpp
        ├── hungarianoper.h
        └── munkres
            ├── adapters
                ├── adapter.cpp
                ├── adapter.h
                ├── boostmatrixadapter.cpp
                └── boostmatrixadapter.h
            ├── matrix.h
            ├── munkres.cpp
            └── munkres.h


/Debug/makefile:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | -include ../makefile.init
 6 | 
 7 | RM := rm -rf
 8 | 
 9 | # All of the sources participating in the build are defined here
10 | -include sources.mk
11 | -include subdir.mk
12 | -include src/thirdPart/munkres/subdir.mk
13 | -include src/thirdPart/munkres/adapters/subdir.mk
14 | -include src/thirdPart/subdir.mk
15 | -include src/matching/subdir.mk
16 | -include src/subdir.mk
17 | -include src/feature/subdir.mk
18 | -include src/darkSrc/subdir.mk
19 | 
20 | OS_SUFFIX := $(subst Linux,linux,$(subst Darwin/x86_64,darwin,$(shell uname -s)/$(shell uname -m)))
21 | 
22 | -include objects.mk
23 | 
24 | ifneq ($(MAKECMDGOALS),clean)
25 | ifneq ($(strip $(CC_DEPS)),)
26 | -include $(CC_DEPS)
27 | endif
28 | ifneq ($(strip $(C++_DEPS)),)
29 | -include $(C++_DEPS)
30 | endif
31 | ifneq ($(strip $(C_UPPER_DEPS)),)
32 | -include $(C_UPPER_DEPS)
33 | endif
34 | ifneq ($(strip $(CXX_DEPS)),)
35 | -include $(CXX_DEPS)
36 | endif
37 | ifneq ($(strip $(CU_DEPS)),)
38 | -include $(CU_DEPS)
39 | endif
40 | ifneq ($(strip $(CPP_DEPS)),)
41 | -include $(CPP_DEPS)
42 | endif
43 | ifneq ($(strip $(C_DEPS)),)
44 | -include $(C_DEPS)
45 | endif
46 | endif
47 | 
48 | -include ../makefile.defs
49 | 
50 | # OpenGL+GLUT OS-specific define
51 | ifeq ($(shell uname -s),Darwin)
52 | GLUT_LIBS := -Xlinker -framework -Xlinker GLUT -Xlinker -framework -Xlinker OpenGL
53 | else
54 | GLUT_LIBS := -lGL -lGLU -lglut 
55 | endif
56 | 
57 | 
58 | # Add inputs and outputs from these tool invocations to the build variables 
59 | 
60 | # All Target
61 | all: VideoTracking
62 | 
63 | # Tool invocations
64 | VideoTracking: $(OBJS) $(USER_OBJS)
65 | 	@echo 'Building target: $@'
66 | 	@echo 'Invoking: NVCC Linker'
67 | 	/usr/local/cuda-8.0/bin/nvcc --cudart static -L/usr/local/opencv3/lib -L/home/zy/Downloads/tensorflow-1.4.0/bazel-bin/tensorflow --relocatable-device-code=false -gencode arch=compute_61,code=compute_61 -gencode arch=compute_61,code=sm_61 -link -o  "VideoTracking" $(OBJS) $(USER_OBJS) $(LIBS)
68 | 	@echo 'Finished building target: $@'
69 | 	@echo ' '
70 | 
71 | # Other Targets
72 | clean:
73 | 	-$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(C_UPPER_DEPS)$(CXX_DEPS)$(OBJS)$(CU_DEPS)$(CPP_DEPS)$(C_DEPS) VideoTracking
74 | 	-@echo ' '
75 | 
76 | .PHONY: all clean dependents
77 | .SECONDARY:
78 | 
79 | -include ../makefile.targets
80 | 


--------------------------------------------------------------------------------
/Debug/nsightbuilddata:
--------------------------------------------------------------------------------
 1 | Local@Native
 2 | Local@Native
 3 | Local@Native
 4 | Local@Native
 5 | Local@Native
 6 | Local@Native
 7 | Local@Native
 8 | Local@Native
 9 | Local@Native
10 | Local@Native
11 | Local@Native
12 | Local@Native
13 | Local@Native
14 | Local@Native
15 | Local@Native
16 | Local@Native
17 | Local@Native
18 | Local@Native
19 | Local@Native
20 | Local@Native
21 | Local@Native
22 | Local@Native
23 | Local@Native
24 | Local@Native
25 | Local@Native
26 | Local@Native
27 | Local@Native
28 | Local@Native
29 | Local@Native
30 | Local@Native
31 | Local@Native
32 | Local@Native
33 | Local@Native
34 | 


--------------------------------------------------------------------------------
/Debug/objects.mk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Automatically-generated file. Do not edit!
3 | ################################################################################
4 | 
5 | USER_OBJS :=
6 | 
7 | LIBS := -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_videoio -lopencv_imgproc -ltensorflow_cc -lcuda -lcudart -lcublas -lcurand -lcudnn
8 | 
9 | 


--------------------------------------------------------------------------------
/Debug/sources.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | CU_SRCS := 
 6 | C_UPPER_SRCS := 
 7 | CXX_SRCS := 
 8 | C++_SRCS := 
 9 | OBJ_SRCS := 
10 | CC_SRCS := 
11 | ASM_SRCS := 
12 | CPP_SRCS := 
13 | C_SRCS := 
14 | O_SRCS := 
15 | S_UPPER_SRCS := 
16 | CC_DEPS := 
17 | C++_DEPS := 
18 | EXECUTABLES := 
19 | C_UPPER_DEPS := 
20 | CXX_DEPS := 
21 | OBJS := 
22 | CU_DEPS := 
23 | CPP_DEPS := 
24 | C_DEPS := 
25 | 
26 | # Every subdirectory with source files must be described here
27 | SUBDIRS := \
28 | src/thirdPart/munkres \
29 | src/thirdPart/munkres/adapters \
30 | src/thirdPart \
31 | src/matching \
32 | src \
33 | src/feature \
34 | src/darkSrc \
35 | 
36 | 


--------------------------------------------------------------------------------
/Debug/src/feature/subdir.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | # Add inputs and outputs from these tool invocations to the build variables 
 6 | CPP_SRCS += \
 7 | ../src/feature/FeatureTensor.cpp \
 8 | ../src/feature/model.cpp 
 9 | 
10 | OBJS += \
11 | ./src/feature/FeatureTensor.o \
12 | ./src/feature/model.o 
13 | 
14 | CPP_DEPS += \
15 | ./src/feature/FeatureTensor.d \
16 | ./src/feature/model.d 
17 | 
18 | 
19 | # Each subdirectory must supply rules for building sources it contributes
20 | src/feature/%.o: ../src/feature/%.cpp
21 | 	@echo 'Building file: $<'
22 | 	@echo 'Invoking: NVCC Compiler'
23 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61  -odir "src/feature" -M -o "$(@:%.o=%.d)" "$<"
24 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile  -x c++ -o  "$@" "$<"
25 | 	@echo 'Finished building: $<'
26 | 	@echo ' '
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Debug/src/matching/subdir.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | # Add inputs and outputs from these tool invocations to the build variables 
 6 | CPP_SRCS += \
 7 | ../src/matching/kalmanfilter.cpp \
 8 | ../src/matching/linear_assignment.cpp \
 9 | ../src/matching/nn_matching.cpp \
10 | ../src/matching/track.cpp \
11 | ../src/matching/tracker.cpp 
12 | 
13 | OBJS += \
14 | ./src/matching/kalmanfilter.o \
15 | ./src/matching/linear_assignment.o \
16 | ./src/matching/nn_matching.o \
17 | ./src/matching/track.o \
18 | ./src/matching/tracker.o 
19 | 
20 | CPP_DEPS += \
21 | ./src/matching/kalmanfilter.d \
22 | ./src/matching/linear_assignment.d \
23 | ./src/matching/nn_matching.d \
24 | ./src/matching/track.d \
25 | ./src/matching/tracker.d 
26 | 
27 | 
28 | # Each subdirectory must supply rules for building sources it contributes
29 | src/matching/%.o: ../src/matching/%.cpp
30 | 	@echo 'Building file: $<'
31 | 	@echo 'Invoking: NVCC Compiler'
32 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61  -odir "src/matching" -M -o "$(@:%.o=%.d)" "$<"
33 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile  -x c++ -o  "$@" "$<"
34 | 	@echo 'Finished building: $<'
35 | 	@echo ' '
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/Debug/src/subdir.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | # Add inputs and outputs from these tool invocations to the build variables 
 6 | CPP_SRCS += \
 7 | ../src/VideoTracker.cpp \
 8 | ../src/errmsg.cpp \
 9 | ../src/main.cpp 
10 | 
11 | OBJS += \
12 | ./src/VideoTracker.o \
13 | ./src/errmsg.o \
14 | ./src/main.o 
15 | 
16 | CPP_DEPS += \
17 | ./src/VideoTracker.d \
18 | ./src/errmsg.d \
19 | ./src/main.d 
20 | 
21 | 
22 | # Each subdirectory must supply rules for building sources it contributes
23 | src/%.o: ../src/%.cpp
24 | 	@echo 'Building file: $<'
25 | 	@echo 'Invoking: NVCC Compiler'
26 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61  -odir "src" -M -o "$(@:%.o=%.d)" "$<"
27 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile  -x c++ -o  "$@" "$<"
28 | 	@echo 'Finished building: $<'
29 | 	@echo ' '
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/Debug/src/thirdPart/munkres/adapters/subdir.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | # Add inputs and outputs from these tool invocations to the build variables 
 6 | CPP_SRCS += \
 7 | ../src/thirdPart/munkres/adapters/adapter.cpp \
 8 | ../src/thirdPart/munkres/adapters/boostmatrixadapter.cpp 
 9 | 
10 | OBJS += \
11 | ./src/thirdPart/munkres/adapters/adapter.o \
12 | ./src/thirdPart/munkres/adapters/boostmatrixadapter.o 
13 | 
14 | CPP_DEPS += \
15 | ./src/thirdPart/munkres/adapters/adapter.d \
16 | ./src/thirdPart/munkres/adapters/boostmatrixadapter.d 
17 | 
18 | 
19 | # Each subdirectory must supply rules for building sources it contributes
20 | src/thirdPart/munkres/adapters/%.o: ../src/thirdPart/munkres/adapters/%.cpp
21 | 	@echo 'Building file: $<'
22 | 	@echo 'Invoking: NVCC Compiler'
23 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61  -odir "src/thirdPart/munkres/adapters" -M -o "$(@:%.o=%.d)" "$<"
24 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile  -x c++ -o  "$@" "$<"
25 | 	@echo 'Finished building: $<'
26 | 	@echo ' '
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Debug/src/thirdPart/munkres/subdir.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | # Add inputs and outputs from these tool invocations to the build variables 
 6 | CPP_SRCS += \
 7 | ../src/thirdPart/munkres/munkres.cpp 
 8 | 
 9 | OBJS += \
10 | ./src/thirdPart/munkres/munkres.o 
11 | 
12 | CPP_DEPS += \
13 | ./src/thirdPart/munkres/munkres.d 
14 | 
15 | 
16 | # Each subdirectory must supply rules for building sources it contributes
17 | src/thirdPart/munkres/%.o: ../src/thirdPart/munkres/%.cpp
18 | 	@echo 'Building file: $<'
19 | 	@echo 'Invoking: NVCC Compiler'
20 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61  -odir "src/thirdPart/munkres" -M -o "$(@:%.o=%.d)" "$<"
21 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile  -x c++ -o  "$@" "$<"
22 | 	@echo 'Finished building: $<'
23 | 	@echo ' '
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/Debug/src/thirdPart/subdir.mk:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Automatically-generated file. Do not edit!
 3 | ################################################################################
 4 | 
 5 | # Add inputs and outputs from these tool invocations to the build variables 
 6 | CPP_SRCS += \
 7 | ../src/thirdPart/hungarianoper.cpp 
 8 | 
 9 | OBJS += \
10 | ./src/thirdPart/hungarianoper.o 
11 | 
12 | CPP_DEPS += \
13 | ./src/thirdPart/hungarianoper.d 
14 | 
15 | 
16 | # Each subdirectory must supply rules for building sources it contributes
17 | src/thirdPart/%.o: ../src/thirdPart/%.cpp
18 | 	@echo 'Building file: $<'
19 | 	@echo 'Invoking: NVCC Compiler'
20 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61  -odir "src/thirdPart" -M -o "$(@:%.o=%.d)" "$<"
21 | 	/usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile  -x c++ -o  "$@" "$<"
22 | 	@echo 'Finished building: $<'
23 | 	@echo ' '
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepSort
 2 | The c++ version of https://github.com/nwojke/deep_sort. 
 3 | 
 4 | # dependency
 5 | - opencv3
 6 | - tensorflow 1.4
 7 | - cuda 8.0
 8 | - cudnn 6.0
 9 | 
10 | develop tool: Nsight Eclipse Edition Version:9.1
11 | 
12 | # PREPARE DATA & RUN
13 | Defore you run, you need:
14 | 1. download YOLOV2's related model files https://pjreddie.com. Copy the **cfg**, **data** directory and **yolo.weights** to the  **RUNNINGDATA/darknet_data**.
15 | 2. download the CNN checkpoint file **resources/networks/mars-small128.ckpt-68577** and put to **RUNNINGDATA/tensor_networks**.
16 | ```
17 | cd Debug
18 | make
19 | cd .. #into the project root directory
20 | ./Debug/VideoTracking
21 | ```
22 | 
23 | ## OPTION in main.cpp
24 | There are there options in main.cpp which you cann't define simutanously.
25 | They are `RUNGT` `RUNMOTTENSOR` and `RUNLOCALVIDEO`.
26 | - `RUNGT`: you need download MOT dataset and define the specific directory by `MOTDIR` in the main.cpp. FUNCTION: tracking mot dataset using mot groundtruth.
27 | - `RUNMOTTENSOR`: like `RUNGT`. FUNCTION: tracking mot dataset using mot detection groundtruth but generate feature by tensorflow.
28 | - `RUNLOCALVIDEO`: you need define the local video's path by `VIDEO` in the main.cpp. FUNCTION: tracking local video without any known data.
29 |  
30 | 
31 | 


--------------------------------------------------------------------------------
/RUNNINGDATA/tensor_networks/111.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bitzy/DeepSort/db8b64e594c97493a0c289a7ce22fa69530b9075/RUNNINGDATA/tensor_networks/111.meta


--------------------------------------------------------------------------------
/src/VideoTracker.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * VideoTracker.h
 3 |  *
 4 |  *  Created on: Dec 15, 2017
 5 |  *      Author: zy
 6 |  */
 7 | 
 8 | #ifndef VIDEOTRACKER_H_
 9 | #define VIDEOTRACKER_H_
10 | #include <string>
11 | 
12 | /**
13 |  * VideoTracker
14 |  *
15 |  * run:
16 |  * -vpath: the video's path.
17 |  * -showSwitch: whether show the tracking result.
18 |  *
19 |  * run_sequenceWithGT:
20 |  * -motDir: the path of MOT directory.
21 |  * -showSwitch: show or not.
22 |  *
23 |  * run_sequence:
24 |  * -motDir: the path of MOT directory.
25 |  * -showSwitch: show or not.
26 |  */
27 | class VideoTracker {
28 | public:
29 | 	bool run(const char* vpath, bool showSwitch);
30 | 	bool run_sequenceWithGT(const char* motDir, bool showSwitch);
31 | 	bool run_sequence(const char* motDir, bool showSwitch);
32 | 	std::string showErrMsg();
33 | 
34 | private:
35 | 	bool vShowFlag;
36 | 	std::string errorMsg;
37 | 
38 | 	void videoLoad(std::string sequence_dir,int &min_frame_idx, int &max_frame_idx);
39 | 	std::string loadFromBUFFERFILE();
40 | };
41 | 
42 | 
43 | #endif /* VIDEOTRACKER_H_ */
44 | 
45 | 


--------------------------------------------------------------------------------
/src/darkSrc/activation_kernels.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "curand.h"
  3 | #include "cublas_v2.h"
  4 | 
  5 | extern "C" {
  6 | #include "activations.h"
  7 | #include "cuda.h"
  8 | }
  9 | 
 10 | 
 11 | __device__ float lhtan_activate_kernel(float x)
 12 | {
 13 |     if(x < 0) return .001*x;
 14 |     if(x > 1) return .001*(x-1) + 1;
 15 |     return x;
 16 | }
 17 | __device__ float lhtan_gradient_kernel(float x)
 18 | {
 19 |     if(x > 0 && x < 1) return 1;
 20 |     return .001;
 21 | }
 22 | 
 23 | __device__ float hardtan_activate_kernel(float x)
 24 | {
 25 |     if (x < -1) return -1;
 26 |     if (x > 1) return 1;
 27 |     return x;
 28 | }
 29 | __device__ float linear_activate_kernel(float x){return x;}
 30 | __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
 31 | __device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
 32 | __device__ float relu_activate_kernel(float x){return x*(x>0);}
 33 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
 34 | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;}
 35 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
 36 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
 37 | __device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);}
 38 | __device__ float plse_activate_kernel(float x)
 39 | {
 40 |     if(x < -4) return .01 * (x + 4);
 41 |     if(x > 4)  return .01 * (x - 4) + 1;
 42 |     return .125*x + .5;
 43 | }
 44 | __device__ float stair_activate_kernel(float x)
 45 | {
 46 |     int n = floor(x);
 47 |     if (n%2 == 0) return floor(x/2.);
 48 |     else return (x - n) + floor(x/2.);
 49 | }
 50 |  
 51 | 
 52 | __device__ float hardtan_gradient_kernel(float x)
 53 | {
 54 |     if (x > -1 && x < 1) return 1;
 55 |     return 0;
 56 | }
 57 | __device__ float linear_gradient_kernel(float x){return 1;}
 58 | __device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
 59 | __device__ float loggy_gradient_kernel(float x)
 60 | {
 61 |     float y = (x+1.)/2.;
 62 |     return 2*(1-y)*y;
 63 | }
 64 | __device__ float relu_gradient_kernel(float x){return (x>0);}
 65 | __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
 66 | __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
 67 | __device__ float ramp_gradient_kernel(float x){return (x>0)+.1;}
 68 | __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;}
 69 | __device__ float tanh_gradient_kernel(float x){return 1-x*x;}
 70 | __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;}
 71 | __device__ float stair_gradient_kernel(float x)
 72 | {
 73 |     if (floor(x) == x) return 0;
 74 |     return 1;
 75 | }
 76 | 
 77 | __device__ float activate_kernel(float x, ACTIVATION a)
 78 | {
 79 |     switch(a){
 80 |         case LINEAR:
 81 |             return linear_activate_kernel(x);
 82 |         case LOGISTIC:
 83 |             return logistic_activate_kernel(x);
 84 |         case LOGGY:
 85 |             return loggy_activate_kernel(x);
 86 |         case RELU:
 87 |             return relu_activate_kernel(x);
 88 |         case ELU:
 89 |             return elu_activate_kernel(x);
 90 |         case RELIE:
 91 |             return relie_activate_kernel(x);
 92 |         case RAMP:
 93 |             return ramp_activate_kernel(x);
 94 |         case LEAKY:
 95 |             return leaky_activate_kernel(x);
 96 |         case TANH:
 97 |             return tanh_activate_kernel(x);
 98 |         case PLSE:
 99 |             return plse_activate_kernel(x);
100 |         case STAIR:
101 |             return stair_activate_kernel(x);
102 |         case HARDTAN:
103 |             return hardtan_activate_kernel(x);
104 |         case LHTAN:
105 |             return lhtan_activate_kernel(x);
106 |     }
107 |     return 0;
108 | }
109 | 
110 | __device__ float gradient_kernel(float x, ACTIVATION a)
111 | {
112 |     switch(a){
113 |         case LINEAR:
114 |             return linear_gradient_kernel(x);
115 |         case LOGISTIC:
116 |             return logistic_gradient_kernel(x);
117 |         case LOGGY:
118 |             return loggy_gradient_kernel(x);
119 |         case RELU:
120 |             return relu_gradient_kernel(x);
121 |         case ELU:
122 |             return elu_gradient_kernel(x);
123 |         case RELIE:
124 |             return relie_gradient_kernel(x);
125 |         case RAMP:
126 |             return ramp_gradient_kernel(x);
127 |         case LEAKY:
128 |             return leaky_gradient_kernel(x);
129 |         case TANH:
130 |             return tanh_gradient_kernel(x);
131 |         case PLSE:
132 |             return plse_gradient_kernel(x);
133 |         case STAIR:
134 |             return stair_gradient_kernel(x);
135 |         case HARDTAN:
136 |             return hardtan_gradient_kernel(x);
137 |         case LHTAN:
138 |             return lhtan_gradient_kernel(x);
139 |     }
140 |     return 0;
141 | }
142 | 
143 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
144 | {
145 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
146 |     if(i < n) x[i] = activate_kernel(x[i], a);
147 | }
148 | 
149 | __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
150 | {
151 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
152 |     if(i < n) delta[i] *= gradient_kernel(x[i], a);
153 | }
154 | 
155 | extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) 
156 | {
157 |     activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
158 |     check_error(cudaPeekAtLastError());
159 | }
160 | 
161 | extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta) 
162 | {
163 |     gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
164 |     check_error(cudaPeekAtLastError());
165 | }
166 | 


--------------------------------------------------------------------------------
/src/darkSrc/activation_layer.c:
--------------------------------------------------------------------------------
 1 | #include "activation_layer.h"
 2 | #include "utils.h"
 3 | #include "cuda.h"
 4 | #include "blas.h"
 5 | #include "gemm.h"
 6 | 
 7 | #include <math.h>
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | #include <string.h>
11 | 
12 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
13 | {
14 |     layer l = {0};
15 |     l.type = ACTIVE;
16 | 
17 |     l.inputs = inputs;
18 |     l.outputs = inputs;
19 |     l.batch=batch;
20 | 
21 |     l.output = calloc(batch*inputs, sizeof(float*));
22 |     l.delta = calloc(batch*inputs, sizeof(float*));
23 | 
24 |     l.forward = forward_activation_layer;
25 |     l.backward = backward_activation_layer;
26 | #ifdef GPU
27 |     l.forward_gpu = forward_activation_layer_gpu;
28 |     l.backward_gpu = backward_activation_layer_gpu;
29 | 
30 |     l.output_gpu = cuda_make_array(l.output, inputs*batch);
31 |     l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
32 | #endif
33 |     l.activation = activation;
34 |     fprintf(stderr, "Activation Layer: %d inputs\n", inputs);
35 |     return l;
36 | }
37 | 
38 | void forward_activation_layer(layer l, network_state state)
39 | {
40 |     copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
41 |     activate_array(l.output, l.outputs*l.batch, l.activation);
42 | }
43 | 
44 | void backward_activation_layer(layer l, network_state state)
45 | {
46 |     gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
47 |     copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1);
48 | }
49 | 
50 | #ifdef GPU
51 | 
52 | void forward_activation_layer_gpu(layer l, network_state state)
53 | {
54 |     copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
55 |     activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
56 | }
57 | 
58 | void backward_activation_layer_gpu(layer l, network_state state)
59 | {
60 |     gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
61 |     copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
62 | }
63 | #endif
64 | 


--------------------------------------------------------------------------------
/src/darkSrc/activation_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ACTIVATION_LAYER_H
 2 | #define ACTIVATION_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
 9 | 
10 | void forward_activation_layer(layer l, network_state state);
11 | void backward_activation_layer(layer l, network_state state);
12 | 
13 | #ifdef GPU
14 | void forward_activation_layer_gpu(layer l, network_state state);
15 | void backward_activation_layer_gpu(layer l, network_state state);
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/src/darkSrc/activations.c:
--------------------------------------------------------------------------------
  1 | #include "activations.h"
  2 | 
  3 | #include <math.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | 
  8 | char *get_activation_string(ACTIVATION a)
  9 | {
 10 |     switch(a){
 11 |         case LOGISTIC:
 12 |             return "logistic";
 13 |         case LOGGY:
 14 |             return "loggy";
 15 |         case RELU:
 16 |             return "relu";
 17 |         case ELU:
 18 |             return "elu";
 19 |         case RELIE:
 20 |             return "relie";
 21 |         case RAMP:
 22 |             return "ramp";
 23 |         case LINEAR:
 24 |             return "linear";
 25 |         case TANH:
 26 |             return "tanh";
 27 |         case PLSE:
 28 |             return "plse";
 29 |         case LEAKY:
 30 |             return "leaky";
 31 |         case STAIR:
 32 |             return "stair";
 33 |         case HARDTAN:
 34 |             return "hardtan";
 35 |         case LHTAN:
 36 |             return "lhtan";
 37 |         default:
 38 |             break;
 39 |     }
 40 |     return "relu";
 41 | }
 42 | 
 43 | ACTIVATION get_activation(char *s)
 44 | {
 45 |     if (strcmp(s, "logistic")==0) return LOGISTIC;
 46 |     if (strcmp(s, "loggy")==0) return LOGGY;
 47 |     if (strcmp(s, "relu")==0) return RELU;
 48 |     if (strcmp(s, "elu")==0) return ELU;
 49 |     if (strcmp(s, "relie")==0) return RELIE;
 50 |     if (strcmp(s, "plse")==0) return PLSE;
 51 |     if (strcmp(s, "hardtan")==0) return HARDTAN;
 52 |     if (strcmp(s, "lhtan")==0) return LHTAN;
 53 |     if (strcmp(s, "linear")==0) return LINEAR;
 54 |     if (strcmp(s, "ramp")==0) return RAMP;
 55 |     if (strcmp(s, "leaky")==0) return LEAKY;
 56 |     if (strcmp(s, "tanh")==0) return TANH;
 57 |     if (strcmp(s, "stair")==0) return STAIR;
 58 |     fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
 59 |     return RELU;
 60 | }
 61 | 
 62 | float activate(float x, ACTIVATION a)
 63 | {
 64 |     switch(a){
 65 |         case LINEAR:
 66 |             return linear_activate(x);
 67 |         case LOGISTIC:
 68 |             return logistic_activate(x);
 69 |         case LOGGY:
 70 |             return loggy_activate(x);
 71 |         case RELU:
 72 |             return relu_activate(x);
 73 |         case ELU:
 74 |             return elu_activate(x);
 75 |         case RELIE:
 76 |             return relie_activate(x);
 77 |         case RAMP:
 78 |             return ramp_activate(x);
 79 |         case LEAKY:
 80 |             return leaky_activate(x);
 81 |         case TANH:
 82 |             return tanh_activate(x);
 83 |         case PLSE:
 84 |             return plse_activate(x);
 85 |         case STAIR:
 86 |             return stair_activate(x);
 87 |         case HARDTAN:
 88 |             return hardtan_activate(x);
 89 |         case LHTAN:
 90 |             return lhtan_activate(x);
 91 |     }
 92 |     return 0;
 93 | }
 94 | 
 95 | void activate_array(float *x, const int n, const ACTIVATION a)
 96 | {
 97 |     int i;
 98 |     for(i = 0; i < n; ++i){
 99 |         x[i] = activate(x[i], a);
100 |     }
101 | }
102 | 
103 | float gradient(float x, ACTIVATION a)
104 | {
105 |     switch(a){
106 |         case LINEAR:
107 |             return linear_gradient(x);
108 |         case LOGISTIC:
109 |             return logistic_gradient(x);
110 |         case LOGGY:
111 |             return loggy_gradient(x);
112 |         case RELU:
113 |             return relu_gradient(x);
114 |         case ELU:
115 |             return elu_gradient(x);
116 |         case RELIE:
117 |             return relie_gradient(x);
118 |         case RAMP:
119 |             return ramp_gradient(x);
120 |         case LEAKY:
121 |             return leaky_gradient(x);
122 |         case TANH:
123 |             return tanh_gradient(x);
124 |         case PLSE:
125 |             return plse_gradient(x);
126 |         case STAIR:
127 |             return stair_gradient(x);
128 |         case HARDTAN:
129 |             return hardtan_gradient(x);
130 |         case LHTAN:
131 |             return lhtan_gradient(x);
132 |     }
133 |     return 0;
134 | }
135 | 
136 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
137 | {
138 |     int i;
139 |     for(i = 0; i < n; ++i){
140 |         delta[i] *= gradient(x[i], a);
141 |     }
142 | } 
143 | 
144 | 


--------------------------------------------------------------------------------
/src/darkSrc/activations.h:
--------------------------------------------------------------------------------
 1 | #ifndef ACTIVATIONS_H
 2 | #define ACTIVATIONS_H
 3 | #include "cuda.h"
 4 | #include "math.h"
 5 | 
 6 | typedef enum{
 7 |     LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
 8 | }ACTIVATION;
 9 | 
10 | ACTIVATION get_activation(char *s);
11 | 
12 | char *get_activation_string(ACTIVATION a);
13 | float activate(float x, ACTIVATION a);
14 | float gradient(float x, ACTIVATION a);
15 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
16 | void activate_array(float *x, const int n, const ACTIVATION a);
17 | #ifdef GPU
18 | void activate_array_ongpu(float *x, int n, ACTIVATION a);
19 | void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta);
20 | #endif
21 | 
22 | static inline float stair_activate(float x)
23 | {
24 |     int n = floor(x);
25 |     if (n%2 == 0) return floor(x/2.);
26 |     else return (x - n) + floor(x/2.);
27 | }
28 | static inline float hardtan_activate(float x)
29 | {
30 |     if (x < -1) return -1;
31 |     if (x > 1) return 1;
32 |     return x;
33 | }
34 | static inline float linear_activate(float x){return x;}
35 | static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
36 | static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
37 | static inline float relu_activate(float x){return x*(x>0);}
38 | static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
39 | static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
40 | static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
41 | static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
42 | static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
43 | static inline float plse_activate(float x)
44 | {
45 |     if(x < -4) return .01 * (x + 4);
46 |     if(x > 4)  return .01 * (x - 4) + 1;
47 |     return .125*x + .5;
48 | }
49 | 
50 | static inline float lhtan_activate(float x)
51 | {
52 |     if(x < 0) return .001*x;
53 |     if(x > 1) return .001*(x-1) + 1;
54 |     return x;
55 | }
56 | static inline float lhtan_gradient(float x)
57 | {
58 |     if(x > 0 && x < 1) return 1;
59 |     return .001;
60 | }
61 | 
62 | static inline float hardtan_gradient(float x)
63 | {
64 |     if (x > -1 && x < 1) return 1;
65 |     return 0;
66 | }
67 | static inline float linear_gradient(float x){return 1;}
68 | static inline float logistic_gradient(float x){return (1-x)*x;}
69 | static inline float loggy_gradient(float x)
70 | {
71 |     float y = (x+1.)/2.;
72 |     return 2*(1-y)*y;
73 | }
74 | static inline float stair_gradient(float x)
75 | {
76 |     if (floor(x) == x) return 0;
77 |     return 1;
78 | }
79 | static inline float relu_gradient(float x){return (x>0);}
80 | static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
81 | static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
82 | static inline float ramp_gradient(float x){return (x>0)+.1;}
83 | static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
84 | static inline float tanh_gradient(float x){return 1-x*x;}
85 | static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;}
86 | 
87 | #endif
88 | 
89 | 


--------------------------------------------------------------------------------
/src/darkSrc/avgpool_layer.c:
--------------------------------------------------------------------------------
 1 | #include "avgpool_layer.h"
 2 | #include "cuda.h"
 3 | #include <stdio.h>
 4 | 
 5 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
 6 | {
 7 |     fprintf(stderr, "avg                     %4d x%4d x%4d   ->  %4d\n",  w, h, c, c);
 8 |     avgpool_layer l = {0};
 9 |     l.type = AVGPOOL;
10 |     l.batch = batch;
11 |     l.h = h;
12 |     l.w = w;
13 |     l.c = c;
14 |     l.out_w = 1;
15 |     l.out_h = 1;
16 |     l.out_c = c;
17 |     l.outputs = l.out_c;
18 |     l.inputs = h*w*c;
19 |     int output_size = l.outputs * batch;
20 |     l.output =  calloc(output_size, sizeof(float));
21 |     l.delta =   calloc(output_size, sizeof(float));
22 |     l.forward = forward_avgpool_layer;
23 |     l.backward = backward_avgpool_layer;
24 |     #ifdef GPU
25 |     l.forward_gpu = forward_avgpool_layer_gpu;
26 |     l.backward_gpu = backward_avgpool_layer_gpu;
27 |     l.output_gpu  = cuda_make_array(l.output, output_size);
28 |     l.delta_gpu   = cuda_make_array(l.delta, output_size);
29 |     #endif
30 |     return l;
31 | }
32 | 
33 | void resize_avgpool_layer(avgpool_layer *l, int w, int h)
34 | {
35 |     l->w = w;
36 |     l->h = h;
37 |     l->inputs = h*w*l->c;
38 | }
39 | 
40 | void forward_avgpool_layer(const avgpool_layer l, network_state state)
41 | {
42 |     int b,i,k;
43 | 
44 |     for(b = 0; b < l.batch; ++b){
45 |         for(k = 0; k < l.c; ++k){
46 |             int out_index = k + b*l.c;
47 |             l.output[out_index] = 0;
48 |             for(i = 0; i < l.h*l.w; ++i){
49 |                 int in_index = i + l.h*l.w*(k + b*l.c);
50 |                 l.output[out_index] += state.input[in_index];
51 |             }
52 |             l.output[out_index] /= l.h*l.w;
53 |         }
54 |     }
55 | }
56 | 
57 | void backward_avgpool_layer(const avgpool_layer l, network_state state)
58 | {
59 |     int b,i,k;
60 | 
61 |     for(b = 0; b < l.batch; ++b){
62 |         for(k = 0; k < l.c; ++k){
63 |             int out_index = k + b*l.c;
64 |             for(i = 0; i < l.h*l.w; ++i){
65 |                 int in_index = i + l.h*l.w*(k + b*l.c);
66 |                 state.delta[in_index] += l.delta[out_index] / (l.h*l.w);
67 |             }
68 |         }
69 |     }
70 | }
71 | 
72 | 


--------------------------------------------------------------------------------
/src/darkSrc/avgpool_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef AVGPOOL_LAYER_H
 2 | #define AVGPOOL_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "cuda.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | 
 9 | typedef layer avgpool_layer;
10 | 
11 | image get_avgpool_image(avgpool_layer l);
12 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
13 | void resize_avgpool_layer(avgpool_layer *l, int w, int h);
14 | void forward_avgpool_layer(const avgpool_layer l, network_state state);
15 | void backward_avgpool_layer(const avgpool_layer l, network_state state);
16 | 
17 | #ifdef GPU
18 | void forward_avgpool_layer_gpu(avgpool_layer l, network_state state);
19 | void backward_avgpool_layer_gpu(avgpool_layer l, network_state state);
20 | #endif
21 | 
22 | #endif
23 | 
24 | 


--------------------------------------------------------------------------------
/src/darkSrc/avgpool_layer_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "avgpool_layer.h"
 7 | #include "cuda.h"
 8 | }
 9 | 
10 | __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
11 | {
12 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
13 |     if(id >= n) return;
14 | 
15 |     int k = id % c;
16 |     id /= c;
17 |     int b = id;
18 | 
19 |     int i;
20 |     int out_index = (k + c*b);
21 |     output[out_index] = 0;
22 |     for(i = 0; i < w*h; ++i){
23 |         int in_index = i + h*w*(k + b*c);
24 |         output[out_index] += input[in_index];
25 |     }
26 |     output[out_index] /= w*h;
27 | }
28 | 
29 | __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta)
30 | {
31 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
32 |     if(id >= n) return;
33 | 
34 |     int k = id % c;
35 |     id /= c;
36 |     int b = id;
37 | 
38 |     int i;
39 |     int out_index = (k + c*b);
40 |     for(i = 0; i < w*h; ++i){
41 |         int in_index = i + h*w*(k + b*c);
42 |         in_delta[in_index] += out_delta[out_index] / (w*h);
43 |     }
44 | }
45 | 
46 | extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
47 | {
48 |     size_t n = layer.c*layer.batch;
49 | 
50 |     forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, state.input, layer.output_gpu);
51 |     check_error(cudaPeekAtLastError());
52 | }
53 | 
54 | extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state)
55 | {
56 |     size_t n = layer.c*layer.batch;
57 | 
58 |     backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu);
59 |     check_error(cudaPeekAtLastError());
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/src/darkSrc/batchnorm_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef BATCHNORM_LAYER_H
 2 | #define BATCHNORM_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | layer make_batchnorm_layer(int batch, int w, int h, int c);
 9 | void forward_batchnorm_layer(layer l, network_state state);
10 | void backward_batchnorm_layer(layer l, network_state state);
11 | 
12 | #ifdef GPU
13 | void forward_batchnorm_layer_gpu(layer l, network_state state);
14 | void backward_batchnorm_layer_gpu(layer l, network_state state);
15 | void pull_batchnorm_layer(layer l);
16 | void push_batchnorm_layer(layer l);
17 | #endif
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/darkSrc/blas.c:
--------------------------------------------------------------------------------
  1 | #include "blas.h"
  2 | #include "math.h"
  3 | #include <assert.h>
  4 | #include <float.h>
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
  9 | {
 10 |     int b,i,j,k;
 11 |     int out_c = c/(stride*stride);
 12 | 
 13 |     for(b = 0; b < batch; ++b){
 14 |         for(k = 0; k < c; ++k){
 15 |             for(j = 0; j < h; ++j){
 16 |                 for(i = 0; i < w; ++i){
 17 |                     int in_index  = i + w*(j + h*(k + c*b));
 18 |                     int c2 = k % out_c;
 19 |                     int offset = k / out_c;
 20 |                     int w2 = i*stride + offset % stride;
 21 |                     int h2 = j*stride + offset / stride;
 22 |                     int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
 23 |                     if(forward) out[out_index] = x[in_index];
 24 |                     else out[in_index] = x[out_index];
 25 |                 }
 26 |             }
 27 |         }
 28 |     }
 29 | }
 30 | 
 31 | void flatten(float *x, int size, int layers, int batch, int forward)
 32 | {
 33 |     float *swap = calloc(size*layers*batch, sizeof(float));
 34 |     int i,c,b;
 35 |     for(b = 0; b < batch; ++b){
 36 |         for(c = 0; c < layers; ++c){
 37 |             for(i = 0; i < size; ++i){
 38 |                 int i1 = b*layers*size + c*size + i;
 39 |                 int i2 = b*layers*size + i*layers + c;
 40 |                 if (forward) swap[i2] = x[i1];
 41 |                 else swap[i1] = x[i2];
 42 |             }
 43 |         }
 44 |     }
 45 |     memcpy(x, swap, size*layers*batch*sizeof(float));
 46 |     free(swap);
 47 | }
 48 | 
 49 | void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
 50 | {
 51 |     int i;
 52 |     for(i = 0; i < n; ++i){
 53 |         c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0);
 54 |     }
 55 | }
 56 | 
 57 | void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
 58 | {
 59 |     int stride = w1/w2;
 60 |     int sample = w2/w1;
 61 |     assert(stride == h1/h2);
 62 |     assert(sample == h2/h1);
 63 |     if(stride < 1) stride = 1;
 64 |     if(sample < 1) sample = 1;
 65 |     int minw = (w1 < w2) ? w1 : w2;
 66 |     int minh = (h1 < h2) ? h1 : h2;
 67 |     int minc = (c1 < c2) ? c1 : c2;
 68 | 
 69 |     int i,j,k,b;
 70 |     for(b = 0; b < batch; ++b){
 71 |         for(k = 0; k < minc; ++k){
 72 |             for(j = 0; j < minh; ++j){
 73 |                 for(i = 0; i < minw; ++i){
 74 |                     int out_index = i*sample + w2*(j*sample + h2*(k + c2*b));
 75 |                     int add_index = i*stride + w1*(j*stride + h1*(k + c1*b));
 76 |                     out[out_index] += add[add_index];
 77 |                 }
 78 |             }
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
 84 | {
 85 |     float scale = 1./(batch * spatial);
 86 |     int i,j,k;
 87 |     for(i = 0; i < filters; ++i){
 88 |         mean[i] = 0;
 89 |         for(j = 0; j < batch; ++j){
 90 |             for(k = 0; k < spatial; ++k){
 91 |                 int index = j*filters*spatial + i*spatial + k;
 92 |                 mean[i] += x[index];
 93 |             }
 94 |         }
 95 |         mean[i] *= scale;
 96 |     }
 97 | }
 98 | 
 99 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
100 | {
101 |     float scale = 1./(batch * spatial - 1);
102 |     int i,j,k;
103 |     for(i = 0; i < filters; ++i){
104 |         variance[i] = 0;
105 |         for(j = 0; j < batch; ++j){
106 |             for(k = 0; k < spatial; ++k){
107 |                 int index = j*filters*spatial + i*spatial + k;
108 |                 variance[i] += pow((x[index] - mean[i]), 2);
109 |             }
110 |         }
111 |         variance[i] *= scale;
112 |     }
113 | }
114 | 
115 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
116 | {
117 |     int b, f, i;
118 |     for(b = 0; b < batch; ++b){
119 |         for(f = 0; f < filters; ++f){
120 |             for(i = 0; i < spatial; ++i){
121 |                 int index = b*filters*spatial + f*spatial + i;
122 |                 x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
123 |             }
124 |         }
125 |     }
126 | }
127 | 
128 | void const_cpu(int N, float ALPHA, float *X, int INCX)
129 | {
130 |     int i;
131 |     for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
132 | }
133 | 
134 | void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
135 | {
136 |     int i;
137 |     for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX];
138 | }
139 | 
140 | void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
141 | {
142 |     int i;
143 |     for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA);
144 | }
145 | 
146 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
147 | {
148 |     int i;
149 |     for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX];
150 | }
151 | 
152 | void scal_cpu(int N, float ALPHA, float *X, int INCX)
153 | {
154 |     int i;
155 |     for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA;
156 | }
157 | 
158 | void fill_cpu(int N, float ALPHA, float *X, int INCX)
159 | {
160 |     int i;
161 |     for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
162 | }
163 | 
164 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
165 | {
166 |     int i;
167 |     for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
168 | }
169 | 
170 | void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
171 | {
172 |     int i;
173 |     for(i = 0; i < n; ++i){
174 |         float diff = truth[i] - pred[i];
175 |         float abs_val = fabs(diff);
176 |         if(abs_val < 1) {
177 |             error[i] = diff * diff;
178 |             delta[i] = diff;
179 |         }
180 |         else {
181 |             error[i] = 2*abs_val - 1;
182 |             delta[i] = (diff < 0) ? -1 : 1;
183 |         }
184 |     }
185 | }
186 | 
187 | void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
188 | {
189 |     int i;
190 |     for(i = 0; i < n; ++i){
191 |         float diff = truth[i] - pred[i];
192 |         error[i] = diff * diff;
193 |         delta[i] = diff;
194 |     }
195 | }
196 | 
197 | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
198 | {
199 |     int i;
200 |     float dot = 0;
201 |     for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY];
202 |     return dot;
203 | }
204 | 
205 | void softmax(float *input, int n, float temp, float *output)
206 | {
207 |     int i;
208 |     float sum = 0;
209 |     float largest = -FLT_MAX;
210 |     for(i = 0; i < n; ++i){
211 |         if(input[i] > largest) largest = input[i];
212 |     }
213 |     for(i = 0; i < n; ++i){
214 |         float e = exp(input[i]/temp - largest/temp);
215 |         sum += e;
216 |         output[i] = e;
217 |     }
218 |     for(i = 0; i < n; ++i){
219 |         output[i] /= sum;
220 |     }
221 | }
222 | 
223 | 


--------------------------------------------------------------------------------
/src/darkSrc/blas.h:
--------------------------------------------------------------------------------
 1 | #ifndef BLAS_H
 2 | #define BLAS_H
 3 | void flatten(float *x, int size, int layers, int batch, int forward);
 4 | void pm(int M, int N, float *A);
 5 | float *random_matrix(int rows, int cols);
 6 | void time_random_matrix(int TA, int TB, int m, int k, int n);
 7 | void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
 8 | 
 9 | void test_blas();
10 | 
11 | void const_cpu(int N, float ALPHA, float *X, int INCX);
12 | void constrain_ongpu(int N, float ALPHA, float * X, int INCX);
13 | void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
14 | void mul_cpu(int N, float *X, int INCX, float *Y, int INCY);
15 | 
16 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
17 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY);
18 | void scal_cpu(int N, float ALPHA, float *X, int INCX);
19 | void fill_cpu(int N, float ALPHA, float * X, int INCX);
20 | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
21 | void test_gpu_blas();
22 | void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
23 | 
24 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean);
25 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
26 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
27 | 
28 | void scale_bias(float *output, float *scales, int batch, int n, int size);
29 | void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
30 | void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
31 | void  variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
32 | void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
33 | 
34 | void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
35 | void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
36 | void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
37 | 
38 | void softmax(float *input, int n, float temp, float *output);
39 | 
40 | #ifdef GPU
41 | #include "cuda.h"
42 | 
43 | void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
44 | void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
45 | void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
46 | void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
47 | void scal_ongpu(int N, float ALPHA, float * X, int INCX);
48 | void supp_ongpu(int N, float ALPHA, float * X, int INCX);
49 | void mask_ongpu(int N, float * X, float mask_num, float * mask);
50 | void const_ongpu(int N, float ALPHA, float *X, int INCX);
51 | void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
52 | void mul_ongpu(int N, float *X, int INCX, float *Y, int INCY);
53 | void fill_ongpu(int N, float ALPHA, float * X, int INCX);
54 | 
55 | void mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
56 | void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
57 | void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
58 | 
59 | void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
60 | 
61 | void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
62 | void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
63 | 
64 | void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
65 | void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
66 | void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
67 | void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
68 | void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
69 | void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
70 | void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
71 | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
72 | 
73 | void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
74 | void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
75 | void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
76 | void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c);
77 | void mult_add_into_gpu(int num, float *a, float *b, float *c);
78 | 
79 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
80 | 
81 | void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output);
82 | void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t);
83 | 
84 | void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out);
85 | 
86 | #endif
87 | #endif
88 | 


--------------------------------------------------------------------------------
/src/darkSrc/box.h:
--------------------------------------------------------------------------------
 1 | #ifndef BOX_H
 2 | #define BOX_H
 3 | 
 4 | typedef struct{
 5 |     float x, y, w, h;
 6 | } box;
 7 | 
 8 | typedef struct{
 9 |     float dx, dy, dw, dh;
10 | } dbox;
11 | 
12 | box float_to_box(float *f);
13 | float box_iou(box a, box b);
14 | float box_rmse(box a, box b);
15 | dbox diou(box a, box b);
16 | void do_nms(box *boxes, float **probs, int total, int classes, float thresh);
17 | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh);
18 | box decode_box(box b, box anchor);
19 | box encode_box(box b, box anchor);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/darkSrc/col2im.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <math.h>
 3 | void col2im_add_pixel(float *im, int height, int width, int channels,
 4 |                         int row, int col, int channel, int pad, float val)
 5 | {
 6 |     row -= pad;
 7 |     col -= pad;
 8 | 
 9 |     if (row < 0 || col < 0 ||
10 |         row >= height || col >= width) return;
11 |     im[col + width*(row + height*channel)] += val;
12 | }
13 | //This one might be too, can't remember.
14 | void col2im_cpu(float* data_col,
15 |          int channels,  int height,  int width,
16 |          int ksize,  int stride, int pad, float* data_im) 
17 | {
18 |     int c,h,w;
19 |     int height_col = (height + 2*pad - ksize) / stride + 1;
20 |     int width_col = (width + 2*pad - ksize) / stride + 1;
21 | 
22 |     int channels_col = channels * ksize * ksize;
23 |     for (c = 0; c < channels_col; ++c) {
24 |         int w_offset = c % ksize;
25 |         int h_offset = (c / ksize) % ksize;
26 |         int c_im = c / ksize / ksize;
27 |         for (h = 0; h < height_col; ++h) {
28 |             for (w = 0; w < width_col; ++w) {
29 |                 int im_row = h_offset + h * stride;
30 |                 int im_col = w_offset + w * stride;
31 |                 int col_index = (c * height_col + h) * width_col + w;
32 |                 double val = data_col[col_index];
33 |                 col2im_add_pixel(data_im, height, width, channels,
34 |                         im_row, im_col, c_im, pad, val);
35 |             }
36 |         }
37 |     }
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/src/darkSrc/col2im.h:
--------------------------------------------------------------------------------
 1 | #ifndef COL2IM_H
 2 | #define COL2IM_H
 3 | 
 4 | void col2im_cpu(float* data_col,
 5 |         int channels, int height, int width,
 6 |         int ksize, int stride, int pad, float* data_im);
 7 | 
 8 | #ifdef GPU
 9 | void col2im_ongpu(float *data_col,
10 |         int channels, int height, int width,
11 |         int ksize, int stride, int pad, float *data_im);
12 | #endif
13 | #endif
14 | 


--------------------------------------------------------------------------------
/src/darkSrc/col2im_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "col2im.h"
 7 | #include "cuda.h"
 8 | }
 9 | 
10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
12 | 
13 | __global__ void col2im_gpu_kernel(const int n, const float* data_col,
14 |         const int height, const int width, const int ksize,
15 |         const int pad,
16 |         const int stride,
17 |         const int height_col, const int width_col,
18 |         float *data_im) {
19 |     int index = blockIdx.x*blockDim.x+threadIdx.x;
20 |     for(; index < n; index += blockDim.x*gridDim.x){
21 |         float val = 0;
22 |         int w = index % width + pad;
23 |         int h = (index / width) % height + pad;
24 |         int c = index / (width * height);
25 |         // compute the start and end of the output
26 |         int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1;
27 |         int w_col_end = min(w / stride + 1, width_col);
28 |         int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1;
29 |         int h_col_end = min(h / stride + 1, height_col);
30 |         // equivalent implementation
31 |         int offset =
32 |             (c * ksize * ksize + h * ksize + w) * height_col * width_col;
33 |         int coeff_h_col = (1 - stride * ksize * height_col) * width_col;
34 |         int coeff_w_col = (1 - stride * height_col * width_col);
35 |         for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
36 |             for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
37 |                 val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
38 |             }
39 |         }
40 |         data_im[index] += val;
41 |     }
42 | }
43 | 
44 | void col2im_ongpu(float *data_col,
45 |         int channels, int height, int width,
46 |         int ksize, int stride, int pad, float *data_im){
47 |     // We are going to launch channels * height_col * width_col kernels, each
48 |     // kernel responsible for copying a single-channel grid.
49 |     int height_col = (height + 2 * pad - ksize) / stride + 1;
50 |     int width_col = (width + 2 * pad - ksize) / stride + 1;
51 |     int num_kernels = channels * height * width;
52 |     col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK,
53 |         BLOCK>>>(
54 |                 num_kernels, data_col, height, width, ksize, pad,
55 |                 stride, height_col,
56 |                 width_col, data_im);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/src/darkSrc/connected_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONNECTED_LAYER_H
 2 | #define CONNECTED_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | typedef layer connected_layer;
 9 | 
10 | connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize);
11 | 
12 | void forward_connected_layer(connected_layer layer, network_state state);
13 | void backward_connected_layer(connected_layer layer, network_state state);
14 | void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
15 | void denormalize_connected_layer(layer l);
16 | void statistics_connected_layer(layer l);
17 | 
18 | #ifdef GPU
19 | void forward_connected_layer_gpu(connected_layer layer, network_state state);
20 | void backward_connected_layer_gpu(connected_layer layer, network_state state);
21 | void update_connected_layer_gpu(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
22 | void push_connected_layer(connected_layer layer);
23 | void pull_connected_layer(connected_layer layer);
24 | #endif
25 | 
26 | #endif
27 | 
28 | 


--------------------------------------------------------------------------------
/src/darkSrc/convolutional_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONVOLUTIONAL_LAYER_H
 2 | #define CONVOLUTIONAL_LAYER_H
 3 | 
 4 | #include "cuda.h"
 5 | #include "image.h"
 6 | #include "activations.h"
 7 | #include "layer.h"
 8 | #include "network.h"
 9 | 
10 | typedef layer convolutional_layer;
11 | 
12 | #ifdef GPU
13 | void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
14 | void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state);
15 | void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
16 | 
17 | void push_convolutional_layer(convolutional_layer layer);
18 | void pull_convolutional_layer(convolutional_layer layer);
19 | 
20 | void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
21 | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
22 | #ifdef CUDNN
23 | void cudnn_convolutional_setup(layer *l);
24 | #endif
25 | #endif
26 | 
27 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam);
28 | void denormalize_convolutional_layer(convolutional_layer l);
29 | void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
30 | void forward_convolutional_layer(const convolutional_layer layer, network_state state);
31 | void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
32 | image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights);
33 | void binarize_weights(float *weights, int n, int size, float *binary);
34 | void swap_binary(convolutional_layer *l);
35 | void binarize_weights2(float *weights, int n, int size, char *binary, float *scales);
36 | 
37 | void backward_convolutional_layer(convolutional_layer layer, network_state state);
38 | 
39 | void add_bias(float *output, float *biases, int batch, int n, int size);
40 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
41 | 
42 | image get_convolutional_image(convolutional_layer layer);
43 | image get_convolutional_delta(convolutional_layer layer);
44 | image get_convolutional_weight(convolutional_layer layer, int i);
45 | 
46 | int convolutional_out_height(convolutional_layer layer);
47 | int convolutional_out_width(convolutional_layer layer);
48 | void rescale_weights(convolutional_layer l, float scale, float trans);
49 | void rgbgr_weights(convolutional_layer l);
50 | 
51 | #endif
52 | 
53 | 


--------------------------------------------------------------------------------
/src/darkSrc/cost_layer.c:
--------------------------------------------------------------------------------
  1 | #include "cost_layer.h"
  2 | #include "utils.h"
  3 | #include "cuda.h"
  4 | #include "blas.h"
  5 | #include <math.h>
  6 | #include <string.h>
  7 | #include <stdlib.h>
  8 | #include <stdio.h>
  9 | 
 10 | COST_TYPE get_cost_type(char *s)
 11 | {
 12 |     if (strcmp(s, "sse")==0) return SSE;
 13 |     if (strcmp(s, "masked")==0) return MASKED;
 14 |     if (strcmp(s, "smooth")==0) return SMOOTH;
 15 |     fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
 16 |     return SSE;
 17 | }
 18 | 
 19 | char *get_cost_string(COST_TYPE a)
 20 | {
 21 |     switch(a){
 22 |         case SSE:
 23 |             return "sse";
 24 |         case MASKED:
 25 |             return "masked";
 26 |         case SMOOTH:
 27 |             return "smooth";
 28 |     }
 29 |     return "sse";
 30 | }
 31 | 
 32 | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
 33 | {
 34 |     fprintf(stderr, "cost                                           %4d\n",  inputs);
 35 |     cost_layer l = {0};
 36 |     l.type = COST;
 37 | 
 38 |     l.scale = scale;
 39 |     l.batch = batch;
 40 |     l.inputs = inputs;
 41 |     l.outputs = inputs;
 42 |     l.cost_type = cost_type;
 43 |     l.delta = calloc(inputs*batch, sizeof(float));
 44 |     l.output = calloc(inputs*batch, sizeof(float));
 45 |     l.cost = calloc(1, sizeof(float));
 46 | 
 47 |     l.forward = forward_cost_layer;
 48 |     l.backward = backward_cost_layer;
 49 |     #ifdef GPU
 50 |     l.forward_gpu = forward_cost_layer_gpu;
 51 |     l.backward_gpu = backward_cost_layer_gpu;
 52 | 
 53 |     l.delta_gpu = cuda_make_array(l.output, inputs*batch);
 54 |     l.output_gpu = cuda_make_array(l.delta, inputs*batch);
 55 |     #endif
 56 |     return l;
 57 | }
 58 | 
 59 | void resize_cost_layer(cost_layer *l, int inputs)
 60 | {
 61 |     l->inputs = inputs;
 62 |     l->outputs = inputs;
 63 |     l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
 64 |     l->output = realloc(l->output, inputs*l->batch*sizeof(float));
 65 | #ifdef GPU
 66 |     cuda_free(l->delta_gpu);
 67 |     cuda_free(l->output_gpu);
 68 |     l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch);
 69 |     l->output_gpu = cuda_make_array(l->output, inputs*l->batch);
 70 | #endif
 71 | }
 72 | 
 73 | void forward_cost_layer(cost_layer l, network_state state)
 74 | {
 75 |     if (!state.truth) return;
 76 |     if(l.cost_type == MASKED){
 77 |         int i;
 78 |         for(i = 0; i < l.batch*l.inputs; ++i){
 79 |             if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM;
 80 |         }
 81 |     }
 82 |     if(l.cost_type == SMOOTH){
 83 |         smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
 84 |     } else {
 85 |         l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
 86 |     }
 87 |     l.cost[0] = sum_array(l.output, l.batch*l.inputs);
 88 | }
 89 | 
 90 | void backward_cost_layer(const cost_layer l, network_state state)
 91 | {
 92 |     axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1);
 93 | }
 94 | 
 95 | #ifdef GPU
 96 | 
 97 | void pull_cost_layer(cost_layer l)
 98 | {
 99 |     cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
100 | }
101 | 
102 | void push_cost_layer(cost_layer l)
103 | {
104 |     cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
105 | }
106 | 
107 | int float_abs_compare (const void * a, const void * b)
108 | {
109 |     float fa = *(const float*) a;
110 |     if(fa < 0) fa = -fa;
111 |     float fb = *(const float*) b;
112 |     if(fb < 0) fb = -fb;
113 |     return (fa > fb) - (fa < fb);
114 | }
115 | 
116 | void forward_cost_layer_gpu(cost_layer l, network_state state)
117 | {
118 |     if (!state.truth) return;
119 |     if (l.cost_type == MASKED) {
120 |         mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
121 |     }
122 | 
123 |     if(l.cost_type == SMOOTH){
124 |         smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
125 |     } else {
126 |         l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
127 |     }
128 | 
129 |     if(l.ratio){
130 |         cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
131 |         qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
132 |         int n = (1-l.ratio) * l.batch*l.inputs;
133 |         float thresh = l.delta[n];
134 |         thresh = 0;
135 |         printf("%f\n", thresh);
136 |         supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
137 |     }
138 | 
139 |     cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
140 |     l.cost[0] = sum_array(l.output, l.batch*l.inputs);
141 | }
142 | 
143 | void backward_cost_layer_gpu(const cost_layer l, network_state state)
144 | {
145 |     axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
146 | }
147 | #endif
148 | 
149 | 


--------------------------------------------------------------------------------
/src/darkSrc/cost_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef COST_LAYER_H
 2 | #define COST_LAYER_H
 3 | #include "layer.h"
 4 | #include "network.h"
 5 | 
 6 | typedef layer cost_layer;
 7 | 
 8 | COST_TYPE get_cost_type(char *s);
 9 | char *get_cost_string(COST_TYPE a);
10 | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
11 | void forward_cost_layer(const cost_layer l, network_state state);
12 | void backward_cost_layer(const cost_layer l, network_state state);
13 | void resize_cost_layer(cost_layer *l, int inputs);
14 | 
15 | #ifdef GPU
16 | void forward_cost_layer_gpu(cost_layer l, network_state state);
17 | void backward_cost_layer_gpu(const cost_layer l, network_state state);
18 | #endif
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/darkSrc/crnn_layer.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef CRNN_LAYER_H
 3 | #define CRNN_LAYER_H
 4 | 
 5 | #include "activations.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | 
 9 | layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);
10 | 
11 | void forward_crnn_layer(layer l, network_state state);
12 | void backward_crnn_layer(layer l, network_state state);
13 | void update_crnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
14 | 
15 | #ifdef GPU
16 | void forward_crnn_layer_gpu(layer l, network_state state);
17 | void backward_crnn_layer_gpu(layer l, network_state state);
18 | void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
19 | void push_crnn_layer(layer l);
20 | void pull_crnn_layer(layer l);
21 | #endif
22 | 
23 | #endif
24 | 
25 | 


--------------------------------------------------------------------------------
/src/darkSrc/crop_layer.c:
--------------------------------------------------------------------------------
  1 | #include "crop_layer.h"
  2 | #include "cuda.h"
  3 | #include <stdio.h>
  4 | 
  5 | image get_crop_image(crop_layer l)
  6 | {
  7 |     int h = l.out_h;
  8 |     int w = l.out_w;
  9 |     int c = l.out_c;
 10 |     return float_to_image(w,h,c,l.output);
 11 | }
 12 | 
 13 | void backward_crop_layer(const crop_layer l, network_state state){}
 14 | void backward_crop_layer_gpu(const crop_layer l, network_state state){}
 15 | 
 16 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
 17 | {
 18 |     fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
 19 |     crop_layer l = {0};
 20 |     l.type = CROP;
 21 |     l.batch = batch;
 22 |     l.h = h;
 23 |     l.w = w;
 24 |     l.c = c;
 25 |     l.scale = (float)crop_height / h;
 26 |     l.flip = flip;
 27 |     l.angle = angle;
 28 |     l.saturation = saturation;
 29 |     l.exposure = exposure;
 30 |     l.out_w = crop_width;
 31 |     l.out_h = crop_height;
 32 |     l.out_c = c;
 33 |     l.inputs = l.w * l.h * l.c;
 34 |     l.outputs = l.out_w * l.out_h * l.out_c;
 35 |     l.output = calloc(l.outputs*batch, sizeof(float));
 36 |     l.forward = forward_crop_layer;
 37 |     l.backward = backward_crop_layer;
 38 | 
 39 |     #ifdef GPU
 40 |     l.forward_gpu = forward_crop_layer_gpu;
 41 |     l.backward_gpu = backward_crop_layer_gpu;
 42 |     l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
 43 |     l.rand_gpu   = cuda_make_array(0, l.batch*8);
 44 |     #endif
 45 |     return l;
 46 | }
 47 | 
 48 | void resize_crop_layer(layer *l, int w, int h)
 49 | {
 50 |     l->w = w;
 51 |     l->h = h;
 52 | 
 53 |     l->out_w =  l->scale*w;
 54 |     l->out_h =  l->scale*h;
 55 | 
 56 |     l->inputs = l->w * l->h * l->c;
 57 |     l->outputs = l->out_h * l->out_w * l->out_c;
 58 | 
 59 |     l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
 60 |     #ifdef GPU
 61 |     cuda_free(l->output_gpu);
 62 |     l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
 63 |     #endif
 64 | }
 65 | 
 66 | 
 67 | void forward_crop_layer(const crop_layer l, network_state state)
 68 | {
 69 |     int i,j,c,b,row,col;
 70 |     int index;
 71 |     int count = 0;
 72 |     int flip = (l.flip && rand()%2);
 73 |     int dh = rand()%(l.h - l.out_h + 1);
 74 |     int dw = rand()%(l.w - l.out_w + 1);
 75 |     float scale = 2;
 76 |     float trans = -1;
 77 |     if(l.noadjust){
 78 |         scale = 1;
 79 |         trans = 0;
 80 |     }
 81 |     if(!state.train){
 82 |         flip = 0;
 83 |         dh = (l.h - l.out_h)/2;
 84 |         dw = (l.w - l.out_w)/2;
 85 |     }
 86 |     for(b = 0; b < l.batch; ++b){
 87 |         for(c = 0; c < l.c; ++c){
 88 |             for(i = 0; i < l.out_h; ++i){
 89 |                 for(j = 0; j < l.out_w; ++j){
 90 |                     if(flip){
 91 |                         col = l.w - dw - j - 1;    
 92 |                     }else{
 93 |                         col = j + dw;
 94 |                     }
 95 |                     row = i + dh;
 96 |                     index = col+l.w*(row+l.h*(c + l.c*b)); 
 97 |                     l.output[count++] = state.input[index]*scale + trans;
 98 |                 }
 99 |             }
100 |         }
101 |     }
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/src/darkSrc/crop_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CROP_LAYER_H
 2 | #define CROP_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | typedef layer crop_layer;
 9 | 
10 | image get_crop_image(crop_layer l);
11 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
12 | void forward_crop_layer(const crop_layer l, network_state state);
13 | void resize_crop_layer(layer *l, int w, int h);
14 | 
15 | #ifdef GPU
16 | void forward_crop_layer_gpu(crop_layer l, network_state state);
17 | #endif
18 | 
19 | #endif
20 | 
21 | 


--------------------------------------------------------------------------------
/src/darkSrc/cuda.c:
--------------------------------------------------------------------------------
  1 | int gpu_index = 0;
  2 | 
  3 | #ifdef GPU
  4 | 
  5 | #include "cuda.h"
  6 | #include "utils.h"
  7 | #include "blas.h"
  8 | #include "assert.h"
  9 | #include <stdlib.h>
 10 | #include <time.h>
 11 | 
 12 | void cuda_set_device(int n)
 13 | {
 14 |     gpu_index = n;
 15 |     cudaError_t status = cudaSetDevice(n);
 16 |     check_error(status);
 17 | }
 18 | 
 19 | int cuda_get_device()
 20 | {
 21 |     int n = 0;
 22 |     cudaError_t status = cudaGetDevice(&n);
 23 |     check_error(status);
 24 |     return n;
 25 | }
 26 | 
 27 | void check_error(cudaError_t status)
 28 | {
 29 |     //cudaDeviceSynchronize();
 30 |     cudaError_t status2 = cudaGetLastError();
 31 |     if (status != cudaSuccess)
 32 |     {   
 33 |         const char *s = cudaGetErrorString(status);
 34 |         char buffer[256];
 35 |         printf("CUDA Error: %s\n", s);
 36 |         assert(0);
 37 |         snprintf(buffer, 256, "CUDA Error: %s", s);
 38 |         error(buffer);
 39 |     } 
 40 |     if (status2 != cudaSuccess)
 41 |     {   
 42 |         const char *s = cudaGetErrorString(status);
 43 |         char buffer[256];
 44 |         printf("CUDA Error Prev: %s\n", s);
 45 |         assert(0);
 46 |         snprintf(buffer, 256, "CUDA Error Prev: %s", s);
 47 |         error(buffer);
 48 |     } 
 49 | }
 50 | 
 51 | dim3 cuda_gridsize(size_t n){
 52 |     size_t k = (n-1) / BLOCK + 1;
 53 |     size_t x = k;
 54 |     size_t y = 1;
 55 |     if(x > 65535){
 56 |         x = ceil(sqrt(k));
 57 |         y = (n-1)/(x*BLOCK) + 1;
 58 |     }
 59 |     dim3 d = {x, y, 1};
 60 |     //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
 61 |     return d;
 62 | }
 63 | 
 64 | #ifdef CUDNN
 65 | cudnnHandle_t cudnn_handle()
 66 | {
 67 |     static int init[16] = {0};
 68 |     static cudnnHandle_t handle[16];
 69 |     int i = cuda_get_device();
 70 |     if(!init[i]) {
 71 |         cudnnCreate(&handle[i]);
 72 |         init[i] = 1;
 73 |     }
 74 |     return handle[i];
 75 | }
 76 | #endif
 77 | 
 78 | cublasHandle_t blas_handle()
 79 | {
 80 |     static int init[16] = {0};
 81 |     static cublasHandle_t handle[16];
 82 |     int i = cuda_get_device();
 83 |     if(!init[i]) {
 84 |         cublasCreate(&handle[i]);
 85 |         init[i] = 1;
 86 |     }
 87 |     return handle[i];
 88 | }
 89 | 
 90 | float *cuda_make_array(float *x, size_t n)
 91 | {
 92 |     float *x_gpu;
 93 |     size_t size = sizeof(float)*n;
 94 |     cudaError_t status = cudaMalloc((void **)&x_gpu, size);
 95 |     check_error(status);
 96 |     if(x){
 97 |         status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
 98 |         check_error(status);
 99 |     }
100 |     if(!x_gpu) error("Cuda malloc failed\n");
101 |     return x_gpu;
102 | }
103 | 
104 | void cuda_random(float *x_gpu, size_t n)
105 | {
106 |     static curandGenerator_t gen[16];
107 |     static int init[16] = {0};
108 |     int i = cuda_get_device();
109 |     if(!init[i]){
110 |         curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT);
111 |         curandSetPseudoRandomGeneratorSeed(gen[i], time(0));
112 |         init[i] = 1;
113 |     }
114 |     curandGenerateUniform(gen[i], x_gpu, n);
115 |     check_error(cudaPeekAtLastError());
116 | }
117 | 
118 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
119 | {
120 |     float *tmp = calloc(n, sizeof(float));
121 |     cuda_pull_array(x_gpu, tmp, n);
122 |     //int i;
123 |     //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
124 |     axpy_cpu(n, -1, x, 1, tmp, 1);
125 |     float err = dot_cpu(n, tmp, 1, tmp, 1);
126 |     printf("Error %s: %f\n", s, sqrt(err/n));
127 |     free(tmp);
128 |     return err;
129 | }
130 | 
131 | int *cuda_make_int_array(size_t n)
132 | {
133 |     int *x_gpu;
134 |     size_t size = sizeof(int)*n;
135 |     cudaError_t status = cudaMalloc((void **)&x_gpu, size);
136 |     check_error(status);
137 |     return x_gpu;
138 | }
139 | 
140 | void cuda_free(float *x_gpu)
141 | {
142 |     cudaError_t status = cudaFree(x_gpu);
143 |     check_error(status);
144 | }
145 | 
146 | void cuda_push_array(float *x_gpu, float *x, size_t n)
147 | {
148 |     size_t size = sizeof(float)*n;
149 |     cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
150 |     check_error(status);
151 | }
152 | 
153 | void cuda_pull_array(float *x_gpu, float *x, size_t n)
154 | {
155 |     size_t size = sizeof(float)*n;
156 |     cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost);
157 |     check_error(status);
158 | }
159 | 
160 | #endif
161 | 


--------------------------------------------------------------------------------
/src/darkSrc/cuda.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef CUDA_H
 3 | #define CUDA_H
 4 | 
 5 | #if defined(_MSC_VER) && _MSC_VER < 1900
 6 | 	#define inline __inline
 7 | #endif
 8 | 
 9 | extern int gpu_index;
10 | 
11 | #ifdef GPU
12 | 
13 | #define BLOCK 512
14 | 
15 | #include "cuda_runtime.h"
16 | #include "curand.h"
17 | #include "cublas_v2.h"
18 | 
19 | #ifdef CUDNN
20 | #include "cudnn.h"
21 | #endif
22 | 
23 | #ifdef __cplusplus
24 | extern "C" {
25 | #endif
26 | 
27 | void check_error(cudaError_t status);
28 | cublasHandle_t blas_handle();
29 | float *cuda_make_array(float *x, size_t n);
30 | int *cuda_make_int_array(size_t n);
31 | void cuda_push_array(float *x_gpu, float *x, size_t n);
32 | void cuda_pull_array(float *x_gpu, float *x, size_t n);
33 | void cuda_set_device(int n);
34 | void cuda_free(float *x_gpu);
35 | void cuda_random(float *x_gpu, size_t n);
36 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
37 | dim3 cuda_gridsize(size_t n);
38 | 
39 | #ifdef __cplusplus
40 | }
41 | #endif
42 | 
43 | #ifdef CUDNN
44 | cudnnHandle_t cudnn_handle();
45 | #endif
46 | 
47 | #endif
48 | #endif
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/src/darkSrc/data.h:
--------------------------------------------------------------------------------
  1 | #ifndef DATA_H
  2 | #define DATA_H
  3 | #include <pthread.h>
  4 | 
  5 | #if defined(_MSC_VER) && _MSC_VER < 1900
  6 | 	#define inline __inline
  7 | #endif
  8 | 
  9 | #include "matrix.h"
 10 | #include "list.h"
 11 | #include "image.h"
 12 | #include "tree.h"
 13 | 
 14 | static inline float distance_from_edge(int x, int max)
 15 | {
 16 |     int dx = (max/2) - x;
 17 |     if (dx < 0) dx = -dx;
 18 |     dx = (max/2) + 1 - dx;
 19 |     dx *= 2;
 20 |     float dist = (float)dx/max;
 21 |     if (dist > 1) dist = 1;
 22 |     return dist;
 23 | }
 24 | 
 25 | typedef struct{
 26 |     int w, h;
 27 |     matrix X;
 28 |     matrix y;
 29 |     int shallow;
 30 |     int *num_boxes;
 31 |     box **boxes;
 32 | } data;
 33 | 
 34 | typedef enum {
 35 |     CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA
 36 | } data_type;
 37 | 
 38 | typedef struct load_args{
 39 |     int threads;
 40 |     char **paths;
 41 |     char *path;
 42 |     int n;
 43 |     int m;
 44 |     char **labels;
 45 |     int h;
 46 |     int w;
 47 |     int out_w;
 48 |     int out_h;
 49 |     int nh;
 50 |     int nw;
 51 |     int num_boxes;
 52 |     int min, max, size;
 53 |     int classes;
 54 |     int background;
 55 |     int scale;
 56 |     float jitter;
 57 |     float angle;
 58 |     float aspect;
 59 |     float saturation;
 60 |     float exposure;
 61 |     float hue;
 62 |     data *d;
 63 |     image *im;
 64 |     image *resized;
 65 |     data_type type;
 66 |     tree *hierarchy;
 67 | } load_args;
 68 | 
 69 | typedef struct{
 70 |     int id;
 71 |     float x,y,w,h;
 72 |     float left, right, top, bottom;
 73 | } box_label;
 74 | 
 75 | void free_data(data d);
 76 | 
 77 | pthread_t load_data(load_args args);
 78 | 
 79 | pthread_t load_data_in_thread(load_args args);
 80 | 
 81 | void print_letters(float *pred, int n);
 82 | data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
 83 | data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
 84 | data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
 85 | data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
 86 | data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 87 | matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 88 | data load_data_super(char **paths, int n, int m, int w, int h, int scale);
 89 | data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 90 | data load_go(char *filename);
 91 | 
 92 | box_label *read_boxes(char *filename, int *n);
 93 | data load_cifar10_data(char *filename);
 94 | data load_all_cifar10();
 95 | 
 96 | data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h);
 97 | 
 98 | list *get_paths(char *filename);
 99 | char **get_labels(char *filename);
100 | void get_random_batch(data d, int n, float *X, float *y);
101 | data get_data_part(data d, int part, int total);
102 | data get_random_data(data d, int num);
103 | void get_next_batch(data d, int n, int offset, float *X, float *y);
104 | data load_categorical_data_csv(char *filename, int target, int k);
105 | void normalize_data_rows(data d);
106 | void scale_data_rows(data d, float s);
107 | void translate_data_rows(data d, float s);
108 | void randomize_data(data d);
109 | data *split_data(data d, int part, int total);
110 | data concat_data(data d1, data d2);
111 | data concat_datas(data *d, int n);
112 | void fill_truth(char *path, char **labels, int k, float *truth);
113 | 
114 | #endif
115 | 


--------------------------------------------------------------------------------
/src/darkSrc/deconvolutional_kernels.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "curand.h"
  3 | #include "cublas_v2.h"
  4 | 
  5 | extern "C" {
  6 | #include "convolutional_layer.h"
  7 | #include "deconvolutional_layer.h"
  8 | #include "gemm.h"
  9 | #include "blas.h"
 10 | #include "im2col.h"
 11 | #include "col2im.h"
 12 | #include "utils.h"
 13 | #include "cuda.h"
 14 | }
 15 | 
 16 | extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
 17 | {
 18 |     int i;
 19 |     int out_h = deconvolutional_out_height(layer);
 20 |     int out_w = deconvolutional_out_width(layer);
 21 |     int size = out_h*out_w;
 22 | 
 23 |     int m = layer.size*layer.size*layer.n;
 24 |     int n = layer.h*layer.w;
 25 |     int k = layer.c;
 26 | 
 27 |     fill_ongpu(layer.outputs*layer.batch, 0, layer.output_gpu, 1);
 28 | 
 29 |     for(i = 0; i < layer.batch; ++i){
 30 |         float *a = layer.weights_gpu;
 31 |         float *b = state.input + i*layer.c*layer.h*layer.w;
 32 |         float *c = layer.col_image_gpu;
 33 | 
 34 |         gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
 35 | 
 36 |         col2im_ongpu(c, layer.n, out_h, out_w, layer.size, layer.stride, 0, layer.output_gpu+i*layer.n*size);
 37 |     }
 38 |     add_bias_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, size);
 39 |     activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation);
 40 | }
 41 | 
 42 | extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
 43 | {
 44 |     float alpha = 1./layer.batch;
 45 |     int out_h = deconvolutional_out_height(layer);
 46 |     int out_w = deconvolutional_out_width(layer);
 47 |     int size = out_h*out_w;
 48 |     int i;
 49 | 
 50 |     gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu);
 51 |     backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size);
 52 | 
 53 |     if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float));
 54 | 
 55 |     for(i = 0; i < layer.batch; ++i){
 56 |         int m = layer.c;
 57 |         int n = layer.size*layer.size*layer.n;
 58 |         int k = layer.h*layer.w;
 59 | 
 60 |         float *a = state.input + i*m*n;
 61 |         float *b = layer.col_image_gpu;
 62 |         float *c = layer.weight_updates_gpu;
 63 | 
 64 |         im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w, 
 65 |                 layer.size, layer.stride, 0, b);
 66 |         gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n);
 67 | 
 68 |         if(state.delta){
 69 |             int m = layer.c;
 70 |             int n = layer.h*layer.w;
 71 |             int k = layer.size*layer.size*layer.n;
 72 | 
 73 |             float *a = layer.weights_gpu;
 74 |             float *b = layer.col_image_gpu;
 75 |             float *c = state.delta + i*n*m;
 76 | 
 77 |             gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
 78 |         }
 79 |     }
 80 | }
 81 | 
 82 | extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer)
 83 | {
 84 |     cuda_pull_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
 85 |     cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
 86 |     cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size);
 87 |     cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
 88 | }
 89 | 
 90 | extern "C" void push_deconvolutional_layer(deconvolutional_layer layer)
 91 | {
 92 |     cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size);
 93 |     cuda_push_array(layer.biases_gpu, layer.biases, layer.n);
 94 |     cuda_push_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size);
 95 |     cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
 96 | }
 97 | 
 98 | extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay)
 99 | {
100 |     int size = layer.size*layer.size*layer.c*layer.n;
101 | 
102 |     axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
103 |     scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1);
104 | 
105 |     axpy_ongpu(size, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
106 |     axpy_ongpu(size, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
107 |     scal_ongpu(size, momentum, layer.weight_updates_gpu, 1);
108 | }
109 | 
110 | 


--------------------------------------------------------------------------------
/src/darkSrc/deconvolutional_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef DECONVOLUTIONAL_LAYER_H
 2 | #define DECONVOLUTIONAL_LAYER_H
 3 | 
 4 | #include "cuda.h"
 5 | #include "image.h"
 6 | #include "activations.h"
 7 | #include "layer.h"
 8 | #include "network.h"
 9 | 
10 | typedef layer deconvolutional_layer;
11 | 
12 | #ifdef GPU
13 | void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
14 | void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state);
15 | void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
16 | void push_deconvolutional_layer(deconvolutional_layer layer);
17 | void pull_deconvolutional_layer(deconvolutional_layer layer);
18 | #endif
19 | 
20 | deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation);
21 | void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w);
22 | void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state);
23 | void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay);
24 | void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state);
25 | 
26 | image get_deconvolutional_image(deconvolutional_layer layer);
27 | image get_deconvolutional_delta(deconvolutional_layer layer);
28 | image get_deconvolutional_filter(deconvolutional_layer layer, int i);
29 | 
30 | int deconvolutional_out_height(deconvolutional_layer layer);
31 | int deconvolutional_out_width(deconvolutional_layer layer);
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/src/darkSrc/demo.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEMO
 2 | #define DEMO
 3 | 
 4 | #include "image.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, char *out_filename);
11 | void demo1();
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/darkSrc/detection_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef DETECTION_LAYER_H
 2 | #define DETECTION_LAYER_H
 3 | 
 4 | #include "layer.h"
 5 | #include "network.h"
 6 | 
 7 | typedef layer detection_layer;
 8 | 
 9 | detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
10 | void forward_detection_layer(const detection_layer l, network_state state);
11 | void backward_detection_layer(const detection_layer l, network_state state);
12 | void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
13 | 
14 | #ifdef GPU
15 | void forward_detection_layer_gpu(const detection_layer l, network_state state);
16 | void backward_detection_layer_gpu(detection_layer l, network_state state);
17 | #endif
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/darkSrc/dropout_layer.c:
--------------------------------------------------------------------------------
 1 | #include "dropout_layer.h"
 2 | #include "utils.h"
 3 | #include "cuda.h"
 4 | #include <stdlib.h>
 5 | #include <stdio.h>
 6 | 
 7 | dropout_layer make_dropout_layer(int batch, int inputs, float probability)
 8 | {
 9 |     dropout_layer l = {0};
10 |     l.type = DROPOUT;
11 |     l.probability = probability;
12 |     l.inputs = inputs;
13 |     l.outputs = inputs;
14 |     l.batch = batch;
15 |     l.rand = calloc(inputs*batch, sizeof(float));
16 |     l.scale = 1./(1.-probability);
17 |     l.forward = forward_dropout_layer;
18 |     l.backward = backward_dropout_layer;
19 |     #ifdef GPU
20 |     l.forward_gpu = forward_dropout_layer_gpu;
21 |     l.backward_gpu = backward_dropout_layer_gpu;
22 |     l.rand_gpu = cuda_make_array(l.rand, inputs*batch);
23 |     #endif
24 |     fprintf(stderr, "dropout       p = %.2f               %4d  ->  %4d\n", probability, inputs, inputs);
25 |     return l;
26 | } 
27 | 
28 | void resize_dropout_layer(dropout_layer *l, int inputs)
29 | {
30 |     l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float));
31 |     #ifdef GPU
32 |     cuda_free(l->rand_gpu);
33 | 
34 |     l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch);
35 |     #endif
36 | }
37 | 
38 | void forward_dropout_layer(dropout_layer l, network_state state)
39 | {
40 |     int i;
41 |     if (!state.train) return;
42 |     for(i = 0; i < l.batch * l.inputs; ++i){
43 |         float r = rand_uniform(0, 1);
44 |         l.rand[i] = r;
45 |         if(r < l.probability) state.input[i] = 0;
46 |         else state.input[i] *= l.scale;
47 |     }
48 | }
49 | 
50 | void backward_dropout_layer(dropout_layer l, network_state state)
51 | {
52 |     int i;
53 |     if(!state.delta) return;
54 |     for(i = 0; i < l.batch * l.inputs; ++i){
55 |         float r = l.rand[i];
56 |         if(r < l.probability) state.delta[i] = 0;
57 |         else state.delta[i] *= l.scale;
58 |     }
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/src/darkSrc/dropout_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef DROPOUT_LAYER_H
 2 | #define DROPOUT_LAYER_H
 3 | 
 4 | #include "layer.h"
 5 | #include "network.h"
 6 | 
 7 | typedef layer dropout_layer;
 8 | 
 9 | dropout_layer make_dropout_layer(int batch, int inputs, float probability);
10 | 
11 | void forward_dropout_layer(dropout_layer l, network_state state);
12 | void backward_dropout_layer(dropout_layer l, network_state state);
13 | void resize_dropout_layer(dropout_layer *l, int inputs);
14 | 
15 | #ifdef GPU
16 | void forward_dropout_layer_gpu(dropout_layer l, network_state state);
17 | void backward_dropout_layer_gpu(dropout_layer l, network_state state);
18 | 
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/darkSrc/dropout_layer_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "dropout_layer.h"
 7 | #include "cuda.h"
 8 | #include "utils.h"
 9 | }
10 | 
11 | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
12 | {
13 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
14 |     if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale;
15 | }
16 | 
17 | void forward_dropout_layer_gpu(dropout_layer layer, network_state state)
18 | {
19 |     if (!state.train) return;
20 |     int size = layer.inputs*layer.batch;
21 |     cuda_random(layer.rand_gpu, size);
22 |     /*
23 |     int i;
24 |     for(i = 0; i < size; ++i){
25 |         layer.rand[i] = rand_uniform();
26 |     }
27 |     cuda_push_array(layer.rand_gpu, layer.rand, size);
28 |     */
29 | 
30 |     yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale);
31 |     check_error(cudaPeekAtLastError());
32 | }
33 | 
34 | void backward_dropout_layer_gpu(dropout_layer layer, network_state state)
35 | {
36 |     if(!state.delta) return;
37 |     int size = layer.inputs*layer.batch;
38 | 
39 |     yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.delta, size, layer.rand_gpu, layer.probability, layer.scale);
40 |     check_error(cudaPeekAtLastError());
41 | }
42 | 


--------------------------------------------------------------------------------
/src/darkSrc/gemm.h:
--------------------------------------------------------------------------------
 1 | #ifndef GEMM_H
 2 | #define GEMM_H
 3 | 
 4 | void gemm_bin(int M, int N, int K, float ALPHA, 
 5 |         char  *A, int lda, 
 6 |         float *B, int ldb,
 7 |         float *C, int ldc);
 8 |         
 9 | void gemm(int TA, int TB, int M, int N, int K, float ALPHA, 
10 |                     float *A, int lda, 
11 |                     float *B, int ldb,
12 |                     float BETA,
13 |                     float *C, int ldc);
14 | 
15 | void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, 
16 |         float *A, int lda, 
17 |         float *B, int ldb,
18 |         float BETA,
19 |         float *C, int ldc);
20 | 
21 | #ifdef GPU
22 | void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA, 
23 |         float *A_gpu, int lda, 
24 |         float *B_gpu, int ldb,
25 |         float BETA,
26 |         float *C_gpu, int ldc);
27 | 
28 | void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, 
29 |         float *A, int lda, 
30 |         float *B, int ldb,
31 |         float BETA,
32 |         float *C, int ldc);
33 | #endif
34 | #endif
35 | 


--------------------------------------------------------------------------------
/src/darkSrc/gettimeofday.c:
--------------------------------------------------------------------------------
 1 | #include "gettimeofday.h"
 2 | #ifdef WIN32
 3 |  
 4 | int gettimeofday(struct timeval *tv, struct timezone *tz)
 5 | {
 6 |   FILETIME ft;
 7 |   unsigned __int64 tmpres = 0;
 8 |   static int tzflag;
 9 |  
10 |   if (NULL != tv)
11 |   {
12 |     GetSystemTimeAsFileTime(&ft);
13 |  
14 |     tmpres |= ft.dwHighDateTime;
15 |     tmpres <<= 32;
16 |     tmpres |= ft.dwLowDateTime;
17 |  
18 |     /*converting file time to unix epoch*/
19 |     tmpres -= DELTA_EPOCH_IN_MICROSECS; 
20 |     tmpres /= 10;  /*convert into microseconds*/
21 |     tv->tv_sec = (long)(tmpres / 1000000UL);
22 |     tv->tv_usec = (long)(tmpres % 1000000UL);
23 |   }
24 |  
25 |   if (NULL != tz)
26 |   {
27 |     if (!tzflag)
28 |     {
29 |       _tzset();
30 |       tzflag++;
31 |     }
32 |     tz->tz_minuteswest = _timezone / 60;
33 |     tz->tz_dsttime = _daylight;
34 |   }
35 |  
36 |   return 0;
37 | }
38 | 
39 | /* never worry about timersub type activies again -- from GLIBC and upcased. */
40 | int timersub(struct timeval *a, struct timeval *b, struct timeval *result)
41 | {                                                                
42 |          (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;                        
43 |          (result)->tv_usec = (a)->tv_usec - (b)->tv_usec;                     
44 |          if ((result)->tv_usec < 0) {                                         
45 |            --(result)->tv_sec;                                                
46 |            (result)->tv_usec += 1000000;                                      
47 |          }                                                                         
48 | 
49 |     return 0;
50 | }
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/darkSrc/gettimeofday.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifdef WIN32
 4 | #include < time.h >
 5 | #include <windows.h> //I've ommited this line.
 6 | #if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
 7 |   #define DELTA_EPOCH_IN_MICROSECS  11644473600000000Ui64
 8 | #else
 9 |   #define DELTA_EPOCH_IN_MICROSECS  11644473600000000ULL
10 | #endif
11 |  
12 | struct timezone 
13 | {
14 |   int  tz_minuteswest; /* minutes W of Greenwich */
15 |   int  tz_dsttime;     /* type of dst correction */
16 | };
17 |  
18 | int gettimeofday(struct timeval *tv, struct timezone *tz);
19 | 
20 | /* never worry about timersub type activies again -- from GLIBC and upcased. */
21 | int timersub(struct timeval *a, struct timeval *b, struct timeval *result);
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/darkSrc/gru_layer.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef GRU_LAYER_H
 3 | #define GRU_LAYER_H
 4 | 
 5 | #include "activations.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | 
 9 | layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
10 | 
11 | void forward_gru_layer(layer l, network_state state);
12 | void backward_gru_layer(layer l, network_state state);
13 | void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay);
14 | 
15 | #ifdef GPU
16 | void forward_gru_layer_gpu(layer l, network_state state);
17 | void backward_gru_layer_gpu(layer l, network_state state);
18 | void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
19 | void push_gru_layer(layer l);
20 | void pull_gru_layer(layer l);
21 | #endif
22 | 
23 | #endif
24 | 
25 | 


--------------------------------------------------------------------------------
/src/darkSrc/im2col.c:
--------------------------------------------------------------------------------
 1 | #include "im2col.h"
 2 | #include <stdio.h>
 3 | float im2col_get_pixel(float *im, int height, int width, int channels,
 4 |                         int row, int col, int channel, int pad)
 5 | {
 6 |     row -= pad;
 7 |     col -= pad;
 8 | 
 9 |     if (row < 0 || col < 0 ||
10 |         row >= height || col >= width) return 0;
11 |     return im[col + width*(row + height*channel)];
12 | }
13 | 
14 | //From Berkeley Vision's Caffe!
15 | //https://github.com/BVLC/caffe/blob/master/LICENSE
16 | void im2col_cpu(float* data_im,
17 |      int channels,  int height,  int width,
18 |      int ksize,  int stride, int pad, float* data_col) 
19 | {
20 |     int c,h,w;
21 |     int height_col = (height + 2*pad - ksize) / stride + 1;
22 |     int width_col = (width + 2*pad - ksize) / stride + 1;
23 | 
24 |     int channels_col = channels * ksize * ksize;
25 |     for (c = 0; c < channels_col; ++c) {
26 |         int w_offset = c % ksize;
27 |         int h_offset = (c / ksize) % ksize;
28 |         int c_im = c / ksize / ksize;
29 |         for (h = 0; h < height_col; ++h) {
30 |             for (w = 0; w < width_col; ++w) {
31 |                 int im_row = h_offset + h * stride;
32 |                 int im_col = w_offset + w * stride;
33 |                 int col_index = (c * height_col + h) * width_col + w;
34 |                 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
35 |                         im_row, im_col, c_im, pad);
36 |             }
37 |         }
38 |     }
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/darkSrc/im2col.h:
--------------------------------------------------------------------------------
 1 | #ifndef IM2COL_H
 2 | #define IM2COL_H
 3 | 
 4 | void im2col_cpu(float* data_im,
 5 |         int channels, int height, int width,
 6 |         int ksize, int stride, int pad, float* data_col);
 7 | 
 8 | #ifdef GPU
 9 | 
10 | void im2col_ongpu(float *im,
11 |          int channels, int height, int width,
12 |          int ksize, int stride, int pad,float *data_col);
13 | 
14 | #endif
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/darkSrc/im2col_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "im2col.h"
 7 | #include "cuda.h"
 8 | }
 9 | 
10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
12 | 
13 | __global__ void im2col_gpu_kernel(const int n, const float* data_im,
14 |         const int height, const int width, const int ksize,
15 |         const int pad,
16 |         const int stride,
17 |         const int height_col, const int width_col,
18 |         float *data_col) {
19 |     int index = blockIdx.x*blockDim.x+threadIdx.x;
20 |     for(; index < n; index += blockDim.x*gridDim.x){
21 |         int w_out = index % width_col;
22 |         int h_index = index / width_col;
23 |         int h_out = h_index % height_col;
24 |         int channel_in = h_index / height_col;
25 |         int channel_out = channel_in * ksize * ksize;
26 |         int h_in = h_out * stride - pad;
27 |         int w_in = w_out * stride - pad;
28 |         float* data_col_ptr = data_col;
29 |         data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out;
30 |         const float* data_im_ptr = data_im;
31 |         data_im_ptr += (channel_in * height + h_in) * width + w_in;
32 |         for (int i = 0; i < ksize; ++i) {
33 |             for (int j = 0; j < ksize; ++j) {
34 |                 int h = h_in + i;
35 |                 int w = w_in + j;
36 | 
37 |                 *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
38 |                     data_im_ptr[i * width + j] : 0;
39 | 
40 |                 //*data_col_ptr = data_im_ptr[ii * width + jj];
41 | 
42 |                 data_col_ptr += height_col * width_col;
43 |             }
44 |         }
45 |     }
46 | }
47 | 
48 | void im2col_ongpu(float *im,
49 |          int channels, int height, int width,
50 |          int ksize, int stride, int pad, float *data_col){
51 |     // We are going to launch channels * height_col * width_col kernels, each
52 |     // kernel responsible for copying a single-channel grid.
53 |     int height_col = (height + 2 * pad - ksize) / stride + 1;
54 |     int width_col = (width + 2 * pad - ksize) / stride + 1;
55 |     int num_kernels = channels * height_col * width_col;
56 |     im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK,
57 |         BLOCK>>>(
58 |                 num_kernels, im, height, width, ksize, pad,
59 |                 stride, height_col,
60 |                 width_col, data_col);
61 | }
62 | 


--------------------------------------------------------------------------------
/src/darkSrc/image.h:
--------------------------------------------------------------------------------
  1 | #ifndef IMAGE_H
  2 | #define IMAGE_H
  3 | 
  4 | #include <stdlib.h>
  5 | #include <stdio.h>
  6 | #include <float.h>
  7 | #include <string.h>
  8 | #include <math.h>
  9 | #include "box.h"
 10 | 
 11 | typedef struct {
 12 | 	int h;
 13 | 	int w;
 14 | 	int c;
 15 | 	float *data;
 16 | } image;
 17 | 
 18 | #ifdef __cplusplus
 19 | extern "C" {
 20 | #endif
 21 | 
 22 | float get_color(int c, int x, int max);
 23 | void flip_image(image a);
 24 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b);
 25 | void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
 26 | void draw_bbox(image a, box bbox, int w, float r, float g, float b);
 27 | void draw_label(image a, int r, int c, image label, const float *rgb);
 28 | void write_label(image a, int r, int c, image *characters, char *string, float *rgb);
 29 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes);
 30 | image image_distance(image a, image b);
 31 | void scale_image(image m, float s);
 32 | image crop_image(image im, int dx, int dy, int w, int h);
 33 | image random_crop_image(image im, int w, int h);
 34 | image random_augment_image(image im, float angle, float aspect, int low, int high, int size);
 35 | void random_distort_image(image im, float hue, float saturation, float exposure);
 36 | image resize_image(image im, int w, int h);
 37 | void fill_image(image m, float s);
 38 | void letterbox_image_into(image im, int w, int h, image boxed);
 39 | image letterbox_image(image im, int w, int h);
 40 | image resize_min(image im, int min);
 41 | image resize_max(image im, int max);
 42 | void translate_image(image m, float s);
 43 | void normalize_image(image p);
 44 | image rotate_image(image m, float rad);
 45 | void rotate_image_cw(image im, int times);
 46 | void embed_image(image source, image dest, int dx, int dy);
 47 | void saturate_image(image im, float sat);
 48 | void exposure_image(image im, float sat);
 49 | void distort_image(image im, float hue, float sat, float val);
 50 | void saturate_exposure_image(image im, float sat, float exposure);
 51 | void hsv_to_rgb(image im);
 52 | void rgbgr_image(image im);
 53 | void constrain_image(image im);
 54 | void composite_3d(char *f1, char *f2, char *out, int delta);
 55 | int best_3d_shift_r(image a, image b, int min, int max);
 56 | 
 57 | image grayscale_image(image im);
 58 | image threshold_image(image im, float thresh);
 59 | 
 60 | image collapse_image_layers(image source, int border);
 61 | image collapse_images_horz(image *ims, int n);
 62 | image collapse_images_vert(image *ims, int n);
 63 | 
 64 | void show_image(image p, const char *name);
 65 | void show_image_normalized(image im, const char *name);
 66 | void save_image_png(image im, const char *name);
 67 | void save_image(image p, const char *name);
 68 | void show_images(image *ims, int n, char *window);
 69 | void show_image_layers(image p, char *name);
 70 | void show_image_collapsed(image p, char *name);
 71 | 
 72 | void print_image(image m);
 73 | 
 74 | image make_image(int w, int h, int c);
 75 | image make_random_image(int w, int h, int c);
 76 | image make_empty_image(int w, int h, int c);
 77 | image float_to_image(int w, int h, int c, float *data);
 78 | image copy_image(image p);
 79 | image load_image(char *filename, int w, int h, int c);
 80 | image load_image_color(char *filename, int w, int h);
 81 | image **load_alphabet();
 82 | 
 83 | float get_pixel(image m, int x, int y, int c);
 84 | float get_pixel_extend(image m, int x, int y, int c);
 85 | void set_pixel(image m, int x, int y, int c, float val);
 86 | void add_pixel(image m, int x, int y, int c, float val);
 87 | float bilinear_interpolate(image im, float x, float y, int c);
 88 | 
 89 | image get_image_layer(image m, int l);
 90 | 
 91 | void free_image(image m);
 92 | void test_resize(char *filename);
 93 | 
 94 | //#ifdef OPENCV
 95 | //#include "opencv2/imgproc/imgproc_c.h"
 96 | //image ipl_to_image(IplImage* src);
 97 | //void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes);
 98 | //void show_image_cv_ipl(IplImage *disp, const char *name, const char *out_filename);
 99 | //#endif
100 | #ifdef __cplusplus
101 | }
102 | #endif
103 | 
104 | #endif
105 | 
106 | 


--------------------------------------------------------------------------------
/src/darkSrc/layer.c:
--------------------------------------------------------------------------------
 1 | #include "layer.h"
 2 | #include "cuda.h"
 3 | #include <stdlib.h>
 4 | 
 5 | void free_layer(layer l)
 6 | {
 7 | 	if (l.type == DROPOUT) {
 8 | 		if (l.rand)           free(l.rand);
 9 | #ifdef GPU
10 | 		if (l.rand_gpu)             cuda_free(l.rand_gpu);
11 | #endif
12 | 		return;
13 | 	}
14 | 	if (l.cweights)           free(l.cweights);
15 | 	if (l.indexes)            free(l.indexes);
16 | 	if (l.input_layers)       free(l.input_layers);
17 | 	if (l.input_sizes)        free(l.input_sizes);
18 | 	if (l.map)                free(l.map);
19 | 	if (l.rand)               free(l.rand);
20 | 	if (l.cost)               free(l.cost);
21 | 	if (l.state)              free(l.state);
22 | 	if (l.prev_state)         free(l.prev_state);
23 | 	if (l.forgot_state)       free(l.forgot_state);
24 | 	if (l.forgot_delta)       free(l.forgot_delta);
25 | 	if (l.state_delta)        free(l.state_delta);
26 | 	if (l.concat)             free(l.concat);
27 | 	if (l.concat_delta)       free(l.concat_delta);
28 | 	if (l.binary_weights)     free(l.binary_weights);
29 | 	if (l.biases)             free(l.biases);
30 | 	if (l.bias_updates)       free(l.bias_updates);
31 | 	if (l.scales)             free(l.scales);
32 | 	if (l.scale_updates)      free(l.scale_updates);
33 | 	if (l.weights)            free(l.weights);
34 | 	if (l.weight_updates)     free(l.weight_updates);
35 | 	if (l.delta)              free(l.delta);
36 | 	if (l.output)             free(l.output);
37 | 	if (l.squared)            free(l.squared);
38 | 	if (l.norms)              free(l.norms);
39 | 	if (l.spatial_mean)       free(l.spatial_mean);
40 | 	if (l.mean)               free(l.mean);
41 | 	if (l.variance)           free(l.variance);
42 | 	if (l.mean_delta)         free(l.mean_delta);
43 | 	if (l.variance_delta)     free(l.variance_delta);
44 | 	if (l.rolling_mean)       free(l.rolling_mean);
45 | 	if (l.rolling_variance)   free(l.rolling_variance);
46 | 	if (l.x)                  free(l.x);
47 | 	if (l.x_norm)             free(l.x_norm);
48 | 	if (l.m)                  free(l.m);
49 | 	if (l.v)                  free(l.v);
50 | 	if (l.z_cpu)              free(l.z_cpu);
51 | 	if (l.r_cpu)              free(l.r_cpu);
52 | 	if (l.h_cpu)              free(l.h_cpu);
53 | 	if (l.binary_input)       free(l.binary_input);
54 | 
55 | #ifdef GPU
56 | 	if (l.indexes_gpu)           cuda_free((float *)l.indexes_gpu);
57 | 
58 | 	if (l.z_gpu)                   cuda_free(l.z_gpu);
59 | 	if (l.r_gpu)                   cuda_free(l.r_gpu);
60 | 	if (l.h_gpu)                   cuda_free(l.h_gpu);
61 | 	if (l.m_gpu)                   cuda_free(l.m_gpu);
62 | 	if (l.v_gpu)                   cuda_free(l.v_gpu);
63 | 	if (l.prev_state_gpu)          cuda_free(l.prev_state_gpu);
64 | 	if (l.forgot_state_gpu)        cuda_free(l.forgot_state_gpu);
65 | 	if (l.forgot_delta_gpu)        cuda_free(l.forgot_delta_gpu);
66 | 	if (l.state_gpu)               cuda_free(l.state_gpu);
67 | 	if (l.state_delta_gpu)         cuda_free(l.state_delta_gpu);
68 | 	if (l.gate_gpu)                cuda_free(l.gate_gpu);
69 | 	if (l.gate_delta_gpu)          cuda_free(l.gate_delta_gpu);
70 | 	if (l.save_gpu)                cuda_free(l.save_gpu);
71 | 	if (l.save_delta_gpu)          cuda_free(l.save_delta_gpu);
72 | 	if (l.concat_gpu)              cuda_free(l.concat_gpu);
73 | 	if (l.concat_delta_gpu)        cuda_free(l.concat_delta_gpu);
74 | 	if (l.binary_input_gpu)        cuda_free(l.binary_input_gpu);
75 | 	if (l.binary_weights_gpu)      cuda_free(l.binary_weights_gpu);
76 | 	if (l.mean_gpu)                cuda_free(l.mean_gpu);
77 | 	if (l.variance_gpu)            cuda_free(l.variance_gpu);
78 | 	if (l.rolling_mean_gpu)        cuda_free(l.rolling_mean_gpu);
79 | 	if (l.rolling_variance_gpu)    cuda_free(l.rolling_variance_gpu);
80 | 	if (l.variance_delta_gpu)      cuda_free(l.variance_delta_gpu);
81 | 	if (l.mean_delta_gpu)          cuda_free(l.mean_delta_gpu);
82 | 	if (l.x_gpu)                   cuda_free(l.x_gpu);
83 | 	if (l.x_norm_gpu)              cuda_free(l.x_norm_gpu);
84 | 	if (l.weights_gpu)             cuda_free(l.weights_gpu);
85 | 	if (l.weight_updates_gpu)      cuda_free(l.weight_updates_gpu);
86 | 	if (l.biases_gpu)              cuda_free(l.biases_gpu);
87 | 	if (l.bias_updates_gpu)        cuda_free(l.bias_updates_gpu);
88 | 	if (l.scales_gpu)              cuda_free(l.scales_gpu);
89 | 	if (l.scale_updates_gpu)       cuda_free(l.scale_updates_gpu);
90 | 	if (l.output_gpu)              cuda_free(l.output_gpu);
91 | 	if (l.delta_gpu)               cuda_free(l.delta_gpu);
92 | 	if (l.rand_gpu)                cuda_free(l.rand_gpu);
93 | 	if (l.squared_gpu)             cuda_free(l.squared_gpu);
94 | 	if (l.norms_gpu)               cuda_free(l.norms_gpu);
95 | #endif
96 | }
97 | 


--------------------------------------------------------------------------------
/src/darkSrc/layer.h:
--------------------------------------------------------------------------------
  1 | #ifndef BASE_LAYER_H
  2 | #define BASE_LAYER_H
  3 | 
  4 | #include "activations.h"
  5 | #include "stddef.h"
  6 | #include "tree.h"
  7 | 
  8 | struct network_state;
  9 | 
 10 | struct layer;
 11 | typedef struct layer layer;
 12 | 
 13 | typedef enum {
 14 |     CONVOLUTIONAL,
 15 |     DECONVOLUTIONAL,
 16 |     CONNECTED,
 17 |     MAXPOOL,
 18 |     SOFTMAX,
 19 |     DETECTION,
 20 |     DROPOUT,
 21 |     CROP,
 22 |     ROUTE,
 23 |     COST,
 24 |     NORMALIZATION,
 25 |     AVGPOOL,
 26 |     LOCAL,
 27 |     SHORTCUT,
 28 |     ACTIVE,
 29 |     RNN,
 30 |     GRU,
 31 |     CRNN,
 32 |     BATCHNORM,
 33 |     NETWORK,
 34 |     XNOR,
 35 |     REGION,
 36 |     REORG,
 37 |     BLANK
 38 | } LAYER_TYPE;
 39 | 
 40 | typedef enum{
 41 |     SSE, MASKED, SMOOTH
 42 | } COST_TYPE;
 43 | 
 44 | struct layer{
 45 |     LAYER_TYPE type;
 46 |     ACTIVATION activation;
 47 |     COST_TYPE cost_type;
 48 |     void (*forward)   (struct layer, struct network_state);
 49 |     void (*backward)  (struct layer, struct network_state);
 50 |     void (*update)    (struct layer, int, float, float, float);
 51 |     void (*forward_gpu)   (struct layer, struct network_state);
 52 |     void (*backward_gpu)  (struct layer, struct network_state);
 53 |     void (*update_gpu)    (struct layer, int, float, float, float);
 54 |     int batch_normalize;
 55 |     int shortcut;
 56 |     int batch;
 57 |     int forced;
 58 |     int flipped;
 59 |     int inputs;
 60 |     int outputs;
 61 |     int truths;
 62 |     int h,w,c;
 63 |     int out_h, out_w, out_c;
 64 |     int n;
 65 |     int max_boxes;
 66 |     int groups;
 67 |     int size;
 68 |     int side;
 69 |     int stride;
 70 |     int reverse;
 71 |     int pad;
 72 |     int sqrt;
 73 |     int flip;
 74 |     int index;
 75 |     int binary;
 76 |     int xnor;
 77 |     int steps;
 78 |     int hidden;
 79 |     float dot;
 80 |     float angle;
 81 |     float jitter;
 82 |     float saturation;
 83 |     float exposure;
 84 |     float shift;
 85 |     float ratio;
 86 |     int softmax;
 87 |     int classes;
 88 |     int coords;
 89 |     int background;
 90 |     int rescore;
 91 |     int objectness;
 92 |     int does_cost;
 93 |     int joint;
 94 |     int noadjust;
 95 |     int reorg;
 96 |     int log;
 97 | 
 98 |     int adam;
 99 |     float B1;
100 |     float B2;
101 |     float eps;
102 |     float *m_gpu;
103 |     float *v_gpu;
104 |     int t;
105 |     float *m;
106 |     float *v;
107 | 
108 |     tree *softmax_tree;
109 |     int  *map;
110 | 
111 |     float alpha;
112 |     float beta;
113 |     float kappa;
114 | 
115 |     float coord_scale;
116 |     float object_scale;
117 |     float noobject_scale;
118 |     float class_scale;
119 |     int bias_match;
120 |     int random;
121 |     float thresh;
122 |     int classfix;
123 |     int absolute;
124 | 
125 |     int dontload;
126 |     int dontloadscales;
127 | 
128 |     float temperature;
129 |     float probability;
130 |     float scale;
131 | 
132 |     int *indexes;
133 |     float *rand;
134 |     float *cost;
135 |     char  *cweights;
136 |     float *state;
137 |     float *prev_state;
138 |     float *forgot_state;
139 |     float *forgot_delta;
140 |     float *state_delta;
141 | 
142 |     float *concat;
143 |     float *concat_delta;
144 | 
145 |     float *binary_weights;
146 | 
147 |     float *biases;
148 |     float *bias_updates;
149 | 
150 |     float *scales;
151 |     float *scale_updates;
152 | 
153 |     float *weights;
154 |     float *weight_updates;
155 | 
156 |     float *col_image;
157 |     int   * input_layers;
158 |     int   * input_sizes;
159 |     float * delta;
160 |     float * output;
161 |     float * squared;
162 |     float * norms;
163 | 
164 |     float * spatial_mean;
165 |     float * mean;
166 |     float * variance;
167 | 
168 |     float * mean_delta;
169 |     float * variance_delta;
170 | 
171 |     float * rolling_mean;
172 |     float * rolling_variance;
173 | 
174 |     float * x;
175 |     float * x_norm;
176 | 
177 |     struct layer *input_layer;
178 |     struct layer *self_layer;
179 |     struct layer *output_layer;
180 | 
181 |     struct layer *input_gate_layer;
182 |     struct layer *state_gate_layer;
183 |     struct layer *input_save_layer;
184 |     struct layer *state_save_layer;
185 |     struct layer *input_state_layer;
186 |     struct layer *state_state_layer;
187 | 
188 |     struct layer *input_z_layer;
189 |     struct layer *state_z_layer;
190 | 
191 |     struct layer *input_r_layer;
192 |     struct layer *state_r_layer;
193 | 
194 |     struct layer *input_h_layer;
195 |     struct layer *state_h_layer;
196 | 
197 |     float *z_cpu;
198 |     float *r_cpu;
199 |     float *h_cpu;
200 | 
201 |     float *binary_input;
202 | 
203 |     size_t workspace_size;
204 | 
205 |     #ifdef GPU
206 |     float *z_gpu;
207 |     float *r_gpu;
208 |     float *h_gpu;
209 | 
210 |     int *indexes_gpu;
211 |     float * prev_state_gpu;
212 |     float * forgot_state_gpu;
213 |     float * forgot_delta_gpu;
214 |     float * state_gpu;
215 |     float * state_delta_gpu;
216 |     float * gate_gpu;
217 |     float * gate_delta_gpu;
218 |     float * save_gpu;
219 |     float * save_delta_gpu;
220 |     float * concat_gpu;
221 |     float * concat_delta_gpu;
222 | 
223 |     float *binary_input_gpu;
224 |     float *binary_weights_gpu;
225 | 
226 |     float * mean_gpu;
227 |     float * variance_gpu;
228 | 
229 |     float * rolling_mean_gpu;
230 |     float * rolling_variance_gpu;
231 | 
232 |     float * variance_delta_gpu;
233 |     float * mean_delta_gpu;
234 | 
235 |     float * col_image_gpu;
236 | 
237 |     float * x_gpu;
238 |     float * x_norm_gpu;
239 |     float * weights_gpu;
240 |     float * weight_updates_gpu;
241 | 
242 |     float * biases_gpu;
243 |     float * bias_updates_gpu;
244 | 
245 |     float * scales_gpu;
246 |     float * scale_updates_gpu;
247 | 
248 |     float * output_gpu;
249 |     float * delta_gpu;
250 |     float * rand_gpu;
251 |     float * squared_gpu;
252 |     float * norms_gpu;
253 |     #ifdef CUDNN
254 |     cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
255 |     cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
256 |     cudnnFilterDescriptor_t weightDesc;
257 |     cudnnFilterDescriptor_t dweightDesc;
258 |     cudnnConvolutionDescriptor_t convDesc;
259 |     cudnnConvolutionFwdAlgo_t fw_algo;
260 |     cudnnConvolutionBwdDataAlgo_t bd_algo;
261 |     cudnnConvolutionBwdFilterAlgo_t bf_algo;
262 |     #endif
263 |     #endif
264 | };
265 | 
266 | void free_layer(layer);
267 | 
268 | #endif
269 | 


--------------------------------------------------------------------------------
/src/darkSrc/list.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <string.h>
 3 | #include "list.h"
 4 | 
 5 | list *make_list()
 6 | {
 7 | 	list *l = malloc(sizeof(list));
 8 | 	l->size = 0;
 9 | 	l->front = 0;
10 | 	l->back = 0;
11 | 	return l;
12 | }
13 | 
14 | /*
15 | void transfer_node(list *s, list *d, node *n)
16 | {
17 |     node *prev, *next;
18 |     prev = n->prev;
19 |     next = n->next;
20 |     if(prev) prev->next = next;
21 |     if(next) next->prev = prev;
22 |     --s->size;
23 |     if(s->front == n) s->front = next;
24 |     if(s->back == n) s->back = prev;
25 | }
26 | */
27 | 
28 | void *list_pop(list *l){
29 |     if(!l->back) return 0;
30 |     node *b = l->back;
31 |     void *val = b->val;
32 |     l->back = b->prev;
33 |     if(l->back) l->back->next = 0;
34 |     free(b);
35 |     --l->size;
36 |     
37 |     return val;
38 | }
39 | 
40 | void list_insert(list *l, void *val)
41 | {
42 | 	node *new = malloc(sizeof(node));
43 | 	new->val = val;
44 | 	new->next = 0;
45 | 
46 | 	if(!l->back){
47 | 		l->front = new;
48 | 		new->prev = 0;
49 | 	}else{
50 | 		l->back->next = new;
51 | 		new->prev = l->back;
52 | 	}
53 | 	l->back = new;
54 | 	++l->size;
55 | }
56 | 
57 | void free_node(node *n)
58 | {
59 | 	node *next;
60 | 	while(n) {
61 | 		next = n->next;
62 | 		free(n);
63 | 		n = next;
64 | 	}
65 | }
66 | 
67 | void free_list(list *l)
68 | {
69 | 	free_node(l->front);
70 | 	free(l);
71 | }
72 | 
73 | void free_list_contents(list *l)
74 | {
75 | 	node *n = l->front;
76 | 	while(n){
77 | 		free(n->val);
78 | 		n = n->next;
79 | 	}
80 | }
81 | 
82 | void **list_to_array(list *l)
83 | {
84 |     void **a = calloc(l->size, sizeof(void*));
85 |     int count = 0;
86 |     node *n = l->front;
87 |     while(n){
88 |         a[count++] = n->val;
89 |         n = n->next;
90 |     }
91 |     return a;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/darkSrc/list.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIST_H
 2 | #define LIST_H
 3 | 
 4 | typedef struct node{
 5 |     void *val;
 6 |     struct node *next;
 7 |     struct node *prev;
 8 | } node;
 9 | 
10 | typedef struct list{
11 |     int size;
12 |     node *front;
13 |     node *back;
14 | } list;
15 | 
16 | list *make_list();
17 | int list_find(list *l, void *val);
18 | 
19 | void list_insert(list *, void *);
20 | 
21 | void **list_to_array(list *l);
22 | 
23 | void free_list(list *l);
24 | void free_list_contents(list *l);
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/darkSrc/local_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOCAL_LAYER_H
 2 | #define LOCAL_LAYER_H
 3 | 
 4 | #include "cuda.h"
 5 | #include "image.h"
 6 | #include "activations.h"
 7 | #include "layer.h"
 8 | #include "network.h"
 9 | 
10 | typedef layer local_layer;
11 | 
12 | #ifdef GPU
13 | void forward_local_layer_gpu(local_layer layer, network_state state);
14 | void backward_local_layer_gpu(local_layer layer, network_state state);
15 | void update_local_layer_gpu(local_layer layer, int batch, float learning_rate, float momentum, float decay);
16 | 
17 | void push_local_layer(local_layer layer);
18 | void pull_local_layer(local_layer layer);
19 | #endif
20 | 
21 | local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation);
22 | 
23 | void forward_local_layer(const local_layer layer, network_state state);
24 | void backward_local_layer(local_layer layer, network_state state);
25 | void update_local_layer(local_layer layer, int batch, float learning_rate, float momentum, float decay);
26 | 
27 | void bias_output(float *output, float *biases, int batch, int n, int size);
28 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
29 | 
30 | #endif
31 | 
32 | 


--------------------------------------------------------------------------------
/src/darkSrc/matrix.c:
--------------------------------------------------------------------------------
  1 | #include "matrix.h"
  2 | #include "utils.h"
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | #include <assert.h>
  7 | #include <math.h>
  8 | 
  9 | void free_matrix(matrix m)
 10 | {
 11 |     int i;
 12 |     for(i = 0; i < m.rows; ++i) free(m.vals[i]);
 13 |     free(m.vals);
 14 | }
 15 | 
 16 | float matrix_topk_accuracy(matrix truth, matrix guess, int k)
 17 | {
 18 |     int *indexes = calloc(k, sizeof(int));
 19 |     int n = truth.cols;
 20 |     int i,j;
 21 |     int correct = 0;
 22 |     for(i = 0; i < truth.rows; ++i){
 23 |         top_k(guess.vals[i], n, k, indexes);
 24 |         for(j = 0; j < k; ++j){
 25 |             int class = indexes[j];
 26 |             if(truth.vals[i][class]){
 27 |                 ++correct;
 28 |                 break;
 29 |             }
 30 |         }
 31 |     }
 32 |     free(indexes);
 33 |     return (float)correct/truth.rows;
 34 | }
 35 | 
 36 | void scale_matrix(matrix m, float scale)
 37 | {
 38 |     int i,j;
 39 |     for(i = 0; i < m.rows; ++i){
 40 |         for(j = 0; j < m.cols; ++j){
 41 |             m.vals[i][j] *= scale;
 42 |         }
 43 |     }
 44 | }
 45 | 
 46 | matrix resize_matrix(matrix m, int size)
 47 | {
 48 |     int i;
 49 |     if (m.rows == size) return m;
 50 |     if (m.rows < size) {
 51 |         m.vals = realloc(m.vals, size*sizeof(float*));
 52 |         for (i = m.rows; i < size; ++i) {
 53 |             m.vals[i] = calloc(m.cols, sizeof(float));
 54 |         }
 55 |     } else if (m.rows > size) {
 56 |         for (i = size; i < m.rows; ++i) {
 57 |             free(m.vals[i]);
 58 |         }
 59 |         m.vals = realloc(m.vals, size*sizeof(float*));
 60 |     }
 61 |     m.rows = size;
 62 |     return m;
 63 | }
 64 | 
 65 | void matrix_add_matrix(matrix from, matrix to)
 66 | {
 67 |     assert(from.rows == to.rows && from.cols == to.cols);
 68 |     int i,j;
 69 |     for(i = 0; i < from.rows; ++i){
 70 |         for(j = 0; j < from.cols; ++j){
 71 |             to.vals[i][j] += from.vals[i][j];
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | matrix make_matrix(int rows, int cols)
 77 | {
 78 |     int i;
 79 |     matrix m;
 80 |     m.rows = rows;
 81 |     m.cols = cols;
 82 |     m.vals = calloc(m.rows, sizeof(float *));
 83 |     for(i = 0; i < m.rows; ++i){
 84 |         m.vals[i] = calloc(m.cols, sizeof(float));
 85 |     }
 86 |     return m;
 87 | }
 88 | 
 89 | matrix hold_out_matrix(matrix *m, int n)
 90 | {
 91 |     int i;
 92 |     matrix h;
 93 |     h.rows = n;
 94 |     h.cols = m->cols;
 95 |     h.vals = calloc(h.rows, sizeof(float *));
 96 |     for(i = 0; i < n; ++i){
 97 |         int index = rand()%m->rows;
 98 |         h.vals[i] = m->vals[index];
 99 |         m->vals[index] = m->vals[--(m->rows)];
100 |     }
101 |     return h;
102 | }
103 | 
104 | float *pop_column(matrix *m, int c)
105 | {
106 |     float *col = calloc(m->rows, sizeof(float));
107 |     int i, j;
108 |     for(i = 0; i < m->rows; ++i){
109 |         col[i] = m->vals[i][c];
110 |         for(j = c; j < m->cols-1; ++j){
111 |             m->vals[i][j] = m->vals[i][j+1];
112 |         }
113 |     }
114 |     --m->cols;
115 |     return col;
116 | }
117 | 
118 | matrix csv_to_matrix(char *filename)
119 | {
120 |     FILE *fp = fopen(filename, "r");
121 |     if(!fp) file_error(filename);
122 | 
123 |     matrix m;
124 |     m.cols = -1;
125 | 
126 |     char *line;
127 | 
128 |     int n = 0;
129 |     int size = 1024;
130 |     m.vals = calloc(size, sizeof(float*));
131 |     while((line = fgetl(fp))){
132 |         if(m.cols == -1) m.cols = count_fields(line);
133 |         if(n == size){
134 |             size *= 2;
135 |             m.vals = realloc(m.vals, size*sizeof(float*));
136 |         }
137 |         m.vals[n] = parse_fields(line, m.cols);
138 |         free(line);
139 |         ++n;
140 |     }
141 |     m.vals = realloc(m.vals, n*sizeof(float*));
142 |     m.rows = n;
143 |     return m;
144 | }
145 | 
146 | void matrix_to_csv(matrix m)
147 | {
148 |     int i, j;
149 | 
150 |     for(i = 0; i < m.rows; ++i){
151 |         for(j = 0; j < m.cols; ++j){
152 |             if(j > 0) printf(",");
153 |             printf("%.17g", m.vals[i][j]);
154 |         }
155 |         printf("\n");
156 |     }
157 | }
158 | 
159 | void print_matrix(matrix m)
160 | {
161 |     int i, j;
162 |     printf("%d X %d Matrix:\n",m.rows, m.cols);
163 |     printf(" __");
164 |     for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
165 |     printf("__ \n");
166 | 
167 |     printf("|  ");
168 |     for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
169 |     printf("  |\n");
170 | 
171 |     for(i = 0; i < m.rows; ++i){
172 |         printf("|  ");
173 |         for(j = 0; j < m.cols; ++j){
174 |             printf("%15.7f ", m.vals[i][j]);
175 |         }
176 |         printf(" |\n");
177 |     }
178 |     printf("|__");
179 |     for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
180 |     printf("__|\n");
181 | }
182 | 


--------------------------------------------------------------------------------
/src/darkSrc/matrix.h:
--------------------------------------------------------------------------------
 1 | #ifndef MATRIX_H
 2 | #define MATRIX_H
 3 | typedef struct matrix{
 4 |     int rows, cols;
 5 |     float **vals;
 6 | } matrix;
 7 | 
 8 | matrix make_matrix(int rows, int cols);
 9 | void free_matrix(matrix m);
10 | void print_matrix(matrix m);
11 | 
12 | matrix csv_to_matrix(char *filename);
13 | void matrix_to_csv(matrix m);
14 | matrix hold_out_matrix(matrix *m, int n);
15 | float matrix_topk_accuracy(matrix truth, matrix guess, int k);
16 | void matrix_add_matrix(matrix from, matrix to);
17 | void scale_matrix(matrix m, float scale);
18 | matrix resize_matrix(matrix m, int size);
19 | 
20 | float *pop_column(matrix *m, int c);
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/darkSrc/maxpool_layer.c:
--------------------------------------------------------------------------------
  1 | #include "maxpool_layer.h"
  2 | #include "cuda.h"
  3 | #include <stdio.h>
  4 | 
  5 | image get_maxpool_image(maxpool_layer l)
  6 | {
  7 |     int h = l.out_h;
  8 |     int w = l.out_w;
  9 |     int c = l.c;
 10 |     return float_to_image(w,h,c,l.output);
 11 | }
 12 | 
 13 | image get_maxpool_delta(maxpool_layer l)
 14 | {
 15 |     int h = l.out_h;
 16 |     int w = l.out_w;
 17 |     int c = l.c;
 18 |     return float_to_image(w,h,c,l.delta);
 19 | }
 20 | 
 21 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
 22 | {
 23 |     maxpool_layer l = {0};
 24 |     l.type = MAXPOOL;
 25 |     l.batch = batch;
 26 |     l.h = h;
 27 |     l.w = w;
 28 |     l.c = c;
 29 |     l.pad = padding;
 30 |     l.out_w = (w + 2*padding)/stride;
 31 |     l.out_h = (h + 2*padding)/stride;
 32 |     l.out_c = c;
 33 |     l.outputs = l.out_h * l.out_w * l.out_c;
 34 |     l.inputs = h*w*c;
 35 |     l.size = size;
 36 |     l.stride = stride;
 37 |     int output_size = l.out_h * l.out_w * l.out_c * batch;
 38 |     l.indexes = calloc(output_size, sizeof(int));
 39 |     l.output =  calloc(output_size, sizeof(float));
 40 |     l.delta =   calloc(output_size, sizeof(float));
 41 |     l.forward = forward_maxpool_layer;
 42 |     l.backward = backward_maxpool_layer;
 43 |     #ifdef GPU
 44 |     l.forward_gpu = forward_maxpool_layer_gpu;
 45 |     l.backward_gpu = backward_maxpool_layer_gpu;
 46 |     l.indexes_gpu = cuda_make_int_array(output_size);
 47 |     l.output_gpu  = cuda_make_array(l.output, output_size);
 48 |     l.delta_gpu   = cuda_make_array(l.delta, output_size);
 49 |     #endif
 50 |     fprintf(stderr, "max          %d x %d / %d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
 51 |     return l;
 52 | }
 53 | 
 54 | void resize_maxpool_layer(maxpool_layer *l, int w, int h)
 55 | {
 56 |     l->h = h;
 57 |     l->w = w;
 58 |     l->inputs = h*w*l->c;
 59 | 
 60 |     l->out_w = (w + 2*l->pad)/l->stride;
 61 |     l->out_h = (h + 2*l->pad)/l->stride;
 62 |     l->outputs = l->out_w * l->out_h * l->c;
 63 |     int output_size = l->outputs * l->batch;
 64 | 
 65 |     l->indexes = realloc(l->indexes, output_size * sizeof(int));
 66 |     l->output = realloc(l->output, output_size * sizeof(float));
 67 |     l->delta = realloc(l->delta, output_size * sizeof(float));
 68 | 
 69 |     #ifdef GPU
 70 |     cuda_free((float *)l->indexes_gpu);
 71 |     cuda_free(l->output_gpu);
 72 |     cuda_free(l->delta_gpu);
 73 |     l->indexes_gpu = cuda_make_int_array(output_size);
 74 |     l->output_gpu  = cuda_make_array(l->output, output_size);
 75 |     l->delta_gpu   = cuda_make_array(l->delta,  output_size);
 76 |     #endif
 77 | }
 78 | 
 79 | void forward_maxpool_layer(const maxpool_layer l, network_state state)
 80 | {
 81 |     int b,i,j,k,m,n;
 82 |     int w_offset = -l.pad;
 83 |     int h_offset = -l.pad;
 84 | 
 85 |     int h = l.out_h;
 86 |     int w = l.out_w;
 87 |     int c = l.c;
 88 | 
 89 |     for(b = 0; b < l.batch; ++b){
 90 |         for(k = 0; k < c; ++k){
 91 |             for(i = 0; i < h; ++i){
 92 |                 for(j = 0; j < w; ++j){
 93 |                     int out_index = j + w*(i + h*(k + c*b));
 94 |                     float max = -FLT_MAX;
 95 |                     int max_i = -1;
 96 |                     for(n = 0; n < l.size; ++n){
 97 |                         for(m = 0; m < l.size; ++m){
 98 |                             int cur_h = h_offset + i*l.stride + n;
 99 |                             int cur_w = w_offset + j*l.stride + m;
100 |                             int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c));
101 |                             int valid = (cur_h >= 0 && cur_h < l.h &&
102 |                                          cur_w >= 0 && cur_w < l.w);
103 |                             float val = (valid != 0) ? state.input[index] : -FLT_MAX;
104 |                             max_i = (val > max) ? index : max_i;
105 |                             max   = (val > max) ? val   : max;
106 |                         }
107 |                     }
108 |                     l.output[out_index] = max;
109 |                     l.indexes[out_index] = max_i;
110 |                 }
111 |             }
112 |         }
113 |     }
114 | }
115 | 
116 | void backward_maxpool_layer(const maxpool_layer l, network_state state)
117 | {
118 |     int i;
119 |     int h = l.out_h;
120 |     int w = l.out_w;
121 |     int c = l.c;
122 |     for(i = 0; i < h*w*c*l.batch; ++i){
123 |         int index = l.indexes[i];
124 |         state.delta[index] += l.delta[i];
125 |     }
126 | }
127 | 
128 | 


--------------------------------------------------------------------------------
/src/darkSrc/maxpool_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAXPOOL_LAYER_H
 2 | #define MAXPOOL_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "cuda.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | 
 9 | typedef layer maxpool_layer;
10 | 
11 | image get_maxpool_image(maxpool_layer l);
12 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
13 | void resize_maxpool_layer(maxpool_layer *l, int w, int h);
14 | void forward_maxpool_layer(const maxpool_layer l, network_state state);
15 | void backward_maxpool_layer(const maxpool_layer l, network_state state);
16 | 
17 | #ifdef GPU
18 | void forward_maxpool_layer_gpu(maxpool_layer l, network_state state);
19 | void backward_maxpool_layer_gpu(maxpool_layer l, network_state state);
20 | #endif
21 | 
22 | #endif
23 | 
24 | 


--------------------------------------------------------------------------------
/src/darkSrc/maxpool_layer_kernels.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "curand.h"
  3 | #include "cublas_v2.h"
  4 | 
  5 | extern "C" {
  6 | #include "maxpool_layer.h"
  7 | #include "cuda.h"
  8 | }
  9 | 
 10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
 11 | {
 12 |     int h = (in_h + 2*pad)/stride;
 13 |     int w = (in_w + 2*pad)/stride;
 14 |     int c = in_c;
 15 | 
 16 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 17 |     if(id >= n) return;
 18 | 
 19 |     int j = id % w;
 20 |     id /= w;
 21 |     int i = id % h;
 22 |     id /= h;
 23 |     int k = id % c;
 24 |     id /= c;
 25 |     int b = id;
 26 | 
 27 |     int w_offset = -pad;
 28 |     int h_offset = -pad;
 29 | 
 30 |     int out_index = j + w*(i + h*(k + c*b));
 31 |     float max = -INFINITY;
 32 |     int max_i = -1;
 33 |     int l, m;
 34 |     for(l = 0; l < size; ++l){
 35 |         for(m = 0; m < size; ++m){
 36 |             int cur_h = h_offset + i*stride + l;
 37 |             int cur_w = w_offset + j*stride + m;
 38 |             int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
 39 |             int valid = (cur_h >= 0 && cur_h < in_h &&
 40 |                     cur_w >= 0 && cur_w < in_w);
 41 |             float val = (valid != 0) ? input[index] : -INFINITY;
 42 |             max_i = (val > max) ? index : max_i;
 43 |             max   = (val > max) ? val   : max;
 44 |         }
 45 |     }
 46 |     output[out_index] = max;
 47 |     indexes[out_index] = max_i;
 48 | }
 49 | 
 50 | __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
 51 | {
 52 |     int h = (in_h + 2*pad)/stride;
 53 |     int w = (in_w + 2*pad)/stride;
 54 |     int c = in_c;
 55 |     int area = (size-1)/stride;
 56 | 
 57 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 58 |     if(id >= n) return;
 59 | 
 60 |     int index = id;
 61 |     int j = id % in_w;
 62 |     id /= in_w;
 63 |     int i = id % in_h;
 64 |     id /= in_h;
 65 |     int k = id % in_c;
 66 |     id /= in_c;
 67 |     int b = id;
 68 | 
 69 |     int w_offset = -pad;
 70 |     int h_offset = -pad;
 71 | 
 72 |     float d = 0;
 73 |     int l, m;
 74 |     for(l = -area; l < area+1; ++l){
 75 |         for(m = -area; m < area+1; ++m){
 76 |             int out_w = (j-w_offset)/stride + m;
 77 |             int out_h = (i-h_offset)/stride + l;
 78 |             int out_index = out_w + w*(out_h + h*(k + c*b));
 79 |             int valid = (out_w >= 0 && out_w < w &&
 80 |                      out_h >= 0 && out_h < h);
 81 |             d += (valid && indexes[out_index] == index) ? delta[out_index] : 0;
 82 |         }
 83 |     }
 84 |     prev_delta[index] += d;
 85 | }
 86 | 
 87 | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
 88 | {
 89 |     int h = layer.out_h;
 90 |     int w = layer.out_w;
 91 |     int c = layer.c;
 92 | 
 93 |     size_t n = h*w*c*layer.batch;
 94 | 
 95 |     forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
 96 |     check_error(cudaPeekAtLastError());
 97 | }
 98 | 
 99 | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
100 | {
101 |     size_t n = layer.h*layer.w*layer.c*layer.batch;
102 | 
103 |     backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
104 |     check_error(cudaPeekAtLastError());
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/src/darkSrc/network.h:
--------------------------------------------------------------------------------
  1 | // Oh boy, why am I about to do this....
  2 | #ifndef NETWORK_H
  3 | #define NETWORK_H
  4 | 
  5 | #include <stdint.h>
  6 | #include "layer.h"
  7 | 
  8 | #ifdef __cplusplus
  9 | extern "C" {
 10 | #endif
 11 | 
 12 | #include "image.h"
 13 | #include "data.h"
 14 | #include "tree.h"
 15 | 
 16 | typedef enum {
 17 |     CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
 18 | } learning_rate_policy;
 19 | 
 20 | typedef struct network{
 21 |     float *workspace;
 22 |     int n;
 23 |     int batch;
 24 | 	int *seen;
 25 |     float epoch;
 26 |     int subdivisions;
 27 |     float momentum;
 28 |     float decay;
 29 |     layer *layers;
 30 |     int outputs;
 31 |     float *output;
 32 |     learning_rate_policy policy;
 33 | 
 34 |     float learning_rate;
 35 |     float gamma;
 36 |     float scale;
 37 |     float power;
 38 |     int time_steps;
 39 |     int step;
 40 |     int max_batches;
 41 |     float *scales;
 42 |     int   *steps;
 43 |     int num_steps;
 44 |     int burn_in;
 45 | 
 46 |     int adam;
 47 |     float B1;
 48 |     float B2;
 49 |     float eps;
 50 | 
 51 |     int inputs;
 52 |     int h, w, c;
 53 |     int max_crop;
 54 |     int min_crop;
 55 |     float angle;
 56 |     float aspect;
 57 |     float exposure;
 58 |     float saturation;
 59 |     float hue;
 60 | 
 61 |     int gpu_index;
 62 |     tree *hierarchy;
 63 | 
 64 |     #ifdef GPU
 65 |     float **input_gpu;
 66 |     float **truth_gpu;
 67 |     #endif
 68 | } network;
 69 | 
 70 | typedef struct network_state {
 71 |     float *truth;
 72 |     float *input;
 73 |     float *delta;
 74 |     float *workspace;
 75 |     int train;
 76 |     int index;
 77 |     network net;
 78 | } network_state;
 79 | 
 80 | #ifdef GPU
 81 | float train_networks(network *nets, int n, data d, int interval);
 82 | void sync_nets(network *nets, int n, int interval);
 83 | float train_network_datum_gpu(network net, float *x, float *y);
 84 | float *network_predict_gpu(network net, float *input);
 85 | float * get_network_output_gpu_layer(network net, int i);
 86 | float * get_network_delta_gpu_layer(network net, int i);
 87 | float *get_network_output_gpu(network net);
 88 | void forward_network_gpu(network net, network_state state);
 89 | void backward_network_gpu(network net, network_state state);
 90 | void update_network_gpu(network net);
 91 | #endif
 92 | 
 93 | float get_current_rate(network net);
 94 | int get_current_batch(network net);
 95 | void free_network(network net);
 96 | void compare_networks(network n1, network n2, data d);
 97 | char *get_layer_string(LAYER_TYPE a);
 98 | 
 99 | network make_network(int n);
100 | void forward_network(network net, network_state state);
101 | void backward_network(network net, network_state state);
102 | void update_network(network net);
103 | 
104 | float train_network(network net, data d);
105 | float train_network_batch(network net, data d, int n);
106 | float train_network_sgd(network net, data d, int n);
107 | float train_network_datum(network net, float *x, float *y);
108 | 
109 | matrix network_predict_data(network net, data test);
110 | float *network_predict(network net, float *input);
111 | float network_accuracy(network net, data d);
112 | float *network_accuracies(network net, data d, int n);
113 | float network_accuracy_multi(network net, data d, int n);
114 | void top_predictions(network net, int n, int *index);
115 | float *get_network_output(network net);
116 | float *get_network_output_layer(network net, int i);
117 | float *get_network_delta_layer(network net, int i);
118 | float *get_network_delta(network net);
119 | int get_network_output_size_layer(network net, int i);
120 | int get_network_output_size(network net);
121 | image get_network_image(network net);
122 | image get_network_image_layer(network net, int i);
123 | int get_predicted_class_network(network net);
124 | void print_network(network net);
125 | void visualize_network(network net);
126 | int resize_network(network *net, int w, int h);
127 | void set_batch_network(network *net, int b);
128 | int get_network_input_size(network net);
129 | float get_network_cost(network net);
130 | 
131 | int get_network_nuisance(network net);
132 | int get_network_background(network net);
133 | 
134 | #ifdef __cplusplus
135 | }
136 | #endif
137 | 
138 | #endif
139 | 
140 | 


--------------------------------------------------------------------------------
/src/darkSrc/normalization_layer.c:
--------------------------------------------------------------------------------
  1 | #include "normalization_layer.h"
  2 | #include "blas.h"
  3 | #include <stdio.h>
  4 | 
  5 | layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
  6 | {
  7 |     fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
  8 |     layer layer = {0};
  9 |     layer.type = NORMALIZATION;
 10 |     layer.batch = batch;
 11 |     layer.h = layer.out_h = h;
 12 |     layer.w = layer.out_w = w;
 13 |     layer.c = layer.out_c = c;
 14 |     layer.kappa = kappa;
 15 |     layer.size = size;
 16 |     layer.alpha = alpha;
 17 |     layer.beta = beta;
 18 |     layer.output = calloc(h * w * c * batch, sizeof(float));
 19 |     layer.delta = calloc(h * w * c * batch, sizeof(float));
 20 |     layer.squared = calloc(h * w * c * batch, sizeof(float));
 21 |     layer.norms = calloc(h * w * c * batch, sizeof(float));
 22 |     layer.inputs = w*h*c;
 23 |     layer.outputs = layer.inputs;
 24 | 
 25 |     layer.forward = forward_normalization_layer;
 26 |     layer.backward = backward_normalization_layer;
 27 |     #ifdef GPU
 28 |     layer.forward_gpu = forward_normalization_layer_gpu;
 29 |     layer.backward_gpu = backward_normalization_layer_gpu;
 30 | 
 31 |     layer.output_gpu =  cuda_make_array(layer.output, h * w * c * batch);
 32 |     layer.delta_gpu =   cuda_make_array(layer.delta, h * w * c * batch);
 33 |     layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
 34 |     layer.norms_gpu =   cuda_make_array(layer.norms, h * w * c * batch);
 35 |     #endif
 36 |     return layer;
 37 | }
 38 | 
 39 | void resize_normalization_layer(layer *layer, int w, int h)
 40 | {
 41 |     int c = layer->c;
 42 |     int batch = layer->batch;
 43 |     layer->h = h;
 44 |     layer->w = w;
 45 |     layer->out_h = h;
 46 |     layer->out_w = w;
 47 |     layer->inputs = w*h*c;
 48 |     layer->outputs = layer->inputs;
 49 |     layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));
 50 |     layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));
 51 |     layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));
 52 |     layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));
 53 | #ifdef GPU
 54 |     cuda_free(layer->output_gpu);
 55 |     cuda_free(layer->delta_gpu); 
 56 |     cuda_free(layer->squared_gpu); 
 57 |     cuda_free(layer->norms_gpu);   
 58 |     layer->output_gpu =  cuda_make_array(layer->output, h * w * c * batch);
 59 |     layer->delta_gpu =   cuda_make_array(layer->delta, h * w * c * batch);
 60 |     layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
 61 |     layer->norms_gpu =   cuda_make_array(layer->norms, h * w * c * batch);
 62 | #endif
 63 | }
 64 | 
 65 | void forward_normalization_layer(const layer layer, network_state state)
 66 | {
 67 |     int k,b;
 68 |     int w = layer.w;
 69 |     int h = layer.h;
 70 |     int c = layer.c;
 71 |     scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1);
 72 | 
 73 |     for(b = 0; b < layer.batch; ++b){
 74 |         float *squared = layer.squared + w*h*c*b;
 75 |         float *norms   = layer.norms + w*h*c*b;
 76 |         float *input   = state.input + w*h*c*b;
 77 |         pow_cpu(w*h*c, 2, input, 1, squared, 1);
 78 | 
 79 |         const_cpu(w*h, layer.kappa, norms, 1);
 80 |         for(k = 0; k < layer.size/2; ++k){
 81 |             axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
 82 |         }
 83 | 
 84 |         for(k = 1; k < layer.c; ++k){
 85 |             copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
 86 |             int prev = k - ((layer.size-1)/2) - 1;
 87 |             int next = k + (layer.size/2);
 88 |             if(prev >= 0)      axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
 89 |             if(next < layer.c) axpy_cpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
 90 |         }
 91 |     }
 92 |     pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
 93 |     mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1);
 94 | }
 95 | 
 96 | void backward_normalization_layer(const layer layer, network_state state)
 97 | {
 98 |     // TODO This is approximate ;-)
 99 |     // Also this should add in to delta instead of overwritting.
100 | 
101 |     int w = layer.w;
102 |     int h = layer.h;
103 |     int c = layer.c;
104 |     pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
105 |     mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1);
106 | }
107 | 
108 | #ifdef GPU
109 | void forward_normalization_layer_gpu(const layer layer, network_state state)
110 | {
111 |     int k,b;
112 |     int w = layer.w;
113 |     int h = layer.h;
114 |     int c = layer.c;
115 |     scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1);
116 | 
117 |     for(b = 0; b < layer.batch; ++b){
118 |         float *squared = layer.squared_gpu + w*h*c*b;
119 |         float *norms   = layer.norms_gpu + w*h*c*b;
120 |         float *input   = state.input + w*h*c*b;
121 |         pow_ongpu(w*h*c, 2, input, 1, squared, 1);
122 | 
123 |         const_ongpu(w*h, layer.kappa, norms, 1);
124 |         for(k = 0; k < layer.size/2; ++k){
125 |             axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
126 |         }
127 | 
128 |         for(k = 1; k < layer.c; ++k){
129 |             copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
130 |             int prev = k - ((layer.size-1)/2) - 1;
131 |             int next = k + (layer.size/2);
132 |             if(prev >= 0)      axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
133 |             if(next < layer.c) axpy_ongpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
134 |         }
135 |     }
136 |     pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
137 |     mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1);
138 | }
139 | 
140 | void backward_normalization_layer_gpu(const layer layer, network_state state)
141 | {
142 |     // TODO This is approximate ;-)
143 | 
144 |     int w = layer.w;
145 |     int h = layer.h;
146 |     int c = layer.c;
147 |     pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
148 |     mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1);
149 | }
150 | #endif
151 | 


--------------------------------------------------------------------------------
/src/darkSrc/normalization_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef NORMALIZATION_LAYER_H
 2 | #define NORMALIZATION_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
 9 | void resize_normalization_layer(layer *layer, int h, int w);
10 | void forward_normalization_layer(const layer layer, network_state state);
11 | void backward_normalization_layer(const layer layer, network_state state);
12 | void visualize_normalization_layer(layer layer, char *window);
13 | 
14 | #ifdef GPU
15 | void forward_normalization_layer_gpu(const layer layer, network_state state);
16 | void backward_normalization_layer_gpu(const layer layer, network_state state);
17 | #endif
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/darkSrc/option_list.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include "option_list.h"
  5 | #include "utils.h"
  6 | 
  7 | list *read_data_cfg(char *filename)
  8 | {
  9 |     FILE *file = fopen(filename, "r");
 10 |     if(file == 0) file_error(filename);
 11 |     char *line;
 12 |     int nu = 0;
 13 |     list *options = make_list();
 14 |     while((line=fgetl(file)) != 0){
 15 |         ++ nu;
 16 |         strip(line);
 17 |         switch(line[0]){
 18 |             case '\0':
 19 |             case '#':
 20 |             case ';':
 21 |                 free(line);
 22 |                 break;
 23 |             default:
 24 |                 if(!read_option(line, options)){
 25 |                     fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
 26 |                     free(line);
 27 |                 }
 28 |                 break;
 29 |         }
 30 |     }
 31 |     fclose(file);
 32 |     return options;
 33 | }
 34 | 
 35 | int read_option(char *s, list *options)
 36 | {
 37 |     size_t i;
 38 |     size_t len = strlen(s);
 39 |     char *val = 0;
 40 |     for(i = 0; i < len; ++i){
 41 |         if(s[i] == '='){
 42 |             s[i] = '\0';
 43 |             val = s+i+1;
 44 |             break;
 45 |         }
 46 |     }
 47 |     if(i == len-1) return 0;
 48 |     char *key = s;
 49 |     option_insert(options, key, val);
 50 |     return 1;
 51 | }
 52 | 
 53 | void option_insert(list *l, char *key, char *val)
 54 | {
 55 |     kvp *p = malloc(sizeof(kvp));
 56 |     p->key = key;
 57 |     p->val = val;
 58 |     p->used = 0;
 59 |     list_insert(l, p);
 60 | }
 61 | 
 62 | void option_unused(list *l)
 63 | {
 64 |     node *n = l->front;
 65 |     while(n){
 66 |         kvp *p = (kvp *)n->val;
 67 |         if(!p->used){
 68 |             fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val);
 69 |         }
 70 |         n = n->next;
 71 |     }
 72 | }
 73 | 
 74 | char *option_find(list *l, char *key)
 75 | {
 76 |     node *n = l->front;
 77 |     while(n){
 78 |         kvp *p = (kvp *)n->val;
 79 |         if(strcmp(p->key, key) == 0){
 80 |             p->used = 1;
 81 |             return p->val;
 82 |         }
 83 |         n = n->next;
 84 |     }
 85 |     return 0;
 86 | }
 87 | char *option_find_str(list *l, char *key, char *def)
 88 | {
 89 |     char *v = option_find(l, key);
 90 |     if(v) return v;
 91 |     if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def);
 92 |     return def;
 93 | }
 94 | 
 95 | int option_find_int(list *l, char *key, int def)
 96 | {
 97 |     char *v = option_find(l, key);
 98 |     if(v) return atoi(v);
 99 |     fprintf(stderr, "%s: Using default '%d'\n", key, def);
100 |     return def;
101 | }
102 | 
103 | int option_find_int_quiet(list *l, char *key, int def)
104 | {
105 |     char *v = option_find(l, key);
106 |     if(v) return atoi(v);
107 |     return def;
108 | }
109 | 
110 | float option_find_float_quiet(list *l, char *key, float def)
111 | {
112 |     char *v = option_find(l, key);
113 |     if(v) return atof(v);
114 |     return def;
115 | }
116 | 
117 | float option_find_float(list *l, char *key, float def)
118 | {
119 |     char *v = option_find(l, key);
120 |     if(v) return atof(v);
121 |     fprintf(stderr, "%s: Using default '%lf'\n", key, def);
122 |     return def;
123 | }
124 | 


--------------------------------------------------------------------------------
/src/darkSrc/option_list.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTION_LIST_H
 2 | #define OPTION_LIST_H
 3 | #include "list.h"
 4 | 
 5 | typedef struct{
 6 |     char *key;
 7 |     char *val;
 8 |     int used;
 9 | } kvp;
10 | 
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 | list *read_data_cfg(char *filename);
17 | int read_option(char *s, list *options);
18 | void option_insert(list *l, char *key, char *val);
19 | char *option_find(list *l, char *key);
20 | char *option_find_str(list *l, char *key, char *def);
21 | int option_find_int(list *l, char *key, int def);
22 | int option_find_int_quiet(list *l, char *key, int def);
23 | float option_find_float(list *l, char *key, float def);
24 | float option_find_float_quiet(list *l, char *key, float def);
25 | void option_unused(list *l);
26 | 
27 | #ifdef __cplusplus
28 | }
29 | #endif
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/darkSrc/parser.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARSER_H
 2 | #define PARSER_H
 3 | #include "network.h"
 4 | 
 5 | #ifdef __cplusplus
 6 | extern "C" {
 7 | #endif
 8 | 
 9 | network parse_network_cfg(char *filename);
10 | network parse_network_cfg_custom(char *filename, int batch);
11 | void save_network(network net, char *filename);
12 | void save_weights(network net, char *filename);
13 | void save_weights_upto(network net, char *filename, int cutoff);
14 | void save_weights_double(network net, char *filename);
15 | void load_weights(network *net, char *filename);
16 | void load_weights_upto(network *net, char *filename, int cutoff);
17 | 
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/darkSrc/region_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef REGION_LAYER_H
 2 | #define REGION_LAYER_H
 3 | 
 4 | #include "layer.h"
 5 | #include "network.h"
 6 | 
 7 | typedef layer region_layer;
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
14 | void forward_region_layer(const region_layer l, network_state state);
15 | void backward_region_layer(const region_layer l, network_state state);
16 | void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map);
17 | void resize_region_layer(layer *l, int w, int h);
18 | 
19 | #ifdef GPU
20 | void forward_region_layer_gpu(const region_layer l, network_state state);
21 | void backward_region_layer_gpu(region_layer l, network_state state);
22 | #endif
23 | 
24 | #ifdef __cplusplus
25 | }
26 | #endif
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/src/darkSrc/reorg_layer.c:
--------------------------------------------------------------------------------
  1 | #include "reorg_layer.h"
  2 | #include "cuda.h"
  3 | #include "blas.h"
  4 | #include <stdio.h>
  5 | 
  6 | 
  7 | layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
  8 | {
  9 |     layer l = {0};
 10 |     l.type = REORG;
 11 |     l.batch = batch;
 12 |     l.stride = stride;
 13 |     l.h = h;
 14 |     l.w = w;
 15 |     l.c = c;
 16 |     if(reverse){
 17 |         l.out_w = w*stride;
 18 |         l.out_h = h*stride;
 19 |         l.out_c = c/(stride*stride);
 20 |     }else{
 21 |         l.out_w = w/stride;
 22 |         l.out_h = h/stride;
 23 |         l.out_c = c*(stride*stride);
 24 |     }
 25 |     l.reverse = reverse;
 26 |     fprintf(stderr, "reorg              /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n",  stride, w, h, c, l.out_w, l.out_h, l.out_c);
 27 |     l.outputs = l.out_h * l.out_w * l.out_c;
 28 |     l.inputs = h*w*c;
 29 |     int output_size = l.out_h * l.out_w * l.out_c * batch;
 30 |     l.output =  calloc(output_size, sizeof(float));
 31 |     l.delta =   calloc(output_size, sizeof(float));
 32 | 
 33 |     l.forward = forward_reorg_layer;
 34 |     l.backward = backward_reorg_layer;
 35 | #ifdef GPU
 36 |     l.forward_gpu = forward_reorg_layer_gpu;
 37 |     l.backward_gpu = backward_reorg_layer_gpu;
 38 | 
 39 |     l.output_gpu  = cuda_make_array(l.output, output_size);
 40 |     l.delta_gpu   = cuda_make_array(l.delta, output_size);
 41 | #endif
 42 |     return l;
 43 | }
 44 | 
 45 | void resize_reorg_layer(layer *l, int w, int h)
 46 | {
 47 |     int stride = l->stride;
 48 |     int c = l->c;
 49 | 
 50 |     l->h = h;
 51 |     l->w = w;
 52 | 
 53 |     if(l->reverse){
 54 |         l->out_w = w*stride;
 55 |         l->out_h = h*stride;
 56 |         l->out_c = c/(stride*stride);
 57 |     }else{
 58 |         l->out_w = w/stride;
 59 |         l->out_h = h/stride;
 60 |         l->out_c = c*(stride*stride);
 61 |     }
 62 | 
 63 |     l->outputs = l->out_h * l->out_w * l->out_c;
 64 |     l->inputs = l->outputs;
 65 |     int output_size = l->outputs * l->batch;
 66 | 
 67 |     l->output = realloc(l->output, output_size * sizeof(float));
 68 |     l->delta = realloc(l->delta, output_size * sizeof(float));
 69 | 
 70 | #ifdef GPU
 71 |     cuda_free(l->output_gpu);
 72 |     cuda_free(l->delta_gpu);
 73 |     l->output_gpu  = cuda_make_array(l->output, output_size);
 74 |     l->delta_gpu   = cuda_make_array(l->delta,  output_size);
 75 | #endif
 76 | }
 77 | 
 78 | void forward_reorg_layer(const layer l, network_state state)
 79 | {
 80 |     if(l.reverse){
 81 |         reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
 82 |     }else {
 83 |         reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
 84 |     }
 85 | }
 86 | 
 87 | void backward_reorg_layer(const layer l, network_state state)
 88 | {
 89 |     if(l.reverse){
 90 |         reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
 91 |     }else{
 92 |         reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
 93 |     }
 94 | }
 95 | 
 96 | #ifdef GPU
 97 | void forward_reorg_layer_gpu(layer l, network_state state)
 98 | {
 99 |     if(l.reverse){
100 |         reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
101 |     }else {
102 |         reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
103 |     }
104 | }
105 | 
106 | void backward_reorg_layer_gpu(layer l, network_state state)
107 | {
108 |     if(l.reverse){
109 |         reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta);
110 |     }else{
111 |         reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta);
112 |     }
113 | }
114 | #endif
115 | 


--------------------------------------------------------------------------------
/src/darkSrc/reorg_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef REORG_LAYER_H
 2 | #define REORG_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "cuda.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | 
 9 | layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse);
10 | void resize_reorg_layer(layer *l, int w, int h);
11 | void forward_reorg_layer(const layer l, network_state state);
12 | void backward_reorg_layer(const layer l, network_state state);
13 | 
14 | #ifdef GPU
15 | void forward_reorg_layer_gpu(layer l, network_state state);
16 | void backward_reorg_layer_gpu(layer l, network_state state);
17 | #endif
18 | 
19 | #endif
20 | 
21 | 


--------------------------------------------------------------------------------
/src/darkSrc/rnn_layer.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef RNN_LAYER_H
 3 | #define RNN_LAYER_H
 4 | 
 5 | #include "activations.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | #define USET
 9 | 
10 | layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
11 | 
12 | void forward_rnn_layer(layer l, network_state state);
13 | void backward_rnn_layer(layer l, network_state state);
14 | void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
15 | 
16 | #ifdef GPU
17 | void forward_rnn_layer_gpu(layer l, network_state state);
18 | void backward_rnn_layer_gpu(layer l, network_state state);
19 | void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
20 | void push_rnn_layer(layer l);
21 | void pull_rnn_layer(layer l);
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/src/darkSrc/route_layer.c:
--------------------------------------------------------------------------------
  1 | #include "route_layer.h"
  2 | #include "cuda.h"
  3 | #include "blas.h"
  4 | #include <stdio.h>
  5 | 
  6 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes)
  7 | {
  8 |     fprintf(stderr,"route ");
  9 |     route_layer l = {0};
 10 |     l.type = ROUTE;
 11 |     l.batch = batch;
 12 |     l.n = n;
 13 |     l.input_layers = input_layers;
 14 |     l.input_sizes = input_sizes;
 15 |     int i;
 16 |     int outputs = 0;
 17 |     for(i = 0; i < n; ++i){
 18 |         fprintf(stderr," %d", input_layers[i]);
 19 |         outputs += input_sizes[i];
 20 |     }
 21 |     fprintf(stderr, "\n");
 22 |     l.outputs = outputs;
 23 |     l.inputs = outputs;
 24 |     l.delta =  calloc(outputs*batch, sizeof(float));
 25 |     l.output = calloc(outputs*batch, sizeof(float));;
 26 | 
 27 |     l.forward = forward_route_layer;
 28 |     l.backward = backward_route_layer;
 29 |     #ifdef GPU
 30 |     l.forward_gpu = forward_route_layer_gpu;
 31 |     l.backward_gpu = backward_route_layer_gpu;
 32 | 
 33 |     l.delta_gpu =  cuda_make_array(l.delta, outputs*batch);
 34 |     l.output_gpu = cuda_make_array(l.output, outputs*batch);
 35 |     #endif
 36 |     return l;
 37 | }
 38 | 
 39 | void resize_route_layer(route_layer *l, network *net)
 40 | {
 41 |     int i;
 42 |     layer first = net->layers[l->input_layers[0]];
 43 |     l->out_w = first.out_w;
 44 |     l->out_h = first.out_h;
 45 |     l->out_c = first.out_c;
 46 |     l->outputs = first.outputs;
 47 |     l->input_sizes[0] = first.outputs;
 48 |     for(i = 1; i < l->n; ++i){
 49 |         int index = l->input_layers[i];
 50 |         layer next = net->layers[index];
 51 |         l->outputs += next.outputs;
 52 |         l->input_sizes[i] = next.outputs;
 53 |         if(next.out_w == first.out_w && next.out_h == first.out_h){
 54 |             l->out_c += next.out_c;
 55 |         }else{
 56 |             printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h);
 57 |             l->out_h = l->out_w = l->out_c = 0;
 58 |         }
 59 |     }
 60 |     l->inputs = l->outputs;
 61 |     l->delta =  realloc(l->delta, l->outputs*l->batch*sizeof(float));
 62 |     l->output = realloc(l->output, l->outputs*l->batch*sizeof(float));
 63 | 
 64 | #ifdef GPU
 65 |     cuda_free(l->output_gpu);
 66 |     cuda_free(l->delta_gpu);
 67 |     l->output_gpu  = cuda_make_array(l->output, l->outputs*l->batch);
 68 |     l->delta_gpu   = cuda_make_array(l->delta,  l->outputs*l->batch);
 69 | #endif
 70 |     
 71 | }
 72 | 
 73 | void forward_route_layer(const route_layer l, network_state state)
 74 | {
 75 |     int i, j;
 76 |     int offset = 0;
 77 |     for(i = 0; i < l.n; ++i){
 78 |         int index = l.input_layers[i];
 79 |         float *input = state.net.layers[index].output;
 80 |         int input_size = l.input_sizes[i];
 81 |         for(j = 0; j < l.batch; ++j){
 82 |             copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1);
 83 |         }
 84 |         offset += input_size;
 85 |     }
 86 | }
 87 | 
 88 | void backward_route_layer(const route_layer l, network_state state)
 89 | {
 90 |     int i, j;
 91 |     int offset = 0;
 92 |     for(i = 0; i < l.n; ++i){
 93 |         int index = l.input_layers[i];
 94 |         float *delta = state.net.layers[index].delta;
 95 |         int input_size = l.input_sizes[i];
 96 |         for(j = 0; j < l.batch; ++j){
 97 |             axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1);
 98 |         }
 99 |         offset += input_size;
100 |     }
101 | }
102 | 
103 | #ifdef GPU
104 | void forward_route_layer_gpu(const route_layer l, network_state state)
105 | {
106 |     int i, j;
107 |     int offset = 0;
108 |     for(i = 0; i < l.n; ++i){
109 |         int index = l.input_layers[i];
110 |         float *input = state.net.layers[index].output_gpu;
111 |         int input_size = l.input_sizes[i];
112 |         for(j = 0; j < l.batch; ++j){
113 |             copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1);
114 |         }
115 |         offset += input_size;
116 |     }
117 | }
118 | 
119 | void backward_route_layer_gpu(const route_layer l, network_state state)
120 | {
121 |     int i, j;
122 |     int offset = 0;
123 |     for(i = 0; i < l.n; ++i){
124 |         int index = l.input_layers[i];
125 |         float *delta = state.net.layers[index].delta_gpu;
126 |         int input_size = l.input_sizes[i];
127 |         for(j = 0; j < l.batch; ++j){
128 |             axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1);
129 |         }
130 |         offset += input_size;
131 |     }
132 | }
133 | #endif
134 | 


--------------------------------------------------------------------------------
/src/darkSrc/route_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ROUTE_LAYER_H
 2 | #define ROUTE_LAYER_H
 3 | #include "network.h"
 4 | #include "layer.h"
 5 | 
 6 | typedef layer route_layer;
 7 | 
 8 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size);
 9 | void forward_route_layer(const route_layer l, network_state state);
10 | void backward_route_layer(const route_layer l, network_state state);
11 | void resize_route_layer(route_layer *l, network *net);
12 | 
13 | #ifdef GPU
14 | void forward_route_layer_gpu(const route_layer l, network_state state);
15 | void backward_route_layer_gpu(const route_layer l, network_state state);
16 | #endif
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/darkSrc/shortcut_layer.c:
--------------------------------------------------------------------------------
 1 | #include "shortcut_layer.h"
 2 | #include "cuda.h"
 3 | #include "blas.h"
 4 | #include <stdio.h>
 5 | #include <assert.h>
 6 | 
 7 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2)
 8 | {
 9 |     fprintf(stderr,"Shortcut Layer: %d\n", index);
10 |     layer l = {0};
11 |     l.type = SHORTCUT;
12 |     l.batch = batch;
13 |     l.w = w2;
14 |     l.h = h2;
15 |     l.c = c2;
16 |     l.out_w = w;
17 |     l.out_h = h;
18 |     l.out_c = c;
19 |     l.outputs = w*h*c;
20 |     l.inputs = l.outputs;
21 | 
22 |     l.index = index;
23 | 
24 |     l.delta =  calloc(l.outputs*batch, sizeof(float));
25 |     l.output = calloc(l.outputs*batch, sizeof(float));;
26 | 
27 |     l.forward = forward_shortcut_layer;
28 |     l.backward = backward_shortcut_layer;
29 |     #ifdef GPU
30 |     l.forward_gpu = forward_shortcut_layer_gpu;
31 |     l.backward_gpu = backward_shortcut_layer_gpu;
32 | 
33 |     l.delta_gpu =  cuda_make_array(l.delta, l.outputs*batch);
34 |     l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
35 |     #endif
36 |     return l;
37 | }
38 | 
39 | void forward_shortcut_layer(const layer l, network_state state)
40 | {
41 |     copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
42 |     shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output);
43 |     activate_array(l.output, l.outputs*l.batch, l.activation);
44 | }
45 | 
46 | void backward_shortcut_layer(const layer l, network_state state)
47 | {
48 |     gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
49 |     axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1);
50 |     shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta);
51 | }
52 | 
53 | #ifdef GPU
54 | void forward_shortcut_layer_gpu(const layer l, network_state state)
55 | {
56 |     copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
57 |     shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
58 |     activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
59 | }
60 | 
61 | void backward_shortcut_layer_gpu(const layer l, network_state state)
62 | {
63 |     gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
64 |     axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1);
65 |     shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu);
66 | }
67 | #endif
68 | 


--------------------------------------------------------------------------------
/src/darkSrc/shortcut_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHORTCUT_LAYER_H
 2 | #define SHORTCUT_LAYER_H
 3 | 
 4 | #include "layer.h"
 5 | #include "network.h"
 6 | 
 7 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2);
 8 | void forward_shortcut_layer(const layer l, network_state state);
 9 | void backward_shortcut_layer(const layer l, network_state state);
10 | 
11 | #ifdef GPU
12 | void forward_shortcut_layer_gpu(const layer l, network_state state);
13 | void backward_shortcut_layer_gpu(const layer l, network_state state);
14 | #endif
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/src/darkSrc/softmax_layer.c:
--------------------------------------------------------------------------------
  1 | #include "softmax_layer.h"
  2 | #include "blas.h"
  3 | #include "cuda.h"
  4 | #include <float.h>
  5 | #include <math.h>
  6 | #include <stdlib.h>
  7 | #include <stdio.h>
  8 | #include <assert.h>
  9 | 
 10 | softmax_layer make_softmax_layer(int batch, int inputs, int groups)
 11 | {
 12 |     assert(inputs%groups == 0);
 13 |     fprintf(stderr, "softmax                                        %4d\n",  inputs);
 14 |     softmax_layer l = {0};
 15 |     l.type = SOFTMAX;
 16 |     l.batch = batch;
 17 |     l.groups = groups;
 18 |     l.inputs = inputs;
 19 |     l.outputs = inputs;
 20 |     l.output = calloc(inputs*batch, sizeof(float));
 21 |     l.delta = calloc(inputs*batch, sizeof(float));
 22 | 
 23 |     l.forward = forward_softmax_layer;
 24 |     l.backward = backward_softmax_layer;
 25 |     #ifdef GPU
 26 |     l.forward_gpu = forward_softmax_layer_gpu;
 27 |     l.backward_gpu = backward_softmax_layer_gpu;
 28 | 
 29 |     l.output_gpu = cuda_make_array(l.output, inputs*batch); 
 30 |     l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 
 31 |     #endif
 32 |     return l;
 33 | }
 34 | 
 35 | void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output)
 36 | {
 37 |     int b;
 38 |     for(b = 0; b < batch; ++b){
 39 |         int i;
 40 |         int count = 0;
 41 |         for(i = 0; i < hierarchy->groups; ++i){
 42 |             int group_size = hierarchy->group_size[i];
 43 |             softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count);
 44 |             count += group_size;
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | void forward_softmax_layer(const softmax_layer l, network_state state)
 50 | {
 51 |     int b;
 52 |     int inputs = l.inputs / l.groups;
 53 |     int batch = l.batch * l.groups;
 54 |     if(l.softmax_tree){
 55 |         softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
 56 |     } else {
 57 |         for(b = 0; b < batch; ++b){
 58 |             softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | void backward_softmax_layer(const softmax_layer l, network_state state)
 64 | {
 65 |     int i;
 66 |     for(i = 0; i < l.inputs*l.batch; ++i){
 67 |         state.delta[i] += l.delta[i];
 68 |     }
 69 | }
 70 | 
 71 | #ifdef GPU
 72 | 
 73 | void pull_softmax_layer_output(const softmax_layer layer)
 74 | {
 75 |     cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch);
 76 | }
 77 | 
 78 | void forward_softmax_layer_gpu(const softmax_layer l, network_state state)
 79 | {
 80 |     int inputs = l.inputs / l.groups;
 81 |     int batch = l.batch * l.groups;
 82 |     if(l.softmax_tree){
 83 |         int i;
 84 |         int count = 0;
 85 |         for (i = 0; i < l.softmax_tree->groups; ++i) {
 86 |             int group_size = l.softmax_tree->group_size[i];
 87 |             softmax_gpu(state.input+count, group_size, inputs, batch, l.temperature, l.output_gpu + count);
 88 |             count += group_size;
 89 |         }
 90 |     } else {
 91 |         softmax_gpu(state.input, inputs, inputs, batch, l.temperature, l.output_gpu);
 92 |     }
 93 | }
 94 | 
 95 | void backward_softmax_layer_gpu(const softmax_layer layer, network_state state)
 96 | {
 97 |     axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1);
 98 | }
 99 | 
100 | #endif
101 | 


--------------------------------------------------------------------------------
/src/darkSrc/softmax_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SOFTMAX_LAYER_H
 2 | #define SOFTMAX_LAYER_H
 3 | #include "layer.h"
 4 | #include "network.h"
 5 | 
 6 | typedef layer softmax_layer;
 7 | 
 8 | void softmax_array(float *input, int n, float temp, float *output);
 9 | softmax_layer make_softmax_layer(int batch, int inputs, int groups);
10 | void forward_softmax_layer(const softmax_layer l, network_state state);
11 | void backward_softmax_layer(const softmax_layer l, network_state state);
12 | 
13 | #ifdef GPU
14 | void pull_softmax_layer_output(const softmax_layer l);
15 | void forward_softmax_layer_gpu(const softmax_layer l, network_state state);
16 | void backward_softmax_layer_gpu(const softmax_layer l, network_state state);
17 | #endif
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/darkSrc/tree.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include "tree.h"
  4 | #include "utils.h"
  5 | #include "data.h"
  6 | 
  7 | void change_leaves(tree *t, char *leaf_list)
  8 | {
  9 |     list *llist = get_paths(leaf_list);
 10 |     char **leaves = (char **)list_to_array(llist);
 11 |     int n = llist->size;
 12 |     int i,j;
 13 |     int found = 0;
 14 |     for(i = 0; i < t->n; ++i){
 15 |         t->leaf[i] = 0;
 16 |         for(j = 0; j < n; ++j){
 17 |             if (0==strcmp(t->name[i], leaves[j])){
 18 |                 t->leaf[i] = 1;
 19 |                 ++found;
 20 |                 break;
 21 |             }
 22 |         }
 23 |     }
 24 |     fprintf(stderr, "Found %d leaves.\n", found);
 25 | }
 26 | 
 27 | float get_hierarchy_probability(float *x, tree *hier, int c)
 28 | {
 29 |     float p = 1;
 30 |     while(c >= 0){
 31 |         p = p * x[c];
 32 |         c = hier->parent[c];
 33 |     }
 34 |     return p;
 35 | }
 36 | 
 37 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves)
 38 | {
 39 |     int j;
 40 |     for(j = 0; j < n; ++j){
 41 |         int parent = hier->parent[j];
 42 |         if(parent >= 0){
 43 |             predictions[j] *= predictions[parent]; 
 44 |         }
 45 |     }
 46 |     if(only_leaves){
 47 |         for(j = 0; j < n; ++j){
 48 |             if(!hier->leaf[j]) predictions[j] = 0;
 49 |         }
 50 |     }
 51 | }
 52 | 
 53 | tree *read_tree(char *filename)
 54 | {
 55 |     tree t = {0};
 56 |     FILE *fp = fopen(filename, "r");
 57 |     
 58 |     char *line;
 59 |     int last_parent = -1;
 60 |     int group_size = 0;
 61 |     int groups = 0;
 62 |     int n = 0;
 63 |     while((line=fgetl(fp)) != 0){
 64 |         char *id = calloc(256, sizeof(char));
 65 |         int parent = -1;
 66 |         sscanf(line, "%s %d", id, &parent);
 67 |         t.parent = realloc(t.parent, (n+1)*sizeof(int));
 68 |         t.parent[n] = parent;
 69 | 
 70 |         t.name = realloc(t.name, (n+1)*sizeof(char *));
 71 |         t.name[n] = id;
 72 |         if(parent != last_parent){
 73 |             ++groups;
 74 |             t.group_offset = realloc(t.group_offset, groups * sizeof(int));
 75 |             t.group_offset[groups - 1] = n - group_size;
 76 |             t.group_size = realloc(t.group_size, groups * sizeof(int));
 77 |             t.group_size[groups - 1] = group_size;
 78 |             group_size = 0;
 79 |             last_parent = parent;
 80 |         }
 81 |         t.group = realloc(t.group, (n+1)*sizeof(int));
 82 |         t.group[n] = groups;
 83 |         ++n;
 84 |         ++group_size;
 85 |     }
 86 |     ++groups;
 87 |     t.group_offset = realloc(t.group_offset, groups * sizeof(int));
 88 |     t.group_offset[groups - 1] = n - group_size;
 89 |     t.group_size = realloc(t.group_size, groups * sizeof(int));
 90 |     t.group_size[groups - 1] = group_size;
 91 |     t.n = n;
 92 |     t.groups = groups;
 93 |     t.leaf = calloc(n, sizeof(int));
 94 |     int i;
 95 |     for(i = 0; i < n; ++i) t.leaf[i] = 1;
 96 |     for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0;
 97 | 
 98 |     fclose(fp);
 99 |     tree *tree_ptr = calloc(1, sizeof(tree));
100 |     *tree_ptr = t;
101 |     //error(0);
102 |     return tree_ptr;
103 | }
104 | 


--------------------------------------------------------------------------------
/src/darkSrc/tree.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_H
 2 | #define TREE_H
 3 | 
 4 | typedef struct{
 5 |     int *leaf;
 6 |     int n;
 7 |     int *parent;
 8 |     int *group;
 9 |     char **name;
10 | 
11 |     int groups;
12 |     int *group_size;
13 |     int *group_offset;
14 | } tree;
15 | 
16 | tree *read_tree(char *filename);
17 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
18 | void change_leaves(tree *t, char *leaf_list);
19 | float get_hierarchy_probability(float *x, tree *hier, int c);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/darkSrc/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H
 3 | #include <stdio.h>
 4 | #include <time.h>
 5 | #include "list.h"
 6 | 
 7 | #if defined(_MSC_VER) && _MSC_VER < 1900
 8 | 	#define snprintf(buf,len, format,...) _snprintf_s(buf, len,len, format, __VA_ARGS__)
 9 | #endif
10 | 
11 | #define SECRET_NUM -1234
12 | #define TWO_PI 6.2831853071795864769252866
13 | 
14 | #ifdef __cplusplus
15 | extern "C" {
16 | #endif
17 | 
18 | int *read_map(char *filename);
19 | void shuffle(void *arr, size_t n, size_t size);
20 | void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections);
21 | void free_ptrs(void **ptrs, int n);
22 | char *basecfg(char *cfgfile);
23 | int alphanum_to_int(char c);
24 | char int_to_alphanum(int i);
25 | int read_int(int fd);
26 | void write_int(int fd, int n);
27 | void read_all(int fd, char *buffer, size_t bytes);
28 | void write_all(int fd, char *buffer, size_t bytes);
29 | int read_all_fail(int fd, char *buffer, size_t bytes);
30 | int write_all_fail(int fd, char *buffer, size_t bytes);
31 | void find_replace(char *str, char *orig, char *rep, char *output);
32 | void error(const char *s);
33 | void malloc_error();
34 | void file_error(char *s);
35 | void strip(char *s);
36 | void strip_char(char *s, char bad);
37 | void top_k(float *a, int n, int k, int *index);
38 | list *split_str(char *s, char delim);
39 | char *fgetl(FILE *fp);
40 | list *parse_csv_line(char *line);
41 | char *copy_string(char *s);
42 | int count_fields(char *line);
43 | float *parse_fields(char *line, int n);
44 | void normalize_array(float *a, int n);
45 | void scale_array(float *a, int n, float s);
46 | void translate_array(float *a, int n, float s);
47 | int max_index(float *a, int n);
48 | float constrain(float min, float max, float a);
49 | int constrain_int(int a, int min, int max);
50 | float mse_array(float *a, int n);
51 | float rand_normal();
52 | size_t rand_size_t();
53 | float rand_uniform(float min, float max);
54 | float rand_scale(float s);
55 | int rand_int(int min, int max);
56 | float sum_array(float *a, int n);
57 | float mean_array(float *a, int n);
58 | void mean_arrays(float **a, int n, int els, float *avg);
59 | float variance_array(float *a, int n);
60 | float mag_array(float *a, int n);
61 | float dist_array(float *a, float *b, int n, int sub);
62 | float **one_hot_encode(float *a, int n, int k);
63 | float sec(clock_t clocks);
64 | int find_int_arg(int argc, char **argv, char *arg, int def);
65 | float find_float_arg(int argc, char **argv, char *arg, float def);
66 | int find_arg(int argc, char* argv[], char *arg);
67 | char *find_char_arg(int argc, char **argv, char *arg, char *def);
68 | int sample_array(float *a, int n);
69 | void print_statistics(float *a, int n);
70 | unsigned int random_gen();
71 | float random_float();
72 | float rand_uniform_strong(float min, float max);
73 | 
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | 
78 | #endif
79 | 
80 | 


--------------------------------------------------------------------------------
/src/errmsg.cpp:
--------------------------------------------------------------------------------
 1 | #include "errmsg.h"
 2 | #include <iostream>
 3 | 
 4 | errMsg* errMsg::instance = NULL;
 5 | errMsg *errMsg::getInstance()
 6 | {
 7 |     if(instance == NULL) instance = new errMsg();
 8 |     return instance;
 9 | }
10 | 
11 | errMsg::errMsg()
12 | {
13 | }
14 | 
15 | void errMsg::out(
16 |         std::string file,
17 |         std::string func,
18 |         std::string msg, bool pause)
19 | {
20 |     std::cout << "IN file<" << file << "> "
21 |               << func << " : " << msg << std::endl;
22 |     if(pause) exit(0);
23 | }
24 | 


--------------------------------------------------------------------------------
/src/errmsg.h:
--------------------------------------------------------------------------------
 1 | #ifndef ERRMSG_H
 2 | #define ERRMSG_H
 3 | #include <stddef.h>
 4 | #include <string>
 5 | 
 6 | class errMsg
 7 | {    
 8 | public:
 9 |     static errMsg* getInstance();
10 |     void out(std::string file,
11 |                 std::string func,
12 |                 std::string msd,
13 |                 bool pause = true);
14 | private:
15 |     errMsg();
16 |     errMsg(const errMsg&);
17 |     errMsg& operator=(const errMsg&);
18 | 
19 |     static errMsg* instance;    
20 | };
21 | 
22 | #endif // ERRMSG_H
23 | 


--------------------------------------------------------------------------------
/src/feature/FeatureTensor.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * FeatureTensor.cpp
  3 |  *
  4 |  *  Created on: Dec 15, 2017
  5 |  *      Author: zy
  6 |  */
  7 | 
  8 | #include "FeatureTensor.h"
  9 | using namespace tensorflow;
 10 | 
 11 | #define TENSORFLOW_MODEL_META "./RUNNINGDATA/tensor_networks/111.meta"
 12 | #define TENSORFLOW_MODEL "./RUNNINGDATA/tensor_networks/mars-small128.ckpt-68577"
 13 | 
 14 | FeatureTensor *FeatureTensor::instance = NULL;
 15 | 
 16 | FeatureTensor *FeatureTensor::getInstance() {
 17 | 	if(instance == NULL) {
 18 | 		instance = new FeatureTensor();
 19 | 	}
 20 | 	return instance;
 21 | }
 22 | 
 23 | FeatureTensor::FeatureTensor() {
 24 | 	//prepare model:
 25 | 	bool status = init();
 26 | 	if(status == false) exit(1);
 27 | }
 28 | 
 29 | FeatureTensor::~FeatureTensor() {
 30 | 	session->Close();
 31 | 	delete session;
 32 | 	output_tensors.clear();
 33 | 	outnames.clear();
 34 | }
 35 | 
 36 | bool FeatureTensor::init() {
 37 | 	tensorflow::SessionOptions sessOptions;
 38 | 	sessOptions.config.mutable_gpu_options()->set_allow_growth(true);
 39 | 	session = NewSession(sessOptions);
 40 | 	if(session == nullptr) return false;
 41 | 
 42 | 	const tensorflow::string pathToGraph = TENSORFLOW_MODEL_META;
 43 | 	Status status;
 44 | 	MetaGraphDef graph_def;
 45 | 	status = ReadBinaryProto(tensorflow::Env::Default(), pathToGraph, &graph_def);
 46 | 	if(status.ok() == false) return false;
 47 | 
 48 | 	status = session->Create(graph_def.graph_def());
 49 | 	if(status.ok() == false) return false;
 50 | 
 51 | 	const tensorflow::string checkpointPath = TENSORFLOW_MODEL;
 52 | 	Tensor checkpointTensor(DT_STRING, TensorShape());
 53 | 	checkpointTensor.scalar<std::string>()() = checkpointPath;
 54 | 	status = session->Run(
 55 | 			{ {graph_def.saver_def().filename_tensor_name(), checkpointTensor}, },
 56 | 			{}, {graph_def.saver_def().restore_op_name()}, nullptr );
 57 | 	if(status.ok() == false) return false;
 58 | 
 59 | 	input_layer = "Placeholder:0";
 60 | 	outnames.push_back("truediv:0");
 61 | 	feature_dim = 128;
 62 | 	return true;
 63 | }
 64 | 
 65 | bool FeatureTensor::getRectsFeature(const cv::Mat& img, DETECTIONS& d) {
 66 | 	std::vector<cv::Mat> mats;
 67 | 	for(DETECTION_ROW& dbox : d) {
 68 | 		cv::Rect rc = cv::Rect(int(dbox.tlwh(0)), int(dbox.tlwh(1)),
 69 | 				int(dbox.tlwh(2)), int(dbox.tlwh(3)));
 70 | 		rc.x -= (rc.height * 0.5 - rc.width) * 0.5;
 71 | 		rc.width = rc.height * 0.5;
 72 | 		rc.x = (rc.x >= 0 ? rc.x : 0);
 73 | 		rc.y = (rc.y >= 0 ? rc.y : 0);
 74 | 		rc.width = (rc.x + rc.width <= img.cols? rc.width: (img.cols-rc.x));
 75 | 		rc.height = (rc.y + rc.height <= img.rows? rc.height:(img.rows - rc.y));
 76 | 
 77 | 		cv::Mat mattmp = img(rc).clone();
 78 | 		cv::resize(mattmp, mattmp, cv::Size(64, 128));
 79 | 		mats.push_back(mattmp);
 80 | 	}
 81 | 	int count = mats.size();
 82 | 
 83 | 	Tensor input_tensor(DT_UINT8, TensorShape({count, 128, 64, 3}));
 84 | 	tobuffer(mats, input_tensor.flat<uint8>().data());
 85 | 	std::vector<std::pair<tensorflow::string, Tensor>> feed_dict = {
 86 | 			{input_layer, input_tensor},
 87 | 	};
 88 | 	Status status = session->Run(feed_dict, outnames, {}, &output_tensors);
 89 | 	if(status.ok() == false) return false;
 90 | 	float* tensor_buffer = output_tensors[0].flat<float>().data();
 91 | 	int i = 0;
 92 | 	for(DETECTION_ROW& dbox : d) {
 93 | 		for(int j = 0; j < feature_dim; j++)
 94 | 			dbox.feature[j] = tensor_buffer[i*feature_dim+j];
 95 | 		i++;
 96 | 	}
 97 | 	return true;
 98 | }
 99 | 
100 | void FeatureTensor::tobuffer(const std::vector<cv::Mat> &imgs, uint8 *buf) {
101 | 	int pos = 0;
102 | 	for(const cv::Mat& img : imgs) {
103 | 		int Lenth = img.rows * img.cols * 3;
104 | 		int nr = img.rows;
105 | 		int nc = img.cols;
106 | 		if(img.isContinuous()) {
107 | 			nr = 1;
108 | 			nc = Lenth;
109 | 		}
110 | 		for(int i = 0; i < nr; i++) {
111 | 			const uchar* inData = img.ptr<uchar>(i);
112 | 			for(int j = 0; j < nc; j++) {
113 | 				buf[pos] = *inData++;
114 | 				pos++;
115 | 			}
116 | 		}//end for
117 | 	}//end imgs;
118 | }
119 | 


--------------------------------------------------------------------------------
/src/feature/FeatureTensor.h:
--------------------------------------------------------------------------------
 1 | #include "opencv2/opencv.hpp"
 2 | #include "opencv2/core/core.hpp"
 3 | #include "opencv2/highgui/highgui.hpp"
 4 | #include "tensorflow/core/public/session.h"
 5 | #include "tensorflow/core/protobuf/meta_graph.pb.h"
 6 | 
 7 | #include "model.h"
 8 | 
 9 | typedef unsigned char uint8;
10 | 
11 | class FeatureTensor
12 | {
13 | public:
14 | 	static FeatureTensor* getInstance();
15 | 	bool getRectsFeature(const cv::Mat& img, DETECTIONS& d);
16 | 
17 | private:
18 | 	FeatureTensor();
19 | 	FeatureTensor(const FeatureTensor&);
20 | 	FeatureTensor& operator = (const FeatureTensor&);
21 | 	static FeatureTensor* instance;
22 | 	bool init();
23 | 	~FeatureTensor();
24 | 
25 | 	void tobuffer(const std::vector<cv::Mat> &imgs, uint8 *buf);
26 | 
27 | 	int feature_dim;
28 | 	tensorflow::Session* session;
29 | 	std::vector<tensorflow::Tensor> output_tensors;
30 | 	std::vector<tensorflow::string> outnames;
31 | 	tensorflow::string input_layer;
32 | };
33 | 


--------------------------------------------------------------------------------
/src/feature/dataType.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef DATATYPE_H
 3 | #define DATATYPEH
 4 | 
 5 | #include <cstddef>
 6 | #include <vector>
 7 | //#include <Eigen>
 8 | #include <Eigen/Core>
 9 | 
10 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> DETECTBOX;
11 | typedef Eigen::Matrix<float, -1, 4, Eigen::RowMajor> DETECTBOXSS;
12 | typedef Eigen::Matrix<float, 1, 128, Eigen::RowMajor> FEATURE;
13 | typedef Eigen::Matrix<float, Eigen::Dynamic, 128, Eigen::RowMajor> FEATURESS;
14 | //typedef std::vector<FEATURE> FEATURESS;
15 | 
16 | //Kalmanfilter
17 | //typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_FILTER;
18 | typedef Eigen::Matrix<float, 1, 8, Eigen::RowMajor> KAL_MEAN;
19 | typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_COVA;
20 | typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> KAL_HMEAN;
21 | typedef Eigen::Matrix<float, 4, 4, Eigen::RowMajor> KAL_HCOVA;
22 | using KAL_DATA = std::pair<KAL_MEAN, KAL_COVA>;
23 | using KAL_HDATA = std::pair<KAL_HMEAN, KAL_HCOVA>;
24 | 
25 | //main
26 | using RESULT_DATA = std::pair<int, DETECTBOX>;
27 | 
28 | //tracker:
29 | using TRACKER_DATA = std::pair<int, FEATURESS>;
30 | using MATCH_DATA = std::pair<int, int>;
31 | typedef struct t{
32 |     std::vector<MATCH_DATA> matches;
33 |     std::vector<int> unmatched_tracks;
34 |     std::vector<int> unmatched_detections;
35 | }TRACHER_MATCHD;
36 | 
37 | //linear_assignment:
38 | typedef Eigen::Matrix<float, -1, -1, Eigen::RowMajor> DYNAMICM;
39 | 
40 | 
41 | #endif // DATATYPE_H
42 | 


--------------------------------------------------------------------------------
/src/feature/model.h:
--------------------------------------------------------------------------------
 1 | #ifndef MODEL_H
 2 | #define MODEL_H
 3 | #include "dataType.h"
 4 | #include <map>
 5 | #include "opencv2/opencv.hpp"
 6 | #include "../darkSrc/network.h"
 7 | 
 8 | /**
 9 |  * Each rect's data structure.
10 |  * tlwh: topleft point & (w,h)
11 |  * confidence: detection confidence.
12 |  * feature: the rect's 128d feature.
13 |  */
14 | class DETECTION_ROW {
15 | public:
16 |     DETECTBOX tlwh; //np.float
17 |     float confidence; //float
18 |     FEATURE feature; //np.float32
19 |     DETECTBOX to_xyah() const;
20 |     DETECTBOX to_tlbr() const;
21 | };
22 | 
23 | typedef std::vector<DETECTION_ROW> DETECTIONS;
24 | 
25 | /**
26 |  * Get each image's rects & corresponding features.
27 |  * Method of filter conf.
28 |  * Method of preprocessing.
29 |  */
30 | class ModelDetection
31 | {
32 | 
33 | public:
34 |     static ModelDetection* getInstance();
35 |     bool loadDataFromFile(const char* motDir, bool withFeature);
36 |     bool getFrameDetections(int frame_idx, DETECTIONS& res);
37 |     bool getFrameDetections(cv::Mat& frame, DETECTIONS& res);
38 |     void dataMoreConf(float min_confidence, DETECTIONS& d);
39 |     void dataPreprocessing(float max_bbox_overlap, DETECTIONS& d);
40 | 
41 | private:
42 |     ModelDetection();
43 |     ModelDetection(const ModelDetection&);
44 |     ModelDetection& operator =(const ModelDetection&);
45 |     static ModelDetection* instance;
46 | 
47 |     using AREAPAIR = std::pair<int, double>;
48 |     struct cmp {
49 |         bool operator()(const AREAPAIR a, const AREAPAIR b) {
50 |             return a.second < b.second;
51 |         }
52 |     };
53 |     std::map<int, DETECTIONS> data;
54 |     void _Qsort(DETECTIONS d, std::vector<int>& a, int low, int high);
55 |     bool loadFromFile;
56 | 
57 |     //darknet:
58 |     char *input;
59 |     network net;
60 |     clock_t time;
61 |     float thresh;
62 |     float nms;
63 |     char **names;
64 |     //image **alphabet;
65 | 
66 |     image ipl_to_image(IplImage* src);
67 | };
68 | 
69 | #endif // MODEL_H
70 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "VideoTracker.h"
 3 | using namespace std;
 4 | 
 5 | #define MOTDIR "/home/zy/git-project/MOT_tracking/deep_sort/MOT16/test/MOT16-06/"
 6 | #define VIDEO "./RUNNINGDATA/test.avi"
 7 | //#define RUNGT
 8 | //#define RUNMOTTENSOR
 9 | #define RUNLOCALVIDEO
10 | 
11 | int main()
12 | {
13 | 	VideoTracker* t = new VideoTracker;
14 | #ifdef RUNGT
15 | 	if(t->run_sequenceWithGT(MOTDIR, true) == false) {
16 | 		cout << t->showErrMsg() << endl;
17 | 	}
18 | #endif
19 | 
20 | #ifdef RUNMOTTENSOR
21 | 	if(t->run_sequence(MOTDIR, true) == false) {
22 | 		cout << t->showErrMsg() << endl;
23 | 	}
24 | #endif
25 | 
26 | #ifdef RUNLOCALVIDEO
27 | 	if(t->run(VIDEO, true) == false) {
28 | 		cout << t->showErrMsg() << endl;
29 | 	}
30 | #endif
31 | 	return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/src/matching/kalmanfilter.cpp:
--------------------------------------------------------------------------------
  1 | #include "kalmanfilter.h"
  2 | #include <Eigen/Cholesky>
  3 | 
  4 | const double KalmanFilter::chi2inv95[10] = {
  5 |     0,
  6 |     3.8415,
  7 |     5.9915,
  8 |     7.8147,
  9 |     9.4877,
 10 |     11.070,
 11 |     12.592,
 12 |     14.067,
 13 |     15.507,
 14 |     16.919
 15 | };
 16 | KalmanFilter::KalmanFilter()
 17 | {
 18 |     int ndim = 4;
 19 |     double dt = 1.;
 20 | 
 21 |     _motion_mat = Eigen::MatrixXf::Identity(8, 8);
 22 |     for(int i = 0; i < ndim; i++) {
 23 |         _motion_mat(i, ndim+i) = dt;
 24 |     }
 25 |     _update_mat = Eigen::MatrixXf::Identity(4, 8);
 26 | 
 27 |     this->_std_weight_position = 1. / 20;
 28 |     this->_std_weight_velocity = 1. / 160;
 29 | }
 30 | 
 31 | KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement)
 32 | {
 33 |     DETECTBOX mean_pos = measurement;
 34 |     DETECTBOX mean_vel;
 35 |     for(int i = 0; i < 4; i++) mean_vel(i) = 0;
 36 | 
 37 |     KAL_MEAN mean;
 38 |     for(int i = 0; i < 8; i++){
 39 |         if(i < 4) mean(i) = mean_pos(i);
 40 |         else mean(i) = mean_vel(i - 4);
 41 |     }
 42 | 
 43 |     KAL_MEAN std;
 44 |     std(0) = 2 * _std_weight_position * measurement[3];
 45 |     std(1) = 2 * _std_weight_position * measurement[3];
 46 |     std(2) = 1e-2;
 47 |     std(3) = 2 * _std_weight_position * measurement[3];
 48 |     std(4) = 10 * _std_weight_velocity * measurement[3];
 49 |     std(5) = 10 * _std_weight_velocity * measurement[3];
 50 |     std(6) = 1e-5;
 51 |     std(7) = 10 * _std_weight_velocity * measurement[3];
 52 | 
 53 |     KAL_MEAN tmp = std.array().square();
 54 |     KAL_COVA var = tmp.asDiagonal();
 55 |     return std::make_pair(mean, var);
 56 | }
 57 | 
 58 | void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance)
 59 | {
 60 |     //revise the data;
 61 |     DETECTBOX std_pos;
 62 |     std_pos << _std_weight_position * mean(3),
 63 |             _std_weight_position * mean(3),
 64 |             1e-2,
 65 |             _std_weight_position * mean(3);
 66 |     DETECTBOX std_vel;
 67 |     std_vel << _std_weight_velocity * mean(3),
 68 |             _std_weight_velocity * mean(3),
 69 |             1e-5,
 70 |             _std_weight_velocity * mean(3);
 71 |     KAL_MEAN tmp;
 72 |     tmp.block<1,4>(0,0) = std_pos;
 73 |     tmp.block<1,4>(0,4) = std_vel;
 74 |     tmp = tmp.array().square();
 75 |     KAL_COVA motion_cov = tmp.asDiagonal();
 76 |     KAL_MEAN mean1 = this->_motion_mat * mean.transpose();
 77 |     KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose());
 78 |     covariance1 += motion_cov;
 79 | 
 80 |     mean = mean1;
 81 |     covariance = covariance1;
 82 | }
 83 | 
 84 | KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance)
 85 | {
 86 |     DETECTBOX std;
 87 |     std << _std_weight_position * mean(3), _std_weight_position * mean(3),
 88 |             1e-1, _std_weight_position * mean(3);
 89 |     KAL_HMEAN mean1 = _update_mat * mean.transpose();
 90 |     KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose());
 91 |     Eigen::Matrix<float, 4, 4> diag = std.asDiagonal();
 92 |     diag = diag.array().square().matrix();
 93 |     covariance1 += diag;
 94 | //    covariance1.diagonal() << diag;
 95 |     return std::make_pair(mean1, covariance1);
 96 | }
 97 | 
 98 | KAL_DATA
 99 | KalmanFilter::update(
100 |         const KAL_MEAN &mean,
101 |         const KAL_COVA &covariance,
102 |         const DETECTBOX &measurement)
103 | {
104 |     KAL_HDATA pa = project(mean, covariance);
105 |     KAL_HMEAN projected_mean = pa.first;
106 |     KAL_HCOVA projected_cov = pa.second;
107 | 
108 |     //chol_factor, lower =
109 |     //scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
110 |     //kalmain_gain =
111 |     //scipy.linalg.cho_solve((cho_factor, lower),
112 |     //np.dot(covariance, self._upadte_mat.T).T,
113 |     //check_finite=False).T
114 |     Eigen::Matrix<float, 4, 8> B = (covariance * (_update_mat.transpose())).transpose();
115 |     Eigen::Matrix<float, 8, 4> kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4
116 |     Eigen::Matrix<float, 1, 4> innovation = measurement - projected_mean; //eg.1x4
117 |     auto tmp = innovation*(kalman_gain.transpose());
118 |     KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix();
119 |     KAL_COVA new_covariance = covariance - kalman_gain*projected_cov*(kalman_gain.transpose());
120 |     return std::make_pair(new_mean, new_covariance);
121 | }
122 | 
123 | Eigen::Matrix<float, 1, -1>
124 | KalmanFilter::gating_distance(
125 |         const KAL_MEAN &mean,
126 |         const KAL_COVA &covariance,
127 |         const std::vector<DETECTBOX> &measurements,
128 |         bool only_position)
129 | {
130 |     KAL_HDATA pa = this->project(mean, covariance);
131 |     if(only_position) {
132 |         printf("not implement!");
133 |         exit(0);
134 |     }
135 |     KAL_HMEAN mean1 = pa.first;
136 |     KAL_HCOVA covariance1 = pa.second;
137 | 
138 | //    Eigen::Matrix<float, -1, 4, Eigen::RowMajor> d(size, 4);
139 |     DETECTBOXSS d(measurements.size(), 4);
140 |     int pos = 0;
141 |     for(DETECTBOX box:measurements) {        
142 |         d.row(pos++) = box - mean1;
143 |     }
144 |     Eigen::Matrix<float, -1, -1, Eigen::RowMajor> factor = covariance1.llt().matrixL();
145 |     Eigen::Matrix<float, -1, -1> z = factor.triangularView<Eigen::Lower>().solve<Eigen::OnTheRight>(d).transpose();
146 |     auto zz = ((z.array())*(z.array())).matrix();
147 |     auto square_maha = zz.colwise().sum();
148 |     return square_maha;
149 | }
150 | 
151 | 


--------------------------------------------------------------------------------
/src/matching/kalmanfilter.h:
--------------------------------------------------------------------------------
 1 | #ifndef KALMANFILTER_H
 2 | #define KALMANFILTER_H
 3 | 
 4 | #include "../feature/dataType.h"
 5 | 
 6 | class KalmanFilter
 7 | {
 8 | public:
 9 |     static const double chi2inv95[10];
10 |     KalmanFilter();
11 |     KAL_DATA initiate(const DETECTBOX& measurement);
12 |     void predict(KAL_MEAN& mean, KAL_COVA& covariance);
13 |     KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance);
14 |     KAL_DATA update(const KAL_MEAN& mean,
15 |                     const KAL_COVA& covariance,
16 |                     const DETECTBOX& measurement);
17 | 
18 |     Eigen::Matrix<float, 1, -1> gating_distance(
19 |             const KAL_MEAN& mean,
20 |             const KAL_COVA& covariance,
21 |             const std::vector<DETECTBOX>& measurements,
22 |             bool only_position = false);
23 | 
24 | private:
25 |     Eigen::Matrix<float, 8, 8, Eigen::RowMajor> _motion_mat;
26 |     Eigen::Matrix<float, 4, 8, Eigen::RowMajor> _update_mat;
27 |     float _std_weight_position;
28 |     float _std_weight_velocity;
29 | };
30 | 
31 | #endif // KALMANFILTER_H
32 | 


--------------------------------------------------------------------------------
/src/matching/linear_assignment.h:
--------------------------------------------------------------------------------
 1 | #ifndef LINEAR_ASSIGNMENT_H
 2 | #define LINEAR_ASSIGNMENT_H
 3 | #include "../feature/dataType.h"
 4 | #include "tracker.h"
 5 | 
 6 | #define INFTY_COST 1e5
 7 | class tracker;
 8 | //for matching;
 9 | class linear_assignment
10 | {
11 |     linear_assignment();
12 |     linear_assignment(const linear_assignment& );
13 |     linear_assignment& operator=(const linear_assignment&);
14 |     static linear_assignment* instance;
15 | 
16 | public:
17 |     static linear_assignment* getInstance();
18 |     TRACHER_MATCHD matching_cascade(tracker* distance_metric,
19 |             tracker::GATED_METRIC_FUNC distance_metric_func,
20 |             float max_distance,
21 |             int cascade_depth,
22 |             std::vector<Track>& tracks,
23 |             const DETECTIONS& detections,
24 |             std::vector<int> &track_indices,
25 |             std::vector<int> detection_indices = std::vector<int>());
26 |     TRACHER_MATCHD min_cost_matching(
27 |             tracker* distance_metric,
28 |             tracker::GATED_METRIC_FUNC distance_metric_func,
29 |             float max_distance,
30 |             std::vector<Track>& tracks,
31 |             const DETECTIONS& detections,
32 |             std::vector<int>& track_indices,
33 |             std::vector<int>& detection_indices);
34 |     DYNAMICM gate_cost_matrix(
35 |             KalmanFilter* kf,
36 |             DYNAMICM& cost_matrix,
37 |             std::vector<Track>& tracks,
38 |             const DETECTIONS& detections,
39 |             const std::vector<int>& track_indices,
40 |             const std::vector<int>& detection_indices,
41 |             float gated_cost = INFTY_COST,
42 |             bool only_position = false);
43 | };
44 | 
45 | #endif // LINEAR_ASSIGNMENT_H
46 | 


--------------------------------------------------------------------------------
/src/matching/nn_matching.cpp:
--------------------------------------------------------------------------------
  1 | #include "nn_matching.h"
  2 | #include "../errmsg.h"
  3 | 
  4 | using namespace Eigen;
  5 | 
  6 | NearNeighborDisMetric::NearNeighborDisMetric(
  7 |         NearNeighborDisMetric::METRIC_TYPE metric,
  8 |         float matching_threshold, int budget)
  9 | {
 10 |     if(metric == euclidean) {
 11 |         _metric = &NearNeighborDisMetric::_nneuclidean_distance;
 12 |     } else if (metric == cosine) {
 13 |         _metric = &NearNeighborDisMetric::_nncosine_distance;
 14 |     } else {
 15 |         errMsg::getInstance()->out(
 16 |                     "nn_matching.cpp",
 17 |                     "NearestNeighborDistanceMetric::NearestNeighborDistanceMetric",
 18 |                     "Invalid metric; must be either 'euclidean' or 'cosine'", true);
 19 |     }
 20 |     this->mating_threshold = matching_threshold;
 21 |     this->budget = budget;
 22 |     this->samples.clear();
 23 | }
 24 | 
 25 | /*
 26 | void
 27 | NearNeighborDisMetric::partial_fit(
 28 | FEATURESS& features,
 29 | std::vector<int> targets,
 30 | std::vector<int> active_targets)
 31 | {
 32 |     int size = targets.size();
 33 |     for(int i = 0; i < size; i++) {
 34 |         FEATURE feature = features.row(i);
 35 |         int target = targets[i];
 36 | 
 37 |         bool isActive = false;
 38 |         for(int k:active_targets) {
 39 |             if(k == target) {
 40 |                 isActive = true;
 41 |                 break;
 42 |             }
 43 |         }
 44 |         if(samples.find(target) != samples.end()) {//exist
 45 |         } else {//not exist
 46 |             //
 47 |         }
 48 |     }//each (feature,target)
 49 | }*/
 50 | 
 51 | DYNAMICM
 52 | NearNeighborDisMetric::distance(
 53 |         const FEATURESS &features,
 54 |         const std::vector<int>& targets)
 55 | {
 56 |     DYNAMICM cost_matrix = Eigen::MatrixXf::Zero(targets.size(), features.rows());
 57 |     int idx = 0;
 58 |     for(int target:targets) {
 59 |         cost_matrix.row(idx) = (this->*_metric)(this->samples[target], features);
 60 |         idx++;
 61 |     }
 62 |     return cost_matrix;
 63 | }
 64 | 
 65 | void
 66 | NearNeighborDisMetric::partial_fit(
 67 |         std::vector<TRACKER_DATA> &tid_feats,
 68 |         std::vector<int> &active_targets)
 69 | {
 70 |     /*python code:
 71 |  * let feature(target_id) append to samples;
 72 |  * && delete not comfirmed target_id from samples.
 73 |  * update samples;
 74 | */
 75 |     for(TRACKER_DATA& data:tid_feats) {
 76 |         int track_id = data.first;
 77 |         FEATURESS newFeatOne = data.second;
 78 | 
 79 |         if(samples.find(track_id) != samples.end()) {//append
 80 |             int oldSize = samples[track_id].rows();
 81 |             int addSize = newFeatOne.rows();
 82 |             int newSize = oldSize + addSize;
 83 | 
 84 |             if(newSize <= this->budget) {
 85 |                 FEATURESS newSampleFeatures(newSize, 128);
 86 |                 newSampleFeatures.block(0,0, oldSize, 128) = samples[track_id];
 87 |                 newSampleFeatures.block(oldSize, 0, addSize, 128) = newFeatOne;
 88 |                 samples[track_id] = newSampleFeatures;
 89 |             } else {
 90 |                 if(oldSize < this->budget) {//original space is not enough;
 91 |                     FEATURESS newSampleFeatures(this->budget, 128);
 92 |                     if(addSize >= this->budget) {
 93 |                         newSampleFeatures = newFeatOne.block(0, 0, this->budget, 128);
 94 |                     } else {
 95 |                         newSampleFeatures.block(0, 0, this->budget-addSize, 128) =
 96 |                                 samples[track_id].block(addSize-1, 0, this->budget-addSize, 128).eval();
 97 |                         newSampleFeatures.block(this->budget-addSize, 0, addSize, 128) = newFeatOne;
 98 |                     }
 99 |                     samples[track_id] = newSampleFeatures;
100 |                 } else {//original space is ok;
101 |                     if(addSize >= this->budget) {
102 |                         samples[track_id] = newFeatOne.block(0,0, this->budget, 128);
103 |                     } else {
104 |                         samples[track_id].block(0, 0, this->budget-addSize, 128) =
105 |                                 samples[track_id].block(addSize-1, 0, this->budget-addSize, 128).eval();
106 |                         samples[track_id].block(this->budget-addSize, 0, addSize, 128) = newFeatOne;
107 |                     }
108 |                 }
109 |             }
110 |         } else {//not exit, create new one;
111 |             samples[track_id] = newFeatOne;
112 |         }
113 |     }//add features;
114 | 
115 |     //erase the samples which not in active_targets;
116 |     for(std::map<int, FEATURESS>::iterator i = samples.begin(); i != samples.end();) {
117 |         bool flag = false;
118 |         for(int j:active_targets) if(j == i->first) { flag=true; break; }
119 |         if(flag == false)  samples.erase(i++);
120 |         else i++;
121 |     }
122 | }
123 | 
124 | Eigen::VectorXf
125 | NearNeighborDisMetric::_nncosine_distance(
126 |         const FEATURESS &x, const FEATURESS &y)
127 | {
128 |     MatrixXf distances = _cosine_distance(x,y);
129 |     VectorXf res = distances.colwise().minCoeff().transpose();
130 |     return res;
131 | }
132 | 
133 | Eigen::VectorXf
134 | NearNeighborDisMetric::_nneuclidean_distance(
135 |         const FEATURESS &x, const FEATURESS &y)
136 | {
137 |     MatrixXf distances = _pdist(x,y);
138 |     VectorXf res = distances.colwise().maxCoeff().transpose();
139 |     res = res.array().max(VectorXf::Zero(res.rows()).array());
140 |     return res;
141 | }
142 | 
143 | Eigen::MatrixXf
144 | NearNeighborDisMetric::_pdist(const FEATURESS &x, const FEATURESS &y)
145 | {
146 |     int len1 = x.rows(), len2 = y.rows();
147 |     if(len1 == 0 || len2 == 0) {
148 |         return Eigen::MatrixXf::Zero(len1, len2);
149 |     }
150 |     MatrixXf res = x * y.transpose()* -2;
151 |     res = res.colwise() + x.rowwise().squaredNorm();
152 |     res = res.rowwise() + y.rowwise().squaredNorm().transpose();
153 |     res = res.array().max(MatrixXf::Zero(res.rows(), res.cols()).array());
154 |     return res;
155 | }
156 | 
157 | Eigen::MatrixXf
158 | NearNeighborDisMetric::_cosine_distance(
159 |         const FEATURESS & a,
160 |         const FEATURESS& b, bool data_is_normalized) {
161 |     if(data_is_normalized == true) {
162 |         //undo:
163 |         assert(false);
164 |     }
165 |     MatrixXf res = 1. - (a*b.transpose()).array();
166 |     return res;
167 | }
168 | 


--------------------------------------------------------------------------------
/src/matching/nn_matching.h:
--------------------------------------------------------------------------------
 1 | #ifndef NN_MATCHING_H
 2 | #define NN_MATCHING_H
 3 | 
 4 | #include "../feature/dataType.h"
 5 | 
 6 | #include <map>
 7 | 
 8 | //A tool to calculate distance;
 9 | class NearNeighborDisMetric{
10 | public:
11 |     enum METRIC_TYPE{euclidean=1, cosine};
12 |     NearNeighborDisMetric(METRIC_TYPE metric,
13 |             float matching_threshold,
14 |             int budget);
15 |     DYNAMICM distance(const FEATURESS& features, const std::vector<int> &targets);
16 |     //    void partial_fit(FEATURESS& features, std::vector<int> targets, std::vector<int> active_targets);
17 |     void partial_fit(std::vector<TRACKER_DATA>& tid_feats, std::vector<int>& active_targets);
18 |     float mating_threshold;
19 | 
20 | private:
21 |     typedef Eigen::VectorXf (NearNeighborDisMetric::*PTRFUN)(const FEATURESS&, const FEATURESS&);
22 |     Eigen::VectorXf _nncosine_distance(const FEATURESS& x, const FEATURESS& y);
23 |     Eigen::VectorXf _nneuclidean_distance(const FEATURESS& x, const FEATURESS& y);
24 | 
25 |     Eigen::MatrixXf _pdist(const FEATURESS& x, const FEATURESS& y);
26 |     Eigen::MatrixXf _cosine_distance(const FEATURESS & a, const FEATURESS& b, bool data_is_normalized = false);
27 | private:
28 |     PTRFUN _metric;
29 |     int budget;
30 |     std::map<int, FEATURESS > samples;
31 | };
32 | 
33 | #endif // NN_MATCHING_H
34 | 


--------------------------------------------------------------------------------
/src/matching/track.cpp:
--------------------------------------------------------------------------------
 1 | #include "track.h"
 2 | 
 3 | Track::Track(KAL_MEAN& mean, KAL_COVA& covariance, int track_id, int n_init, int max_age, const FEATURE& feature)
 4 | {
 5 |     this->mean = mean;
 6 |     this->covariance = covariance;
 7 |     this->track_id = track_id;
 8 |     this->hits = 1;
 9 |     this->age = 1;
10 |     this->time_since_update = 0;
11 |     this->state = TrackState::Tentative;
12 |     features = FEATURESS(1, 128);
13 |     features.row(0) = feature;//features.rows() must = 0;
14 | 
15 |     this->_n_init = n_init;
16 |     this->_max_age = max_age;
17 | }
18 | 
19 | void Track::predit(KalmanFilter *kf)
20 | {
21 |     /*Propagate the state distribution to the current time step using a
22 |         Kalman filter prediction step.
23 | 
24 |         Parameters
25 |         ----------
26 |         kf : kalman_filter.KalmanFilter
27 |             The Kalman filter.
28 |         */
29 | 
30 |     kf->predict(this->mean, this->covariance);
31 |     this->age += 1;
32 |     this->time_since_update += 1;
33 | }
34 | 
35 | void Track::update(KalmanFilter * const kf, const DETECTION_ROW& detection)
36 | {
37 |     KAL_DATA pa = kf->update(this->mean, this->covariance, detection.to_xyah());
38 |     this->mean = pa.first;
39 |     this->covariance = pa.second;
40 | 
41 |     featuresAppendOne(detection.feature);
42 |     //    this->features.row(features.rows()) = detection.feature;
43 |     this->hits += 1;
44 |     this->time_since_update = 0;
45 |     if(this->state == TrackState::Tentative && this->hits >= this->_n_init) {
46 |         this->state = TrackState::Confirmed;
47 |     }
48 | }
49 | 
50 | void Track::mark_missed()
51 | {
52 |     if(this->state == TrackState::Tentative) {
53 |         this->state = TrackState::Deleted;
54 |     } else if(this->time_since_update > this->_max_age) {
55 |         this->state = TrackState::Deleted;
56 |     }
57 | }
58 | 
59 | bool Track::is_confirmed()
60 | {
61 |     return this->state == TrackState::Confirmed;
62 | }
63 | 
64 | bool Track::is_deleted()
65 | {
66 |     return this->state == TrackState::Deleted;
67 | }
68 | 
69 | bool Track::is_tentative()
70 | {
71 |     return this->state == TrackState::Tentative;
72 | }
73 | 
74 | DETECTBOX Track::to_tlwh()
75 | {
76 |     DETECTBOX ret = mean.leftCols(4);
77 |     ret(2) *= ret(3);
78 |     ret.leftCols(2) -= (ret.rightCols(2)/2);
79 |     return ret;
80 | }
81 | 
82 | void Track::featuresAppendOne(const FEATURE &f)
83 | {
84 |     int size = this->features.rows();
85 |     FEATURESS newfeatures = FEATURESS(size+1, 128);
86 |     newfeatures.block(0, 0, size, 128) = this->features;
87 |     newfeatures.row(size) = f;
88 |     features = newfeatures;
89 | }
90 | 


--------------------------------------------------------------------------------
/src/matching/track.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRACK_H
 2 | #define TRACK_H
 3 | 
 4 | #include "../feature/dataType.h"
 5 | 
 6 | #include "kalmanfilter.h"
 7 | #include "../feature/model.h"
 8 | 
 9 | class Track
10 | {
11 |     /*"""
12 |     A single target track with state space `(x, y, a, h)` and associated
13 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
14 |     aspect ratio and `h` is the height.
15 | 
16 |     Parameters
17 |     ----------
18 |     mean : ndarray
19 |         Mean vector of the initial state distribution.
20 |     covariance : ndarray
21 |         Covariance matrix of the initial state distribution.
22 |     track_id : int
23 |         A unique track identifier.
24 |     n_init : int
25 |         Number of consecutive detections before the track is confirmed. The
26 |         track state is set to `Deleted` if a miss occurs within the first
27 |         `n_init` frames.
28 |     max_age : int
29 |         The maximum number of consecutive misses before the track state is
30 |         set to `Deleted`.
31 |     feature : Optional[ndarray]
32 |         Feature vector of the detection this track originates from. If not None,
33 |         this feature is added to the `features` cache.
34 | 
35 |     Attributes
36 |     ----------
37 |     mean : ndarray
38 |         Mean vector of the initial state distribution.
39 |     covariance : ndarray
40 |         Covariance matrix of the initial state distribution.
41 |     track_id : int
42 |         A unique track identifier.
43 |     hits : int
44 |         Total number of measurement updates.
45 |     age : int
46 |         Total number of frames since first occurance.
47 |     time_since_update : int
48 |         Total number of frames since last measurement update.
49 |     state : TrackState
50 |         The current track state.
51 |     features : List[ndarray]
52 |         A cache of features. On each measurement update, the associated feature
53 |         vector is added to this list.
54 | 
55 |     """*/
56 |     enum TrackState {Tentative = 1, Confirmed, Deleted};
57 | 
58 | public:
59 |     Track(KAL_MEAN& mean, KAL_COVA& covariance, int track_id,
60 |           int n_init, int max_age, const FEATURE& feature);
61 |     void predit(KalmanFilter *kf);
62 |     void update(KalmanFilter * const kf, const DETECTION_ROW &detection);
63 |     void mark_missed();
64 |     bool is_confirmed();
65 |     bool is_deleted();
66 |     bool is_tentative();
67 |     DETECTBOX to_tlwh();
68 |     int time_since_update;
69 |     int track_id;
70 |     FEATURESS features;
71 |     KAL_MEAN mean;
72 |     KAL_COVA covariance;
73 | 
74 |     int hits;
75 |     int age;
76 |     int _n_init;
77 |     int _max_age;
78 |     TrackState state;
79 | private:
80 |     void featuresAppendOne(const FEATURE& f);
81 | };
82 | 
83 | #endif // TRACK_H
84 | 


--------------------------------------------------------------------------------
/src/matching/tracker.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRACKER_H
 2 | #define TRACKER_H
 3 | #include <vector>
 4 | 
 5 | 
 6 | #include "kalmanfilter.h"
 7 | #include "track.h"
 8 | #include "../feature/model.h"
 9 | 
10 | class NearNeighborDisMetric;
11 | 
12 | class tracker
13 | {
14 | public:
15 |     NearNeighborDisMetric* metric;
16 |     float max_iou_distance;
17 |     int max_age;
18 |     int n_init;
19 | 
20 |     KalmanFilter* kf;
21 | 
22 |     int _next_idx;
23 | public:
24 |     std::vector<Track> tracks;
25 |     tracker(/*NearNeighborDisMetric* metric,*/
26 |     		float max_cosine_distance, int nn_budget,
27 |             float max_iou_distance = 0.7,
28 |             int max_age = 30, int n_init=3);
29 |     void predict();
30 |     void update(const DETECTIONS& detections);
31 |     typedef DYNAMICM (tracker::* GATED_METRIC_FUNC)(
32 |             std::vector<Track>& tracks,
33 |             const DETECTIONS& dets,
34 |             const std::vector<int>& track_indices,
35 |             const std::vector<int>& detection_indices);
36 | private:    
37 |     void _match(const DETECTIONS& detections, TRACHER_MATCHD& res);
38 |     void _initiate_track(const DETECTION_ROW& detection);
39 | public:
40 |     DYNAMICM gated_matric(
41 |             std::vector<Track>& tracks,
42 |             const DETECTIONS& dets,
43 |             const std::vector<int>& track_indices,
44 |             const std::vector<int>& detection_indices);
45 |     DYNAMICM iou_cost(
46 |             std::vector<Track>& tracks,
47 |             const DETECTIONS& dets,
48 |             const std::vector<int>& track_indices,
49 |             const std::vector<int>& detection_indices);
50 |     Eigen::VectorXf iou(DETECTBOX& bbox,
51 |             DETECTBOXSS &candidates);
52 | };
53 | 
54 | #endif // TRACKER_H
55 | 


--------------------------------------------------------------------------------
/src/thirdPart/hungarianoper.cpp:
--------------------------------------------------------------------------------
 1 | #include "hungarianoper.h"
 2 | 
 3 | Eigen::Matrix<float, -1, 2, Eigen::RowMajor> HungarianOper::Solve(const DYNAMICM &cost_matrix)
 4 | {
 5 |     int rows = cost_matrix.rows();
 6 |     int cols = cost_matrix.cols();
 7 |     Matrix<double> matrix(rows, cols);
 8 |     for (int row = 0; row < rows; row++) {
 9 |         for (int col = 0; col < cols; col++) {
10 |             matrix(row, col) = cost_matrix(row, col);
11 |         }
12 |     }
13 |     //Munkres get matrix;
14 |     Munkres<double> m;
15 |     m.solve(matrix);
16 | 
17 |     //
18 |     std::vector<std::pair<int, int>> pairs;
19 |     for (int row = 0; row < rows; row++) {
20 |         for (int col = 0; col < cols; col++) {
21 |             int tmp = (int)matrix(row, col);
22 |             if (tmp == 0) pairs.push_back(std::make_pair(row, col));
23 |         }
24 |     }
25 |     //
26 |     int count = pairs.size();
27 |     Eigen::Matrix<float, -1, 2, Eigen::RowMajor> re(count, 2);
28 |     for (int i = 0; i < count; i++) {
29 |         re(i, 0) = pairs[i].first;
30 |         re(i, 1) = pairs[i].second;
31 |     }
32 |     return re;
33 | }//end Solve;
34 | 


--------------------------------------------------------------------------------
/src/thirdPart/hungarianoper.h:
--------------------------------------------------------------------------------
 1 | #ifndef HUNGARIANOPER_H
 2 | #define HUNGARIANOPER_H
 3 | #include "munkres/munkres.h"
 4 | #include "munkres/adapters/boostmatrixadapter.h"
 5 | #include "../feature/dataType.h"
 6 | 
 7 | class HungarianOper {
 8 | public:
 9 |     static Eigen::Matrix<float, -1, 2, Eigen::RowMajor> Solve(const DYNAMICM &cost_matrix);
10 | };
11 | 
12 | #endif // HUNGARIANOPER_H
13 | 


--------------------------------------------------------------------------------
/src/thirdPart/munkres/adapters/adapter.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   Copyright (c) 2015 Miroslav Krajicek
 3 |  *
 4 |  *   This program is free software; you can redistribute it and/or modify
 5 |  *   it under the terms of the GNU General Public License as published by
 6 |  *   the Free Software Foundation; either version 2 of the License, or
 7 |  *   (at your option) any later version.
 8 |  *
 9 |  *   This program is distributed in the hope that it will be useful,
10 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  *   GNU General Public License for more details.
13 |  *
14 |  *   You should have received a copy of the GNU General Public License
15 |  *   along with this program; if not, write to the Free Software
16 |  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 |  */
18 | 
19 | #include "adapter.h"
20 | 


--------------------------------------------------------------------------------
/src/thirdPart/munkres/adapters/adapter.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   Copyright (c) 2015 Miroslav Krajicek
 3 |  *
 4 |  *   This program is free software; you can redistribute it and/or modify
 5 |  *   it under the terms of the GNU General Public License as published by
 6 |  *   the Free Software Foundation; either version 2 of the License, or
 7 |  *   (at your option) any later version.
 8 |  *
 9 |  *   This program is distributed in the hope that it will be useful,
10 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  *   GNU General Public License for more details.
13 |  *
14 |  *   You should have received a copy of the GNU General Public License
15 |  *   along with this program; if not, write to the Free Software
16 |  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 |  */
18 | 
19 | #ifndef _ADAPTER_H_
20 | #define _ADAPTER_H_
21 | 
22 | #include "../matrix.h"
23 | #include "../munkres.h"
24 | 
25 | template<typename Data, class Container > class Adapter
26 | {
27 | public:
28 |     virtual Matrix<Data> convertToMatrix(const Container &con) const = 0;
29 |     virtual void convertFromMatrix(Container &con, const Matrix<Data> &matrix) const = 0;
30 |     virtual void solve(Container &con)
31 |     {
32 |         auto matrix = convertToMatrix(con);
33 |         m_munkres.solve(matrix);
34 |         convertFromMatrix(con, matrix);
35 |     }
36 | protected:
37 |     Munkres<Data> m_munkres;
38 | };
39 | 
40 | #endif /* _ADAPTER_H_ */
41 | 


--------------------------------------------------------------------------------
/src/thirdPart/munkres/adapters/boostmatrixadapter.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   Copyright (c) 2015 Miroslav Krajicek
 3 |  *
 4 |  *   This program is free software; you can redistribute it and/or modify
 5 |  *   it under the terms of the GNU General Public License as published by
 6 |  *   the Free Software Foundation; either version 2 of the License, or
 7 |  *   (at your option) any later version.
 8 |  *
 9 |  *   This program is distributed in the hope that it will be useful,
10 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  *   GNU General Public License for more details.
13 |  *
14 |  *   You should have received a copy of the GNU General Public License
15 |  *   along with this program; if not, write to the Free Software
16 |  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 |  */
18 | 
19 | #include "boostmatrixadapter.h"
20 | 
21 | //template class BoostMatrixAdapter<double>;
22 | //template class BoostMatrixAdapter<float>;
23 | //template class BoostMatrixAdapter<int>;
24 | 


--------------------------------------------------------------------------------
/src/thirdPart/munkres/adapters/boostmatrixadapter.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   Copyright (c) 2015 Miroslav Krajicek
 3 |  *
 4 |  *   This program is free software; you can redistribute it and/or modify
 5 |  *   it under the terms of the GNU General Public License as published by
 6 |  *   the Free Software Foundation; either version 2 of the License, or
 7 |  *   (at your option) any later version.
 8 |  *
 9 |  *   This program is distributed in the hope that it will be useful,
10 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |  *   GNU General Public License for more details.
13 |  *
14 |  *   You should have received a copy of the GNU General Public License
15 |  *   along with this program; if not, write to the Free Software
16 |  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 |  */
18 | 
19 | #ifndef _BOOSTMATRIXADAPTER_H_
20 | #define _BOOSTMATRIXADAPTER_H_
21 | 
22 | #include "adapter.h"
23 | #ifndef WIN32
24 | #include <boost/serialization/array_wrapper.hpp>
25 | #endif
26 | #include <boost/numeric/ublas/matrix.hpp>
27 | 
28 | template<typename Data> class BoostMatrixAdapter : public Adapter<Data,boost::numeric::ublas::matrix<Data> >
29 | {
30 | public:
31 |     virtual Matrix<Data> convertToMatrix(const boost::numeric::ublas::matrix<Data> &boost_matrix) const override
32 |     {
33 |         const auto rows = boost_matrix.size1 ();
34 |           const auto columns = boost_matrix.size2 ();
35 |           Matrix <Data> matrix (rows, columns);
36 |           for (int i = 0; i < rows; ++i) {
37 |             for (int j = 0; j < columns; ++j) {
38 |               matrix (i, j) = boost_matrix (i, j);
39 |             }
40 |           }
41 |           return matrix;
42 |     }
43 | 
44 |     virtual void convertFromMatrix(boost::numeric::ublas::matrix<Data> &boost_matrix,const Matrix<Data> &matrix) const override
45 |     {
46 |         const auto rows = matrix.rows();
47 |           const auto columns = matrix.columns();
48 |           for (int i = 0; i < rows; ++i) {
49 |             for (int j = 0; j < columns; ++j) {
50 |               boost_matrix (i, j) = matrix (i, j);
51 |             }
52 |           }
53 |     }
54 | };
55 | 
56 | #endif /* _BOOSTMATRIXADAPTER_H_ */
57 | 


--------------------------------------------------------------------------------
/src/thirdPart/munkres/munkres.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   Copyright (c) 2007 John Weaver
 3 |  *   Copyright (c) 2015 Miroslav Krajicek
 4 |  *
 5 |  *   This program is free software; you can redistribute it and/or modify
 6 |  *   it under the terms of the GNU General Public License as published by
 7 |  *   the Free Software Foundation; either version 2 of the License, or
 8 |  *   (at your option) any later version.
 9 |  *
10 |  *   This program is distributed in the hope that it will be useful,
11 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  *   GNU General Public License for more details.
14 |  *
15 |  *   You should have received a copy of the GNU General Public License
16 |  *   along with this program; if not, write to the Free Software
17 |  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
18 |  */
19 | 
20 | #include "munkres.h"
21 | 
22 | template class Munkres<double>;
23 | template class Munkres<float>;
24 | template class Munkres<int>;
25 | 
26 | 


--------------------------------------------------------------------------------