├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── cfg ├── alexnet.cfg ├── darknet.cfg ├── extraction.cfg ├── extraction.conv.cfg ├── jnet-conv.cfg ├── msr_152.cfg ├── msr_34.cfg ├── msr_50.cfg ├── strided.cfg ├── vgg-16.cfg ├── vgg-conv.cfg ├── writing.cfg ├── yolo-coco.cfg ├── yolo-small.cfg ├── yolo-tiny.cfg ├── yolo.cfg ├── yolo_2class └── yolo_2class_box11.cfg ├── data ├── after_conversion.txt ├── before_conversion.txt ├── dog.jpg ├── eagle.jpg ├── horses.jpg ├── inet.labels.list ├── labels │ ├── aeroplane.png │ ├── airplane.png │ ├── apple.png │ ├── backpack.png │ ├── banana.png │ ├── baseball bat.png │ ├── baseball glove.png │ ├── bear.png │ ├── bed.png │ ├── bench.png │ ├── bicycle.png │ ├── bird.png │ ├── boat.png │ ├── book.png │ ├── bottle.png │ ├── bowl.png │ ├── broccoli.png │ ├── bus.png │ ├── cake.png │ ├── car.png │ ├── carrot.png │ ├── cat.png │ ├── cell phone.png │ ├── chair.png │ ├── clock.png │ ├── couch.png │ ├── cow.png │ ├── cup.png │ ├── dining table.png │ ├── diningtable.png │ ├── dog.png │ ├── donut.png │ ├── elephant.png │ ├── fire hydrant.png │ ├── fork.png │ ├── frisbee.png │ ├── giraffe.png │ ├── hair drier.png │ ├── handbag.png │ ├── horse.png │ ├── hot dog.png │ ├── keyboard.png │ ├── kite.png │ ├── knife.png │ ├── laptop.png │ ├── make_labels.py │ ├── microwave.png │ ├── motorbike.png │ ├── motorcycle.png │ ├── mouse.png │ ├── orange.png │ ├── oven.png │ ├── parking meter.png │ ├── person.png │ ├── pizza.png │ ├── potted plant.png │ ├── pottedplant.png │ ├── refrigerator.png │ ├── remote.png │ ├── sandwich.png │ ├── scissors.png │ ├── sheep.png │ ├── sink.png │ ├── skateboard.png │ ├── skis.png │ ├── snowboard.png │ ├── sofa.png │ ├── spoon.png │ ├── sports ball.png │ ├── stop sign.png │ ├── suitcase.png │ ├── surfboard.png │ ├── teddy bear.png │ ├── tennis racket.png │ ├── tie.png │ ├── toaster.png │ ├── toilet.png │ ├── toothbrush.png │ ├── traffic light.png │ ├── train.png │ ├── truck.png │ ├── tv.png │ ├── tvmonitor.png │ ├── umbrella.png │ ├── vase.png │ ├── wine glass.png │ └── zebra.png ├── person.jpg ├── scream.jpg ├── shortnames.txt └── training_list.txt ├── scripts ├── convert.py ├── dice_label.sh ├── imagenet_label.sh └── voc_label.py └── src ├── activation_kernels.cu ├── activations.c ├── activations.h ├── avgpool_layer.c ├── avgpool_layer.h ├── avgpool_layer_kernels.cu ├── blas.c ├── blas.h ├── blas_kernels.cu ├── box.c ├── box.h ├── captcha.c ├── classifier.c ├── coco.c ├── coco_kernels.cu ├── col2im.c ├── col2im.h ├── col2im_kernels.cu ├── compare.c ├── connected_layer.c ├── connected_layer.h ├── convolutional_kernels.cu ├── convolutional_layer.c ├── convolutional_layer.h ├── cost_layer.c ├── cost_layer.h ├── cpu_gemm.c ├── crop_layer.c ├── crop_layer.h ├── crop_layer_kernels.cu ├── cuda.c ├── cuda.h ├── darknet.c ├── data.c ├── data.h ├── deconvolutional_kernels.cu ├── deconvolutional_layer.c ├── deconvolutional_layer.h ├── detection_layer.c ├── detection_layer.h ├── dice.c ├── dropout_layer.c ├── dropout_layer.h ├── dropout_layer_kernels.cu ├── gemm.c ├── gemm.h ├── im2col.c ├── im2col.h ├── im2col_kernels.cu ├── image.c ├── image.h ├── imagenet.c ├── layer.c ├── layer.h ├── list.c ├── list.h ├── local_layer.c ├── local_layer.h ├── matrix.c ├── matrix.h ├── maxpool_layer.c ├── maxpool_layer.h ├── maxpool_layer_kernels.cu ├── network.c ├── network.h ├── network_kernels.cu ├── nightmare.c ├── normalization_layer.c ├── normalization_layer.h ├── option_list.c ├── option_list.h ├── params.h ├── parser.c ├── parser.h ├── route_layer.c ├── route_layer.h ├── server.c ├── server.h ├── shortcut_layer.c ├── shortcut_layer.h ├── softmax_layer.c ├── softmax_layer.h ├── softmax_layer_kernels.cu ├── stb_image.h ├── stb_image_write.h ├── swag.c ├── utils.c ├── utils.h ├── writing.c ├── yolo.c └── yolo_kernels.cu /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.dSYM 3 | *.csv 4 | *.out 5 | *.png 6 | mnist/ 7 | data/ 8 | caffe/ 9 | grasp/ 10 | images/ 11 | opencv/ 12 | convnet/ 13 | decaf/ 14 | submission/ 15 | cfg/ 16 | darknet 17 | 18 | # OS Generated # 19 | .DS_Store* 20 | ehthumbs.db 21 | Icon? 22 | Thumbs.db 23 | *.swp 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YOLO LICENSE 2 | Version 1, July 10 2015 3 | 4 | THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER 5 | SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN 6 | TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES 7 | LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO 8 | THE FOLLOWING CONDITIONS: 9 | 10 | 1. #yolo 11 | 2. #swag 12 | 3. #blazeit 13 | 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GPU=0 2 | OPENCV=0 3 | DEBUG=0 4 | 5 | ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20 6 | 7 | VPATH=./src/ 8 | EXEC=darknet 9 | OBJDIR=./obj/ 10 | 11 | CC=gcc 12 | NVCC=nvcc 13 | OPTS=-Ofast 14 | LDFLAGS= -lm -pthread -lstdc++ 15 | COMMON= 16 | CFLAGS=-Wall -Wfatal-errors 17 | 18 | ifeq ($(DEBUG), 1) 19 | OPTS=-O0 -g 20 | endif 21 | 22 | CFLAGS+=$(OPTS) 23 | 24 | ifeq ($(OPENCV), 1) 25 | COMMON+= -DOPENCV 26 | CFLAGS+= -DOPENCV 27 | LDFLAGS+= `pkg-config --libs opencv` 28 | COMMON+= `pkg-config --cflags opencv` 29 | endif 30 | 31 | ifeq ($(GPU), 1) 32 | COMMON+= -DGPU -I/usr/local/cuda/include/ 33 | CFLAGS+= -DGPU 34 | LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand 35 | endif 36 | 37 | OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o 38 | ifeq ($(GPU), 1) 39 | OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o coco_kernels.o 40 | endif 41 | 42 | OBJS = $(addprefix $(OBJDIR), $(OBJ)) 43 | DEPS = $(wildcard src/*.h) Makefile 44 | 45 | all: obj results $(EXEC) 46 | 47 | $(EXEC): $(OBJS) 48 | $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) 49 | 50 | $(OBJDIR)%.o: %.c $(DEPS) 51 | $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ 52 | 53 | $(OBJDIR)%.o: %.cu $(DEPS) 54 | $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ 55 | 56 | obj: 57 | mkdir -p obj 58 | results: 59 | mkdir -p results 60 | 61 | .PHONY: clean 62 | 63 | clean: 64 | rm -rf $(OBJS) $(EXEC) 65 | 66 | -------------------------------------------------------------------------------- /cfg/alexnet.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | learning_rate=0.01 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | [crop] 12 | crop_height=224 13 | crop_width=224 14 | flip=1 15 | angle=0 16 | saturation=1 17 | exposure=1 18 | 19 | [convolutional] 20 | filters=64 21 | size=11 22 | stride=4 23 | pad=0 24 | activation=ramp 25 | 26 | [maxpool] 27 | size=3 28 | stride=2 29 | 30 | [convolutional] 31 | filters=192 32 | size=5 33 | stride=1 34 | pad=1 35 | activation=ramp 36 | 37 | [maxpool] 38 | size=3 39 | stride=2 40 | 41 | [convolutional] 42 | filters=384 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=ramp 47 | 48 | [convolutional] 49 | filters=256 50 | size=3 51 | stride=1 52 | pad=1 53 | activation=ramp 54 | 55 | [convolutional] 56 | filters=256 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=ramp 61 | 62 | [maxpool] 63 | size=3 64 | stride=2 65 | 66 | [connected] 67 | output=4096 68 | activation=ramp 69 | 70 | [dropout] 71 | probability=.5 72 | 73 | [connected] 74 | output=4096 75 | activation=ramp 76 | 77 | [dropout] 78 | probability=.5 79 | 80 | [connected] 81 | output=1000 82 | activation=ramp 83 | 84 | [softmax] 85 | groups=1 86 | 87 | [cost] 88 | type=sse 89 | 90 | -------------------------------------------------------------------------------- /cfg/darknet.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.01 11 | policy=sigmoid 12 | gamma=.00002 13 | step=400000 14 | max_batches=800000 15 | 16 | [crop] 17 | crop_height=224 18 | crop_width=224 19 | flip=1 20 | angle=0 21 | saturation=1 22 | exposure=1 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [avgpool] 98 | 99 | [dropout] 100 | probability=.5 101 | 102 | [connected] 103 | output=1000 104 | activation=leaky 105 | 106 | [softmax] 107 | groups=1 108 | 109 | [cost] 110 | type=sse 111 | 112 | -------------------------------------------------------------------------------- /cfg/extraction.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.5 11 | policy=poly 12 | power=6 13 | max_batches=500000 14 | 15 | [crop] 16 | crop_height=224 17 | crop_width=224 18 | flip=1 19 | saturation=1 20 | exposure=1 21 | angle=0 22 | 23 | [convolutional] 24 | filters=64 25 | size=7 26 | stride=2 27 | pad=1 28 | activation=leaky 29 | 30 | [maxpool] 31 | size=2 32 | stride=2 33 | 34 | [convolutional] 35 | filters=192 36 | size=3 37 | stride=1 38 | pad=1 39 | activation=leaky 40 | 41 | [maxpool] 42 | size=2 43 | stride=2 44 | 45 | [convolutional] 46 | filters=128 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | filters=256 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | filters=256 61 | size=1 62 | stride=1 63 | pad=1 64 | activation=leaky 65 | 66 | [convolutional] 67 | filters=512 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | filters=256 79 | size=1 80 | stride=1 81 | pad=1 82 | activation=leaky 83 | 84 | [convolutional] 85 | filters=512 86 | size=3 87 | stride=1 88 | pad=1 89 | activation=leaky 90 | 91 | [convolutional] 92 | filters=256 93 | size=1 94 | stride=1 95 | pad=1 96 | activation=leaky 97 | 98 | [convolutional] 99 | filters=512 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [convolutional] 106 | filters=256 107 | size=1 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | [convolutional] 113 | filters=512 114 | size=3 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | filters=256 121 | size=1 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [convolutional] 127 | filters=512 128 | size=3 129 | stride=1 130 | pad=1 131 | activation=leaky 132 | 133 | [convolutional] 134 | filters=512 135 | size=1 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [convolutional] 141 | filters=1024 142 | size=3 143 | stride=1 144 | pad=1 145 | activation=leaky 146 | 147 | [maxpool] 148 | size=2 149 | stride=2 150 | 151 | [convolutional] 152 | filters=512 153 | size=1 154 | stride=1 155 | pad=1 156 | activation=leaky 157 | 158 | [convolutional] 159 | filters=1024 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | filters=512 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | filters=1024 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [avgpool] 180 | 181 | [connected] 182 | output=1000 183 | activation=leaky 184 | 185 | [softmax] 186 | groups=1 187 | 188 | [cost] 189 | type=sse 190 | 191 | -------------------------------------------------------------------------------- /cfg/extraction.conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.5 11 | policy=poly 12 | power=6 13 | max_batches=500000 14 | 15 | [convolutional] 16 | filters=64 17 | size=7 18 | stride=2 19 | pad=1 20 | activation=leaky 21 | 22 | [maxpool] 23 | size=2 24 | stride=2 25 | 26 | [convolutional] 27 | filters=192 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | filters=128 39 | size=1 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | filters=256 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | filters=256 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [convolutional] 59 | filters=512 60 | size=3 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [maxpool] 66 | size=2 67 | stride=2 68 | 69 | [convolutional] 70 | filters=256 71 | size=1 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [convolutional] 77 | filters=512 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [convolutional] 84 | filters=256 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=256 99 | size=1 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | filters=256 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | filters=512 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | filters=512 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | filters=1024 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [maxpool] 140 | size=2 141 | stride=2 142 | 143 | [convolutional] 144 | filters=512 145 | size=1 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | filters=512 159 | size=1 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [convolutional] 165 | filters=1024 166 | size=3 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [avgpool] 172 | 173 | [connected] 174 | output=1000 175 | activation=leaky 176 | 177 | [softmax] 178 | groups=1 179 | 180 | -------------------------------------------------------------------------------- /cfg/jnet-conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | height=10 5 | width=10 6 | channels=3 7 | learning_rate=0.01 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | [convolutional] 12 | filters=32 13 | size=3 14 | stride=1 15 | pad=1 16 | activation=leaky 17 | 18 | [convolutional] 19 | filters=32 20 | size=3 21 | stride=1 22 | pad=1 23 | activation=leaky 24 | 25 | [maxpool] 26 | stride=2 27 | size=2 28 | 29 | [convolutional] 30 | filters=64 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=leaky 35 | 36 | [convolutional] 37 | filters=64 38 | size=3 39 | stride=1 40 | pad=1 41 | activation=leaky 42 | 43 | [maxpool] 44 | stride=2 45 | size=2 46 | 47 | [convolutional] 48 | filters=128 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=leaky 53 | 54 | [convolutional] 55 | filters=128 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=leaky 60 | 61 | [maxpool] 62 | stride=2 63 | size=2 64 | 65 | [convolutional] 66 | filters=256 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [convolutional] 73 | filters=256 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [maxpool] 80 | stride=2 81 | size=2 82 | 83 | [convolutional] 84 | filters=512 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | filters=512 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [maxpool] 98 | stride=2 99 | size=2 100 | 101 | [convolutional] 102 | filters=1024 103 | size=3 104 | stride=1 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | filters=1024 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [maxpool] 116 | size=2 117 | stride=2 118 | 119 | -------------------------------------------------------------------------------- /cfg/msr_34.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.1 11 | policy=poly 12 | power=4 13 | max_batches=500000 14 | 15 | #policy=sigmoid 16 | #gamma=.00008 17 | #step=100000 18 | #max_batches=200000 19 | 20 | [crop] 21 | crop_height=224 22 | crop_width=224 23 | flip=1 24 | saturation=1 25 | exposure=1 26 | angle=0 27 | 28 | [convolutional] 29 | batch_normalize=1 30 | filters=64 31 | size=7 32 | stride=2 33 | pad=1 34 | activation=leaky 35 | 36 | [maxpool] 37 | size=3 38 | stride=2 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=64 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [shortcut] 57 | from = -3 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=64 62 | size=3 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=64 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [shortcut] 76 | from = -3 77 | 78 | [convolutional] 79 | batch_normalize=1 80 | filters=64 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=64 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [shortcut] 95 | from = -3 96 | 97 | 98 | 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=128 103 | size=3 104 | stride=2 105 | pad=1 106 | activation=leaky 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=128 111 | size=3 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [shortcut] 117 | from = -3 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=128 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=128 130 | size=3 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [shortcut] 136 | from = -3 137 | 138 | [convolutional] 139 | batch_normalize=1 140 | filters=128 141 | size=3 142 | stride=1 143 | pad=1 144 | activation=leaky 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=128 149 | size=3 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [shortcut] 155 | from = -3 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=128 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [shortcut] 174 | from = -3 175 | 176 | 177 | 178 | 179 | 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=256 184 | size=3 185 | stride=2 186 | pad=1 187 | activation=leaky 188 | 189 | [convolutional] 190 | batch_normalize=1 191 | filters=256 192 | size=3 193 | stride=1 194 | pad=1 195 | activation=leaky 196 | 197 | [shortcut] 198 | from = -3 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | filters=256 203 | size=3 204 | stride=1 205 | pad=1 206 | activation=leaky 207 | 208 | [convolutional] 209 | batch_normalize=1 210 | filters=256 211 | size=3 212 | stride=1 213 | pad=1 214 | activation=leaky 215 | 216 | [shortcut] 217 | from = -3 218 | 219 | [convolutional] 220 | batch_normalize=1 221 | filters=256 222 | size=3 223 | stride=1 224 | pad=1 225 | activation=leaky 226 | 227 | [convolutional] 228 | batch_normalize=1 229 | filters=256 230 | size=3 231 | stride=1 232 | pad=1 233 | activation=leaky 234 | 235 | [shortcut] 236 | from = -3 237 | 238 | [convolutional] 239 | batch_normalize=1 240 | filters=256 241 | size=3 242 | stride=1 243 | pad=1 244 | activation=leaky 245 | 246 | [convolutional] 247 | batch_normalize=1 248 | filters=256 249 | size=3 250 | stride=1 251 | pad=1 252 | activation=leaky 253 | 254 | [shortcut] 255 | from = -3 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=256 260 | size=3 261 | stride=1 262 | pad=1 263 | activation=leaky 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=256 268 | size=3 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [shortcut] 274 | from = -3 275 | 276 | [convolutional] 277 | batch_normalize=1 278 | filters=256 279 | size=3 280 | stride=1 281 | pad=1 282 | activation=leaky 283 | 284 | [convolutional] 285 | batch_normalize=1 286 | filters=256 287 | size=3 288 | stride=1 289 | pad=1 290 | activation=leaky 291 | 292 | [shortcut] 293 | from = -3 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | [convolutional] 304 | batch_normalize=1 305 | filters=512 306 | size=3 307 | stride=2 308 | pad=1 309 | activation=leaky 310 | 311 | [convolutional] 312 | batch_normalize=1 313 | filters=512 314 | size=3 315 | stride=1 316 | pad=1 317 | activation=leaky 318 | 319 | [shortcut] 320 | from = -3 321 | 322 | [convolutional] 323 | batch_normalize=1 324 | filters=512 325 | size=3 326 | stride=1 327 | pad=1 328 | activation=leaky 329 | 330 | [convolutional] 331 | batch_normalize=1 332 | filters=512 333 | size=3 334 | stride=1 335 | pad=1 336 | activation=leaky 337 | 338 | [shortcut] 339 | from = -3 340 | 341 | [convolutional] 342 | batch_normalize=1 343 | filters=512 344 | size=3 345 | stride=1 346 | pad=1 347 | activation=leaky 348 | 349 | [convolutional] 350 | batch_normalize=1 351 | filters=512 352 | size=3 353 | stride=1 354 | pad=1 355 | activation=leaky 356 | 357 | [shortcut] 358 | from = -3 359 | 360 | [avgpool] 361 | 362 | [connected] 363 | output=1000 364 | activation=leaky 365 | 366 | [softmax] 367 | groups=1 368 | 369 | [cost] 370 | type=sse 371 | 372 | -------------------------------------------------------------------------------- /cfg/strided.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=4 4 | height=256 5 | width=256 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.01 11 | policy=steps 12 | scales=.1,.1,.1 13 | steps=200000,300000,400000 14 | max_batches=800000 15 | 16 | 17 | [crop] 18 | crop_height=224 19 | crop_width=224 20 | flip=1 21 | angle=0 22 | saturation=1 23 | exposure=1 24 | shift=.2 25 | 26 | [convolutional] 27 | filters=64 28 | size=7 29 | stride=2 30 | pad=1 31 | activation=ramp 32 | 33 | [convolutional] 34 | filters=192 35 | size=3 36 | stride=2 37 | pad=1 38 | activation=ramp 39 | 40 | [convolutional] 41 | filters=128 42 | size=1 43 | stride=1 44 | pad=1 45 | activation=ramp 46 | 47 | [convolutional] 48 | filters=256 49 | size=3 50 | stride=2 51 | pad=1 52 | activation=ramp 53 | 54 | [convolutional] 55 | filters=128 56 | size=1 57 | stride=1 58 | pad=1 59 | activation=ramp 60 | 61 | [convolutional] 62 | filters=256 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=ramp 67 | 68 | [convolutional] 69 | filters=128 70 | size=1 71 | stride=1 72 | pad=1 73 | activation=ramp 74 | 75 | [convolutional] 76 | filters=512 77 | size=3 78 | stride=2 79 | pad=1 80 | activation=ramp 81 | 82 | [convolutional] 83 | filters=256 84 | size=1 85 | stride=1 86 | pad=1 87 | activation=ramp 88 | 89 | [convolutional] 90 | filters=512 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=ramp 95 | 96 | [convolutional] 97 | filters=256 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=ramp 102 | 103 | [convolutional] 104 | filters=512 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=ramp 109 | 110 | [convolutional] 111 | filters=256 112 | size=1 113 | stride=1 114 | pad=1 115 | activation=ramp 116 | 117 | [convolutional] 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=ramp 123 | 124 | [convolutional] 125 | filters=256 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=ramp 130 | 131 | [convolutional] 132 | filters=512 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=ramp 137 | 138 | [convolutional] 139 | filters=256 140 | size=1 141 | stride=1 142 | pad=1 143 | activation=ramp 144 | 145 | [convolutional] 146 | filters=1024 147 | size=3 148 | stride=2 149 | pad=1 150 | activation=ramp 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=ramp 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=ramp 165 | 166 | [maxpool] 167 | size=3 168 | stride=2 169 | 170 | [connected] 171 | output=4096 172 | activation=ramp 173 | 174 | [dropout] 175 | probability=0.5 176 | 177 | [connected] 178 | output=1000 179 | activation=ramp 180 | 181 | [softmax] 182 | 183 | [cost] 184 | type=sse 185 | 186 | -------------------------------------------------------------------------------- /cfg/vgg-16.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=4 4 | height=256 5 | width=256 6 | channels=3 7 | learning_rate=0.00001 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | [crop] 12 | crop_height=224 13 | crop_width=224 14 | flip=1 15 | exposure=1 16 | saturation=1 17 | angle=0 18 | 19 | [convolutional] 20 | filters=64 21 | size=3 22 | stride=1 23 | pad=1 24 | activation=relu 25 | 26 | [convolutional] 27 | filters=64 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=relu 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | filters=128 39 | size=3 40 | stride=1 41 | pad=1 42 | activation=relu 43 | 44 | [convolutional] 45 | filters=128 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=relu 50 | 51 | [maxpool] 52 | size=2 53 | stride=2 54 | 55 | [convolutional] 56 | filters=256 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=relu 61 | 62 | [convolutional] 63 | filters=256 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=relu 68 | 69 | [convolutional] 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=relu 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | filters=512 82 | size=3 83 | stride=1 84 | pad=1 85 | activation=relu 86 | 87 | [convolutional] 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=relu 93 | 94 | [convolutional] 95 | filters=512 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=relu 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | filters=512 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=relu 111 | 112 | [convolutional] 113 | filters=512 114 | size=3 115 | stride=1 116 | pad=1 117 | activation=relu 118 | 119 | [convolutional] 120 | filters=512 121 | size=3 122 | stride=1 123 | pad=1 124 | activation=relu 125 | 126 | [maxpool] 127 | size=2 128 | stride=2 129 | 130 | [connected] 131 | output=4096 132 | activation=relu 133 | 134 | [dropout] 135 | probability=.5 136 | 137 | [connected] 138 | output=4096 139 | activation=relu 140 | 141 | [dropout] 142 | probability=.5 143 | 144 | [connected] 145 | output=1000 146 | activation=linear 147 | 148 | [softmax] 149 | groups=1 150 | 151 | [cost] 152 | type=sse 153 | 154 | -------------------------------------------------------------------------------- /cfg/vgg-conv.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=1 3 | subdivisions=1 4 | width=224 5 | height=224 6 | channels=3 7 | learning_rate=0.00001 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | [convolutional] 12 | filters=64 13 | size=3 14 | stride=1 15 | pad=1 16 | activation=relu 17 | 18 | [convolutional] 19 | filters=64 20 | size=3 21 | stride=1 22 | pad=1 23 | activation=relu 24 | 25 | [maxpool] 26 | size=2 27 | stride=2 28 | 29 | [convolutional] 30 | filters=128 31 | size=3 32 | stride=1 33 | pad=1 34 | activation=relu 35 | 36 | [convolutional] 37 | filters=128 38 | size=3 39 | stride=1 40 | pad=1 41 | activation=relu 42 | 43 | [maxpool] 44 | size=2 45 | stride=2 46 | 47 | [convolutional] 48 | filters=256 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=relu 53 | 54 | [convolutional] 55 | filters=256 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=relu 60 | 61 | [convolutional] 62 | filters=256 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=relu 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | filters=512 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=relu 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=relu 85 | 86 | [convolutional] 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=relu 92 | 93 | [maxpool] 94 | size=2 95 | stride=2 96 | 97 | [convolutional] 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=relu 103 | 104 | [convolutional] 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=relu 110 | 111 | [convolutional] 112 | filters=512 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=relu 117 | 118 | [maxpool] 119 | size=2 120 | stride=2 121 | 122 | -------------------------------------------------------------------------------- /cfg/writing.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=2 4 | height=256 5 | width=256 6 | channels=3 7 | learning_rate=0.00000001 8 | momentum=0.9 9 | decay=0.0005 10 | seen=0 11 | 12 | [convolutional] 13 | filters=32 14 | size=3 15 | stride=1 16 | pad=1 17 | activation=leaky 18 | 19 | [convolutional] 20 | filters=32 21 | size=3 22 | stride=1 23 | pad=1 24 | activation=leaky 25 | 26 | [convolutional] 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [convolutional] 34 | filters=1 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=logistic 39 | 40 | [cost] 41 | 42 | -------------------------------------------------------------------------------- /cfg/yolo-coco.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=4 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=100,200,300,100000,150000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 300000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | 181 | ################################# 182 | 183 | 184 | [convolutional] 185 | size=3 186 | stride=1 187 | pad=1 188 | filters=1024 189 | activation=leaky 190 | 191 | [convolutional] 192 | size=3 193 | stride=2 194 | pad=1 195 | filters=1024 196 | activation=leaky 197 | 198 | [convolutional] 199 | size=3 200 | stride=1 201 | pad=1 202 | filters=1024 203 | activation=leaky 204 | 205 | [convolutional] 206 | size=3 207 | stride=1 208 | pad=1 209 | filters=1024 210 | activation=leaky 211 | 212 | [local] 213 | size=3 214 | stride=1 215 | pad=1 216 | filters=192 217 | activation=leaky 218 | 219 | [dropout] 220 | probability=.5 221 | 222 | [connected] 223 | output= 4410 224 | activation=linear 225 | 226 | [detection] 227 | classes=80 228 | coords=4 229 | rescore=1 230 | side=7 231 | num=2 232 | softmax=0 233 | sqrt=1 234 | jitter=.2 235 | 236 | object_scale=1 237 | noobject_scale=.5 238 | class_scale=1 239 | coord_scale=5 240 | 241 | -------------------------------------------------------------------------------- /cfg/yolo-small.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=512 212 | activation=leaky 213 | 214 | [connected] 215 | output=4096 216 | activation=leaky 217 | 218 | [dropout] 219 | probability=.5 220 | 221 | [connected] 222 | output= 1470 223 | activation=linear 224 | 225 | [detection] 226 | classes=20 227 | coords=4 228 | rescore=1 229 | side=7 230 | num=2 231 | softmax=0 232 | sqrt=1 233 | jitter=.2 234 | 235 | object_scale=1 236 | noobject_scale=.5 237 | class_scale=1 238 | coord_scale=5 239 | 240 | -------------------------------------------------------------------------------- /cfg/yolo-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0001 11 | policy=steps 12 | steps=20,40,60,80,20000,30000 13 | scales=5,5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=16 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=32 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=64 48 | size=3 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [maxpool] 54 | size=2 55 | stride=2 56 | 57 | [convolutional] 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | filters=256 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [maxpool] 76 | size=2 77 | stride=2 78 | 79 | [convolutional] 80 | filters=512 81 | size=3 82 | stride=1 83 | pad=1 84 | activation=leaky 85 | 86 | [maxpool] 87 | size=2 88 | stride=2 89 | 90 | [convolutional] 91 | filters=1024 92 | size=3 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | [convolutional] 105 | filters=1024 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [connected] 112 | output=256 113 | activation=linear 114 | 115 | [connected] 116 | output=4096 117 | activation=leaky 118 | 119 | [dropout] 120 | probability=.5 121 | 122 | [connected] 123 | output= 1470 124 | activation=linear 125 | 126 | [detection] 127 | classes=20 128 | coords=4 129 | rescore=1 130 | side=7 131 | num=2 132 | softmax=0 133 | sqrt=1 134 | jitter=.2 135 | 136 | object_scale=1 137 | noobject_scale=.5 138 | class_scale=1 139 | coord_scale=5 140 | 141 | -------------------------------------------------------------------------------- /cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=64 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [connected] 218 | output= 1470 219 | activation=linear 220 | 221 | [detection] 222 | classes=20 223 | coords=4 224 | rescore=1 225 | side=7 226 | num=2 227 | softmax=0 228 | sqrt=1 229 | jitter=.2 230 | 231 | object_scale=1 232 | noobject_scale=.5 233 | class_scale=1 234 | coord_scale=5 235 | 236 | -------------------------------------------------------------------------------- /cfg/yolo_2class: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.001 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [connected] 218 | output= 588 219 | activation=linear 220 | 221 | [detection] 222 | classes=2 223 | coords=4 224 | rescore=1 225 | side=7 226 | num=2 227 | softmax=0 228 | sqrt=1 229 | jitter=.2 230 | 231 | object_scale=1 232 | noobject_scale=.5 233 | class_scale=1 234 | coord_scale=5 235 | -------------------------------------------------------------------------------- /cfg/yolo_2class_box11.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=2 4 | height=448 5 | width=448 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | 10 | learning_rate=0.0005 11 | policy=steps 12 | steps=200,400,600,20000,30000 13 | scales=2.5,2,2,.1,.1 14 | max_batches = 40000 15 | 16 | [crop] 17 | crop_width=448 18 | crop_height=448 19 | flip=0 20 | angle=0 21 | saturation = 1.5 22 | exposure = 1.5 23 | 24 | [convolutional] 25 | filters=64 26 | size=7 27 | stride=2 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | filters=192 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | filters=128 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | filters=256 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | filters=256 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | filters=512 69 | size=3 70 | stride=1 71 | pad=1 72 | activation=leaky 73 | 74 | [maxpool] 75 | size=2 76 | stride=2 77 | 78 | [convolutional] 79 | filters=256 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [convolutional] 93 | filters=256 94 | size=1 95 | stride=1 96 | pad=1 97 | activation=leaky 98 | 99 | [convolutional] 100 | filters=512 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | [convolutional] 107 | filters=256 108 | size=1 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | filters=512 115 | size=3 116 | stride=1 117 | pad=1 118 | activation=leaky 119 | 120 | [convolutional] 121 | filters=256 122 | size=1 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | filters=512 129 | size=3 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | filters=512 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | filters=1024 143 | size=3 144 | stride=1 145 | pad=1 146 | activation=leaky 147 | 148 | [maxpool] 149 | size=2 150 | stride=2 151 | 152 | [convolutional] 153 | filters=512 154 | size=1 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [convolutional] 160 | filters=1024 161 | size=3 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | [convolutional] 167 | filters=512 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | filters=1024 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | ####### 181 | 182 | [convolutional] 183 | size=3 184 | stride=1 185 | pad=1 186 | filters=1024 187 | activation=leaky 188 | 189 | [convolutional] 190 | size=3 191 | stride=2 192 | pad=1 193 | filters=1024 194 | activation=leaky 195 | 196 | [convolutional] 197 | size=3 198 | stride=1 199 | pad=1 200 | filters=1024 201 | activation=leaky 202 | 203 | [convolutional] 204 | size=3 205 | stride=1 206 | pad=1 207 | filters=1024 208 | activation=leaky 209 | 210 | [connected] 211 | output=4096 212 | activation=leaky 213 | 214 | [dropout] 215 | probability=.5 216 | 217 | [connected] 218 | output= 1452 219 | activation=linear 220 | 221 | [detection] 222 | classes=2 223 | coords=4 224 | rescore=1 225 | side=11 226 | num=2 227 | softmax=0 228 | sqrt=1 229 | jitter=.2 230 | 231 | object_scale=1 232 | noobject_scale=.5 233 | class_scale=1 234 | coord_scale=5 235 | 236 | -------------------------------------------------------------------------------- /data/after_conversion.txt: -------------------------------------------------------------------------------- 1 | 0 0.123552123552 0.559278350515 0.0926640926641 0.10824742268 2 | 0 0.743243243243 0.585051546392 0.0579150579151 0.0773195876289 3 | -------------------------------------------------------------------------------- /data/before_conversion.txt: -------------------------------------------------------------------------------- 1 | 2 2 | 61 90 72 103 3 | 198 5 243 54 4 | -------------------------------------------------------------------------------- /data/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/dog.jpg -------------------------------------------------------------------------------- /data/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/eagle.jpg -------------------------------------------------------------------------------- /data/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/horses.jpg -------------------------------------------------------------------------------- /data/labels/aeroplane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/aeroplane.png -------------------------------------------------------------------------------- /data/labels/airplane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/airplane.png -------------------------------------------------------------------------------- /data/labels/apple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/apple.png -------------------------------------------------------------------------------- /data/labels/backpack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/backpack.png -------------------------------------------------------------------------------- /data/labels/banana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/banana.png -------------------------------------------------------------------------------- /data/labels/baseball bat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/baseball bat.png -------------------------------------------------------------------------------- /data/labels/baseball glove.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/baseball glove.png -------------------------------------------------------------------------------- /data/labels/bear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bear.png -------------------------------------------------------------------------------- /data/labels/bed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bed.png -------------------------------------------------------------------------------- /data/labels/bench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bench.png -------------------------------------------------------------------------------- /data/labels/bicycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bicycle.png -------------------------------------------------------------------------------- /data/labels/bird.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bird.png -------------------------------------------------------------------------------- /data/labels/boat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/boat.png -------------------------------------------------------------------------------- /data/labels/book.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/book.png -------------------------------------------------------------------------------- /data/labels/bottle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bottle.png -------------------------------------------------------------------------------- /data/labels/bowl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bowl.png -------------------------------------------------------------------------------- /data/labels/broccoli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/broccoli.png -------------------------------------------------------------------------------- /data/labels/bus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/bus.png -------------------------------------------------------------------------------- /data/labels/cake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/cake.png -------------------------------------------------------------------------------- /data/labels/car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/car.png -------------------------------------------------------------------------------- /data/labels/carrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/carrot.png -------------------------------------------------------------------------------- /data/labels/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/cat.png -------------------------------------------------------------------------------- /data/labels/cell phone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/cell phone.png -------------------------------------------------------------------------------- /data/labels/chair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/chair.png -------------------------------------------------------------------------------- /data/labels/clock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/clock.png -------------------------------------------------------------------------------- /data/labels/couch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/couch.png -------------------------------------------------------------------------------- /data/labels/cow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/cow.png -------------------------------------------------------------------------------- /data/labels/cup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/cup.png -------------------------------------------------------------------------------- /data/labels/dining table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/dining table.png -------------------------------------------------------------------------------- /data/labels/diningtable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/diningtable.png -------------------------------------------------------------------------------- /data/labels/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/dog.png -------------------------------------------------------------------------------- /data/labels/donut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/donut.png -------------------------------------------------------------------------------- /data/labels/elephant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/elephant.png -------------------------------------------------------------------------------- /data/labels/fire hydrant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/fire hydrant.png -------------------------------------------------------------------------------- /data/labels/fork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/fork.png -------------------------------------------------------------------------------- /data/labels/frisbee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/frisbee.png -------------------------------------------------------------------------------- /data/labels/giraffe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/giraffe.png -------------------------------------------------------------------------------- /data/labels/hair drier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/hair drier.png -------------------------------------------------------------------------------- /data/labels/handbag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/handbag.png -------------------------------------------------------------------------------- /data/labels/horse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/horse.png -------------------------------------------------------------------------------- /data/labels/hot dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/hot dog.png -------------------------------------------------------------------------------- /data/labels/keyboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/keyboard.png -------------------------------------------------------------------------------- /data/labels/kite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/kite.png -------------------------------------------------------------------------------- /data/labels/knife.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/knife.png -------------------------------------------------------------------------------- /data/labels/laptop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/laptop.png -------------------------------------------------------------------------------- /data/labels/make_labels.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 4 | 5 | for word in l: 6 | os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word)) 7 | -------------------------------------------------------------------------------- /data/labels/microwave.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/microwave.png -------------------------------------------------------------------------------- /data/labels/motorbike.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/motorbike.png -------------------------------------------------------------------------------- /data/labels/motorcycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/motorcycle.png -------------------------------------------------------------------------------- /data/labels/mouse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/mouse.png -------------------------------------------------------------------------------- /data/labels/orange.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/orange.png -------------------------------------------------------------------------------- /data/labels/oven.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/oven.png -------------------------------------------------------------------------------- /data/labels/parking meter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/parking meter.png -------------------------------------------------------------------------------- /data/labels/person.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/person.png -------------------------------------------------------------------------------- /data/labels/pizza.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/pizza.png -------------------------------------------------------------------------------- /data/labels/potted plant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/potted plant.png -------------------------------------------------------------------------------- /data/labels/pottedplant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/pottedplant.png -------------------------------------------------------------------------------- /data/labels/refrigerator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/refrigerator.png -------------------------------------------------------------------------------- /data/labels/remote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/remote.png -------------------------------------------------------------------------------- /data/labels/sandwich.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/sandwich.png -------------------------------------------------------------------------------- /data/labels/scissors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/scissors.png -------------------------------------------------------------------------------- /data/labels/sheep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/sheep.png -------------------------------------------------------------------------------- /data/labels/sink.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/sink.png -------------------------------------------------------------------------------- /data/labels/skateboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/skateboard.png -------------------------------------------------------------------------------- /data/labels/skis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/skis.png -------------------------------------------------------------------------------- /data/labels/snowboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/snowboard.png -------------------------------------------------------------------------------- /data/labels/sofa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/sofa.png -------------------------------------------------------------------------------- /data/labels/spoon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/spoon.png -------------------------------------------------------------------------------- /data/labels/sports ball.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/sports ball.png -------------------------------------------------------------------------------- /data/labels/stop sign.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/stop sign.png -------------------------------------------------------------------------------- /data/labels/suitcase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/suitcase.png -------------------------------------------------------------------------------- /data/labels/surfboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/surfboard.png -------------------------------------------------------------------------------- /data/labels/teddy bear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/teddy bear.png -------------------------------------------------------------------------------- /data/labels/tennis racket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/tennis racket.png -------------------------------------------------------------------------------- /data/labels/tie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/tie.png -------------------------------------------------------------------------------- /data/labels/toaster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/toaster.png -------------------------------------------------------------------------------- /data/labels/toilet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/toilet.png -------------------------------------------------------------------------------- /data/labels/toothbrush.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/toothbrush.png -------------------------------------------------------------------------------- /data/labels/traffic light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/traffic light.png -------------------------------------------------------------------------------- /data/labels/train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/train.png -------------------------------------------------------------------------------- /data/labels/truck.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/truck.png -------------------------------------------------------------------------------- /data/labels/tv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/tv.png -------------------------------------------------------------------------------- /data/labels/tvmonitor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/tvmonitor.png -------------------------------------------------------------------------------- /data/labels/umbrella.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/umbrella.png -------------------------------------------------------------------------------- /data/labels/vase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/vase.png -------------------------------------------------------------------------------- /data/labels/wine glass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/wine glass.png -------------------------------------------------------------------------------- /data/labels/zebra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/labels/zebra.png -------------------------------------------------------------------------------- /data/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/person.jpg -------------------------------------------------------------------------------- /data/scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Guanghan/darknet/cd0a3ef2de8826471b526e2ceb9ae3329de01fea/data/scream.jpg -------------------------------------------------------------------------------- /scripts/convert.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Dec 9 14:55:43 2015 4 | 5 | This script is to convert the txt annotation files to appropriate format needed by YOLO 6 | 7 | @author: Guanghan Ning 8 | Email: gnxr9@mail.missouri.edu 9 | """ 10 | 11 | import os 12 | from os import walk, getcwd 13 | from PIL import Image 14 | 15 | classes = ["stopsign"] 16 | 17 | def convert(size, box): 18 | dw = 1./size[0] 19 | dh = 1./size[1] 20 | x = (box[0] + box[1])/2.0 21 | y = (box[2] + box[3])/2.0 22 | w = box[1] - box[0] 23 | h = box[3] - box[2] 24 | x = x*dw 25 | w = w*dw 26 | y = y*dh 27 | h = h*dh 28 | return (x,y,w,h) 29 | 30 | 31 | """-------------------------------------------------------------------""" 32 | 33 | """ Configure Paths""" 34 | mypath = "labels/stopsign_original/" 35 | outpath = "labels/stopsign/" 36 | 37 | cls = "stopsign" 38 | if cls not in classes: 39 | exit(0) 40 | cls_id = classes.index(cls) 41 | 42 | wd = getcwd() 43 | list_file = open('%s/%s_list.txt'%(wd, cls), 'w') 44 | 45 | """ Get input text file list """ 46 | txt_name_list = [] 47 | for (dirpath, dirnames, filenames) in walk(mypath): 48 | txt_name_list.extend(filenames) 49 | break 50 | print(txt_name_list) 51 | 52 | """ Process """ 53 | for txt_name in txt_name_list: 54 | # txt_file = open("Labels/stop_sign/001.txt", "r") 55 | 56 | """ Open input text files """ 57 | txt_path = mypath + txt_name 58 | print("Input:" + txt_path) 59 | txt_file = open(txt_path, "r") 60 | lines = txt_file.read().split('\r\n') #for ubuntu, use "\r\n" instead of "\n" 61 | 62 | """ Open output text files """ 63 | txt_outpath = outpath + txt_name 64 | print("Output:" + txt_outpath) 65 | txt_outfile = open(txt_outpath, "w") 66 | 67 | 68 | """ Convert the data to YOLO format """ 69 | ct = 0 70 | for line in lines: 71 | #print('lenth of line is: ') 72 | #print(len(line)) 73 | #print('\n') 74 | if(len(line) >= 2): 75 | ct = ct + 1 76 | print(line + "\n") 77 | elems = line.split(' ') 78 | print(elems) 79 | xmin = elems[0] 80 | xmax = elems[2] 81 | ymin = elems[1] 82 | ymax = elems[3] 83 | # 84 | img_path = str('%s/images/%s/%s.JPEG'%(wd, cls, os.path.splitext(txt_name)[0])) 85 | #t = magic.from_file(img_path) 86 | #wh= re.search('(\d+) x (\d+)', t).groups() 87 | im=Image.open(img_path) 88 | w= int(im.size[0]) 89 | h= int(im.size[1]) 90 | #w = int(xmax) - int(xmin) 91 | #h = int(ymax) - int(ymin) 92 | # print(xmin) 93 | print(w, h) 94 | b = (float(xmin), float(xmax), float(ymin), float(ymax)) 95 | bb = convert((w,h), b) 96 | print(bb) 97 | txt_outfile.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 98 | 99 | """ Save those images with bb into list""" 100 | if(ct != 0): 101 | list_file.write('%s/images/%s/%s.JPEG\n'%(wd, cls, os.path.splitext(txt_name)[0])) 102 | 103 | list_file.close() 104 | -------------------------------------------------------------------------------- /scripts/dice_label.sh: -------------------------------------------------------------------------------- 1 | mkdir -p images 2 | mkdir -p images/orig 3 | mkdir -p images/train 4 | mkdir -p images/val 5 | 6 | ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg 7 | ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg 8 | ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg 9 | ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg 10 | ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg 11 | ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg 12 | 13 | mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* 14 | 15 | ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val 16 | mv images/orig/* images/train 17 | 18 | find `pwd`/images/train > dice.train.list -name \*.jpg 19 | find `pwd`/images/val > dice.val.list -name \*.jpg 20 | 21 | -------------------------------------------------------------------------------- /scripts/imagenet_label.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p labelled 4 | wd=`pwd` 5 | 6 | for f in val/*.xml; 7 | do 8 | label=`grep -m1 "" $f | grep -oP '\K[^<]*'` 9 | im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` 10 | out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` 11 | ln -s ${wd}/$im ${wd}/$out 12 | done 13 | 14 | find ${wd}/labelled -name \*.JPEG > inet.val.list 15 | 16 | -------------------------------------------------------------------------------- /scripts/voc_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | 7 | sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] 8 | 9 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 10 | 11 | 12 | def convert(size, box): 13 | dw = 1./size[0] 14 | dh = 1./size[1] 15 | x = (box[0] + box[1])/2.0 16 | y = (box[2] + box[3])/2.0 17 | w = box[1] - box[0] 18 | h = box[3] - box[2] 19 | x = x*dw 20 | w = w*dw 21 | y = y*dh 22 | h = h*dh 23 | return (x,y,w,h) 24 | 25 | def convert_annotation(year, image_id): 26 | in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) 27 | out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') 28 | tree=ET.parse(in_file) 29 | root = tree.getroot() 30 | size = root.find('size') 31 | w = int(size.find('width').text) 32 | h = int(size.find('height').text) 33 | 34 | for obj in root.iter('object'): 35 | difficult = obj.find('difficult').text 36 | cls = obj.find('name').text 37 | if cls not in classes or int(difficult) == 1: 38 | continue 39 | cls_id = classes.index(cls) 40 | xmlbox = obj.find('bndbox') 41 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 42 | bb = convert((w,h), b) 43 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 44 | 45 | wd = getcwd() 46 | 47 | for year, image_set in sets: 48 | if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): 49 | os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) 50 | image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() 51 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 52 | for image_id in image_ids: 53 | list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) 54 | convert_annotation(year, image_id) 55 | list_file.close() 56 | 57 | -------------------------------------------------------------------------------- /src/activation_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "activations.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __device__ float linear_activate_kernel(float x){return x;} 11 | __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));} 12 | __device__ float relu_activate_kernel(float x){return x*(x>0);} 13 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} 14 | __device__ float relie_activate_kernel(float x){return x*(x>0);} 15 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;} 16 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;} 17 | __device__ float tanh_activate_kernel(float x){return (exp(2*x)-1)/(exp(2*x)+1);} 18 | __device__ float plse_activate_kernel(float x) 19 | { 20 | if(x < -4) return .01 * (x + 4); 21 | if(x > 4) return .01 * (x - 4) + 1; 22 | return .125*x + .5; 23 | } 24 | 25 | __device__ float linear_gradient_kernel(float x){return 1;} 26 | __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} 27 | __device__ float relu_gradient_kernel(float x){return (x>0);} 28 | __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} 29 | __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;} 30 | __device__ float ramp_gradient_kernel(float x){return (x>0)+.1;} 31 | __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;} 32 | __device__ float tanh_gradient_kernel(float x){return 1-x*x;} 33 | __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;} 34 | 35 | __device__ float activate_kernel(float x, ACTIVATION a) 36 | { 37 | switch(a){ 38 | case LINEAR: 39 | return linear_activate_kernel(x); 40 | case LOGISTIC: 41 | return logistic_activate_kernel(x); 42 | case RELU: 43 | return relu_activate_kernel(x); 44 | case ELU: 45 | return elu_activate_kernel(x); 46 | case RELIE: 47 | return relie_activate_kernel(x); 48 | case RAMP: 49 | return ramp_activate_kernel(x); 50 | case LEAKY: 51 | return leaky_activate_kernel(x); 52 | case TANH: 53 | return tanh_activate_kernel(x); 54 | case PLSE: 55 | return plse_activate_kernel(x); 56 | } 57 | return 0; 58 | } 59 | 60 | __device__ float gradient_kernel(float x, ACTIVATION a) 61 | { 62 | switch(a){ 63 | case LINEAR: 64 | return linear_gradient_kernel(x); 65 | case LOGISTIC: 66 | return logistic_gradient_kernel(x); 67 | case RELU: 68 | return relu_gradient_kernel(x); 69 | case ELU: 70 | return elu_gradient_kernel(x); 71 | case RELIE: 72 | return relie_gradient_kernel(x); 73 | case RAMP: 74 | return ramp_gradient_kernel(x); 75 | case LEAKY: 76 | return leaky_gradient_kernel(x); 77 | case TANH: 78 | return tanh_gradient_kernel(x); 79 | case PLSE: 80 | return plse_gradient_kernel(x); 81 | } 82 | return 0; 83 | } 84 | 85 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a) 86 | { 87 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 88 | if(i < n) x[i] = activate_kernel(x[i], a); 89 | } 90 | 91 | __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) 92 | { 93 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 94 | if(i < n) delta[i] *= gradient_kernel(x[i], a); 95 | } 96 | 97 | extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) 98 | { 99 | activate_array_kernel<<>>(x, n, a); 100 | check_error(cudaPeekAtLastError()); 101 | } 102 | 103 | extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta) 104 | { 105 | gradient_array_kernel<<>>(x, n, a, delta); 106 | check_error(cudaPeekAtLastError()); 107 | } 108 | -------------------------------------------------------------------------------- /src/activations.c: -------------------------------------------------------------------------------- 1 | #include "activations.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | char *get_activation_string(ACTIVATION a) 9 | { 10 | switch(a){ 11 | case LOGISTIC: 12 | return "logistic"; 13 | case RELU: 14 | return "relu"; 15 | case ELU: 16 | return "elu"; 17 | case RELIE: 18 | return "relie"; 19 | case RAMP: 20 | return "ramp"; 21 | case LINEAR: 22 | return "linear"; 23 | case TANH: 24 | return "tanh"; 25 | case PLSE: 26 | return "plse"; 27 | case LEAKY: 28 | return "leaky"; 29 | default: 30 | break; 31 | } 32 | return "relu"; 33 | } 34 | 35 | ACTIVATION get_activation(char *s) 36 | { 37 | if (strcmp(s, "logistic")==0) return LOGISTIC; 38 | if (strcmp(s, "relu")==0) return RELU; 39 | if (strcmp(s, "elu")==0) return ELU; 40 | if (strcmp(s, "relie")==0) return RELIE; 41 | if (strcmp(s, "plse")==0) return PLSE; 42 | if (strcmp(s, "linear")==0) return LINEAR; 43 | if (strcmp(s, "ramp")==0) return RAMP; 44 | if (strcmp(s, "leaky")==0) return LEAKY; 45 | if (strcmp(s, "tanh")==0) return TANH; 46 | fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); 47 | return RELU; 48 | } 49 | 50 | float activate(float x, ACTIVATION a) 51 | { 52 | switch(a){ 53 | case LINEAR: 54 | return linear_activate(x); 55 | case LOGISTIC: 56 | return logistic_activate(x); 57 | case RELU: 58 | return relu_activate(x); 59 | case ELU: 60 | return elu_activate(x); 61 | case RELIE: 62 | return relie_activate(x); 63 | case RAMP: 64 | return ramp_activate(x); 65 | case LEAKY: 66 | return leaky_activate(x); 67 | case TANH: 68 | return tanh_activate(x); 69 | case PLSE: 70 | return plse_activate(x); 71 | } 72 | return 0; 73 | } 74 | 75 | void activate_array(float *x, const int n, const ACTIVATION a) 76 | { 77 | int i; 78 | for(i = 0; i < n; ++i){ 79 | x[i] = activate(x[i], a); 80 | } 81 | } 82 | 83 | float gradient(float x, ACTIVATION a) 84 | { 85 | switch(a){ 86 | case LINEAR: 87 | return linear_gradient(x); 88 | case LOGISTIC: 89 | return logistic_gradient(x); 90 | case RELU: 91 | return relu_gradient(x); 92 | case ELU: 93 | return elu_gradient(x); 94 | case RELIE: 95 | return relie_gradient(x); 96 | case RAMP: 97 | return ramp_gradient(x); 98 | case LEAKY: 99 | return leaky_gradient(x); 100 | case TANH: 101 | return tanh_gradient(x); 102 | case PLSE: 103 | return plse_gradient(x); 104 | } 105 | return 0; 106 | } 107 | 108 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) 109 | { 110 | int i; 111 | for(i = 0; i < n; ++i){ 112 | delta[i] *= gradient(x[i], a); 113 | } 114 | } 115 | 116 | -------------------------------------------------------------------------------- /src/activations.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATIONS_H 2 | #define ACTIVATIONS_H 3 | #include "cuda.h" 4 | #include "math.h" 5 | 6 | typedef enum{ 7 | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU 8 | }ACTIVATION; 9 | 10 | ACTIVATION get_activation(char *s); 11 | 12 | char *get_activation_string(ACTIVATION a); 13 | float activate(float x, ACTIVATION a); 14 | float gradient(float x, ACTIVATION a); 15 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); 16 | void activate_array(float *x, const int n, const ACTIVATION a); 17 | #ifdef GPU 18 | void activate_array_ongpu(float *x, int n, ACTIVATION a); 19 | void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta); 20 | #endif 21 | 22 | static inline float linear_activate(float x){return x;} 23 | static inline float logistic_activate(float x){return 1./(1. + exp(-x));} 24 | static inline float relu_activate(float x){return x*(x>0);} 25 | static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} 26 | static inline float relie_activate(float x){return x*(x>0);} 27 | static inline float ramp_activate(float x){return x*(x>0)+.1*x;} 28 | static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} 29 | static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} 30 | static inline float plse_activate(float x) 31 | { 32 | if(x < -4) return .01 * (x + 4); 33 | if(x > 4) return .01 * (x - 4) + 1; 34 | return .125*x + .5; 35 | } 36 | 37 | static inline float linear_gradient(float x){return 1;} 38 | static inline float logistic_gradient(float x){return (1-x)*x;} 39 | static inline float relu_gradient(float x){return (x>0);} 40 | static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} 41 | static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} 42 | static inline float ramp_gradient(float x){return (x>0)+.1;} 43 | static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} 44 | static inline float tanh_gradient(float x){return 1-x*x;} 45 | static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} 46 | 47 | #endif 48 | 49 | -------------------------------------------------------------------------------- /src/avgpool_layer.c: -------------------------------------------------------------------------------- 1 | #include "avgpool_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) 6 | { 7 | fprintf(stderr, "Avgpool Layer: %d x %d x %d image\n", w,h,c); 8 | avgpool_layer l = {0}; 9 | l.type = AVGPOOL; 10 | l.batch = batch; 11 | l.h = h; 12 | l.w = w; 13 | l.c = c; 14 | l.out_w = 1; 15 | l.out_h = 1; 16 | l.out_c = c; 17 | l.outputs = l.out_c; 18 | l.inputs = h*w*c; 19 | int output_size = l.outputs * batch; 20 | l.output = calloc(output_size, sizeof(float)); 21 | l.delta = calloc(output_size, sizeof(float)); 22 | #ifdef GPU 23 | l.output_gpu = cuda_make_array(l.output, output_size); 24 | l.delta_gpu = cuda_make_array(l.delta, output_size); 25 | #endif 26 | return l; 27 | } 28 | 29 | void resize_avgpool_layer(avgpool_layer *l, int w, int h) 30 | { 31 | l->h = h; 32 | l->w = w; 33 | } 34 | 35 | void forward_avgpool_layer(const avgpool_layer l, network_state state) 36 | { 37 | int b,i,k; 38 | 39 | for(b = 0; b < l.batch; ++b){ 40 | for(k = 0; k < l.c; ++k){ 41 | int out_index = k + b*l.c; 42 | l.output[out_index] = 0; 43 | for(i = 0; i < l.h*l.w; ++i){ 44 | int in_index = i + l.h*l.w*(k + b*l.c); 45 | l.output[out_index] += state.input[in_index]; 46 | } 47 | l.output[out_index] /= l.h*l.w; 48 | } 49 | } 50 | } 51 | 52 | void backward_avgpool_layer(const avgpool_layer l, network_state state) 53 | { 54 | int b,i,k; 55 | 56 | for(b = 0; b < l.batch; ++b){ 57 | for(k = 0; k < l.c; ++k){ 58 | int out_index = k + b*l.c; 59 | for(i = 0; i < l.h*l.w; ++i){ 60 | int in_index = i + l.h*l.w*(k + b*l.c); 61 | state.delta[in_index] += l.delta[out_index] / (l.h*l.w); 62 | } 63 | } 64 | } 65 | } 66 | 67 | -------------------------------------------------------------------------------- /src/avgpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef AVGPOOL_LAYER_H 2 | #define AVGPOOL_LAYER_H 3 | 4 | #include "image.h" 5 | #include "cuda.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | typedef layer avgpool_layer; 10 | 11 | image get_avgpool_image(avgpool_layer l); 12 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); 13 | void resize_avgpool_layer(avgpool_layer *l, int w, int h); 14 | void forward_avgpool_layer(const avgpool_layer l, network_state state); 15 | void backward_avgpool_layer(const avgpool_layer l, network_state state); 16 | 17 | #ifdef GPU 18 | void forward_avgpool_layer_gpu(avgpool_layer l, network_state state); 19 | void backward_avgpool_layer_gpu(avgpool_layer l, network_state state); 20 | #endif 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /src/avgpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "avgpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) 11 | { 12 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 13 | if(id >= n) return; 14 | 15 | int k = id % c; 16 | id /= c; 17 | int b = id; 18 | 19 | int i; 20 | int out_index = (k + c*b); 21 | output[out_index] = 0; 22 | for(i = 0; i < w*h; ++i){ 23 | int in_index = i + h*w*(k + b*c); 24 | output[out_index] += input[in_index]; 25 | } 26 | output[out_index] /= w*h; 27 | } 28 | 29 | __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) 30 | { 31 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 32 | if(id >= n) return; 33 | 34 | int k = id % c; 35 | id /= c; 36 | int b = id; 37 | 38 | int i; 39 | int out_index = (k + c*b); 40 | for(i = 0; i < w*h; ++i){ 41 | int in_index = i + h*w*(k + b*c); 42 | in_delta[in_index] += out_delta[out_index] / (w*h); 43 | } 44 | } 45 | 46 | extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state) 47 | { 48 | size_t n = layer.c*layer.batch; 49 | 50 | forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, state.input, layer.output_gpu); 51 | check_error(cudaPeekAtLastError()); 52 | } 53 | 54 | extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state) 55 | { 56 | size_t n = layer.c*layer.batch; 57 | 58 | backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu); 59 | check_error(cudaPeekAtLastError()); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /src/blas.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | #include "math.h" 3 | 4 | void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2) 5 | { 6 | int i,j,k,b; 7 | for(b = 0; b < batch; ++b){ 8 | for(k = 0; k < c && k < c2; ++k){ 9 | for(j = 0; j < h/sample; ++j){ 10 | for(i = 0; i < w/sample; ++i){ 11 | int out_index = i*sample + w*(j*sample + h*(k + c*b)); 12 | int add_index = b*w*stride/sample*h*stride/sample*c2 + i*stride + w*stride/sample*(j*stride + h*stride/sample*k); 13 | out[out_index] += add[add_index]; 14 | } 15 | } 16 | } 17 | } 18 | } 19 | 20 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) 21 | { 22 | float scale = 1./(batch * spatial); 23 | int i,j,k; 24 | for(i = 0; i < filters; ++i){ 25 | mean[i] = 0; 26 | for(j = 0; j < batch; ++j){ 27 | for(k = 0; k < spatial; ++k){ 28 | int index = j*filters*spatial + i*spatial + k; 29 | mean[i] += x[index]; 30 | } 31 | } 32 | mean[i] *= scale; 33 | } 34 | } 35 | 36 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) 37 | { 38 | float scale = 1./(batch * spatial); 39 | int i,j,k; 40 | for(i = 0; i < filters; ++i){ 41 | variance[i] = 0; 42 | for(j = 0; j < batch; ++j){ 43 | for(k = 0; k < spatial; ++k){ 44 | int index = j*filters*spatial + i*spatial + k; 45 | variance[i] += pow((x[index] - mean[i]), 2); 46 | } 47 | } 48 | variance[i] *= scale; 49 | } 50 | } 51 | 52 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) 53 | { 54 | int b, f, i; 55 | for(b = 0; b < batch; ++b){ 56 | for(f = 0; f < filters; ++f){ 57 | for(i = 0; i < spatial; ++i){ 58 | int index = b*filters*spatial + f*spatial + i; 59 | x[index] = (x[index] - mean[f])/(sqrt(variance[f])); 60 | } 61 | } 62 | } 63 | } 64 | 65 | void const_cpu(int N, float ALPHA, float *X, int INCX) 66 | { 67 | int i; 68 | for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; 69 | } 70 | 71 | void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) 72 | { 73 | int i; 74 | for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; 75 | } 76 | 77 | void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) 78 | { 79 | int i; 80 | for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); 81 | } 82 | 83 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) 84 | { 85 | int i; 86 | for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; 87 | } 88 | 89 | void scal_cpu(int N, float ALPHA, float *X, int INCX) 90 | { 91 | int i; 92 | for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; 93 | } 94 | 95 | void fill_cpu(int N, float ALPHA, float *X, int INCX) 96 | { 97 | int i; 98 | for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; 99 | } 100 | 101 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) 102 | { 103 | int i; 104 | for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; 105 | } 106 | 107 | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) 108 | { 109 | int i; 110 | float dot = 0; 111 | for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; 112 | return dot; 113 | } 114 | 115 | -------------------------------------------------------------------------------- /src/blas.h: -------------------------------------------------------------------------------- 1 | #ifndef BLAS_H 2 | #define BLAS_H 3 | void pm(int M, int N, float *A); 4 | float *random_matrix(int rows, int cols); 5 | void time_random_matrix(int TA, int TB, int m, int k, int n); 6 | 7 | void test_blas(); 8 | 9 | void const_cpu(int N, float ALPHA, float *X, int INCX); 10 | void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); 11 | void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); 12 | 13 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); 14 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); 15 | void scal_cpu(int N, float ALPHA, float *X, int INCX); 16 | void fill_cpu(int N, float ALPHA, float * X, int INCX); 17 | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); 18 | void test_gpu_blas(); 19 | void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2); 20 | 21 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); 22 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); 23 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); 24 | 25 | #ifdef GPU 26 | void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); 27 | void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); 28 | void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY); 29 | void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); 30 | void scal_ongpu(int N, float ALPHA, float * X, int INCX); 31 | void mask_ongpu(int N, float * X, float mask_num, float * mask); 32 | void const_ongpu(int N, float ALPHA, float *X, int INCX); 33 | void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); 34 | void mul_ongpu(int N, float *X, int INCX, float *Y, int INCY); 35 | void fill_ongpu(int N, float ALPHA, float * X, int INCX); 36 | 37 | void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); 38 | void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); 39 | void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); 40 | 41 | void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); 42 | 43 | void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); 44 | void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); 45 | 46 | void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); 47 | void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); 48 | void shortcut_gpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2); 49 | #endif 50 | #endif 51 | -------------------------------------------------------------------------------- /src/box.h: -------------------------------------------------------------------------------- 1 | #ifndef BOX_H 2 | #define BOX_H 3 | 4 | typedef struct{ 5 | float x, y, w, h; 6 | } box; 7 | 8 | typedef struct{ 9 | float dx, dy, dw, dh; 10 | } dbox; 11 | 12 | box float_to_box(float *f); 13 | float box_iou(box a, box b); 14 | float box_rmse(box a, box b); 15 | dbox diou(box a, box b); 16 | void do_nms(box *boxes, float **probs, int total, int classes, float thresh); 17 | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh); 18 | box decode_box(box b, box anchor); 19 | box encode_box(box b, box anchor); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/coco_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "network.h" 7 | #include "detection_layer.h" 8 | #include "cost_layer.h" 9 | #include "utils.h" 10 | #include "parser.h" 11 | #include "box.h" 12 | #include "image.h" 13 | #include 14 | } 15 | 16 | #ifdef OPENCV 17 | #include "opencv2/highgui/highgui.hpp" 18 | #include "opencv2/imgproc/imgproc.hpp" 19 | extern "C" image ipl_to_image(IplImage* src); 20 | extern "C" void convert_coco_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); 21 | 22 | extern "C" char *coco_classes[]; 23 | extern "C" image coco_labels[]; 24 | 25 | static float **probs; 26 | static box *boxes; 27 | static network net; 28 | static image in ; 29 | static image in_s ; 30 | static image det ; 31 | static image det_s; 32 | static image disp ; 33 | static cv::VideoCapture cap; 34 | static float fps = 0; 35 | static float demo_thresh = 0; 36 | 37 | static const int frames = 3; 38 | static float *predictions[frames]; 39 | static int demo_index = 0; 40 | static image images[frames]; 41 | static float *avg; 42 | 43 | void *fetch_in_thread_coco(void *ptr) 44 | { 45 | cv::Mat frame_m; 46 | cap >> frame_m; 47 | IplImage frame = frame_m; 48 | in = ipl_to_image(&frame); 49 | rgbgr_image(in); 50 | in_s = resize_image(in, net.w, net.h); 51 | return 0; 52 | } 53 | 54 | void *detect_in_thread_coco(void *ptr) 55 | { 56 | float nms = .4; 57 | 58 | detection_layer l = net.layers[net.n-1]; 59 | float *X = det_s.data; 60 | float *prediction = network_predict(net, X); 61 | 62 | memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float)); 63 | mean_arrays(predictions, frames, l.outputs, avg); 64 | 65 | free_image(det_s); 66 | convert_coco_detections(avg, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0); 67 | if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms); 68 | printf("\033[2J"); 69 | printf("\033[1;1H"); 70 | printf("\nFPS:%.0f\n",fps); 71 | printf("Objects:\n\n"); 72 | 73 | images[demo_index] = det; 74 | det = images[(demo_index + frames/2 + 1)%frames]; 75 | demo_index = (demo_index + 1)%frames; 76 | 77 | draw_detections(det, l.side*l.side*l.n, demo_thresh, boxes, probs, coco_classes, coco_labels, 80); 78 | return 0; 79 | } 80 | 81 | extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename) 82 | { 83 | demo_thresh = thresh; 84 | printf("YOLO demo\n"); 85 | net = parse_network_cfg(cfgfile); 86 | if(weightfile){ 87 | load_weights(&net, weightfile); 88 | } 89 | set_batch_network(&net, 1); 90 | 91 | srand(2222222); 92 | 93 | if(filename){ 94 | cap.open(filename); 95 | }else{ 96 | cap.open(cam_index); 97 | } 98 | 99 | if(!cap.isOpened()) error("Couldn't connect to webcam.\n"); 100 | 101 | detection_layer l = net.layers[net.n-1]; 102 | int j; 103 | 104 | avg = (float *) calloc(l.outputs, sizeof(float)); 105 | for(j = 0; j < frames; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); 106 | for(j = 0; j < frames; ++j) images[j] = make_image(1,1,3); 107 | 108 | boxes = (box *)calloc(l.side*l.side*l.n, sizeof(box)); 109 | probs = (float **)calloc(l.side*l.side*l.n, sizeof(float *)); 110 | for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *)); 111 | 112 | pthread_t fetch_thread; 113 | pthread_t detect_thread; 114 | 115 | fetch_in_thread_coco(0); 116 | det = in; 117 | det_s = in_s; 118 | 119 | fetch_in_thread_coco(0); 120 | detect_in_thread_coco(0); 121 | disp = det; 122 | det = in; 123 | det_s = in_s; 124 | 125 | while(1){ 126 | struct timeval tval_before, tval_after, tval_result; 127 | gettimeofday(&tval_before, NULL); 128 | if(pthread_create(&fetch_thread, 0, fetch_in_thread_coco, 0)) error("Thread creation failed"); 129 | if(pthread_create(&detect_thread, 0, detect_in_thread_coco, 0)) error("Thread creation failed"); 130 | show_image(disp, "YOLO"); 131 | free_image(disp); 132 | cvWaitKey(1); 133 | pthread_join(fetch_thread, 0); 134 | pthread_join(detect_thread, 0); 135 | 136 | disp = det; 137 | det = in; 138 | det_s = in_s; 139 | 140 | gettimeofday(&tval_after, NULL); 141 | timersub(&tval_after, &tval_before, &tval_result); 142 | float curr = 1000000.f/((long int)tval_result.tv_usec); 143 | fps = .9*fps + .1*curr; 144 | } 145 | } 146 | #else 147 | extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index){ 148 | fprintf(stderr, "YOLO-COCO demo needs OpenCV for webcam images.\n"); 149 | } 150 | #endif 151 | 152 | -------------------------------------------------------------------------------- /src/col2im.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | void col2im_add_pixel(float *im, int height, int width, int channels, 4 | int row, int col, int channel, int pad, float val) 5 | { 6 | row -= pad; 7 | col -= pad; 8 | 9 | if (row < 0 || col < 0 || 10 | row >= height || col >= width) return; 11 | im[col + width*(row + height*channel)] += val; 12 | } 13 | //This one might be too, can't remember. 14 | void col2im_cpu(float* data_col, 15 | int channels, int height, int width, 16 | int ksize, int stride, int pad, float* data_im) 17 | { 18 | int c,h,w; 19 | int height_col = (height - ksize) / stride + 1; 20 | int width_col = (width - ksize) / stride + 1; 21 | if (pad){ 22 | height_col = 1 + (height-1) / stride; 23 | width_col = 1 + (width-1) / stride; 24 | pad = ksize/2; 25 | } 26 | int channels_col = channels * ksize * ksize; 27 | for (c = 0; c < channels_col; ++c) { 28 | int w_offset = c % ksize; 29 | int h_offset = (c / ksize) % ksize; 30 | int c_im = c / ksize / ksize; 31 | for (h = 0; h < height_col; ++h) { 32 | for (w = 0; w < width_col; ++w) { 33 | int im_row = h_offset + h * stride; 34 | int im_col = w_offset + w * stride; 35 | int col_index = (c * height_col + h) * width_col + w; 36 | double val = data_col[col_index]; 37 | col2im_add_pixel(data_im, height, width, channels, 38 | im_row, im_col, c_im, pad, val); 39 | } 40 | } 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/col2im.h: -------------------------------------------------------------------------------- 1 | #ifndef COL2IM_H 2 | #define COL2IM_H 3 | 4 | void col2im_cpu(float* data_col, 5 | int channels, int height, int width, 6 | int ksize, int stride, int pad, float* data_im); 7 | 8 | #ifdef GPU 9 | void col2im_ongpu(float *data_col, 10 | int channels, int height, int width, 11 | int ksize, int stride, int pad, float *data_im); 12 | #endif 13 | #endif 14 | -------------------------------------------------------------------------------- /src/col2im_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "col2im.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void col2im_gpu_kernel(const int n, const float* data_col, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_im) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | float val = 0; 22 | int w = index % width + pad; 23 | int h = (index / width) % height + pad; 24 | int c = index / (width * height); 25 | // compute the start and end of the output 26 | int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; 27 | int w_col_end = min(w / stride + 1, width_col); 28 | int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; 29 | int h_col_end = min(h / stride + 1, height_col); 30 | // equivalent implementation 31 | int offset = 32 | (c * ksize * ksize + h * ksize + w) * height_col * width_col; 33 | int coeff_h_col = (1 - stride * ksize * height_col) * width_col; 34 | int coeff_w_col = (1 - stride * height_col * width_col); 35 | for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { 36 | for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { 37 | val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; 38 | } 39 | } 40 | data_im[index] += val; 41 | } 42 | } 43 | 44 | void col2im_ongpu(float *data_col, 45 | int channels, int height, int width, 46 | int ksize, int stride, int pad, float *data_im){ 47 | // We are going to launch channels * height_col * width_col kernels, each 48 | // kernel responsible for copying a single-channel grid. 49 | pad = pad ? ksize/2 : 0; 50 | int height_col = (height + 2 * pad - ksize) / stride + 1; 51 | int width_col = (width + 2 * pad - ksize) / stride + 1; 52 | int num_kernels = channels * height * width; 53 | col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 54 | BLOCK>>>( 55 | num_kernels, data_col, height, width, ksize, pad, 56 | stride, height_col, 57 | width_col, data_im); 58 | } 59 | 60 | -------------------------------------------------------------------------------- /src/connected_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONNECTED_LAYER_H 2 | #define CONNECTED_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | typedef layer connected_layer; 9 | 10 | connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation); 11 | 12 | void forward_connected_layer(connected_layer layer, network_state state); 13 | void backward_connected_layer(connected_layer layer, network_state state); 14 | void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay); 15 | 16 | #ifdef GPU 17 | void forward_connected_layer_gpu(connected_layer layer, network_state state); 18 | void backward_connected_layer_gpu(connected_layer layer, network_state state); 19 | void update_connected_layer_gpu(connected_layer layer, int batch, float learning_rate, float momentum, float decay); 20 | void push_connected_layer(connected_layer layer); 21 | void pull_connected_layer(connected_layer layer); 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/convolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONVOLUTIONAL_LAYER_H 2 | #define CONVOLUTIONAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "params.h" 6 | #include "image.h" 7 | #include "activations.h" 8 | #include "layer.h" 9 | #include "network.h" 10 | 11 | typedef layer convolutional_layer; 12 | 13 | #ifdef GPU 14 | void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state); 15 | void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state); 16 | void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay); 17 | 18 | void push_convolutional_layer(convolutional_layer layer); 19 | void pull_convolutional_layer(convolutional_layer layer); 20 | 21 | void add_bias_gpu(float *output, float *biases, int batch, int n, int size); 22 | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); 23 | #endif 24 | 25 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalization); 26 | void denormalize_convolutional_layer(convolutional_layer l); 27 | void resize_convolutional_layer(convolutional_layer *layer, int w, int h); 28 | void forward_convolutional_layer(const convolutional_layer layer, network_state state); 29 | void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay); 30 | image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters); 31 | 32 | void backward_convolutional_layer(convolutional_layer layer, network_state state); 33 | 34 | void add_bias(float *output, float *biases, int batch, int n, int size); 35 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); 36 | 37 | image get_convolutional_image(convolutional_layer layer); 38 | image get_convolutional_delta(convolutional_layer layer); 39 | image get_convolutional_filter(convolutional_layer layer, int i); 40 | 41 | int convolutional_out_height(convolutional_layer layer); 42 | int convolutional_out_width(convolutional_layer layer); 43 | void rescale_filters(convolutional_layer l, float scale, float trans); 44 | void rgbgr_filters(convolutional_layer l); 45 | 46 | #endif 47 | 48 | -------------------------------------------------------------------------------- /src/cost_layer.c: -------------------------------------------------------------------------------- 1 | #include "cost_layer.h" 2 | #include "utils.h" 3 | #include "cuda.h" 4 | #include "blas.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | COST_TYPE get_cost_type(char *s) 11 | { 12 | if (strcmp(s, "sse")==0) return SSE; 13 | if (strcmp(s, "masked")==0) return MASKED; 14 | fprintf(stderr, "Couldn't find activation function %s, going with SSE\n", s); 15 | return SSE; 16 | } 17 | 18 | char *get_cost_string(COST_TYPE a) 19 | { 20 | switch(a){ 21 | case SSE: 22 | return "sse"; 23 | case MASKED: 24 | return "masked"; 25 | } 26 | return "sse"; 27 | } 28 | 29 | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) 30 | { 31 | fprintf(stderr, "Cost Layer: %d inputs\n", inputs); 32 | cost_layer l = {0}; 33 | l.type = COST; 34 | 35 | l.scale = scale; 36 | l.batch = batch; 37 | l.inputs = inputs; 38 | l.outputs = inputs; 39 | l.cost_type = cost_type; 40 | l.delta = calloc(inputs*batch, sizeof(float)); 41 | l.output = calloc(1, sizeof(float)); 42 | #ifdef GPU 43 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 44 | #endif 45 | return l; 46 | } 47 | 48 | void resize_cost_layer(cost_layer *l, int inputs) 49 | { 50 | l->inputs = inputs; 51 | l->outputs = inputs; 52 | l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); 53 | #ifdef GPU 54 | cuda_free(l->delta_gpu); 55 | l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); 56 | #endif 57 | } 58 | 59 | void forward_cost_layer(cost_layer l, network_state state) 60 | { 61 | if (!state.truth) return; 62 | if(l.cost_type == MASKED){ 63 | int i; 64 | for(i = 0; i < l.batch*l.inputs; ++i){ 65 | if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM; 66 | } 67 | } 68 | copy_cpu(l.batch*l.inputs, state.truth, 1, l.delta, 1); 69 | axpy_cpu(l.batch*l.inputs, -1, state.input, 1, l.delta, 1); 70 | *(l.output) = dot_cpu(l.batch*l.inputs, l.delta, 1, l.delta, 1); 71 | //printf("cost: %f\n", *l.output); 72 | } 73 | 74 | void backward_cost_layer(const cost_layer l, network_state state) 75 | { 76 | axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1); 77 | } 78 | 79 | #ifdef GPU 80 | 81 | void pull_cost_layer(cost_layer l) 82 | { 83 | cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); 84 | } 85 | 86 | void push_cost_layer(cost_layer l) 87 | { 88 | cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); 89 | } 90 | 91 | void forward_cost_layer_gpu(cost_layer l, network_state state) 92 | { 93 | if (!state.truth) return; 94 | if (l.cost_type == MASKED) { 95 | mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); 96 | } 97 | 98 | copy_ongpu(l.batch*l.inputs, state.truth, 1, l.delta_gpu, 1); 99 | axpy_ongpu(l.batch*l.inputs, -1, state.input, 1, l.delta_gpu, 1); 100 | 101 | cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); 102 | *(l.output) = dot_cpu(l.batch*l.inputs, l.delta, 1, l.delta, 1); 103 | } 104 | 105 | void backward_cost_layer_gpu(const cost_layer l, network_state state) 106 | { 107 | axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1); 108 | } 109 | #endif 110 | 111 | -------------------------------------------------------------------------------- /src/cost_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef COST_LAYER_H 2 | #define COST_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | typedef layer cost_layer; 7 | 8 | COST_TYPE get_cost_type(char *s); 9 | char *get_cost_string(COST_TYPE a); 10 | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); 11 | void forward_cost_layer(const cost_layer l, network_state state); 12 | void backward_cost_layer(const cost_layer l, network_state state); 13 | void resize_cost_layer(cost_layer *l, int inputs); 14 | 15 | #ifdef GPU 16 | void forward_cost_layer_gpu(cost_layer l, network_state state); 17 | void backward_cost_layer_gpu(const cost_layer l, network_state state); 18 | #endif 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/cpu_gemm.c: -------------------------------------------------------------------------------- 1 | #include "mini_blas.h" 2 | 3 | void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, 4 | float *A, int lda, 5 | float *B, int ldb, 6 | float BETA, 7 | float *C, int ldc) 8 | { 9 | int i,j,k; 10 | for(i = 0; i < M; ++i){ 11 | for(k = 0; k < K; ++k){ 12 | register float A_PART = ALPHA*A[i*lda+k]; 13 | for(j = 0; j < N; ++j){ 14 | C[i*ldc+j] += A_PART*B[k*ldb+j]; 15 | } 16 | } 17 | } 18 | } 19 | 20 | void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, 21 | float *A, int lda, 22 | float *B, int ldb, 23 | float BETA, 24 | float *C, int ldc) 25 | { 26 | int i,j,k; 27 | for(i = 0; i < M; ++i){ 28 | for(j = 0; j < N; ++j){ 29 | register float sum = 0; 30 | for(k = 0; k < K; ++k){ 31 | sum += ALPHA*A[i*lda+k]*B[k+j*ldb]; 32 | } 33 | C[i*ldc+j] += sum; 34 | } 35 | } 36 | } 37 | 38 | void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, 39 | float *A, int lda, 40 | float *B, int ldb, 41 | float BETA, 42 | float *C, int ldc) 43 | { 44 | int i,j,k; 45 | for(i = 0; i < M; ++i){ 46 | for(k = 0; k < K; ++k){ 47 | register float A_PART = ALPHA*A[k*lda+i]; 48 | for(j = 0; j < N; ++j){ 49 | C[i*ldc+j] += A_PART*B[k*ldb+j]; 50 | } 51 | } 52 | } 53 | } 54 | void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, 55 | float *A, int lda, 56 | float *B, int ldb, 57 | float BETA, 58 | float *C, int ldc) 59 | { 60 | int i,j,k; 61 | for(i = 0; i < M; ++i){ 62 | for(j = 0; j < N; ++j){ 63 | for(k = 0; k < K; ++k){ 64 | C[i*ldc+j] += ALPHA*A[i+k*lda]*B[k+j*ldb]; 65 | } 66 | } 67 | } 68 | } 69 | 70 | 71 | void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, 72 | float *A, int lda, 73 | float *B, int ldb, 74 | float BETA, 75 | float *C, int ldc) 76 | { 77 | int i, j; 78 | for(i = 0; i < M; ++i){ 79 | for(j = 0; j < N; ++j){ 80 | C[i*ldc + j] *= BETA; 81 | } 82 | } 83 | if(!TA && !TB) 84 | cpu_gemm_nn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); 85 | else if(TA && !TB) 86 | cpu_gemm_tn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); 87 | else if(!TA && TB) 88 | cpu_gemm_nt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); 89 | else 90 | cpu_gemm_tt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); 91 | } 92 | -------------------------------------------------------------------------------- /src/crop_layer.c: -------------------------------------------------------------------------------- 1 | #include "crop_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | image get_crop_image(crop_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.out_c; 10 | return float_to_image(w,h,c,l.output); 11 | } 12 | 13 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) 14 | { 15 | fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); 16 | crop_layer l = {0}; 17 | l.type = CROP; 18 | l.batch = batch; 19 | l.h = h; 20 | l.w = w; 21 | l.c = c; 22 | l.flip = flip; 23 | l.angle = angle; 24 | l.saturation = saturation; 25 | l.exposure = exposure; 26 | l.crop_width = crop_width; 27 | l.crop_height = crop_height; 28 | l.out_w = crop_width; 29 | l.out_h = crop_height; 30 | l.out_c = c; 31 | l.inputs = l.w * l.h * l.c; 32 | l.outputs = l.out_w * l.out_h * l.out_c; 33 | l.output = calloc(crop_width*crop_height * c*batch, sizeof(float)); 34 | #ifdef GPU 35 | l.output_gpu = cuda_make_array(l.output, crop_width*crop_height*c*batch); 36 | l.rand_gpu = cuda_make_array(0, l.batch*8); 37 | #endif 38 | return l; 39 | } 40 | 41 | void forward_crop_layer(const crop_layer l, network_state state) 42 | { 43 | int i,j,c,b,row,col; 44 | int index; 45 | int count = 0; 46 | int flip = (l.flip && rand()%2); 47 | int dh = rand()%(l.h - l.crop_height + 1); 48 | int dw = rand()%(l.w - l.crop_width + 1); 49 | float scale = 2; 50 | float trans = -1; 51 | if(l.noadjust){ 52 | scale = 1; 53 | trans = 0; 54 | } 55 | if(!state.train){ 56 | flip = 0; 57 | dh = (l.h - l.crop_height)/2; 58 | dw = (l.w - l.crop_width)/2; 59 | } 60 | for(b = 0; b < l.batch; ++b){ 61 | for(c = 0; c < l.c; ++c){ 62 | for(i = 0; i < l.crop_height; ++i){ 63 | for(j = 0; j < l.crop_width; ++j){ 64 | if(flip){ 65 | col = l.w - dw - j - 1; 66 | }else{ 67 | col = j + dw; 68 | } 69 | row = i + dh; 70 | index = col+l.w*(row+l.h*(c + l.c*b)); 71 | l.output[count++] = state.input[index]*scale + trans; 72 | } 73 | } 74 | } 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /src/crop_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CROP_LAYER_H 2 | #define CROP_LAYER_H 3 | 4 | #include "image.h" 5 | #include "params.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | typedef layer crop_layer; 10 | 11 | image get_crop_image(crop_layer l); 12 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); 13 | void forward_crop_layer(const crop_layer l, network_state state); 14 | 15 | #ifdef GPU 16 | void forward_crop_layer_gpu(crop_layer l, network_state state); 17 | #endif 18 | 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /src/cuda.c: -------------------------------------------------------------------------------- 1 | int gpu_index = 0; 2 | 3 | #ifdef GPU 4 | 5 | #include "cuda.h" 6 | #include "utils.h" 7 | #include "blas.h" 8 | #include "assert.h" 9 | #include 10 | #include 11 | 12 | 13 | void check_error(cudaError_t status) 14 | { 15 | cudaError_t status2 = cudaGetLastError(); 16 | if (status != cudaSuccess) 17 | { 18 | const char *s = cudaGetErrorString(status); 19 | char buffer[256]; 20 | printf("CUDA Error: %s\n", s); 21 | assert(0); 22 | snprintf(buffer, 256, "CUDA Error: %s", s); 23 | error(buffer); 24 | } 25 | if (status2 != cudaSuccess) 26 | { 27 | const char *s = cudaGetErrorString(status); 28 | char buffer[256]; 29 | printf("CUDA Error Prev: %s\n", s); 30 | assert(0); 31 | snprintf(buffer, 256, "CUDA Error Prev: %s", s); 32 | error(buffer); 33 | } 34 | } 35 | 36 | dim3 cuda_gridsize(size_t n){ 37 | size_t k = (n-1) / BLOCK + 1; 38 | size_t x = k; 39 | size_t y = 1; 40 | if(x > 65535){ 41 | x = ceil(sqrt(k)); 42 | y = (n-1)/(x*BLOCK) + 1; 43 | } 44 | dim3 d = {x, y, 1}; 45 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 46 | return d; 47 | } 48 | 49 | cublasHandle_t blas_handle() 50 | { 51 | static int init = 0; 52 | static cublasHandle_t handle; 53 | if(!init) { 54 | cublasCreate(&handle); 55 | init = 1; 56 | } 57 | return handle; 58 | } 59 | 60 | float *cuda_make_array(float *x, int n) 61 | { 62 | float *x_gpu; 63 | size_t size = sizeof(float)*n; 64 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 65 | check_error(status); 66 | if(x){ 67 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 68 | check_error(status); 69 | } 70 | if(!x_gpu) error("Cuda malloc failed\n"); 71 | return x_gpu; 72 | } 73 | 74 | void cuda_random(float *x_gpu, int n) 75 | { 76 | static curandGenerator_t gen; 77 | static int init = 0; 78 | if(!init){ 79 | curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); 80 | curandSetPseudoRandomGeneratorSeed(gen, time(0)); 81 | init = 1; 82 | } 83 | curandGenerateUniform(gen, x_gpu, n); 84 | check_error(cudaPeekAtLastError()); 85 | } 86 | 87 | float cuda_compare(float *x_gpu, float *x, int n, char *s) 88 | { 89 | float *tmp = calloc(n, sizeof(float)); 90 | cuda_pull_array(x_gpu, tmp, n); 91 | //int i; 92 | //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); 93 | axpy_cpu(n, -1, x, 1, tmp, 1); 94 | float err = dot_cpu(n, tmp, 1, tmp, 1); 95 | printf("Error %s: %f\n", s, sqrt(err/n)); 96 | free(tmp); 97 | return err; 98 | } 99 | 100 | int *cuda_make_int_array(int n) 101 | { 102 | int *x_gpu; 103 | size_t size = sizeof(int)*n; 104 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 105 | check_error(status); 106 | return x_gpu; 107 | } 108 | 109 | void cuda_free(float *x_gpu) 110 | { 111 | cudaError_t status = cudaFree(x_gpu); 112 | check_error(status); 113 | } 114 | 115 | void cuda_push_array(float *x_gpu, float *x, int n) 116 | { 117 | size_t size = sizeof(float)*n; 118 | cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 119 | check_error(status); 120 | } 121 | 122 | void cuda_pull_array(float *x_gpu, float *x, int n) 123 | { 124 | size_t size = sizeof(float)*n; 125 | cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); 126 | check_error(status); 127 | } 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /src/cuda.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDA_H 2 | #define CUDA_H 3 | 4 | extern int gpu_index; 5 | 6 | #ifdef GPU 7 | 8 | #define BLOCK 512 9 | 10 | #include "cuda_runtime.h" 11 | #include "curand.h" 12 | #include "cublas_v2.h" 13 | 14 | void check_error(cudaError_t status); 15 | cublasHandle_t blas_handle(); 16 | float *cuda_make_array(float *x, int n); 17 | int *cuda_make_int_array(int n); 18 | void cuda_push_array(float *x_gpu, float *x, int n); 19 | void cuda_pull_array(float *x_gpu, float *x, int n); 20 | void cuda_free(float *x_gpu); 21 | void cuda_random(float *x_gpu, int n); 22 | float cuda_compare(float *x_gpu, float *x, int n, char *s); 23 | dim3 cuda_gridsize(size_t n); 24 | 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /src/data.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_H 2 | #define DATA_H 3 | #include 4 | 5 | #include "matrix.h" 6 | #include "list.h" 7 | #include "image.h" 8 | 9 | extern unsigned int data_seed; 10 | 11 | static inline float distance_from_edge(int x, int max) 12 | { 13 | int dx = (max/2) - x; 14 | if (dx < 0) dx = -dx; 15 | dx = (max/2) + 1 - dx; 16 | dx *= 2; 17 | float dist = (float)dx/max; 18 | if (dist > 1) dist = 1; 19 | return dist; 20 | } 21 | 22 | typedef struct{ 23 | int w, h; 24 | matrix X; 25 | matrix y; 26 | int shallow; 27 | } data; 28 | 29 | typedef enum { 30 | CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA 31 | } data_type; 32 | 33 | typedef struct load_args{ 34 | char **paths; 35 | char *path; 36 | int n; 37 | int m; 38 | char **labels; 39 | int h; 40 | int w; 41 | int out_w; 42 | int out_h; 43 | int nh; 44 | int nw; 45 | int num_boxes; 46 | int classes; 47 | int background; 48 | float jitter; 49 | data *d; 50 | image *im; 51 | image *resized; 52 | data_type type; 53 | } load_args; 54 | 55 | typedef struct{ 56 | int id; 57 | float x,y,w,h; 58 | float left, right, top, bottom; 59 | } box_label; 60 | 61 | void free_data(data d); 62 | 63 | pthread_t load_data_in_thread(load_args args); 64 | 65 | void print_letters(float *pred, int n); 66 | data load_data_captcha(char **paths, int n, int m, int k, int w, int h); 67 | data load_data_captcha_encode(char **paths, int n, int m, int w, int h); 68 | data load_data(char **paths, int n, int m, char **labels, int k, int w, int h); 69 | data load_data_detection(int n, char **paths, int m, int classes, int w, int h, int num_boxes, int background); 70 | 71 | box_label *read_boxes(char *filename, int *n); 72 | data load_cifar10_data(char *filename); 73 | data load_all_cifar10(); 74 | 75 | data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); 76 | 77 | list *get_paths(char *filename); 78 | char **get_labels(char *filename); 79 | void get_random_batch(data d, int n, float *X, float *y); 80 | void get_next_batch(data d, int n, int offset, float *X, float *y); 81 | data load_categorical_data_csv(char *filename, int target, int k); 82 | void normalize_data_rows(data d); 83 | void scale_data_rows(data d, float s); 84 | void translate_data_rows(data d, float s); 85 | void randomize_data(data d); 86 | data *split_data(data d, int part, int total); 87 | data concat_data(data d1, data d2); 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /src/deconvolutional_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "convolutional_layer.h" 7 | #include "deconvolutional_layer.h" 8 | #include "gemm.h" 9 | #include "blas.h" 10 | #include "im2col.h" 11 | #include "col2im.h" 12 | #include "utils.h" 13 | #include "cuda.h" 14 | } 15 | 16 | extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) 17 | { 18 | int i; 19 | int out_h = deconvolutional_out_height(layer); 20 | int out_w = deconvolutional_out_width(layer); 21 | int size = out_h*out_w; 22 | 23 | int m = layer.size*layer.size*layer.n; 24 | int n = layer.h*layer.w; 25 | int k = layer.c; 26 | 27 | fill_ongpu(layer.outputs*layer.batch, 0, layer.output_gpu, 1); 28 | 29 | for(i = 0; i < layer.batch; ++i){ 30 | float *a = layer.filters_gpu; 31 | float *b = state.input + i*layer.c*layer.h*layer.w; 32 | float *c = layer.col_image_gpu; 33 | 34 | gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n); 35 | 36 | col2im_ongpu(c, layer.n, out_h, out_w, layer.size, layer.stride, 0, layer.output_gpu+i*layer.n*size); 37 | } 38 | add_bias_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, size); 39 | activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation); 40 | } 41 | 42 | extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) 43 | { 44 | float alpha = 1./layer.batch; 45 | int out_h = deconvolutional_out_height(layer); 46 | int out_w = deconvolutional_out_width(layer); 47 | int size = out_h*out_w; 48 | int i; 49 | 50 | gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu); 51 | backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size); 52 | 53 | if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); 54 | 55 | for(i = 0; i < layer.batch; ++i){ 56 | int m = layer.c; 57 | int n = layer.size*layer.size*layer.n; 58 | int k = layer.h*layer.w; 59 | 60 | float *a = state.input + i*m*n; 61 | float *b = layer.col_image_gpu; 62 | float *c = layer.filter_updates_gpu; 63 | 64 | im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w, 65 | layer.size, layer.stride, 0, b); 66 | gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n); 67 | 68 | if(state.delta){ 69 | int m = layer.c; 70 | int n = layer.h*layer.w; 71 | int k = layer.size*layer.size*layer.n; 72 | 73 | float *a = layer.filters_gpu; 74 | float *b = layer.col_image_gpu; 75 | float *c = state.delta + i*n*m; 76 | 77 | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); 78 | } 79 | } 80 | } 81 | 82 | extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer) 83 | { 84 | cuda_pull_array(layer.filters_gpu, layer.filters, layer.c*layer.n*layer.size*layer.size); 85 | cuda_pull_array(layer.biases_gpu, layer.biases, layer.n); 86 | cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, layer.c*layer.n*layer.size*layer.size); 87 | cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); 88 | } 89 | 90 | extern "C" void push_deconvolutional_layer(deconvolutional_layer layer) 91 | { 92 | cuda_push_array(layer.filters_gpu, layer.filters, layer.c*layer.n*layer.size*layer.size); 93 | cuda_push_array(layer.biases_gpu, layer.biases, layer.n); 94 | cuda_push_array(layer.filter_updates_gpu, layer.filter_updates, layer.c*layer.n*layer.size*layer.size); 95 | cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); 96 | } 97 | 98 | extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay) 99 | { 100 | int size = layer.size*layer.size*layer.c*layer.n; 101 | 102 | axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); 103 | scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); 104 | 105 | axpy_ongpu(size, -decay, layer.filters_gpu, 1, layer.filter_updates_gpu, 1); 106 | axpy_ongpu(size, learning_rate, layer.filter_updates_gpu, 1, layer.filters_gpu, 1); 107 | scal_ongpu(size, momentum, layer.filter_updates_gpu, 1); 108 | } 109 | 110 | -------------------------------------------------------------------------------- /src/deconvolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DECONVOLUTIONAL_LAYER_H 2 | #define DECONVOLUTIONAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "params.h" 6 | #include "image.h" 7 | #include "activations.h" 8 | #include "layer.h" 9 | #include "network.h" 10 | 11 | typedef layer deconvolutional_layer; 12 | 13 | #ifdef GPU 14 | void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); 15 | void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); 16 | void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay); 17 | void push_deconvolutional_layer(deconvolutional_layer layer); 18 | void pull_deconvolutional_layer(deconvolutional_layer layer); 19 | #endif 20 | 21 | deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation); 22 | void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w); 23 | void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state); 24 | void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay); 25 | void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state); 26 | 27 | image get_deconvolutional_image(deconvolutional_layer layer); 28 | image get_deconvolutional_delta(deconvolutional_layer layer); 29 | image get_deconvolutional_filter(deconvolutional_layer layer, int i); 30 | 31 | int deconvolutional_out_height(deconvolutional_layer layer); 32 | int deconvolutional_out_width(deconvolutional_layer layer); 33 | 34 | #endif 35 | 36 | -------------------------------------------------------------------------------- /src/detection_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef REGION_LAYER_H 2 | #define REGION_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | typedef layer detection_layer; 8 | 9 | detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); 10 | void forward_detection_layer(const detection_layer l, network_state state); 11 | void backward_detection_layer(const detection_layer l, network_state state); 12 | 13 | #ifdef GPU 14 | void forward_detection_layer_gpu(const detection_layer l, network_state state); 15 | void backward_detection_layer_gpu(detection_layer l, network_state state); 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/dice.c: -------------------------------------------------------------------------------- 1 | #include "network.h" 2 | #include "utils.h" 3 | #include "parser.h" 4 | 5 | char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; 6 | 7 | void train_dice(char *cfgfile, char *weightfile) 8 | { 9 | data_seed = time(0); 10 | srand(time(0)); 11 | float avg_loss = -1; 12 | char *base = basecfg(cfgfile); 13 | char *backup_directory = "/home/pjreddie/backup/"; 14 | printf("%s\n", base); 15 | network net = parse_network_cfg(cfgfile); 16 | if(weightfile){ 17 | load_weights(&net, weightfile); 18 | } 19 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); 20 | int imgs = 1024; 21 | int i = *net.seen/imgs; 22 | char **labels = dice_labels; 23 | list *plist = get_paths("data/dice/dice.train.list"); 24 | char **paths = (char **)list_to_array(plist); 25 | printf("%d\n", plist->size); 26 | clock_t time; 27 | while(1){ 28 | ++i; 29 | time=clock(); 30 | data train = load_data(paths, imgs, plist->size, labels, 6, net.w, net.h); 31 | printf("Loaded: %lf seconds\n", sec(clock()-time)); 32 | 33 | time=clock(); 34 | float loss = train_network(net, train); 35 | if(avg_loss == -1) avg_loss = loss; 36 | avg_loss = avg_loss*.9 + loss*.1; 37 | printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); 38 | free_data(train); 39 | if((i % 100) == 0) net.learning_rate *= .1; 40 | if(i%100==0){ 41 | char buff[256]; 42 | sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); 43 | save_weights(net, buff); 44 | } 45 | } 46 | } 47 | 48 | void validate_dice(char *filename, char *weightfile) 49 | { 50 | network net = parse_network_cfg(filename); 51 | if(weightfile){ 52 | load_weights(&net, weightfile); 53 | } 54 | srand(time(0)); 55 | 56 | char **labels = dice_labels; 57 | list *plist = get_paths("data/dice/dice.val.list"); 58 | 59 | char **paths = (char **)list_to_array(plist); 60 | int m = plist->size; 61 | free_list(plist); 62 | 63 | data val = load_data(paths, m, 0, labels, 6, net.w, net.h); 64 | float *acc = network_accuracies(net, val, 2); 65 | printf("Validation Accuracy: %f, %d images\n", acc[0], m); 66 | free_data(val); 67 | } 68 | 69 | void test_dice(char *cfgfile, char *weightfile, char *filename) 70 | { 71 | network net = parse_network_cfg(cfgfile); 72 | if(weightfile){ 73 | load_weights(&net, weightfile); 74 | } 75 | set_batch_network(&net, 1); 76 | srand(2222222); 77 | int i = 0; 78 | char **names = dice_labels; 79 | char buff[256]; 80 | char *input = buff; 81 | int indexes[6]; 82 | while(1){ 83 | if(filename){ 84 | strncpy(input, filename, 256); 85 | }else{ 86 | printf("Enter Image Path: "); 87 | fflush(stdout); 88 | input = fgets(input, 256, stdin); 89 | if(!input) return; 90 | strtok(input, "\n"); 91 | } 92 | image im = load_image_color(input, net.w, net.h); 93 | float *X = im.data; 94 | float *predictions = network_predict(net, X); 95 | top_predictions(net, 6, indexes); 96 | for(i = 0; i < 6; ++i){ 97 | int index = indexes[i]; 98 | printf("%s: %f\n", names[index], predictions[index]); 99 | } 100 | free_image(im); 101 | if (filename) break; 102 | } 103 | } 104 | 105 | void run_dice(int argc, char **argv) 106 | { 107 | if(argc < 4){ 108 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 109 | return; 110 | } 111 | 112 | char *cfg = argv[3]; 113 | char *weights = (argc > 4) ? argv[4] : 0; 114 | char *filename = (argc > 5) ? argv[5]: 0; 115 | if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); 116 | else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); 117 | else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); 118 | } 119 | 120 | -------------------------------------------------------------------------------- /src/dropout_layer.c: -------------------------------------------------------------------------------- 1 | #include "dropout_layer.h" 2 | #include "params.h" 3 | #include "utils.h" 4 | #include "cuda.h" 5 | #include 6 | #include 7 | 8 | dropout_layer make_dropout_layer(int batch, int inputs, float probability) 9 | { 10 | fprintf(stderr, "Dropout Layer: %d inputs, %f probability\n", inputs, probability); 11 | dropout_layer l = {0}; 12 | l.type = DROPOUT; 13 | l.probability = probability; 14 | l.inputs = inputs; 15 | l.outputs = inputs; 16 | l.batch = batch; 17 | l.rand = calloc(inputs*batch, sizeof(float)); 18 | l.scale = 1./(1.-probability); 19 | #ifdef GPU 20 | l.rand_gpu = cuda_make_array(l.rand, inputs*batch); 21 | #endif 22 | return l; 23 | } 24 | 25 | void resize_dropout_layer(dropout_layer *l, int inputs) 26 | { 27 | l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); 28 | #ifdef GPU 29 | cuda_free(l->rand_gpu); 30 | 31 | l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); 32 | #endif 33 | } 34 | 35 | void forward_dropout_layer(dropout_layer l, network_state state) 36 | { 37 | int i; 38 | if (!state.train) return; 39 | for(i = 0; i < l.batch * l.inputs; ++i){ 40 | float r = rand_uniform(); 41 | l.rand[i] = r; 42 | if(r < l.probability) state.input[i] = 0; 43 | else state.input[i] *= l.scale; 44 | } 45 | } 46 | 47 | void backward_dropout_layer(dropout_layer l, network_state state) 48 | { 49 | int i; 50 | if(!state.delta) return; 51 | for(i = 0; i < l.batch * l.inputs; ++i){ 52 | float r = l.rand[i]; 53 | if(r < l.probability) state.delta[i] = 0; 54 | else state.delta[i] *= l.scale; 55 | } 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/dropout_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DROPOUT_LAYER_H 2 | #define DROPOUT_LAYER_H 3 | 4 | #include "params.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | typedef layer dropout_layer; 9 | 10 | dropout_layer make_dropout_layer(int batch, int inputs, float probability); 11 | 12 | void forward_dropout_layer(dropout_layer l, network_state state); 13 | void backward_dropout_layer(dropout_layer l, network_state state); 14 | void resize_dropout_layer(dropout_layer *l, int inputs); 15 | 16 | #ifdef GPU 17 | void forward_dropout_layer_gpu(dropout_layer l, network_state state); 18 | void backward_dropout_layer_gpu(dropout_layer l, network_state state); 19 | 20 | #endif 21 | #endif 22 | -------------------------------------------------------------------------------- /src/dropout_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "dropout_layer.h" 7 | #include "cuda.h" 8 | #include "utils.h" 9 | #include "params.h" 10 | } 11 | 12 | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) 13 | { 14 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 15 | if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; 16 | } 17 | 18 | void forward_dropout_layer_gpu(dropout_layer layer, network_state state) 19 | { 20 | if (!state.train) return; 21 | int size = layer.inputs*layer.batch; 22 | cuda_random(layer.rand_gpu, size); 23 | /* 24 | int i; 25 | for(i = 0; i < size; ++i){ 26 | layer.rand[i] = rand_uniform(); 27 | } 28 | cuda_push_array(layer.rand_gpu, layer.rand, size); 29 | */ 30 | 31 | yoloswag420blazeit360noscope<<>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale); 32 | check_error(cudaPeekAtLastError()); 33 | } 34 | 35 | void backward_dropout_layer_gpu(dropout_layer layer, network_state state) 36 | { 37 | if(!state.delta) return; 38 | int size = layer.inputs*layer.batch; 39 | 40 | yoloswag420blazeit360noscope<<>>(state.delta, size, layer.rand_gpu, layer.probability, layer.scale); 41 | check_error(cudaPeekAtLastError()); 42 | } 43 | -------------------------------------------------------------------------------- /src/gemm.h: -------------------------------------------------------------------------------- 1 | #ifndef GEMM_H 2 | #define GEMM_H 3 | 4 | void gemm(int TA, int TB, int M, int N, int K, float ALPHA, 5 | float *A, int lda, 6 | float *B, int ldb, 7 | float BETA, 8 | float *C, int ldc); 9 | 10 | void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, 11 | float *A, int lda, 12 | float *B, int ldb, 13 | float BETA, 14 | float *C, int ldc); 15 | 16 | #ifdef GPU 17 | void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA, 18 | float *A_gpu, int lda, 19 | float *B_gpu, int ldb, 20 | float BETA, 21 | float *C_gpu, int ldc); 22 | 23 | void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, 24 | float *A, int lda, 25 | float *B, int ldb, 26 | float BETA, 27 | float *C, int ldc); 28 | #endif 29 | #endif 30 | -------------------------------------------------------------------------------- /src/im2col.c: -------------------------------------------------------------------------------- 1 | #include "im2col.h" 2 | #include 3 | float im2col_get_pixel(float *im, int height, int width, int channels, 4 | int row, int col, int channel, int pad) 5 | { 6 | row -= pad; 7 | col -= pad; 8 | 9 | if (row < 0 || col < 0 || 10 | row >= height || col >= width) return 0; 11 | return im[col + width*(row + height*channel)]; 12 | } 13 | 14 | //From Berkeley Vision's Caffe! 15 | //https://github.com/BVLC/caffe/blob/master/LICENSE 16 | void im2col_cpu(float* data_im, 17 | int channels, int height, int width, 18 | int ksize, int stride, int pad, float* data_col) 19 | { 20 | int c,h,w; 21 | int height_col = (height - ksize) / stride + 1; 22 | int width_col = (width - ksize) / stride + 1; 23 | if (pad){ 24 | height_col = 1 + (height-1) / stride; 25 | width_col = 1 + (width-1) / stride; 26 | pad = ksize/2; 27 | } 28 | int channels_col = channels * ksize * ksize; 29 | for (c = 0; c < channels_col; ++c) { 30 | int w_offset = c % ksize; 31 | int h_offset = (c / ksize) % ksize; 32 | int c_im = c / ksize / ksize; 33 | for (h = 0; h < height_col; ++h) { 34 | for (w = 0; w < width_col; ++w) { 35 | int im_row = h_offset + h * stride; 36 | int im_col = w_offset + w * stride; 37 | int col_index = (c * height_col + h) * width_col + w; 38 | data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, 39 | im_row, im_col, c_im, pad); 40 | } 41 | } 42 | } 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/im2col.h: -------------------------------------------------------------------------------- 1 | #ifndef IM2COL_H 2 | #define IM2COL_H 3 | 4 | void im2col_cpu(float* data_im, 5 | int channels, int height, int width, 6 | int ksize, int stride, int pad, float* data_col); 7 | 8 | #ifdef GPU 9 | 10 | void im2col_ongpu(float *im, 11 | int channels, int height, int width, 12 | int ksize, int stride, int pad,float *data_col); 13 | 14 | #endif 15 | #endif 16 | -------------------------------------------------------------------------------- /src/im2col_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "im2col.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void im2col_gpu_kernel(const int n, const float* data_im, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_col) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | int w_out = index % width_col; 22 | int h_index = index / width_col; 23 | int h_out = h_index % height_col; 24 | int channel_in = h_index / height_col; 25 | int channel_out = channel_in * ksize * ksize; 26 | int h_in = h_out * stride - pad; 27 | int w_in = w_out * stride - pad; 28 | float* data_col_ptr = data_col; 29 | data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; 30 | const float* data_im_ptr = data_im; 31 | data_im_ptr += (channel_in * height + h_in) * width + w_in; 32 | for (int i = 0; i < ksize; ++i) { 33 | for (int j = 0; j < ksize; ++j) { 34 | int h = h_in + i; 35 | int w = w_in + j; 36 | *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? 37 | data_im_ptr[i * width + j] : 0; 38 | data_col_ptr += height_col * width_col; 39 | } 40 | } 41 | } 42 | } 43 | 44 | void im2col_ongpu(float *im, 45 | int channels, int height, int width, 46 | int ksize, int stride, int pad, float *data_col){ 47 | // We are going to launch channels * height_col * width_col kernels, each 48 | // kernel responsible for copying a single-channel grid. 49 | pad = pad ? ksize/2 : 0; 50 | int height_col = (height + 2 * pad - ksize) / stride + 1; 51 | int width_col = (width + 2 * pad - ksize) / stride + 1; 52 | int num_kernels = channels * height_col * width_col; 53 | im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 54 | BLOCK>>>( 55 | num_kernels, im, height, width, ksize, pad, 56 | stride, height_col, 57 | width_col, data_col); 58 | } 59 | /* 60 | __global__ void im2col_pad_kernel(float *im, 61 | int channels, int height, int width, 62 | int ksize, int stride, float *data_col) 63 | { 64 | int c,h,w; 65 | int height_col = 1 + (height-1) / stride; 66 | int width_col = 1 + (width-1) / stride; 67 | int channels_col = channels * ksize * ksize; 68 | 69 | int pad = ksize/2; 70 | 71 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 72 | int col_size = height_col*width_col*channels_col; 73 | if (id >= col_size) return; 74 | 75 | int col_index = id; 76 | w = id % width_col; 77 | id /= width_col; 78 | h = id % height_col; 79 | id /= height_col; 80 | c = id % channels_col; 81 | id /= channels_col; 82 | 83 | int w_offset = c % ksize; 84 | int h_offset = (c / ksize) % ksize; 85 | int im_channel = c / ksize / ksize; 86 | int im_row = h_offset + h * stride - pad; 87 | int im_col = w_offset + w * stride - pad; 88 | 89 | int im_index = im_col + width*(im_row + height*im_channel); 90 | float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index]; 91 | 92 | data_col[col_index] = val; 93 | } 94 | 95 | __global__ void im2col_nopad_kernel(float *im, 96 | int channels, int height, int width, 97 | int ksize, int stride, float *data_col) 98 | { 99 | int c,h,w; 100 | int height_col = (height - ksize) / stride + 1; 101 | int width_col = (width - ksize) / stride + 1; 102 | int channels_col = channels * ksize * ksize; 103 | 104 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 105 | int col_size = height_col*width_col*channels_col; 106 | if (id >= col_size) return; 107 | 108 | int col_index = id; 109 | w = id % width_col; 110 | id /= width_col; 111 | h = id % height_col; 112 | id /= height_col; 113 | c = id % channels_col; 114 | id /= channels_col; 115 | 116 | int w_offset = c % ksize; 117 | int h_offset = (c / ksize) % ksize; 118 | int im_channel = c / ksize / ksize; 119 | int im_row = h_offset + h * stride; 120 | int im_col = w_offset + w * stride; 121 | 122 | int im_index = im_col + width*(im_row + height*im_channel); 123 | float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index]; 124 | 125 | data_col[col_index] = val; 126 | } 127 | 128 | extern "C" void im2col_ongpu(float *im, 129 | int channels, int height, int width, 130 | int ksize, int stride, int pad, float *data_col) 131 | { 132 | 133 | int height_col = (height - ksize) / stride + 1; 134 | int width_col = (width - ksize) / stride + 1; 135 | int channels_col = channels * ksize * ksize; 136 | 137 | if (pad){ 138 | height_col = 1 + (height-1) / stride; 139 | width_col = 1 + (width-1) / stride; 140 | } 141 | 142 | size_t n = channels_col*height_col*width_col; 143 | 144 | if(pad)im2col_pad_kernel<<>>(im, channels, height, width, ksize, stride, data_col); 145 | else im2col_nopad_kernel<<>>(im, channels, height, width, ksize, stride, data_col); 146 | check_error(cudaPeekAtLastError()); 147 | } 148 | */ 149 | -------------------------------------------------------------------------------- /src/image.h: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_H 2 | #define IMAGE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "box.h" 10 | 11 | typedef struct { 12 | int h; 13 | int w; 14 | int c; 15 | float *data; 16 | } image; 17 | 18 | float get_color(int c, int x, int max); 19 | void flip_image(image a); 20 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); 21 | void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); 22 | void draw_bbox(image a, box bbox, int w, float r, float g, float b); 23 | void draw_label(image a, int r, int c, image label, const float *rgb); 24 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes); 25 | image image_distance(image a, image b); 26 | void scale_image(image m, float s); 27 | image crop_image(image im, int dx, int dy, int w, int h); 28 | image resize_image(image im, int w, int h); 29 | image resize_image2(image im, int w, int h); 30 | void translate_image(image m, float s); 31 | void normalize_image(image p); 32 | image rotate_image(image m, float rad); 33 | void embed_image(image source, image dest, int dx, int dy); 34 | void saturate_image(image im, float sat); 35 | void exposure_image(image im, float sat); 36 | void saturate_exposure_image(image im, float sat, float exposure); 37 | void hsv_to_rgb(image im); 38 | void rgbgr_image(image im); 39 | void constrain_image(image im); 40 | 41 | image grayscale_image(image im); 42 | image threshold_image(image im, float thresh); 43 | 44 | image collapse_image_layers(image source, int border); 45 | image collapse_images_horz(image *ims, int n); 46 | image collapse_images_vert(image *ims, int n); 47 | 48 | void show_image(image p, const char *name); 49 | void save_image(image p, const char *name); 50 | void show_images(image *ims, int n, char *window); 51 | void show_image_layers(image p, char *name); 52 | void show_image_collapsed(image p, char *name); 53 | 54 | #ifdef OPENCV 55 | void save_image_jpg(image p, char *name); 56 | #endif 57 | 58 | void print_image(image m); 59 | 60 | image make_image(int w, int h, int c); 61 | image make_empty_image(int w, int h, int c); 62 | image float_to_image(int w, int h, int c, float *data); 63 | image copy_image(image p); 64 | image load_image(char *filename, int w, int h, int c); 65 | image load_image_color(char *filename, int w, int h); 66 | 67 | float get_pixel(image m, int x, int y, int c); 68 | float get_pixel_extend(image m, int x, int y, int c); 69 | void set_pixel(image m, int x, int y, int c, float val); 70 | void add_pixel(image m, int x, int y, int c, float val); 71 | float bilinear_interpolate(image im, float x, float y, int c); 72 | 73 | image get_image_layer(image m, int l); 74 | 75 | void free_image(image m); 76 | void test_resize(char *filename); 77 | #endif 78 | 79 | -------------------------------------------------------------------------------- /src/layer.c: -------------------------------------------------------------------------------- 1 | #include "layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | void free_layer(layer l) 6 | { 7 | if(l.type == DROPOUT){ 8 | if(l.rand) free(l.rand); 9 | #ifdef GPU 10 | if(l.rand_gpu) cuda_free(l.rand_gpu); 11 | #endif 12 | return; 13 | } 14 | if(l.indexes) free(l.indexes); 15 | if(l.rand) free(l.rand); 16 | if(l.cost) free(l.cost); 17 | if(l.filters) free(l.filters); 18 | if(l.filter_updates) free(l.filter_updates); 19 | if(l.biases) free(l.biases); 20 | if(l.bias_updates) free(l.bias_updates); 21 | if(l.weights) free(l.weights); 22 | if(l.weight_updates) free(l.weight_updates); 23 | if(l.col_image) free(l.col_image); 24 | if(l.input_layers) free(l.input_layers); 25 | if(l.input_sizes) free(l.input_sizes); 26 | if(l.delta) free(l.delta); 27 | if(l.output) free(l.output); 28 | if(l.squared) free(l.squared); 29 | if(l.norms) free(l.norms); 30 | 31 | #ifdef GPU 32 | if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); 33 | if(l.filters_gpu) cuda_free(l.filters_gpu); 34 | if(l.filter_updates_gpu) cuda_free(l.filter_updates_gpu); 35 | if(l.col_image_gpu) cuda_free(l.col_image_gpu); 36 | if(l.weights_gpu) cuda_free(l.weights_gpu); 37 | if(l.biases_gpu) cuda_free(l.biases_gpu); 38 | if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); 39 | if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); 40 | if(l.output_gpu) cuda_free(l.output_gpu); 41 | if(l.delta_gpu) cuda_free(l.delta_gpu); 42 | if(l.rand_gpu) cuda_free(l.rand_gpu); 43 | if(l.squared_gpu) cuda_free(l.squared_gpu); 44 | if(l.norms_gpu) cuda_free(l.norms_gpu); 45 | #endif 46 | } 47 | -------------------------------------------------------------------------------- /src/layer.h: -------------------------------------------------------------------------------- 1 | #ifndef BASE_LAYER_H 2 | #define BASE_LAYER_H 3 | 4 | #include "activations.h" 5 | 6 | struct layer; 7 | typedef struct layer layer; 8 | 9 | typedef enum { 10 | CONVOLUTIONAL, 11 | DECONVOLUTIONAL, 12 | CONNECTED, 13 | MAXPOOL, 14 | SOFTMAX, 15 | DETECTION, 16 | DROPOUT, 17 | CROP, 18 | ROUTE, 19 | COST, 20 | NORMALIZATION, 21 | AVGPOOL, 22 | LOCAL, 23 | SHORTCUT 24 | } LAYER_TYPE; 25 | 26 | typedef enum{ 27 | SSE, MASKED 28 | } COST_TYPE; 29 | 30 | struct layer{ 31 | LAYER_TYPE type; 32 | ACTIVATION activation; 33 | COST_TYPE cost_type; 34 | int batch_normalize; 35 | int batch; 36 | int forced; 37 | int flipped; 38 | int inputs; 39 | int outputs; 40 | int truths; 41 | int h,w,c; 42 | int out_h, out_w, out_c; 43 | int n; 44 | int groups; 45 | int size; 46 | int side; 47 | int stride; 48 | int pad; 49 | int crop_width; 50 | int crop_height; 51 | int sqrt; 52 | int flip; 53 | int index; 54 | float angle; 55 | float jitter; 56 | float saturation; 57 | float exposure; 58 | float shift; 59 | int softmax; 60 | int classes; 61 | int coords; 62 | int background; 63 | int rescore; 64 | int objectness; 65 | int does_cost; 66 | int joint; 67 | int noadjust; 68 | 69 | float alpha; 70 | float beta; 71 | float kappa; 72 | 73 | float coord_scale; 74 | float object_scale; 75 | float noobject_scale; 76 | float class_scale; 77 | 78 | int dontload; 79 | int dontloadscales; 80 | 81 | float probability; 82 | float scale; 83 | 84 | int *indexes; 85 | float *rand; 86 | float *cost; 87 | float *filters; 88 | float *filter_updates; 89 | 90 | float *biases; 91 | float *bias_updates; 92 | 93 | float *scales; 94 | float *scale_updates; 95 | 96 | float *weights; 97 | float *weight_updates; 98 | 99 | float *col_image; 100 | int * input_layers; 101 | int * input_sizes; 102 | float * delta; 103 | float * output; 104 | float * squared; 105 | float * norms; 106 | 107 | float * spatial_mean; 108 | float * mean; 109 | float * variance; 110 | 111 | float * rolling_mean; 112 | float * rolling_variance; 113 | 114 | #ifdef GPU 115 | int *indexes_gpu; 116 | float * filters_gpu; 117 | float * filter_updates_gpu; 118 | 119 | float * spatial_mean_gpu; 120 | float * spatial_variance_gpu; 121 | 122 | float * mean_gpu; 123 | float * variance_gpu; 124 | 125 | float * rolling_mean_gpu; 126 | float * rolling_variance_gpu; 127 | 128 | float * spatial_mean_delta_gpu; 129 | float * spatial_variance_delta_gpu; 130 | 131 | float * variance_delta_gpu; 132 | float * mean_delta_gpu; 133 | 134 | float * col_image_gpu; 135 | 136 | float * x_gpu; 137 | float * x_norm_gpu; 138 | float * weights_gpu; 139 | float * weight_updates_gpu; 140 | 141 | float * biases_gpu; 142 | float * bias_updates_gpu; 143 | 144 | float * scales_gpu; 145 | float * scale_updates_gpu; 146 | 147 | float * output_gpu; 148 | float * delta_gpu; 149 | float * rand_gpu; 150 | float * squared_gpu; 151 | float * norms_gpu; 152 | #endif 153 | }; 154 | 155 | void free_layer(layer); 156 | 157 | #endif 158 | -------------------------------------------------------------------------------- /src/list.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "list.h" 4 | 5 | list *make_list() 6 | { 7 | list *l = malloc(sizeof(list)); 8 | l->size = 0; 9 | l->front = 0; 10 | l->back = 0; 11 | return l; 12 | } 13 | 14 | /* 15 | void transfer_node(list *s, list *d, node *n) 16 | { 17 | node *prev, *next; 18 | prev = n->prev; 19 | next = n->next; 20 | if(prev) prev->next = next; 21 | if(next) next->prev = prev; 22 | --s->size; 23 | if(s->front == n) s->front = next; 24 | if(s->back == n) s->back = prev; 25 | } 26 | */ 27 | 28 | void *list_pop(list *l){ 29 | if(!l->back) return 0; 30 | node *b = l->back; 31 | void *val = b->val; 32 | l->back = b->prev; 33 | if(l->back) l->back->next = 0; 34 | free(b); 35 | --l->size; 36 | 37 | return val; 38 | } 39 | 40 | void list_insert(list *l, void *val) 41 | { 42 | node *new = malloc(sizeof(node)); 43 | new->val = val; 44 | new->next = 0; 45 | 46 | if(!l->back){ 47 | l->front = new; 48 | new->prev = 0; 49 | }else{ 50 | l->back->next = new; 51 | new->prev = l->back; 52 | } 53 | l->back = new; 54 | ++l->size; 55 | } 56 | 57 | void free_node(node *n) 58 | { 59 | node *next; 60 | while(n) { 61 | next = n->next; 62 | free(n); 63 | n = next; 64 | } 65 | } 66 | 67 | void free_list(list *l) 68 | { 69 | free_node(l->front); 70 | free(l); 71 | } 72 | 73 | void free_list_contents(list *l) 74 | { 75 | node *n = l->front; 76 | while(n){ 77 | free(n->val); 78 | n = n->next; 79 | } 80 | } 81 | 82 | void **list_to_array(list *l) 83 | { 84 | void **a = calloc(l->size, sizeof(void*)); 85 | int count = 0; 86 | node *n = l->front; 87 | while(n){ 88 | a[count++] = n->val; 89 | n = n->next; 90 | } 91 | return a; 92 | } 93 | -------------------------------------------------------------------------------- /src/list.h: -------------------------------------------------------------------------------- 1 | #ifndef LIST_H 2 | #define LIST_H 3 | 4 | typedef struct node{ 5 | void *val; 6 | struct node *next; 7 | struct node *prev; 8 | } node; 9 | 10 | typedef struct list{ 11 | int size; 12 | node *front; 13 | node *back; 14 | } list; 15 | 16 | list *make_list(); 17 | int list_find(list *l, void *val); 18 | 19 | void list_insert(list *, void *); 20 | 21 | void **list_to_array(list *l); 22 | 23 | void free_list(list *l); 24 | void free_list_contents(list *l); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/local_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCAL_LAYER_H 2 | #define LOCAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "activations.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | typedef layer local_layer; 11 | 12 | #ifdef GPU 13 | void forward_local_layer_gpu(local_layer layer, network_state state); 14 | void backward_local_layer_gpu(local_layer layer, network_state state); 15 | void update_local_layer_gpu(local_layer layer, int batch, float learning_rate, float momentum, float decay); 16 | 17 | void push_local_layer(local_layer layer); 18 | void pull_local_layer(local_layer layer); 19 | #endif 20 | 21 | local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); 22 | 23 | void forward_local_layer(const local_layer layer, network_state state); 24 | void backward_local_layer(local_layer layer, network_state state); 25 | void update_local_layer(local_layer layer, int batch, float learning_rate, float momentum, float decay); 26 | 27 | void bias_output(float *output, float *biases, int batch, int n, int size); 28 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /src/matrix.c: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include "utils.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | void free_matrix(matrix m) 10 | { 11 | int i; 12 | for(i = 0; i < m.rows; ++i) free(m.vals[i]); 13 | free(m.vals); 14 | } 15 | 16 | float matrix_topk_accuracy(matrix truth, matrix guess, int k) 17 | { 18 | int *indexes = calloc(k, sizeof(int)); 19 | int n = truth.cols; 20 | int i,j; 21 | int correct = 0; 22 | for(i = 0; i < truth.rows; ++i){ 23 | top_k(guess.vals[i], n, k, indexes); 24 | for(j = 0; j < k; ++j){ 25 | int class = indexes[j]; 26 | if(truth.vals[i][class]){ 27 | ++correct; 28 | break; 29 | } 30 | } 31 | } 32 | free(indexes); 33 | return (float)correct/truth.rows; 34 | } 35 | 36 | void matrix_add_matrix(matrix from, matrix to) 37 | { 38 | assert(from.rows == to.rows && from.cols == to.cols); 39 | int i,j; 40 | for(i = 0; i < from.rows; ++i){ 41 | for(j = 0; j < from.cols; ++j){ 42 | to.vals[i][j] += from.vals[i][j]; 43 | } 44 | } 45 | } 46 | 47 | matrix make_matrix(int rows, int cols) 48 | { 49 | int i; 50 | matrix m; 51 | m.rows = rows; 52 | m.cols = cols; 53 | m.vals = calloc(m.rows, sizeof(float *)); 54 | for(i = 0; i < m.rows; ++i){ 55 | m.vals[i] = calloc(m.cols, sizeof(float)); 56 | } 57 | return m; 58 | } 59 | 60 | matrix hold_out_matrix(matrix *m, int n) 61 | { 62 | int i; 63 | matrix h; 64 | h.rows = n; 65 | h.cols = m->cols; 66 | h.vals = calloc(h.rows, sizeof(float *)); 67 | for(i = 0; i < n; ++i){ 68 | int index = rand()%m->rows; 69 | h.vals[i] = m->vals[index]; 70 | m->vals[index] = m->vals[--(m->rows)]; 71 | } 72 | return h; 73 | } 74 | 75 | float *pop_column(matrix *m, int c) 76 | { 77 | float *col = calloc(m->rows, sizeof(float)); 78 | int i, j; 79 | for(i = 0; i < m->rows; ++i){ 80 | col[i] = m->vals[i][c]; 81 | for(j = c; j < m->cols-1; ++j){ 82 | m->vals[i][j] = m->vals[i][j+1]; 83 | } 84 | } 85 | --m->cols; 86 | return col; 87 | } 88 | 89 | matrix csv_to_matrix(char *filename) 90 | { 91 | FILE *fp = fopen(filename, "r"); 92 | if(!fp) file_error(filename); 93 | 94 | matrix m; 95 | m.cols = -1; 96 | 97 | char *line; 98 | 99 | int n = 0; 100 | int size = 1024; 101 | m.vals = calloc(size, sizeof(float*)); 102 | while((line = fgetl(fp))){ 103 | if(m.cols == -1) m.cols = count_fields(line); 104 | if(n == size){ 105 | size *= 2; 106 | m.vals = realloc(m.vals, size*sizeof(float*)); 107 | } 108 | m.vals[n] = parse_fields(line, m.cols); 109 | free(line); 110 | ++n; 111 | } 112 | m.vals = realloc(m.vals, n*sizeof(float*)); 113 | m.rows = n; 114 | return m; 115 | } 116 | 117 | void print_matrix(matrix m) 118 | { 119 | int i, j; 120 | printf("%d X %d Matrix:\n",m.rows, m.cols); 121 | printf(" __"); 122 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 123 | printf("__ \n"); 124 | 125 | printf("| "); 126 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 127 | printf(" |\n"); 128 | 129 | for(i = 0; i < m.rows; ++i){ 130 | printf("| "); 131 | for(j = 0; j < m.cols; ++j){ 132 | printf("%15.7f ", m.vals[i][j]); 133 | } 134 | printf(" |\n"); 135 | } 136 | printf("|__"); 137 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 138 | printf("__|\n"); 139 | } 140 | -------------------------------------------------------------------------------- /src/matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_H 2 | #define MATRIX_H 3 | typedef struct matrix{ 4 | int rows, cols; 5 | float **vals; 6 | } matrix; 7 | 8 | matrix make_matrix(int rows, int cols); 9 | void free_matrix(matrix m); 10 | void print_matrix(matrix m); 11 | 12 | matrix csv_to_matrix(char *filename); 13 | matrix hold_out_matrix(matrix *m, int n); 14 | float matrix_topk_accuracy(matrix truth, matrix guess, int k); 15 | void matrix_add_matrix(matrix from, matrix to); 16 | 17 | float *pop_column(matrix *m, int c); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/maxpool_layer.c: -------------------------------------------------------------------------------- 1 | #include "maxpool_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | image get_maxpool_image(maxpool_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.c; 10 | return float_to_image(w,h,c,l.output); 11 | } 12 | 13 | image get_maxpool_delta(maxpool_layer l) 14 | { 15 | int h = l.out_h; 16 | int w = l.out_w; 17 | int c = l.c; 18 | return float_to_image(w,h,c,l.delta); 19 | } 20 | 21 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride) 22 | { 23 | fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d size, %d stride\n", h,w,c,size,stride); 24 | maxpool_layer l = {0}; 25 | l.type = MAXPOOL; 26 | l.batch = batch; 27 | l.h = h; 28 | l.w = w; 29 | l.c = c; 30 | l.out_w = (w-1)/stride + 1; 31 | l.out_h = (h-1)/stride + 1; 32 | l.out_c = c; 33 | l.outputs = l.out_h * l.out_w * l.out_c; 34 | l.inputs = h*w*c; 35 | l.size = size; 36 | l.stride = stride; 37 | int output_size = l.out_h * l.out_w * l.out_c * batch; 38 | l.indexes = calloc(output_size, sizeof(int)); 39 | l.output = calloc(output_size, sizeof(float)); 40 | l.delta = calloc(output_size, sizeof(float)); 41 | #ifdef GPU 42 | l.indexes_gpu = cuda_make_int_array(output_size); 43 | l.output_gpu = cuda_make_array(l.output, output_size); 44 | l.delta_gpu = cuda_make_array(l.delta, output_size); 45 | #endif 46 | return l; 47 | } 48 | 49 | void resize_maxpool_layer(maxpool_layer *l, int w, int h) 50 | { 51 | int stride = l->stride; 52 | l->h = h; 53 | l->w = w; 54 | 55 | l->out_w = (w-1)/stride + 1; 56 | l->out_h = (h-1)/stride + 1; 57 | l->outputs = l->out_w * l->out_h * l->c; 58 | int output_size = l->outputs * l->batch; 59 | 60 | l->indexes = realloc(l->indexes, output_size * sizeof(int)); 61 | l->output = realloc(l->output, output_size * sizeof(float)); 62 | l->delta = realloc(l->delta, output_size * sizeof(float)); 63 | 64 | #ifdef GPU 65 | cuda_free((float *)l->indexes_gpu); 66 | cuda_free(l->output_gpu); 67 | cuda_free(l->delta_gpu); 68 | l->indexes_gpu = cuda_make_int_array(output_size); 69 | l->output_gpu = cuda_make_array(l->output, output_size); 70 | l->delta_gpu = cuda_make_array(l->delta, output_size); 71 | #endif 72 | } 73 | 74 | void forward_maxpool_layer(const maxpool_layer l, network_state state) 75 | { 76 | int b,i,j,k,m,n; 77 | int w_offset = (-l.size-1)/2 + 1; 78 | int h_offset = (-l.size-1)/2 + 1; 79 | 80 | int h = (l.h-1)/l.stride + 1; 81 | int w = (l.w-1)/l.stride + 1; 82 | int c = l.c; 83 | 84 | for(b = 0; b < l.batch; ++b){ 85 | for(k = 0; k < c; ++k){ 86 | for(i = 0; i < h; ++i){ 87 | for(j = 0; j < w; ++j){ 88 | int out_index = j + w*(i + h*(k + c*b)); 89 | float max = -FLT_MAX; 90 | int max_i = -1; 91 | for(n = 0; n < l.size; ++n){ 92 | for(m = 0; m < l.size; ++m){ 93 | int cur_h = h_offset + i*l.stride + n; 94 | int cur_w = w_offset + j*l.stride + m; 95 | int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); 96 | int valid = (cur_h >= 0 && cur_h < l.h && 97 | cur_w >= 0 && cur_w < l.w); 98 | float val = (valid != 0) ? state.input[index] : -FLT_MAX; 99 | max_i = (val > max) ? index : max_i; 100 | max = (val > max) ? val : max; 101 | } 102 | } 103 | l.output[out_index] = max; 104 | l.indexes[out_index] = max_i; 105 | } 106 | } 107 | } 108 | } 109 | } 110 | 111 | void backward_maxpool_layer(const maxpool_layer l, network_state state) 112 | { 113 | int i; 114 | int h = (l.h-1)/l.stride + 1; 115 | int w = (l.w-1)/l.stride + 1; 116 | int c = l.c; 117 | for(i = 0; i < h*w*c*l.batch; ++i){ 118 | int index = l.indexes[i]; 119 | state.delta[index] += l.delta[i]; 120 | } 121 | } 122 | 123 | -------------------------------------------------------------------------------- /src/maxpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef MAXPOOL_LAYER_H 2 | #define MAXPOOL_LAYER_H 3 | 4 | #include "image.h" 5 | #include "params.h" 6 | #include "cuda.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | typedef layer maxpool_layer; 11 | 12 | image get_maxpool_image(maxpool_layer l); 13 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride); 14 | void resize_maxpool_layer(maxpool_layer *l, int w, int h); 15 | void forward_maxpool_layer(const maxpool_layer l, network_state state); 16 | void backward_maxpool_layer(const maxpool_layer l, network_state state); 17 | 18 | #ifdef GPU 19 | void forward_maxpool_layer_gpu(maxpool_layer l, network_state state); 20 | void backward_maxpool_layer_gpu(maxpool_layer l, network_state state); 21 | #endif 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /src/maxpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "maxpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *input, float *output, int *indexes) 11 | { 12 | int h = (in_h-1)/stride + 1; 13 | int w = (in_w-1)/stride + 1; 14 | int c = in_c; 15 | 16 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 17 | if(id >= n) return; 18 | 19 | int j = id % w; 20 | id /= w; 21 | int i = id % h; 22 | id /= h; 23 | int k = id % c; 24 | id /= c; 25 | int b = id; 26 | 27 | int w_offset = (-size-1)/2 + 1; 28 | int h_offset = (-size-1)/2 + 1; 29 | 30 | int out_index = j + w*(i + h*(k + c*b)); 31 | float max = -INFINITY; 32 | int max_i = -1; 33 | int l, m; 34 | for(l = 0; l < size; ++l){ 35 | for(m = 0; m < size; ++m){ 36 | int cur_h = h_offset + i*stride + l; 37 | int cur_w = w_offset + j*stride + m; 38 | int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); 39 | int valid = (cur_h >= 0 && cur_h < in_h && 40 | cur_w >= 0 && cur_w < in_w); 41 | float val = (valid != 0) ? input[index] : -INFINITY; 42 | max_i = (val > max) ? index : max_i; 43 | max = (val > max) ? val : max; 44 | } 45 | } 46 | output[out_index] = max; 47 | indexes[out_index] = max_i; 48 | } 49 | 50 | __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *delta, float *prev_delta, int *indexes) 51 | { 52 | int h = (in_h-1)/stride + 1; 53 | int w = (in_w-1)/stride + 1; 54 | int c = in_c; 55 | int area = (size-1)/stride; 56 | 57 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 58 | if(id >= n) return; 59 | 60 | int index = id; 61 | int j = id % in_w; 62 | id /= in_w; 63 | int i = id % in_h; 64 | id /= in_h; 65 | int k = id % in_c; 66 | id /= in_c; 67 | int b = id; 68 | 69 | int w_offset = (-size-1)/2 + 1; 70 | int h_offset = (-size-1)/2 + 1; 71 | 72 | float d = 0; 73 | int l, m; 74 | for(l = -area; l < area+1; ++l){ 75 | for(m = -area; m < area+1; ++m){ 76 | int out_w = (j-w_offset)/stride + m; 77 | int out_h = (i-h_offset)/stride + l; 78 | int out_index = out_w + w*(out_h + h*(k + c*b)); 79 | int valid = (out_w >= 0 && out_w < w && 80 | out_h >= 0 && out_h < h); 81 | d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; 82 | } 83 | } 84 | prev_delta[index] += d; 85 | } 86 | 87 | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) 88 | { 89 | int h = (layer.h-1)/layer.stride + 1; 90 | int w = (layer.w-1)/layer.stride + 1; 91 | int c = layer.c; 92 | 93 | size_t n = h*w*c*layer.batch; 94 | 95 | forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu); 96 | check_error(cudaPeekAtLastError()); 97 | } 98 | 99 | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) 100 | { 101 | size_t n = layer.h*layer.w*layer.c*layer.batch; 102 | 103 | backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu); 104 | check_error(cudaPeekAtLastError()); 105 | } 106 | 107 | -------------------------------------------------------------------------------- /src/network.h: -------------------------------------------------------------------------------- 1 | // Oh boy, why am I about to do this.... 2 | #ifndef NETWORK_H 3 | #define NETWORK_H 4 | 5 | #include "image.h" 6 | #include "layer.h" 7 | #include "data.h" 8 | #include "params.h" 9 | 10 | typedef enum { 11 | CONSTANT, STEP, EXP, POLY, STEPS, SIG 12 | } learning_rate_policy; 13 | 14 | typedef struct network{ 15 | int n; 16 | int batch; 17 | int *seen; 18 | float epoch; 19 | int subdivisions; 20 | float momentum; 21 | float decay; 22 | layer *layers; 23 | int outputs; 24 | float *output; 25 | learning_rate_policy policy; 26 | 27 | float learning_rate; 28 | float gamma; 29 | float scale; 30 | float power; 31 | int step; 32 | int max_batches; 33 | float *scales; 34 | int *steps; 35 | int num_steps; 36 | 37 | int inputs; 38 | int h, w, c; 39 | 40 | #ifdef GPU 41 | float **input_gpu; 42 | float **truth_gpu; 43 | #endif 44 | } network; 45 | 46 | typedef struct network_state { 47 | float *truth; 48 | float *input; 49 | float *delta; 50 | int train; 51 | int index; 52 | network net; 53 | } network_state; 54 | 55 | #ifdef GPU 56 | float train_network_datum_gpu(network net, float *x, float *y); 57 | float *network_predict_gpu(network net, float *input); 58 | float * get_network_output_gpu_layer(network net, int i); 59 | float * get_network_delta_gpu_layer(network net, int i); 60 | float *get_network_output_gpu(network net); 61 | void forward_network_gpu(network net, network_state state); 62 | void backward_network_gpu(network net, network_state state); 63 | void update_network_gpu(network net); 64 | #endif 65 | 66 | float get_current_rate(network net); 67 | int get_current_batch(network net); 68 | void free_network(network net); 69 | void compare_networks(network n1, network n2, data d); 70 | char *get_layer_string(LAYER_TYPE a); 71 | 72 | network make_network(int n); 73 | void forward_network(network net, network_state state); 74 | void backward_network(network net, network_state state); 75 | void update_network(network net); 76 | 77 | float train_network(network net, data d); 78 | float train_network_batch(network net, data d, int n); 79 | float train_network_sgd(network net, data d, int n); 80 | 81 | matrix network_predict_data(network net, data test); 82 | float *network_predict(network net, float *input); 83 | float network_accuracy(network net, data d); 84 | float *network_accuracies(network net, data d, int n); 85 | float network_accuracy_multi(network net, data d, int n); 86 | void top_predictions(network net, int n, int *index); 87 | float *get_network_output(network net); 88 | float *get_network_output_layer(network net, int i); 89 | float *get_network_delta_layer(network net, int i); 90 | float *get_network_delta(network net); 91 | int get_network_output_size_layer(network net, int i); 92 | int get_network_output_size(network net); 93 | image get_network_image(network net); 94 | image get_network_image_layer(network net, int i); 95 | int get_predicted_class_network(network net); 96 | void print_network(network net); 97 | void visualize_network(network net); 98 | int resize_network(network *net, int w, int h); 99 | void set_batch_network(network *net, int b); 100 | int get_network_input_size(network net); 101 | float get_network_cost(network net); 102 | 103 | int get_network_nuisance(network net); 104 | int get_network_background(network net); 105 | 106 | #endif 107 | 108 | -------------------------------------------------------------------------------- /src/normalization_layer.c: -------------------------------------------------------------------------------- 1 | #include "normalization_layer.h" 2 | #include "blas.h" 3 | #include 4 | 5 | layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) 6 | { 7 | fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); 8 | layer layer = {0}; 9 | layer.type = NORMALIZATION; 10 | layer.batch = batch; 11 | layer.h = layer.out_h = h; 12 | layer.w = layer.out_w = w; 13 | layer.c = layer.out_c = c; 14 | layer.kappa = kappa; 15 | layer.size = size; 16 | layer.alpha = alpha; 17 | layer.beta = beta; 18 | layer.output = calloc(h * w * c * batch, sizeof(float)); 19 | layer.delta = calloc(h * w * c * batch, sizeof(float)); 20 | layer.squared = calloc(h * w * c * batch, sizeof(float)); 21 | layer.norms = calloc(h * w * c * batch, sizeof(float)); 22 | layer.inputs = w*h*c; 23 | layer.outputs = layer.inputs; 24 | #ifdef GPU 25 | layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); 26 | layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); 27 | layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); 28 | layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); 29 | #endif 30 | return layer; 31 | } 32 | 33 | void resize_normalization_layer(layer *layer, int w, int h) 34 | { 35 | int c = layer->c; 36 | int batch = layer->batch; 37 | layer->h = h; 38 | layer->w = w; 39 | layer->out_h = h; 40 | layer->out_w = w; 41 | layer->inputs = w*h*c; 42 | layer->outputs = layer->inputs; 43 | layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); 44 | layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); 45 | layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); 46 | layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); 47 | #ifdef GPU 48 | cuda_free(layer->output_gpu); 49 | cuda_free(layer->delta_gpu); 50 | cuda_free(layer->squared_gpu); 51 | cuda_free(layer->norms_gpu); 52 | layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); 53 | layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); 54 | layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); 55 | layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); 56 | #endif 57 | } 58 | 59 | void forward_normalization_layer(const layer layer, network_state state) 60 | { 61 | int k,b; 62 | int w = layer.w; 63 | int h = layer.h; 64 | int c = layer.c; 65 | scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); 66 | 67 | for(b = 0; b < layer.batch; ++b){ 68 | float *squared = layer.squared + w*h*c*b; 69 | float *norms = layer.norms + w*h*c*b; 70 | float *input = state.input + w*h*c*b; 71 | pow_cpu(w*h*c, 2, input, 1, squared, 1); 72 | 73 | const_cpu(w*h, layer.kappa, norms, 1); 74 | for(k = 0; k < layer.size/2; ++k){ 75 | axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); 76 | } 77 | 78 | for(k = 1; k < layer.c; ++k){ 79 | copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); 80 | int prev = k - ((layer.size-1)/2) - 1; 81 | int next = k + (layer.size/2); 82 | if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); 83 | if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); 84 | } 85 | } 86 | pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); 87 | mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1); 88 | } 89 | 90 | void backward_normalization_layer(const layer layer, network_state state) 91 | { 92 | // TODO This is approximate ;-) 93 | // Also this should add in to delta instead of overwritting. 94 | 95 | int w = layer.w; 96 | int h = layer.h; 97 | int c = layer.c; 98 | pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1); 99 | mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1); 100 | } 101 | 102 | #ifdef GPU 103 | void forward_normalization_layer_gpu(const layer layer, network_state state) 104 | { 105 | int k,b; 106 | int w = layer.w; 107 | int h = layer.h; 108 | int c = layer.c; 109 | scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); 110 | 111 | for(b = 0; b < layer.batch; ++b){ 112 | float *squared = layer.squared_gpu + w*h*c*b; 113 | float *norms = layer.norms_gpu + w*h*c*b; 114 | float *input = state.input + w*h*c*b; 115 | pow_ongpu(w*h*c, 2, input, 1, squared, 1); 116 | 117 | const_ongpu(w*h, layer.kappa, norms, 1); 118 | for(k = 0; k < layer.size/2; ++k){ 119 | axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); 120 | } 121 | 122 | for(k = 1; k < layer.c; ++k){ 123 | copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); 124 | int prev = k - ((layer.size-1)/2) - 1; 125 | int next = k + (layer.size/2); 126 | if(prev >= 0) axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); 127 | if(next < layer.c) axpy_ongpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); 128 | } 129 | } 130 | pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); 131 | mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1); 132 | } 133 | 134 | void backward_normalization_layer_gpu(const layer layer, network_state state) 135 | { 136 | // TODO This is approximate ;-) 137 | 138 | int w = layer.w; 139 | int h = layer.h; 140 | int c = layer.c; 141 | pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1); 142 | mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1); 143 | } 144 | #endif 145 | -------------------------------------------------------------------------------- /src/normalization_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef NORMALIZATION_LAYER_H 2 | #define NORMALIZATION_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); 9 | void resize_normalization_layer(layer *layer, int h, int w); 10 | void forward_normalization_layer(const layer layer, network_state state); 11 | void backward_normalization_layer(const layer layer, network_state state); 12 | void visualize_normalization_layer(layer layer, char *window); 13 | 14 | #ifdef GPU 15 | void forward_normalization_layer_gpu(const layer layer, network_state state); 16 | void backward_normalization_layer_gpu(const layer layer, network_state state); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/option_list.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "option_list.h" 5 | 6 | int read_option(char *s, list *options) 7 | { 8 | size_t i; 9 | size_t len = strlen(s); 10 | char *val = 0; 11 | for(i = 0; i < len; ++i){ 12 | if(s[i] == '='){ 13 | s[i] = '\0'; 14 | val = s+i+1; 15 | break; 16 | } 17 | } 18 | if(i == len-1) return 0; 19 | char *key = s; 20 | option_insert(options, key, val); 21 | return 1; 22 | } 23 | 24 | void option_insert(list *l, char *key, char *val) 25 | { 26 | kvp *p = malloc(sizeof(kvp)); 27 | p->key = key; 28 | p->val = val; 29 | p->used = 0; 30 | list_insert(l, p); 31 | } 32 | 33 | void option_unused(list *l) 34 | { 35 | node *n = l->front; 36 | while(n){ 37 | kvp *p = (kvp *)n->val; 38 | if(!p->used){ 39 | fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); 40 | } 41 | n = n->next; 42 | } 43 | } 44 | 45 | char *option_find(list *l, char *key) 46 | { 47 | node *n = l->front; 48 | while(n){ 49 | kvp *p = (kvp *)n->val; 50 | if(strcmp(p->key, key) == 0){ 51 | p->used = 1; 52 | return p->val; 53 | } 54 | n = n->next; 55 | } 56 | return 0; 57 | } 58 | char *option_find_str(list *l, char *key, char *def) 59 | { 60 | char *v = option_find(l, key); 61 | if(v) return v; 62 | if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); 63 | return def; 64 | } 65 | 66 | int option_find_int(list *l, char *key, int def) 67 | { 68 | char *v = option_find(l, key); 69 | if(v) return atoi(v); 70 | fprintf(stderr, "%s: Using default '%d'\n", key, def); 71 | return def; 72 | } 73 | 74 | int option_find_int_quiet(list *l, char *key, int def) 75 | { 76 | char *v = option_find(l, key); 77 | if(v) return atoi(v); 78 | return def; 79 | } 80 | 81 | float option_find_float_quiet(list *l, char *key, float def) 82 | { 83 | char *v = option_find(l, key); 84 | if(v) return atof(v); 85 | return def; 86 | } 87 | 88 | float option_find_float(list *l, char *key, float def) 89 | { 90 | char *v = option_find(l, key); 91 | if(v) return atof(v); 92 | fprintf(stderr, "%s: Using default '%lf'\n", key, def); 93 | return def; 94 | } 95 | -------------------------------------------------------------------------------- /src/option_list.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTION_LIST_H 2 | #define OPTION_LIST_H 3 | #include "list.h" 4 | 5 | typedef struct{ 6 | char *key; 7 | char *val; 8 | int used; 9 | } kvp; 10 | 11 | 12 | int read_option(char *s, list *options); 13 | void option_insert(list *l, char *key, char *val); 14 | char *option_find(list *l, char *key); 15 | char *option_find_str(list *l, char *key, char *def); 16 | int option_find_int(list *l, char *key, int def); 17 | int option_find_int_quiet(list *l, char *key, int def); 18 | float option_find_float(list *l, char *key, float def); 19 | float option_find_float_quiet(list *l, char *key, float def); 20 | void option_unused(list *l); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/params.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | #include "network.h" 4 | 5 | network parse_network_cfg(char *filename); 6 | void save_network(network net, char *filename); 7 | void save_weights(network net, char *filename); 8 | void save_weights_upto(network net, char *filename, int cutoff); 9 | void save_weights_double(network net, char *filename); 10 | void load_weights(network *net, char *filename); 11 | void load_weights_upto(network *net, char *filename, int cutoff); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/route_layer.c: -------------------------------------------------------------------------------- 1 | #include "route_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | #include 5 | 6 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) 7 | { 8 | fprintf(stderr,"Route Layer:"); 9 | route_layer l = {0}; 10 | l.type = ROUTE; 11 | l.batch = batch; 12 | l.n = n; 13 | l.input_layers = input_layers; 14 | l.input_sizes = input_sizes; 15 | int i; 16 | int outputs = 0; 17 | for(i = 0; i < n; ++i){ 18 | fprintf(stderr," %d", input_layers[i]); 19 | outputs += input_sizes[i]; 20 | } 21 | fprintf(stderr, "\n"); 22 | l.outputs = outputs; 23 | l.inputs = outputs; 24 | l.delta = calloc(outputs*batch, sizeof(float)); 25 | l.output = calloc(outputs*batch, sizeof(float));; 26 | #ifdef GPU 27 | l.delta_gpu = cuda_make_array(l.delta, outputs*batch); 28 | l.output_gpu = cuda_make_array(l.output, outputs*batch); 29 | #endif 30 | return l; 31 | } 32 | 33 | void forward_route_layer(const route_layer l, network net) 34 | { 35 | int i, j; 36 | int offset = 0; 37 | for(i = 0; i < l.n; ++i){ 38 | int index = l.input_layers[i]; 39 | float *input = net.layers[index].output; 40 | int input_size = l.input_sizes[i]; 41 | for(j = 0; j < l.batch; ++j){ 42 | copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); 43 | } 44 | offset += input_size; 45 | } 46 | } 47 | 48 | void backward_route_layer(const route_layer l, network net) 49 | { 50 | int i, j; 51 | int offset = 0; 52 | for(i = 0; i < l.n; ++i){ 53 | int index = l.input_layers[i]; 54 | float *delta = net.layers[index].delta; 55 | int input_size = l.input_sizes[i]; 56 | for(j = 0; j < l.batch; ++j){ 57 | axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); 58 | } 59 | offset += input_size; 60 | } 61 | } 62 | 63 | #ifdef GPU 64 | void forward_route_layer_gpu(const route_layer l, network net) 65 | { 66 | int i, j; 67 | int offset = 0; 68 | for(i = 0; i < l.n; ++i){ 69 | int index = l.input_layers[i]; 70 | float *input = net.layers[index].output_gpu; 71 | int input_size = l.input_sizes[i]; 72 | for(j = 0; j < l.batch; ++j){ 73 | copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); 74 | } 75 | offset += input_size; 76 | } 77 | } 78 | 79 | void backward_route_layer_gpu(const route_layer l, network net) 80 | { 81 | int i, j; 82 | int offset = 0; 83 | for(i = 0; i < l.n; ++i){ 84 | int index = l.input_layers[i]; 85 | float *delta = net.layers[index].delta_gpu; 86 | int input_size = l.input_sizes[i]; 87 | for(j = 0; j < l.batch; ++j){ 88 | axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); 89 | } 90 | offset += input_size; 91 | } 92 | } 93 | #endif 94 | -------------------------------------------------------------------------------- /src/route_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef ROUTE_LAYER_H 2 | #define ROUTE_LAYER_H 3 | #include "network.h" 4 | #include "layer.h" 5 | 6 | typedef layer route_layer; 7 | 8 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); 9 | void forward_route_layer(const route_layer l, network net); 10 | void backward_route_layer(const route_layer l, network net); 11 | 12 | #ifdef GPU 13 | void forward_route_layer_gpu(const route_layer l, network net); 14 | void backward_route_layer_gpu(const route_layer l, network net); 15 | #endif 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/server.h: -------------------------------------------------------------------------------- 1 | #include "network.h" 2 | 3 | void client_update(network net, char *address); 4 | void server_update(network net); 5 | -------------------------------------------------------------------------------- /src/shortcut_layer.c: -------------------------------------------------------------------------------- 1 | #include "shortcut_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | #include 5 | #include 6 | 7 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) 8 | { 9 | fprintf(stderr,"Shortcut Layer: %d\n", index); 10 | layer l = {0}; 11 | l.type = SHORTCUT; 12 | l.batch = batch; 13 | l.w = w; 14 | l.h = h; 15 | l.c = c; 16 | l.out_w = w; 17 | l.out_h = h; 18 | l.out_c = c; 19 | l.outputs = w*h*c; 20 | l.inputs = w*h*c; 21 | int stride = w2 / w; 22 | 23 | assert(stride * w == w2); 24 | assert(stride * h == h2); 25 | assert(c >= c2); 26 | 27 | l.stride = stride; 28 | l.n = c2; 29 | l.index = index; 30 | 31 | l.delta = calloc(l.outputs*batch, sizeof(float)); 32 | l.output = calloc(l.outputs*batch, sizeof(float));; 33 | #ifdef GPU 34 | l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); 35 | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); 36 | #endif 37 | return l; 38 | } 39 | 40 | void forward_shortcut_layer(const layer l, network_state state) 41 | { 42 | copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); 43 | shortcut_cpu(l.output, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output, l.stride, l.n); 44 | } 45 | 46 | void backward_shortcut_layer(const layer l, network_state state) 47 | { 48 | copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1); 49 | shortcut_cpu(state.net.layers[l.index].delta, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta, 1, l.c); 50 | } 51 | 52 | #ifdef GPU 53 | void forward_shortcut_layer_gpu(const layer l, network_state state) 54 | { 55 | copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); 56 | shortcut_gpu(l.output_gpu, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output_gpu, l.stride, l.n); 57 | } 58 | 59 | void backward_shortcut_layer_gpu(const layer l, network_state state) 60 | { 61 | copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); 62 | shortcut_gpu(state.net.layers[l.index].delta_gpu, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta_gpu, 1, l.c); 63 | } 64 | #endif 65 | -------------------------------------------------------------------------------- /src/shortcut_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SHORTCUT_LAYER_H 2 | #define SHORTCUT_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); 8 | void forward_shortcut_layer(const layer l, network_state state); 9 | void backward_shortcut_layer(const layer l, network_state state); 10 | 11 | #ifdef GPU 12 | void forward_shortcut_layer_gpu(const layer l, network_state state); 13 | void backward_shortcut_layer_gpu(const layer l, network_state state); 14 | #endif 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/softmax_layer.c: -------------------------------------------------------------------------------- 1 | #include "softmax_layer.h" 2 | #include "blas.h" 3 | #include "cuda.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | softmax_layer make_softmax_layer(int batch, int inputs, int groups) 11 | { 12 | assert(inputs%groups == 0); 13 | fprintf(stderr, "Softmax Layer: %d inputs\n", inputs); 14 | softmax_layer l = {0}; 15 | l.type = SOFTMAX; 16 | l.batch = batch; 17 | l.groups = groups; 18 | l.inputs = inputs; 19 | l.outputs = inputs; 20 | l.output = calloc(inputs*batch, sizeof(float)); 21 | l.delta = calloc(inputs*batch, sizeof(float)); 22 | #ifdef GPU 23 | l.output_gpu = cuda_make_array(l.output, inputs*batch); 24 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 25 | #endif 26 | return l; 27 | } 28 | 29 | void softmax_array(float *input, int n, float *output) 30 | { 31 | int i; 32 | float sum = 0; 33 | float largest = -FLT_MAX; 34 | for(i = 0; i < n; ++i){ 35 | if(input[i] > largest) largest = input[i]; 36 | } 37 | for(i = 0; i < n; ++i){ 38 | sum += exp(input[i]-largest); 39 | } 40 | if(sum) sum = largest+log(sum); 41 | else sum = largest-100; 42 | for(i = 0; i < n; ++i){ 43 | output[i] = exp(input[i]-sum); 44 | } 45 | } 46 | 47 | void forward_softmax_layer(const softmax_layer l, network_state state) 48 | { 49 | int b; 50 | int inputs = l.inputs / l.groups; 51 | int batch = l.batch * l.groups; 52 | for(b = 0; b < batch; ++b){ 53 | softmax_array(state.input+b*inputs, inputs, l.output+b*inputs); 54 | } 55 | } 56 | 57 | void backward_softmax_layer(const softmax_layer l, network_state state) 58 | { 59 | int i; 60 | for(i = 0; i < l.inputs*l.batch; ++i){ 61 | state.delta[i] += l.delta[i]; 62 | } 63 | } 64 | 65 | -------------------------------------------------------------------------------- /src/softmax_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SOFTMAX_LAYER_H 2 | #define SOFTMAX_LAYER_H 3 | #include "params.h" 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | typedef layer softmax_layer; 8 | 9 | void softmax_array(float *input, int n, float *output); 10 | softmax_layer make_softmax_layer(int batch, int inputs, int groups); 11 | void forward_softmax_layer(const softmax_layer l, network_state state); 12 | void backward_softmax_layer(const softmax_layer l, network_state state); 13 | 14 | #ifdef GPU 15 | void pull_softmax_layer_output(const softmax_layer l); 16 | void forward_softmax_layer_gpu(const softmax_layer l, network_state state); 17 | void backward_softmax_layer_gpu(const softmax_layer l, network_state state); 18 | #endif 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/softmax_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "softmax_layer.h" 7 | #include "cuda.h" 8 | #include "blas.h" 9 | } 10 | 11 | __global__ void forward_softmax_layer_kernel(int n, int batch, float *input, float *output) 12 | { 13 | int b = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 14 | if(b >= batch) return; 15 | 16 | int i; 17 | float sum = 0; 18 | float largest = -INFINITY; 19 | for(i = 0; i < n; ++i){ 20 | int val = input[i+b*n]; 21 | largest = (val>largest) ? val : largest; 22 | } 23 | for(i = 0; i < n; ++i){ 24 | sum += exp(input[i+b*n]-largest); 25 | } 26 | sum = (sum != 0) ? largest+log(sum) : largest-100; 27 | for(i = 0; i < n; ++i){ 28 | output[i+b*n] = exp(input[i+b*n]-sum); 29 | } 30 | } 31 | 32 | extern "C" void pull_softmax_layer_output(const softmax_layer layer) 33 | { 34 | cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); 35 | } 36 | 37 | extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, network_state state) 38 | { 39 | int inputs = layer.inputs / layer.groups; 40 | int batch = layer.batch * layer.groups; 41 | forward_softmax_layer_kernel<<>>(inputs, batch, state.input, layer.output_gpu); 42 | check_error(cudaPeekAtLastError()); 43 | } 44 | 45 | extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) 46 | { 47 | axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1); 48 | } 49 | 50 | /* This is if you want softmax w/o log-loss classification. You probably don't. 51 | int i,j,b; 52 | for(b = 0; b < layer.batch; ++b){ 53 | for(i = 0; i < layer.inputs; ++i){ 54 | for(j = 0; j < layer.inputs; ++j){ 55 | int d = (i==j); 56 | layer.jacobian[b*layer.inputs*layer.inputs + i*layer.inputs + j] = 57 | layer.output[b*layer.inputs + i] * (d - layer.output[b*layer.inputs + j]); 58 | } 59 | } 60 | } 61 | for(b = 0; b < layer.batch; ++b){ 62 | int M = layer.inputs; 63 | int N = 1; 64 | int K = layer.inputs; 65 | float *A = layer.jacobian + b*layer.inputs*layer.inputs; 66 | float *B = layer.delta + b*layer.inputs; 67 | float *C = delta + b*layer.inputs; 68 | gemm(0,0,M,N,K,1,A,K,B,N,0,C,N); 69 | } 70 | */ 71 | -------------------------------------------------------------------------------- /src/swag.c: -------------------------------------------------------------------------------- 1 | #include "network.h" 2 | #include "detection_layer.h" 3 | #include "cost_layer.h" 4 | #include "utils.h" 5 | #include "parser.h" 6 | #include "box.h" 7 | 8 | #ifdef OPENCV 9 | #include "opencv2/highgui/highgui_c.h" 10 | #endif 11 | 12 | void train_swag(char *cfgfile, char *weightfile) 13 | { 14 | char *train_images = "data/voc.0712.trainval"; 15 | char *backup_directory = "/home/pjreddie/backup/"; 16 | srand(time(0)); 17 | data_seed = time(0); 18 | char *base = basecfg(cfgfile); 19 | printf("%s\n", base); 20 | float avg_loss = -1; 21 | network net = parse_network_cfg(cfgfile); 22 | if(weightfile){ 23 | load_weights(&net, weightfile); 24 | } 25 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); 26 | int imgs = net.batch*net.subdivisions; 27 | int i = *net.seen/imgs; 28 | data train, buffer; 29 | 30 | layer l = net.layers[net.n - 1]; 31 | 32 | int side = l.side; 33 | int classes = l.classes; 34 | float jitter = l.jitter; 35 | 36 | list *plist = get_paths(train_images); 37 | //int N = plist->size; 38 | char **paths = (char **)list_to_array(plist); 39 | 40 | load_args args = {0}; 41 | args.w = net.w; 42 | args.h = net.h; 43 | args.paths = paths; 44 | args.n = imgs; 45 | args.m = plist->size; 46 | args.classes = classes; 47 | args.jitter = jitter; 48 | args.num_boxes = side; 49 | args.d = &buffer; 50 | args.type = REGION_DATA; 51 | 52 | pthread_t load_thread = load_data_in_thread(args); 53 | clock_t time; 54 | //while(i*imgs < N*120){ 55 | while(get_current_batch(net) < net.max_batches){ 56 | i += 1; 57 | time=clock(); 58 | pthread_join(load_thread, 0); 59 | train = buffer; 60 | load_thread = load_data_in_thread(args); 61 | 62 | printf("Loaded: %lf seconds\n", sec(clock()-time)); 63 | 64 | time=clock(); 65 | float loss = train_network(net, train); 66 | if (avg_loss < 0) avg_loss = loss; 67 | avg_loss = avg_loss*.9 + loss*.1; 68 | 69 | printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); 70 | if(i%1000==0 || i == 600){ 71 | char buff[256]; 72 | sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); 73 | save_weights(net, buff); 74 | } 75 | free_data(train); 76 | } 77 | char buff[256]; 78 | sprintf(buff, "%s/%s_final.weights", backup_directory, base); 79 | save_weights(net, buff); 80 | } 81 | 82 | void run_swag(int argc, char **argv) 83 | { 84 | if(argc < 4){ 85 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 86 | return; 87 | } 88 | 89 | char *cfg = argv[3]; 90 | char *weights = (argc > 4) ? argv[4] : 0; 91 | if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); 92 | } 93 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | #include 4 | #include 5 | #include "list.h" 6 | 7 | #define SECRET_NUM -1234 8 | 9 | void shuffle(void *arr, size_t n, size_t size); 10 | void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); 11 | void free_ptrs(void **ptrs, int n); 12 | char *basecfg(char *cfgfile); 13 | int alphanum_to_int(char c); 14 | char int_to_alphanum(int i); 15 | void read_all(int fd, char *buffer, size_t bytes); 16 | void write_all(int fd, char *buffer, size_t bytes); 17 | char *find_replace(char *str, char *orig, char *rep); 18 | void error(const char *s); 19 | void malloc_error(); 20 | void file_error(char *s); 21 | void strip(char *s); 22 | void strip_char(char *s, char bad); 23 | void top_k(float *a, int n, int k, int *index); 24 | list *split_str(char *s, char delim); 25 | char *fgetl(FILE *fp); 26 | list *parse_csv_line(char *line); 27 | char *copy_string(char *s); 28 | int count_fields(char *line); 29 | float *parse_fields(char *line, int n); 30 | void normalize_array(float *a, int n); 31 | void scale_array(float *a, int n, float s); 32 | void translate_array(float *a, int n, float s); 33 | int max_index(float *a, int n); 34 | float constrain(float min, float max, float a); 35 | float mse_array(float *a, int n); 36 | float rand_normal(); 37 | float rand_uniform(); 38 | float sum_array(float *a, int n); 39 | float mean_array(float *a, int n); 40 | void mean_arrays(float **a, int n, int els, float *avg); 41 | float variance_array(float *a, int n); 42 | float mag_array(float *a, int n); 43 | float **one_hot_encode(float *a, int n, int k); 44 | float sec(clock_t clocks); 45 | int find_int_arg(int argc, char **argv, char *arg, int def); 46 | float find_float_arg(int argc, char **argv, char *arg, float def); 47 | int find_arg(int argc, char* argv[], char *arg); 48 | char *find_char_arg(int argc, char **argv, char *arg, char *def); 49 | 50 | #endif 51 | 52 | -------------------------------------------------------------------------------- /src/writing.c: -------------------------------------------------------------------------------- 1 | #include "network.h" 2 | #include "utils.h" 3 | #include "parser.h" 4 | 5 | #ifdef OPENCV 6 | #include "opencv2/highgui/highgui_c.h" 7 | #endif 8 | 9 | void train_writing(char *cfgfile, char *weightfile) 10 | { 11 | char *backup_directory = "/home/pjreddie/backup/"; 12 | data_seed = time(0); 13 | srand(time(0)); 14 | float avg_loss = -1; 15 | char *base = basecfg(cfgfile); 16 | printf("%s\n", base); 17 | network net = parse_network_cfg(cfgfile); 18 | if(weightfile){ 19 | load_weights(&net, weightfile); 20 | } 21 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); 22 | int imgs = net.batch*net.subdivisions; 23 | list *plist = get_paths("figures.list"); 24 | char **paths = (char **)list_to_array(plist); 25 | clock_t time; 26 | int N = plist->size; 27 | printf("N: %d\n", N); 28 | image out = get_network_image(net); 29 | 30 | data train, buffer; 31 | 32 | load_args args = {0}; 33 | args.w = net.w; 34 | args.h = net.h; 35 | args.out_w = out.w; 36 | args.out_h = out.h; 37 | args.paths = paths; 38 | args.n = imgs; 39 | args.m = N; 40 | args.d = &buffer; 41 | args.type = WRITING_DATA; 42 | 43 | pthread_t load_thread = load_data_in_thread(args); 44 | int epoch = (*net.seen)/N; 45 | while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ 46 | time=clock(); 47 | pthread_join(load_thread, 0); 48 | train = buffer; 49 | load_thread = load_data_in_thread(args); 50 | printf("Loaded %lf seconds\n",sec(clock()-time)); 51 | 52 | time=clock(); 53 | float loss = train_network(net, train); 54 | 55 | /* 56 | image pred = float_to_image(64, 64, 1, out); 57 | print_image(pred); 58 | */ 59 | 60 | /* 61 | image im = float_to_image(256, 256, 3, train.X.vals[0]); 62 | image lab = float_to_image(64, 64, 1, train.y.vals[0]); 63 | image pred = float_to_image(64, 64, 1, out); 64 | show_image(im, "image"); 65 | show_image(lab, "label"); 66 | print_image(lab); 67 | show_image(pred, "pred"); 68 | cvWaitKey(0); 69 | */ 70 | 71 | if(avg_loss == -1) avg_loss = loss; 72 | avg_loss = avg_loss*.9 + loss*.1; 73 | printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); 74 | free_data(train); 75 | if(get_current_batch(net)%100 == 0){ 76 | char buff[256]; 77 | sprintf(buff, "%s/%s_batch_%d.weights", backup_directory, base, get_current_batch(net)); 78 | save_weights(net, buff); 79 | } 80 | if(*net.seen/N > epoch){ 81 | epoch = *net.seen/N; 82 | char buff[256]; 83 | sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); 84 | save_weights(net, buff); 85 | } 86 | } 87 | } 88 | 89 | void test_writing(char *cfgfile, char *weightfile, char *filename) 90 | { 91 | network net = parse_network_cfg(cfgfile); 92 | if(weightfile){ 93 | load_weights(&net, weightfile); 94 | } 95 | set_batch_network(&net, 1); 96 | srand(2222222); 97 | clock_t time; 98 | char buff[256]; 99 | char *input = buff; 100 | while(1){ 101 | if(filename){ 102 | strncpy(input, filename, 256); 103 | }else{ 104 | printf("Enter Image Path: "); 105 | fflush(stdout); 106 | input = fgets(input, 256, stdin); 107 | if(!input) return; 108 | strtok(input, "\n"); 109 | } 110 | 111 | image im = load_image_color(input, 0, 0); 112 | resize_network(&net, im.w, im.h); 113 | printf("%d %d %d\n", im.h, im.w, im.c); 114 | float *X = im.data; 115 | time=clock(); 116 | network_predict(net, X); 117 | printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); 118 | image pred = get_network_image(net); 119 | 120 | image upsampled = resize_image(pred, im.w, im.h); 121 | image thresh = threshold_image(upsampled, .5); 122 | pred = thresh; 123 | 124 | show_image(pred, "prediction"); 125 | show_image(im, "orig"); 126 | #ifdef OPENCV 127 | cvWaitKey(0); 128 | cvDestroyAllWindows(); 129 | #endif 130 | 131 | free_image(upsampled); 132 | free_image(thresh); 133 | free_image(im); 134 | if (filename) break; 135 | } 136 | } 137 | 138 | void run_writing(int argc, char **argv) 139 | { 140 | if(argc < 4){ 141 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 142 | return; 143 | } 144 | 145 | char *cfg = argv[3]; 146 | char *weights = (argc > 4) ? argv[4] : 0; 147 | char *filename = (argc > 5) ? argv[5] : 0; 148 | if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); 149 | else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); 150 | } 151 | 152 | -------------------------------------------------------------------------------- /src/yolo_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "network.h" 7 | #include "detection_layer.h" 8 | #include "cost_layer.h" 9 | #include "utils.h" 10 | #include "parser.h" 11 | #include "box.h" 12 | #include "image.h" 13 | #include 14 | } 15 | 16 | /* Change class number here */ 17 | #define CLS_NUM 2 18 | 19 | #ifdef OPENCV 20 | #include "opencv2/highgui/highgui.hpp" 21 | #include "opencv2/imgproc/imgproc.hpp" 22 | extern "C" IplImage* image_to_Ipl(image img, int w, int h, int depth, int c, int step); 23 | extern "C" image ipl_to_image(IplImage* src); 24 | extern "C" void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); 25 | extern "C" void draw_yolo(image im, int num, float thresh, box *boxes, float **probs); 26 | 27 | extern "C" char *voc_names[]; 28 | extern "C" image voc_labels[]; 29 | 30 | static float **probs; 31 | static box *boxes; 32 | static network net; 33 | static image in ; 34 | static image in_s ; 35 | static image det ; 36 | static image det_s; 37 | static image disp ; 38 | static cv::VideoCapture cap; 39 | static cv::VideoWriter cap_out; 40 | static float fps = 0; 41 | static float demo_thresh = 0; 42 | static int w, h, depth, c, step= 0; 43 | static int MODE = -1; 44 | 45 | void *fetch_in_thread(void *ptr) 46 | { 47 | cv::Mat frame_m; 48 | cap >> frame_m; 49 | IplImage frame = frame_m; 50 | 51 | if(step == 0) 52 | { 53 | w = frame.width; 54 | h = frame.height; 55 | c = frame.nChannels; 56 | depth= frame.depth; 57 | step = frame.widthStep; 58 | } 59 | 60 | in = ipl_to_image(&frame); 61 | rgbgr_image(in); 62 | in_s = resize_image(in, net.w, net.h); 63 | return 0; 64 | } 65 | 66 | void *detect_in_thread(void *ptr) 67 | { 68 | float nms = .4; 69 | 70 | detection_layer l = net.layers[net.n-1]; 71 | float *X = det_s.data; 72 | float *predictions = network_predict(net, X); 73 | free_image(det_s); 74 | convert_yolo_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0); 75 | if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms); 76 | printf("\033[2J"); 77 | printf("\033[1;1H"); 78 | printf("\nFPS:%.0f\n",fps); 79 | printf("Objects:\n\n"); 80 | draw_detections(det, l.side*l.side*l.n, demo_thresh, boxes, probs, voc_names, voc_labels, CLS_NUM); 81 | 82 | if(MODE == 1) 83 | { 84 | IplImage* outputIpl= image_to_Ipl(det, w, h, depth, c, step); 85 | cv::Mat outputMat = cv::cvarrToMat(outputIpl, true); 86 | /* 87 | cvNamedWindow("image", CV_WINDOW_AUTOSIZE); 88 | cvShowImage("image", outputIpl); 89 | cvWaitKey(1); 90 | */ 91 | cvReleaseImage(&outputIpl); 92 | cap_out << outputMat; 93 | outputMat.release(); 94 | } 95 | 96 | return 0; 97 | } 98 | 99 | extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index, char *videofile) 100 | { 101 | demo_thresh = thresh; 102 | printf("YOLO demo\n"); 103 | net = parse_network_cfg(cfgfile); 104 | if(weightfile){ 105 | load_weights(&net, weightfile); 106 | } 107 | set_batch_network(&net, 1); 108 | 109 | srand(2222222); 110 | 111 | if(cam_index != -1) 112 | { 113 | MODE = 0; 114 | cv::VideoCapture cam(cam_index); 115 | cap = cam; 116 | if(!cap.isOpened()) error("Couldn't connect to webcam.\n"); 117 | } 118 | else 119 | { 120 | MODE = 1; 121 | printf("Video File name is: %s\n", videofile); 122 | cv::VideoCapture videoCap(videofile); 123 | cap = videoCap; 124 | if(!cap.isOpened()) error("Couldn't read video file.\n"); 125 | 126 | cv::Size S = cv::Size((int)videoCap.get(CV_CAP_PROP_FRAME_WIDTH), (int)videoCap.get(CV_CAP_PROP_FRAME_HEIGHT)); 127 | cv::VideoWriter outputVideo("out.avi", CV_FOURCC('D','I','V','X'), videoCap.get(CV_CAP_PROP_FPS), S, true); 128 | if(!outputVideo.isOpened()) error("Couldn't write video file.\n"); 129 | cap_out = outputVideo; 130 | } 131 | 132 | detection_layer l = net.layers[net.n-1]; 133 | int j; 134 | 135 | boxes = (box *)calloc(l.side*l.side*l.n, sizeof(box)); 136 | probs = (float **)calloc(l.side*l.side*l.n, sizeof(float *)); 137 | for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *)); 138 | 139 | pthread_t fetch_thread; 140 | pthread_t detect_thread; 141 | 142 | fetch_in_thread(0); 143 | det = in; 144 | det_s = in_s; 145 | 146 | fetch_in_thread(0); 147 | detect_in_thread(0); 148 | disp = det; 149 | det = in; 150 | det_s = in_s; 151 | 152 | while(1){ 153 | struct timeval tval_before, tval_after, tval_result; 154 | gettimeofday(&tval_before, NULL); 155 | if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); 156 | if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); 157 | show_image(disp, "YOLO"); 158 | free_image(disp); 159 | cvWaitKey(1); 160 | pthread_join(fetch_thread, 0); 161 | pthread_join(detect_thread, 0); 162 | 163 | disp = det; 164 | det = in; 165 | det_s = in_s; 166 | 167 | gettimeofday(&tval_after, NULL); 168 | timersub(&tval_after, &tval_before, &tval_result); 169 | float curr = 1000000.f/((long int)tval_result.tv_usec); 170 | fps = .9*fps + .1*curr; 171 | } 172 | } 173 | #else 174 | extern "C" void demo_yolo(char *cfgfile, char *weightfile, float thresh, int cam_index){ 175 | fprintf(stderr, "YOLO demo needs OpenCV for webcam images.\n"); 176 | } 177 | #endif 178 | --------------------------------------------------------------------------------