├── src ├── classifier.h ├── layer.h ├── demo.h ├── parser.h ├── list.h ├── tree.h ├── box.h ├── matrix.h ├── col2im.h ├── im2col.h ├── l2norm_layer.h ├── cuda.h ├── logistic_layer.h ├── option_list.h ├── upsample_layer.h ├── activation_layer.h ├── region_layer.h ├── batchnorm_layer.h ├── shortcut_layer.h ├── crop_layer.h ├── lstm_layer.h ├── reorg_layer.h ├── route_layer.h ├── dropout_layer.h ├── yolo_layer.h ├── detection_layer.h ├── cost_layer.h ├── softmax_layer.h ├── gru_layer.h ├── avgpool_layer.h ├── network.h ├── rnn_layer.h ├── maxpool_layer.h ├── normalization_layer.h ├── crnn_layer.h ├── connected_layer.h ├── deconvolutional_layer.h ├── gemm.h ├── local_layer.h ├── dropout_layer_kernels.cu ├── col2im.c ├── im2col.c ├── list.c ├── dropout_layer.c ├── utils.h ├── avgpool_layer_kernels.cu ├── activation_layer.c ├── l2norm_layer.c ├── avgpool_layer.c ├── data.h ├── logistic_layer.c ├── convolutional_layer.h ├── im2col_kernels.cu ├── col2im_kernels.cu ├── image.h ├── activations.h ├── crop_layer.c ├── shortcut_layer.c ├── maxpool_layer_kernels.cu ├── upsample_layer.c ├── option_list.c ├── softmax_layer.c ├── activations.c ├── tree.c ├── maxpool_layer.c ├── route_layer.c ├── layer.c ├── cuda.c ├── matrix.c ├── deconvolutional_kernels.cu └── reorg_layer.c ├── jiyan ├── data │ ├── yolo.names │ ├── train │ │ ├── 1530942834.jpg │ │ ├── 1530942839.jpg │ │ ├── 1530942839.txt │ │ └── 1530942834.txt │ ├── valid │ │ ├── 1530951687.jpg │ │ ├── 1530951693.jpg │ │ ├── 1530951687.txt │ │ ├── 1530951693.txt │ │ ├── 1530951693.xml │ │ └── 1530951687.xml │ ├── train.txt │ └── valid.txt └── get_pic.py ├── darknet ├── libdarknet.a ├── libdarknet.so ├── python ├── valid.txt ├── crack.jpg ├── valid │ ├── verifyCode1531873508.jpg │ └── verifyCode1531873518.jpg ├── hanzi_img │ ├── 15342939838998_label.jpg │ ├── 15342939839007_label.jpg │ ├── 15342939839016_label.jpg │ └── 15342939839026_label.jpg ├── segment.py ├── crack_pro.py └── recog_order.py ├── doc_images └── gsxt.png ├── chinese_classify ├── data │ ├── train │ │ ├── 15308364989589_u6709.jpg │ │ └── 15308364989591_u4e4b.jpg │ ├── valid │ │ ├── 15310991235992_u6795.jpg │ │ └── 15310991238021_u9002.jpg │ ├── train.list │ └── valid.list └── label_hanzi.py ├── cfg ├── yolov3.data ├── yolo-origin.data ├── chinese.data ├── chinese_character.cfg └── yolo-origin.cfg ├── scripts ├── gen_tactic.sh ├── imagenet_label.sh ├── dice_label.sh ├── get_coco_dataset.sh └── voc_label.py ├── tools ├── list.py ├── voc_label.py ├── generate_anchorsv2.py └── generate_anchorsv3.py ├── results └── anchor.txt ├── LICENSE.meta ├── LICENSE.v1 ├── LICENSE.fuck ├── LICENSE ├── README.md ├── examples ├── detector.py ├── art.c ├── swag.c ├── dice.c ├── super.c ├── tag.c ├── writing.c └── voxel.c ├── LICENSE.mit ├── install-opencv.sh └── Makefile /src/classifier.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /jiyan/data/yolo.names: -------------------------------------------------------------------------------- 1 | hanzi -------------------------------------------------------------------------------- /src/layer.h: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | -------------------------------------------------------------------------------- /darknet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/darknet -------------------------------------------------------------------------------- /libdarknet.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/libdarknet.a -------------------------------------------------------------------------------- /libdarknet.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/libdarknet.so -------------------------------------------------------------------------------- /python/valid.txt: -------------------------------------------------------------------------------- 1 | verifyCode1531873508.jpg--哺乳动物 2 | verifyCode1531873518.jpg--应届毕业生 3 | -------------------------------------------------------------------------------- /python/crack.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/python/crack.jpg -------------------------------------------------------------------------------- /src/demo.h: -------------------------------------------------------------------------------- 1 | #ifndef DEMO_H 2 | #define DEMO_H 3 | 4 | #include "image.h" 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /doc_images/gsxt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/doc_images/gsxt.png -------------------------------------------------------------------------------- /jiyan/data/train/1530942834.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/jiyan/data/train/1530942834.jpg -------------------------------------------------------------------------------- /jiyan/data/train/1530942839.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/jiyan/data/train/1530942839.jpg -------------------------------------------------------------------------------- /jiyan/data/valid/1530951687.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/jiyan/data/valid/1530951687.jpg -------------------------------------------------------------------------------- /jiyan/data/valid/1530951693.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/jiyan/data/valid/1530951693.jpg -------------------------------------------------------------------------------- /jiyan/data/train.txt: -------------------------------------------------------------------------------- 1 | /home/geng/darknet/jiyan/data/train/1530942834.jpg 2 | /home/geng/darknet/jiyan/data/train/1530942839.jpg 3 | -------------------------------------------------------------------------------- /jiyan/data/valid.txt: -------------------------------------------------------------------------------- 1 | /home/geng/darknet/jiyan/data/valid/1530951687.jpg 2 | /home/geng/darknet/jiyan/data/valid/1530951693.jpg 3 | -------------------------------------------------------------------------------- /python/valid/verifyCode1531873508.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/python/valid/verifyCode1531873508.jpg -------------------------------------------------------------------------------- /python/valid/verifyCode1531873518.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/python/valid/verifyCode1531873518.jpg -------------------------------------------------------------------------------- /python/hanzi_img/15342939838998_label.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/python/hanzi_img/15342939838998_label.jpg -------------------------------------------------------------------------------- /python/hanzi_img/15342939839007_label.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/python/hanzi_img/15342939839007_label.jpg -------------------------------------------------------------------------------- /python/hanzi_img/15342939839016_label.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/python/hanzi_img/15342939839016_label.jpg -------------------------------------------------------------------------------- /python/hanzi_img/15342939839026_label.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/python/hanzi_img/15342939839026_label.jpg -------------------------------------------------------------------------------- /chinese_classify/data/train/15308364989589_u6709.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/chinese_classify/data/train/15308364989589_u6709.jpg -------------------------------------------------------------------------------- /chinese_classify/data/train/15308364989591_u4e4b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/chinese_classify/data/train/15308364989591_u4e4b.jpg -------------------------------------------------------------------------------- /chinese_classify/data/valid/15310991235992_u6795.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/chinese_classify/data/valid/15310991235992_u6795.jpg -------------------------------------------------------------------------------- /chinese_classify/data/valid/15310991238021_u9002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunningGump/gsxt_captcha/HEAD/chinese_classify/data/valid/15310991238021_u9002.jpg -------------------------------------------------------------------------------- /cfg/yolov3.data: -------------------------------------------------------------------------------- 1 | classes= 1 2 | train = jiyan/data/train.txt 3 | valid = jiyan/data/valid.txt 4 | names = /home/geng/darknet/jiyan/data/yolo.names 5 | backup = jiyan/backup 6 | 7 | -------------------------------------------------------------------------------- /cfg/yolo-origin.data: -------------------------------------------------------------------------------- 1 | classes= 1 2 | train = jiyan/data/train.txt 3 | valid = jiyan/data/valid.txt 4 | names = /home/geng/darknet/jiyan/data/yolo.names 5 | backup = jiyan/backup 6 | 7 | -------------------------------------------------------------------------------- /chinese_classify/data/train.list: -------------------------------------------------------------------------------- 1 | /home/geng/darknet/chinese_classify/data/train/15308364989589_u6709.jpg 2 | /home/geng/darknet/chinese_classify/data/train/15308364989591_u4e4b.jpg 3 | -------------------------------------------------------------------------------- /chinese_classify/data/valid.list: -------------------------------------------------------------------------------- 1 | /home/geng/darknet/chinese_classify/data/valid/15310991235992_u6795.jpg 2 | /home/geng/darknet/chinese_classify/data/valid/15310991238021_u9002.jpg 3 | -------------------------------------------------------------------------------- /scripts/gen_tactic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # wget http://pjreddie.com/media/files/peek.weights 4 | # scripts/gen_tactic.sh < data/goal.txt 5 | ./darknet rnn generatetactic cfg/gru.cfg peek.weights 2>/dev/null 6 | -------------------------------------------------------------------------------- /cfg/chinese.data: -------------------------------------------------------------------------------- 1 | classes=3604 2 | train = chinese_classify/data/train.list 3 | valid = chinese_classify/data/valid.list 4 | labels = /home/geng/darknet/chinese_classify/data/labels.txt 5 | backup = chinese_classify/backup 6 | top=100 7 | -------------------------------------------------------------------------------- /src/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | #include "darknet.h" 4 | #include "network.h" 5 | 6 | void save_network(network net, char *filename); 7 | void save_weights_double(network net, char *filename); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /src/list.h: -------------------------------------------------------------------------------- 1 | #ifndef LIST_H 2 | #define LIST_H 3 | #include "darknet.h" 4 | 5 | list *make_list(); 6 | int list_find(list *l, void *val); 7 | 8 | void list_insert(list *, void *); 9 | 10 | 11 | void free_list_contents(list *l); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/tree.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_H 2 | #define TREE_H 3 | #include "darknet.h" 4 | 5 | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); 6 | float get_hierarchy_probability(float *x, tree *hier, int c, int stride); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/box.h: -------------------------------------------------------------------------------- 1 | #ifndef BOX_H 2 | #define BOX_H 3 | #include "darknet.h" 4 | 5 | typedef struct{ 6 | float dx, dy, dw, dh; 7 | } dbox; 8 | 9 | float box_rmse(box a, box b); 10 | dbox diou(box a, box b); 11 | box decode_box(box b, box anchor); 12 | box encode_box(box b, box anchor); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /jiyan/data/train/1530942839.txt: -------------------------------------------------------------------------------- 1 | 0 0.09447674418604651 0.1328125 0.13662790697674418 0.125 2 | 0 0.5145348837209303 0.609375 0.20348837209302326 0.15625 3 | 0 0.8895348837209303 0.21354166666666666 0.18023255813953487 0.15104166666666666 4 | 0 0.8197674418604651 0.7708333333333333 0.20348837209302326 0.17708333333333331 5 | -------------------------------------------------------------------------------- /src/matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_H 2 | #define MATRIX_H 3 | #include "darknet.h" 4 | 5 | matrix copy_matrix(matrix m); 6 | void print_matrix(matrix m); 7 | 8 | matrix hold_out_matrix(matrix *m, int n); 9 | matrix resize_matrix(matrix m, int size); 10 | 11 | float *pop_column(matrix *m, int c); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /jiyan/data/valid/1530951687.txt: -------------------------------------------------------------------------------- 1 | 0 0.38226744186046513 0.19401041666666666 0.18313953488372092 0.18489583333333331 2 | 0 0.6831395348837209 0.22916666666666666 0.18023255813953487 0.140625 3 | 0 0.8401162790697674 0.56640625 0.19767441860465115 0.1796875 4 | 0 0.6395348837209303 0.7747395833333333 0.22093023255813954 0.1640625 5 | -------------------------------------------------------------------------------- /jiyan/data/valid/1530951693.txt: -------------------------------------------------------------------------------- 1 | 0 0.38372093023255816 0.21614583333333331 0.1686046511627907 0.17708333333333331 2 | 0 0.23401162790697674 0.47526041666666663 0.17151162790697674 0.18489583333333331 3 | 0 0.5523255813953488 0.44791666666666663 0.13953488372093023 0.140625 4 | 0 0.2238372093023256 0.8098958333333333 0.18023255813953487 0.13020833333333331 5 | -------------------------------------------------------------------------------- /tools/list.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | IMG_DIR = "/home/lihui/Projects/ai/captcha_crack/jiyan/images" 4 | def run(): 5 | files = os.listdir(IMG_DIR) 6 | for f in files: 7 | if f.endswith("jpeg"): 8 | path = os.path.join(IMG_DIR, f) 9 | print(path) 10 | 11 | 12 | if __name__ == '__main__': 13 | run() -------------------------------------------------------------------------------- /results/anchor.txt: -------------------------------------------------------------------------------- 1 | 65.51749965323431, 50.49991716367795, 72.64219125903084, 64.91186242815965, 88.97009966777478, 75.17609771181226, 55.56961101262676, 55.019245142005275, 64.27906976743384, 67.55792682926086, 81.9873071071471, 63.621028037382146, 73.89348430402681, 56.44545675413395, 76.68315536234596, 73.0964912280708, 64.90395128093877, 59.29117811279056 2 | 0.926015 3 | -------------------------------------------------------------------------------- /LICENSE.meta: -------------------------------------------------------------------------------- 1 | META-LICENSE 2 | Version 1, June 21 2017 3 | 4 | Any and all licenses may be applied to the software either individually 5 | or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries 6 | arising from this combination should be discussed with a local faith leader, 7 | hermit, or guru. The Oxford comma shall be used. 8 | 9 | -------------------------------------------------------------------------------- /src/col2im.h: -------------------------------------------------------------------------------- 1 | #ifndef COL2IM_H 2 | #define COL2IM_H 3 | 4 | void col2im_cpu(float* data_col, 5 | int channels, int height, int width, 6 | int ksize, int stride, int pad, float* data_im); 7 | 8 | #ifdef GPU 9 | void col2im_gpu(float *data_col, 10 | int channels, int height, int width, 11 | int ksize, int stride, int pad, float *data_im); 12 | #endif 13 | #endif 14 | -------------------------------------------------------------------------------- /scripts/imagenet_label.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p labelled 4 | wd=`pwd` 5 | 6 | for f in val/*.xml; 7 | do 8 | label=`grep -m1 "" $f | grep -oP '\K[^<]*'` 9 | im=`echo $f | sed 's/val/imgs/; s/xml/JPEG/'` 10 | out=`echo $im | sed 's/JPEG/'${label}'.JPEG/; s/imgs/labelled/'` 11 | ln -s ${wd}/$im ${wd}/$out 12 | done 13 | 14 | find ${wd}/labelled -name \*.JPEG > inet.val.list 15 | 16 | -------------------------------------------------------------------------------- /src/im2col.h: -------------------------------------------------------------------------------- 1 | #ifndef IM2COL_H 2 | #define IM2COL_H 3 | 4 | void im2col_cpu(float* data_im, 5 | int channels, int height, int width, 6 | int ksize, int stride, int pad, float* data_col); 7 | 8 | #ifdef GPU 9 | 10 | void im2col_gpu(float *im, 11 | int channels, int height, int width, 12 | int ksize, int stride, int pad,float *data_col); 13 | 14 | #endif 15 | #endif 16 | -------------------------------------------------------------------------------- /jiyan/data/train/1530942834.txt: -------------------------------------------------------------------------------- 1 | 0 0.15261627906976744 0.78125 0.2005813953488372 0.16666666666666666 2 | 0 0.34011627906976744 0.28385416666666663 0.19186046511627908 0.13541666666666666 3 | 0 0.5668604651162791 0.24479166666666666 0.19767441860465115 0.18229166666666666 4 | 0 0.877906976744186 0.171875 0.18023255813953487 0.13541666666666666 5 | 0 0.8459302325581395 0.5065104166666666 0.19767441860465115 0.1640625 6 | -------------------------------------------------------------------------------- /src/l2norm_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef L2NORM_LAYER_H 2 | #define L2NORM_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | layer make_l2norm_layer(int batch, int inputs); 7 | void forward_l2norm_layer(const layer l, network net); 8 | void backward_l2norm_layer(const layer l, network net); 9 | 10 | #ifdef GPU 11 | void forward_l2norm_layer_gpu(const layer l, network net); 12 | void backward_l2norm_layer_gpu(const layer l, network net); 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/cuda.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDA_H 2 | #define CUDA_H 3 | 4 | #include "darknet.h" 5 | 6 | #ifdef GPU 7 | 8 | void check_error(cudaError_t status); 9 | cublasHandle_t blas_handle(); 10 | int *cuda_make_int_array(int *x, size_t n); 11 | void cuda_random(float *x_gpu, size_t n); 12 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s); 13 | dim3 cuda_gridsize(size_t n); 14 | 15 | #ifdef CUDNN 16 | cudnnHandle_t cudnn_handle(); 17 | #endif 18 | 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /src/logistic_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGISTIC_LAYER_H 2 | #define LOGISTIC_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | layer make_logistic_layer(int batch, int inputs); 7 | void forward_logistic_layer(const layer l, network net); 8 | void backward_logistic_layer(const layer l, network net); 9 | 10 | #ifdef GPU 11 | void forward_logistic_layer_gpu(const layer l, network net); 12 | void backward_logistic_layer_gpu(const layer l, network net); 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/option_list.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTION_LIST_H 2 | #define OPTION_LIST_H 3 | #include "list.h" 4 | 5 | typedef struct{ 6 | char *key; 7 | char *val; 8 | int used; 9 | } kvp; 10 | 11 | 12 | int read_option(char *s, list *options); 13 | void option_insert(list *l, char *key, char *val); 14 | char *option_find(list *l, char *key); 15 | float option_find_float(list *l, char *key, float def); 16 | float option_find_float_quiet(list *l, char *key, float def); 17 | void option_unused(list *l); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /LICENSE.v1: -------------------------------------------------------------------------------- 1 | YOLO LICENSE 2 | Version 1, July 10 2015 3 | 4 | THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER 5 | SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN 6 | TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES 7 | LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO 8 | THE FOLLOWING CONDITIONS: 9 | 10 | 1. #yolo 11 | 2. #swag 12 | 3. #blazeit 13 | 14 | -------------------------------------------------------------------------------- /LICENSE.fuck: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /src/upsample_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef UPSAMPLE_LAYER_H 2 | #define UPSAMPLE_LAYER_H 3 | #include "darknet.h" 4 | 5 | layer make_upsample_layer(int batch, int w, int h, int c, int stride); 6 | void forward_upsample_layer(const layer l, network net); 7 | void backward_upsample_layer(const layer l, network net); 8 | void resize_upsample_layer(layer *l, int w, int h); 9 | 10 | #ifdef GPU 11 | void forward_upsample_layer_gpu(const layer l, network net); 12 | void backward_upsample_layer_gpu(const layer l, network net); 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/activation_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATION_LAYER_H 2 | #define ACTIVATION_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation); 9 | 10 | void forward_activation_layer(layer l, network net); 11 | void backward_activation_layer(layer l, network net); 12 | 13 | #ifdef GPU 14 | void forward_activation_layer_gpu(layer l, network net); 15 | void backward_activation_layer_gpu(layer l, network net); 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YOLO LICENSE 2 | Version 2, July 29 2016 3 | 4 | THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER 5 | SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN 6 | TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES 7 | LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S 8 | THE REAL LICENSE: 9 | 10 | 0. Darknet is public domain. 11 | 1. Do whatever you want with it. 12 | 2. Stop emailing me about it! 13 | -------------------------------------------------------------------------------- /chinese_classify/label_hanzi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib.pyplot as plt 3 | import matplotlib.image as mpimg 4 | import json 5 | plt.ion() 6 | pic_list = os.listdir('./old_img') 7 | for pic in pic_list: 8 | img = mpimg.imread('/home/geng/darknet/chinese_classify/old_img/' + pic) 9 | plt.imshow(img) 10 | character = input('输入正确的汉字:') 11 | plt.close() 12 | unicod = json.dumps(character) 13 | print(unicod) 14 | os.rename('./old_img/' + pic, 15 | './new_img/' + pic[0:15] + unicod[2:7] + '.jpg') 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/region_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef REGION_LAYER_H 2 | #define REGION_LAYER_H 3 | 4 | #include "darknet.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_region_layer(int batch, int w, int h, int n, int classes, int coords); 9 | void forward_region_layer(const layer l, network net); 10 | void backward_region_layer(const layer l, network net); 11 | void resize_region_layer(layer *l, int w, int h); 12 | 13 | #ifdef GPU 14 | void forward_region_layer_gpu(const layer l, network net); 15 | void backward_region_layer_gpu(layer l, network net); 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/batchnorm_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCHNORM_LAYER_H 2 | #define BATCHNORM_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_batchnorm_layer(int batch, int w, int h, int c); 9 | void forward_batchnorm_layer(layer l, network net); 10 | void backward_batchnorm_layer(layer l, network net); 11 | 12 | #ifdef GPU 13 | void forward_batchnorm_layer_gpu(layer l, network net); 14 | void backward_batchnorm_layer_gpu(layer l, network net); 15 | void pull_batchnorm_layer(layer l); 16 | void push_batchnorm_layer(layer l); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/shortcut_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SHORTCUT_LAYER_H 2 | #define SHORTCUT_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); 8 | void forward_shortcut_layer(const layer l, network net); 9 | void backward_shortcut_layer(const layer l, network net); 10 | void resize_shortcut_layer(layer *l, int w, int h); 11 | 12 | #ifdef GPU 13 | void forward_shortcut_layer_gpu(const layer l, network net); 14 | void backward_shortcut_layer_gpu(const layer l, network net); 15 | #endif 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/crop_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CROP_LAYER_H 2 | #define CROP_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | typedef layer crop_layer; 9 | 10 | image get_crop_image(crop_layer l); 11 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); 12 | void forward_crop_layer(const crop_layer l, network net); 13 | void resize_crop_layer(layer *l, int w, int h); 14 | 15 | #ifdef GPU 16 | void forward_crop_layer_gpu(crop_layer l, network net); 17 | #endif 18 | 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /src/lstm_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LSTM_LAYER_H 2 | #define LSTM_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | #define USET 8 | 9 | layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); 10 | 11 | void forward_lstm_layer(layer l, network net); 12 | void update_lstm_layer(layer l, update_args a); 13 | 14 | #ifdef GPU 15 | void forward_lstm_layer_gpu(layer l, network net); 16 | void backward_lstm_layer_gpu(layer l, network net); 17 | void update_lstm_layer_gpu(layer l, update_args a); 18 | 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /src/reorg_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef REORG_LAYER_H 2 | #define REORG_LAYER_H 3 | 4 | #include "image.h" 5 | #include "cuda.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra); 10 | void resize_reorg_layer(layer *l, int w, int h); 11 | void forward_reorg_layer(const layer l, network net); 12 | void backward_reorg_layer(const layer l, network net); 13 | 14 | #ifdef GPU 15 | void forward_reorg_layer_gpu(layer l, network net); 16 | void backward_reorg_layer_gpu(layer l, network net); 17 | #endif 18 | 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /src/route_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef ROUTE_LAYER_H 2 | #define ROUTE_LAYER_H 3 | #include "network.h" 4 | #include "layer.h" 5 | 6 | typedef layer route_layer; 7 | 8 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); 9 | void forward_route_layer(const route_layer l, network net); 10 | void backward_route_layer(const route_layer l, network net); 11 | void resize_route_layer(route_layer *l, network *net); 12 | 13 | #ifdef GPU 14 | void forward_route_layer_gpu(const route_layer l, network net); 15 | void backward_route_layer_gpu(const route_layer l, network net); 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/dropout_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DROPOUT_LAYER_H 2 | #define DROPOUT_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | typedef layer dropout_layer; 8 | 9 | dropout_layer make_dropout_layer(int batch, int inputs, float probability); 10 | 11 | void forward_dropout_layer(dropout_layer l, network net); 12 | void backward_dropout_layer(dropout_layer l, network net); 13 | void resize_dropout_layer(dropout_layer *l, int inputs); 14 | 15 | #ifdef GPU 16 | void forward_dropout_layer_gpu(dropout_layer l, network net); 17 | void backward_dropout_layer_gpu(dropout_layer l, network net); 18 | 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /src/yolo_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_LAYER_H 2 | #define YOLO_LAYER_H 3 | 4 | #include "darknet.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); 9 | void forward_yolo_layer(const layer l, network net); 10 | void backward_yolo_layer(const layer l, network net); 11 | void resize_yolo_layer(layer *l, int w, int h); 12 | int yolo_num_detections(layer l, float thresh); 13 | 14 | #ifdef GPU 15 | void forward_yolo_layer_gpu(const layer l, network net); 16 | void backward_yolo_layer_gpu(layer l, network net); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/detection_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DETECTION_LAYER_H 2 | #define DETECTION_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | typedef layer detection_layer; 8 | 9 | detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); 10 | void forward_detection_layer(const detection_layer l, network net); 11 | void backward_detection_layer(const detection_layer l, network net); 12 | 13 | #ifdef GPU 14 | void forward_detection_layer_gpu(const detection_layer l, network net); 15 | void backward_detection_layer_gpu(detection_layer l, network net); 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 破解极验点击验证码 2 | 3 | [国家企业信用信息公示系统](http://www.gsxt.gov.cn/index.html)中的验证码是按语序点击汉字,如下图所示: 4 | 5 | ![验证码](./doc_images/gsxt.png) 6 | 7 | 即,如果依次点击:‘无’,‘意’,‘中’,‘发’,‘现’,就会通过验证。 8 | 9 | 本项目的**破解思路**主要分为以下步骤: 10 | 11 | 1. 使用目标探测网络YOLOV2进行**汉字定位** 12 | 2. 设计算法进行**汉字切割** 13 | 3. 使用darknet的分类器进行**汉字识别** 14 | 4. 设计算法进行**汉字纠错与语序识别** 15 | 16 | 说明:该项目对于破解5个字及5个字以下的验证码效果不错,本人测试了400张图片正确率达到了0.85。 17 | 18 | 这里提供一个提升破解速度和准确率的思路:在以后破解gsxt网站的时候,可以将识别正确的词语加入到字典库中,这样随着时间的推移,正确率和速度都会越来越高。 19 | 20 | # 免责声明 21 | 22 | **该项目仅用于学术交流,不得任何商业使用!** 23 | 24 | **如果该项目对您有帮助,记得点一个star哦!!** 25 | 26 | 详细文档请见[个人博客](https://runninggump.github.io/2018/11/19/破解含语序问题的点击验证码) 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/cost_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef COST_LAYER_H 2 | #define COST_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | typedef layer cost_layer; 7 | 8 | COST_TYPE get_cost_type(char *s); 9 | char *get_cost_string(COST_TYPE a); 10 | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); 11 | void forward_cost_layer(const cost_layer l, network net); 12 | void backward_cost_layer(const cost_layer l, network net); 13 | void resize_cost_layer(cost_layer *l, int inputs); 14 | 15 | #ifdef GPU 16 | void forward_cost_layer_gpu(cost_layer l, network net); 17 | void backward_cost_layer_gpu(const cost_layer l, network net); 18 | #endif 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/softmax_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SOFTMAX_LAYER_H 2 | #define SOFTMAX_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | typedef layer softmax_layer; 7 | 8 | void softmax_array(float *input, int n, float temp, float *output); 9 | softmax_layer make_softmax_layer(int batch, int inputs, int groups); 10 | void forward_softmax_layer(const softmax_layer l, network net); 11 | void backward_softmax_layer(const softmax_layer l, network net); 12 | 13 | #ifdef GPU 14 | void pull_softmax_layer_output(const softmax_layer l); 15 | void forward_softmax_layer_gpu(const softmax_layer l, network net); 16 | void backward_softmax_layer_gpu(const softmax_layer l, network net); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/gru_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef GRU_LAYER_H 3 | #define GRU_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam); 10 | 11 | void forward_gru_layer(layer l, network state); 12 | void backward_gru_layer(layer l, network state); 13 | void update_gru_layer(layer l, update_args a); 14 | 15 | #ifdef GPU 16 | void forward_gru_layer_gpu(layer l, network state); 17 | void backward_gru_layer_gpu(layer l, network state); 18 | void update_gru_layer_gpu(layer l, update_args a); 19 | void push_gru_layer(layer l); 20 | void pull_gru_layer(layer l); 21 | #endif 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /src/avgpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef AVGPOOL_LAYER_H 2 | #define AVGPOOL_LAYER_H 3 | 4 | #include "image.h" 5 | #include "cuda.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | typedef layer avgpool_layer; 10 | 11 | image get_avgpool_image(avgpool_layer l); 12 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); 13 | void resize_avgpool_layer(avgpool_layer *l, int w, int h); 14 | void forward_avgpool_layer(const avgpool_layer l, network net); 15 | void backward_avgpool_layer(const avgpool_layer l, network net); 16 | 17 | #ifdef GPU 18 | void forward_avgpool_layer_gpu(avgpool_layer l, network net); 19 | void backward_avgpool_layer_gpu(avgpool_layer l, network net); 20 | #endif 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /src/network.h: -------------------------------------------------------------------------------- 1 | // Oh boy, why am I about to do this.... 2 | #ifndef NETWORK_H 3 | #define NETWORK_H 4 | #include "darknet.h" 5 | 6 | #include "image.h" 7 | #include "layer.h" 8 | #include "data.h" 9 | #include "tree.h" 10 | 11 | 12 | #ifdef GPU 13 | void pull_network_output(network *net); 14 | #endif 15 | 16 | void compare_networks(network *n1, network *n2, data d); 17 | char *get_layer_string(LAYER_TYPE a); 18 | 19 | network *make_network(int n); 20 | 21 | 22 | float network_accuracy_multi(network *net, data d, int n); 23 | int get_predicted_class_network(network *net); 24 | void print_network(network *net); 25 | int resize_network(network *net, int w, int h); 26 | void calc_network_cost(network *net); 27 | 28 | #endif 29 | 30 | -------------------------------------------------------------------------------- /scripts/dice_label.sh: -------------------------------------------------------------------------------- 1 | mkdir -p images 2 | mkdir -p images/orig 3 | mkdir -p images/train 4 | mkdir -p images/val 5 | 6 | ffmpeg -i Face1.mp4 images/orig/face1_%6d.jpg 7 | ffmpeg -i Face2.mp4 images/orig/face2_%6d.jpg 8 | ffmpeg -i Face3.mp4 images/orig/face3_%6d.jpg 9 | ffmpeg -i Face4.mp4 images/orig/face4_%6d.jpg 10 | ffmpeg -i Face5.mp4 images/orig/face5_%6d.jpg 11 | ffmpeg -i Face6.mp4 images/orig/face6_%6d.jpg 12 | 13 | mogrify -resize 100x100^ -gravity center -crop 100x100+0+0 +repage images/orig/* 14 | 15 | ls images/orig/* | shuf | head -n 1000 | xargs mv -t images/val 16 | mv images/orig/* images/train 17 | 18 | find `pwd`/images/train > dice.train.list -name \*.jpg 19 | find `pwd`/images/val > dice.val.list -name \*.jpg 20 | 21 | -------------------------------------------------------------------------------- /src/rnn_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef RNN_LAYER_H 3 | #define RNN_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | #define USET 9 | 10 | layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam); 11 | 12 | void forward_rnn_layer(layer l, network net); 13 | void backward_rnn_layer(layer l, network net); 14 | void update_rnn_layer(layer l, update_args a); 15 | 16 | #ifdef GPU 17 | void forward_rnn_layer_gpu(layer l, network net); 18 | void backward_rnn_layer_gpu(layer l, network net); 19 | void update_rnn_layer_gpu(layer l, update_args a); 20 | void push_rnn_layer(layer l); 21 | void pull_rnn_layer(layer l); 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/maxpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef MAXPOOL_LAYER_H 2 | #define MAXPOOL_LAYER_H 3 | 4 | #include "image.h" 5 | #include "cuda.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | typedef layer maxpool_layer; 10 | 11 | image get_maxpool_image(maxpool_layer l); 12 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); 13 | void resize_maxpool_layer(maxpool_layer *l, int w, int h); 14 | void forward_maxpool_layer(const maxpool_layer l, network net); 15 | void backward_maxpool_layer(const maxpool_layer l, network net); 16 | 17 | #ifdef GPU 18 | void forward_maxpool_layer_gpu(maxpool_layer l, network net); 19 | void backward_maxpool_layer_gpu(maxpool_layer l, network net); 20 | #endif 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /src/normalization_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef NORMALIZATION_LAYER_H 2 | #define NORMALIZATION_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); 9 | void resize_normalization_layer(layer *layer, int h, int w); 10 | void forward_normalization_layer(const layer layer, network net); 11 | void backward_normalization_layer(const layer layer, network net); 12 | void visualize_normalization_layer(layer layer, char *window); 13 | 14 | #ifdef GPU 15 | void forward_normalization_layer_gpu(const layer layer, network net); 16 | void backward_normalization_layer_gpu(const layer layer, network net); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/crnn_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CRNN_LAYER_H 3 | #define CRNN_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); 10 | 11 | void forward_crnn_layer(layer l, network net); 12 | void backward_crnn_layer(layer l, network net); 13 | void update_crnn_layer(layer l, update_args a); 14 | 15 | #ifdef GPU 16 | void forward_crnn_layer_gpu(layer l, network net); 17 | void backward_crnn_layer_gpu(layer l, network net); 18 | void update_crnn_layer_gpu(layer l, update_args a); 19 | void push_crnn_layer(layer l); 20 | void pull_crnn_layer(layer l); 21 | #endif 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /src/connected_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONNECTED_LAYER_H 2 | #define CONNECTED_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam); 9 | 10 | void forward_connected_layer(layer l, network net); 11 | void backward_connected_layer(layer l, network net); 12 | void update_connected_layer(layer l, update_args a); 13 | 14 | #ifdef GPU 15 | void forward_connected_layer_gpu(layer l, network net); 16 | void backward_connected_layer_gpu(layer l, network net); 17 | void update_connected_layer_gpu(layer l, update_args a); 18 | void push_connected_layer(layer l); 19 | void pull_connected_layer(layer l); 20 | #endif 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /examples/detector.py: -------------------------------------------------------------------------------- 1 | # Stupid python path shit. 2 | # Instead just add darknet.py to somewhere in your python path 3 | # OK actually that might not be a great idea, idk, work in progress 4 | # Use at your own risk. or don't, i don't care 5 | 6 | import sys, os 7 | sys.path.append(os.path.join(os.getcwd(),'python/')) 8 | 9 | import darknet as dn 10 | import pdb 11 | 12 | dn.set_gpu(0) 13 | net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0) 14 | meta = dn.load_meta("cfg/thor.data") 15 | r = dn.detect(net, meta, "data/bedroom.jpg") 16 | print r 17 | 18 | # And then down here you could detect a lot more images like: 19 | r = dn.detect(net, meta, "data/eagle.jpg") 20 | print r 21 | r = dn.detect(net, meta, "data/giraffe.jpg") 22 | print r 23 | r = dn.detect(net, meta, "data/horses.jpg") 24 | print r 25 | r = dn.detect(net, meta, "data/person.jpg") 26 | print r 27 | 28 | -------------------------------------------------------------------------------- /scripts/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Clone COCO API 4 | git clone https://github.com/pdollar/coco 5 | cd coco 6 | 7 | mkdir images 8 | cd images 9 | 10 | # Download Images 11 | wget -c https://pjreddie.com/media/files/train2014.zip 12 | wget -c https://pjreddie.com/media/files/val2014.zip 13 | 14 | # Unzip 15 | unzip -q train2014.zip 16 | unzip -q val2014.zip 17 | 18 | cd .. 19 | 20 | # Download COCO Metadata 21 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 22 | wget -c https://pjreddie.com/media/files/coco/5k.part 23 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 24 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 25 | tar xzf labels.tgz 26 | unzip -q instances_train-val2014.zip 27 | 28 | # Set Up Image Lists 29 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 30 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 31 | 32 | -------------------------------------------------------------------------------- /src/deconvolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DECONVOLUTIONAL_LAYER_H 2 | #define DECONVOLUTIONAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "activations.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | #ifdef GPU 11 | void forward_deconvolutional_layer_gpu(layer l, network net); 12 | void backward_deconvolutional_layer_gpu(layer l, network net); 13 | void update_deconvolutional_layer_gpu(layer l, update_args a); 14 | void push_deconvolutional_layer(layer l); 15 | void pull_deconvolutional_layer(layer l); 16 | #endif 17 | 18 | layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam); 19 | void resize_deconvolutional_layer(layer *l, int h, int w); 20 | void forward_deconvolutional_layer(const layer l, network net); 21 | void update_deconvolutional_layer(layer l, update_args a); 22 | void backward_deconvolutional_layer(layer l, network net); 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/gemm.h: -------------------------------------------------------------------------------- 1 | #ifndef GEMM_H 2 | #define GEMM_H 3 | 4 | void gemm_bin(int M, int N, int K, float ALPHA, 5 | char *A, int lda, 6 | float *B, int ldb, 7 | float *C, int ldc); 8 | 9 | void gemm(int TA, int TB, int M, int N, int K, float ALPHA, 10 | float *A, int lda, 11 | float *B, int ldb, 12 | float BETA, 13 | float *C, int ldc); 14 | 15 | void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, 16 | float *A, int lda, 17 | float *B, int ldb, 18 | float BETA, 19 | float *C, int ldc); 20 | 21 | #ifdef GPU 22 | void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, 23 | float *A_gpu, int lda, 24 | float *B_gpu, int ldb, 25 | float BETA, 26 | float *C_gpu, int ldc); 27 | 28 | void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, 29 | float *A, int lda, 30 | float *B, int ldb, 31 | float BETA, 32 | float *C, int ldc); 33 | #endif 34 | #endif 35 | -------------------------------------------------------------------------------- /src/local_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCAL_LAYER_H 2 | #define LOCAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "activations.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | typedef layer local_layer; 11 | 12 | #ifdef GPU 13 | void forward_local_layer_gpu(local_layer layer, network net); 14 | void backward_local_layer_gpu(local_layer layer, network net); 15 | void update_local_layer_gpu(local_layer layer, update_args a); 16 | 17 | void push_local_layer(local_layer layer); 18 | void pull_local_layer(local_layer layer); 19 | #endif 20 | 21 | local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); 22 | 23 | void forward_local_layer(const local_layer layer, network net); 24 | void backward_local_layer(local_layer layer, network net); 25 | void update_local_layer(local_layer layer, update_args a); 26 | 27 | void bias_output(float *output, float *biases, int batch, int n, int size); 28 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /LICENSE.mit: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Joseph Redmon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /cfg/chinese_character.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | #batch=128 3 | batch=64 4 | subdivisions=1 5 | height=64 6 | width=64 7 | channels=3 8 | max_crop=64 9 | min_crop=64 10 | angle=7 11 | hue=.1 12 | saturation=.75 13 | exposure=.75 14 | 15 | learning_rate=0.1 16 | policy=poly 17 | power=4 18 | max_batches = 45000 19 | momentum=0.9 20 | decay=0.0005 21 | 22 | 23 | [convolutional] 24 | batch_normalize=1 25 | filters=128 26 | size=3 27 | stride=1 28 | pad=1 29 | activation=leaky 30 | 31 | [maxpool] 32 | size=2 33 | stride=2 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=256 38 | size=3 39 | stride=1 40 | pad=1 41 | activation=leaky 42 | 43 | [maxpool] 44 | size=2 45 | stride=2 46 | 47 | [convolutional] 48 | batch_normalize=1 49 | filters=512 50 | size=3 51 | stride=1 52 | pad=1 53 | activation=leaky 54 | 55 | [maxpool] 56 | size=2 57 | stride=2 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=1024 62 | size=3 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | 68 | [convolutional] 69 | filters=3604 70 | size=1 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [avgpool] 76 | 77 | [softmax] 78 | groups=1 79 | 80 | [cost] 81 | type=sse 82 | 83 | 84 | -------------------------------------------------------------------------------- /src/dropout_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "dropout_layer.h" 7 | #include "cuda.h" 8 | #include "utils.h" 9 | } 10 | 11 | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) 12 | { 13 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 14 | if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; 15 | } 16 | 17 | void forward_dropout_layer_gpu(dropout_layer layer, network net) 18 | { 19 | if (!net.train) return; 20 | int size = layer.inputs*layer.batch; 21 | cuda_random(layer.rand_gpu, size); 22 | /* 23 | int i; 24 | for(i = 0; i < size; ++i){ 25 | layer.rand[i] = rand_uniform(); 26 | } 27 | cuda_push_array(layer.rand_gpu, layer.rand, size); 28 | */ 29 | 30 | yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); 31 | check_error(cudaPeekAtLastError()); 32 | } 33 | 34 | void backward_dropout_layer_gpu(dropout_layer layer, network net) 35 | { 36 | if(!net.delta_gpu) return; 37 | int size = layer.inputs*layer.batch; 38 | 39 | yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); 40 | check_error(cudaPeekAtLastError()); 41 | } 42 | -------------------------------------------------------------------------------- /jiyan/data/valid/1530951693.xml: -------------------------------------------------------------------------------- 1 | 2 | unlable 3 | 1530951693.jpeg 4 | /home/bytest/unlable/1530951693.jpeg 5 | 6 | Unknown 7 | 8 | 9 | 344 10 | 384 11 | 3 12 | 13 | 0 14 | 15 | hanzi 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 104 21 | 50 22 | 162 23 | 118 24 | 25 | 26 | 27 | hanzi 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 52 33 | 148 34 | 111 35 | 219 36 | 37 | 38 | 39 | hanzi 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 167 45 | 146 46 | 215 47 | 200 48 | 49 | 50 | 51 | hanzi 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 47 57 | 287 58 | 109 59 | 337 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /jiyan/data/valid/1530951687.xml: -------------------------------------------------------------------------------- 1 | 2 | unlable 3 | 1530951687.jpeg 4 | /home/bytest/unlable/1530951687.jpeg 5 | 6 | Unknown 7 | 8 | 9 | 344 10 | 384 11 | 3 12 | 13 | 0 14 | 15 | hanzi 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 101 21 | 40 22 | 164 23 | 111 24 | 25 | 26 | 27 | hanzi 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 205 33 | 62 34 | 267 35 | 116 36 | 37 | 38 | 39 | hanzi 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 256 45 | 184 46 | 324 47 | 253 48 | 49 | 50 | 51 | hanzi 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 183 57 | 267 58 | 259 59 | 330 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /src/col2im.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | void col2im_add_pixel(float *im, int height, int width, int channels, 4 | int row, int col, int channel, int pad, float val) 5 | { 6 | row -= pad; 7 | col -= pad; 8 | 9 | if (row < 0 || col < 0 || 10 | row >= height || col >= width) return; 11 | im[col + width*(row + height*channel)] += val; 12 | } 13 | //This one might be too, can't remember. 14 | void col2im_cpu(float* data_col, 15 | int channels, int height, int width, 16 | int ksize, int stride, int pad, float* data_im) 17 | { 18 | int c,h,w; 19 | int height_col = (height + 2*pad - ksize) / stride + 1; 20 | int width_col = (width + 2*pad - ksize) / stride + 1; 21 | 22 | int channels_col = channels * ksize * ksize; 23 | for (c = 0; c < channels_col; ++c) { 24 | int w_offset = c % ksize; 25 | int h_offset = (c / ksize) % ksize; 26 | int c_im = c / ksize / ksize; 27 | for (h = 0; h < height_col; ++h) { 28 | for (w = 0; w < width_col; ++w) { 29 | int im_row = h_offset + h * stride; 30 | int im_col = w_offset + w * stride; 31 | int col_index = (c * height_col + h) * width_col + w; 32 | double val = data_col[col_index]; 33 | col2im_add_pixel(data_im, height, width, channels, 34 | im_row, im_col, c_im, pad, val); 35 | } 36 | } 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /src/im2col.c: -------------------------------------------------------------------------------- 1 | #include "im2col.h" 2 | #include 3 | float im2col_get_pixel(float *im, int height, int width, int channels, 4 | int row, int col, int channel, int pad) 5 | { 6 | row -= pad; 7 | col -= pad; 8 | 9 | if (row < 0 || col < 0 || 10 | row >= height || col >= width) return 0; 11 | return im[col + width*(row + height*channel)]; 12 | } 13 | 14 | //From Berkeley Vision's Caffe! 15 | //https://github.com/BVLC/caffe/blob/master/LICENSE 16 | void im2col_cpu(float* data_im, 17 | int channels, int height, int width, 18 | int ksize, int stride, int pad, float* data_col) 19 | { 20 | int c,h,w; 21 | int height_col = (height + 2*pad - ksize) / stride + 1; 22 | int width_col = (width + 2*pad - ksize) / stride + 1; 23 | 24 | int channels_col = channels * ksize * ksize; 25 | for (c = 0; c < channels_col; ++c) { 26 | int w_offset = c % ksize; 27 | int h_offset = (c / ksize) % ksize; 28 | int c_im = c / ksize / ksize; 29 | for (h = 0; h < height_col; ++h) { 30 | for (w = 0; w < width_col; ++w) { 31 | int im_row = h_offset + h * stride; 32 | int im_col = w_offset + w * stride; 33 | int col_index = (c * height_col + h) * width_col + w; 34 | data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, 35 | im_row, im_col, c_im, pad); 36 | } 37 | } 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /examples/art.c: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | 3 | #include 4 | 5 | void demo_art(char *cfgfile, char *weightfile, int cam_index) 6 | { 7 | #ifdef OPENCV 8 | network *net = load_network(cfgfile, weightfile, 0); 9 | set_batch_network(net, 1); 10 | 11 | srand(2222222); 12 | CvCapture * cap; 13 | 14 | cap = cvCaptureFromCAM(cam_index); 15 | 16 | char *window = "ArtJudgementBot9000!!!"; 17 | if(!cap) error("Couldn't connect to webcam.\n"); 18 | cvNamedWindow(window, CV_WINDOW_NORMAL); 19 | cvResizeWindow(window, 512, 512); 20 | int i; 21 | int idx[] = {37, 401, 434}; 22 | int n = sizeof(idx)/sizeof(idx[0]); 23 | 24 | while(1){ 25 | image in = get_image_from_stream(cap); 26 | image in_s = resize_image(in, net->w, net->h); 27 | show_image(in, window); 28 | 29 | float *p = network_predict(net, in_s.data); 30 | 31 | printf("\033[2J"); 32 | printf("\033[1;1H"); 33 | 34 | float score = 0; 35 | for(i = 0; i < n; ++i){ 36 | float s = p[idx[i]]; 37 | if (s > score) score = s; 38 | } 39 | score = score; 40 | printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); 41 | printf("["); 42 | int upper = 30; 43 | for(i = 0; i < upper; ++i){ 44 | printf("%c", ((i+.5) < score*upper) ? 219 : ' '); 45 | } 46 | printf("]\n"); 47 | 48 | free_image(in_s); 49 | free_image(in); 50 | 51 | cvWaitKey(1); 52 | } 53 | #endif 54 | } 55 | 56 | 57 | void run_art(int argc, char **argv) 58 | { 59 | int cam_index = find_int_arg(argc, argv, "-c", 0); 60 | char *cfg = argv[2]; 61 | char *weights = argv[3]; 62 | demo_art(cfg, weights, cam_index); 63 | } 64 | 65 | -------------------------------------------------------------------------------- /src/list.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "list.h" 4 | 5 | list *make_list() 6 | { 7 | list *l = malloc(sizeof(list)); 8 | l->size = 0; 9 | l->front = 0; 10 | l->back = 0; 11 | return l; 12 | } 13 | 14 | /* 15 | void transfer_node(list *s, list *d, node *n) 16 | { 17 | node *prev, *next; 18 | prev = n->prev; 19 | next = n->next; 20 | if(prev) prev->next = next; 21 | if(next) next->prev = prev; 22 | --s->size; 23 | if(s->front == n) s->front = next; 24 | if(s->back == n) s->back = prev; 25 | } 26 | */ 27 | 28 | void *list_pop(list *l){ 29 | if(!l->back) return 0; 30 | node *b = l->back; 31 | void *val = b->val; 32 | l->back = b->prev; 33 | if(l->back) l->back->next = 0; 34 | free(b); 35 | --l->size; 36 | 37 | return val; 38 | } 39 | 40 | void list_insert(list *l, void *val) 41 | { 42 | node *new = malloc(sizeof(node)); 43 | new->val = val; 44 | new->next = 0; 45 | 46 | if(!l->back){ 47 | l->front = new; 48 | new->prev = 0; 49 | }else{ 50 | l->back->next = new; 51 | new->prev = l->back; 52 | } 53 | l->back = new; 54 | ++l->size; 55 | } 56 | 57 | void free_node(node *n) 58 | { 59 | node *next; 60 | while(n) { 61 | next = n->next; 62 | free(n); 63 | n = next; 64 | } 65 | } 66 | 67 | void free_list(list *l) 68 | { 69 | free_node(l->front); 70 | free(l); 71 | } 72 | 73 | void free_list_contents(list *l) 74 | { 75 | node *n = l->front; 76 | while(n){ 77 | free(n->val); 78 | n = n->next; 79 | } 80 | } 81 | 82 | void **list_to_array(list *l) 83 | { 84 | void **a = calloc(l->size, sizeof(void*)); 85 | int count = 0; 86 | node *n = l->front; 87 | while(n){ 88 | a[count++] = n->val; 89 | n = n->next; 90 | } 91 | return a; 92 | } 93 | -------------------------------------------------------------------------------- /src/dropout_layer.c: -------------------------------------------------------------------------------- 1 | #include "dropout_layer.h" 2 | #include "utils.h" 3 | #include "cuda.h" 4 | #include 5 | #include 6 | 7 | dropout_layer make_dropout_layer(int batch, int inputs, float probability) 8 | { 9 | dropout_layer l = {0}; 10 | l.type = DROPOUT; 11 | l.probability = probability; 12 | l.inputs = inputs; 13 | l.outputs = inputs; 14 | l.batch = batch; 15 | l.rand = calloc(inputs*batch, sizeof(float)); 16 | l.scale = 1./(1.-probability); 17 | l.forward = forward_dropout_layer; 18 | l.backward = backward_dropout_layer; 19 | #ifdef GPU 20 | l.forward_gpu = forward_dropout_layer_gpu; 21 | l.backward_gpu = backward_dropout_layer_gpu; 22 | l.rand_gpu = cuda_make_array(l.rand, inputs*batch); 23 | #endif 24 | fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); 25 | return l; 26 | } 27 | 28 | void resize_dropout_layer(dropout_layer *l, int inputs) 29 | { 30 | l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); 31 | #ifdef GPU 32 | cuda_free(l->rand_gpu); 33 | 34 | l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); 35 | #endif 36 | } 37 | 38 | void forward_dropout_layer(dropout_layer l, network net) 39 | { 40 | int i; 41 | if (!net.train) return; 42 | for(i = 0; i < l.batch * l.inputs; ++i){ 43 | float r = rand_uniform(0, 1); 44 | l.rand[i] = r; 45 | if(r < l.probability) net.input[i] = 0; 46 | else net.input[i] *= l.scale; 47 | } 48 | } 49 | 50 | void backward_dropout_layer(dropout_layer l, network net) 51 | { 52 | int i; 53 | if(!net.delta) return; 54 | for(i = 0; i < l.batch * l.inputs; ++i){ 55 | float r = l.rand[i]; 56 | if(r < l.probability) net.delta[i] = 0; 57 | else net.delta[i] *= l.scale; 58 | } 59 | } 60 | 61 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | #include 4 | #include 5 | #include "darknet.h" 6 | #include "list.h" 7 | 8 | #define TIME(a) \ 9 | do { \ 10 | double start = what_time_is_it_now(); \ 11 | a; \ 12 | printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ 13 | } while (0) 14 | 15 | #define TWO_PI 6.2831853071795864769252866f 16 | 17 | double what_time_is_it_now(); 18 | void shuffle(void *arr, size_t n, size_t size); 19 | void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); 20 | void free_ptrs(void **ptrs, int n); 21 | int alphanum_to_int(char c); 22 | char int_to_alphanum(int i); 23 | int read_int(int fd); 24 | void write_int(int fd, int n); 25 | void read_all(int fd, char *buffer, size_t bytes); 26 | void write_all(int fd, char *buffer, size_t bytes); 27 | int read_all_fail(int fd, char *buffer, size_t bytes); 28 | int write_all_fail(int fd, char *buffer, size_t bytes); 29 | void find_replace(char *str, char *orig, char *rep, char *output); 30 | void malloc_error(); 31 | void file_error(char *s); 32 | void strip(char *s); 33 | void strip_char(char *s, char bad); 34 | list *split_str(char *s, char delim); 35 | char *fgetl(FILE *fp); 36 | list *parse_csv_line(char *line); 37 | char *copy_string(char *s); 38 | int count_fields(char *line); 39 | float *parse_fields(char *line, int n); 40 | void translate_array(float *a, int n, float s); 41 | float constrain(float min, float max, float a); 42 | int constrain_int(int a, int min, int max); 43 | float rand_scale(float s); 44 | int rand_int(int min, int max); 45 | void mean_arrays(float **a, int n, int els, float *avg); 46 | float dist_array(float *a, float *b, int n, int sub); 47 | float **one_hot_encode(float *a, int n, int k); 48 | float sec(clock_t clocks); 49 | void print_statistics(float *a, int n); 50 | int int_index(int *a, int val, int n); 51 | 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /src/avgpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "avgpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) 11 | { 12 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 13 | if(id >= n) return; 14 | 15 | int k = id % c; 16 | id /= c; 17 | int b = id; 18 | 19 | int i; 20 | int out_index = (k + c*b); 21 | output[out_index] = 0; 22 | for(i = 0; i < w*h; ++i){ 23 | int in_index = i + h*w*(k + b*c); 24 | output[out_index] += input[in_index]; 25 | } 26 | output[out_index] /= w*h; 27 | } 28 | 29 | __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) 30 | { 31 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 32 | if(id >= n) return; 33 | 34 | int k = id % c; 35 | id /= c; 36 | int b = id; 37 | 38 | int i; 39 | int out_index = (k + c*b); 40 | for(i = 0; i < w*h; ++i){ 41 | int in_index = i + h*w*(k + b*c); 42 | in_delta[in_index] += out_delta[out_index] / (w*h); 43 | } 44 | } 45 | 46 | extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) 47 | { 48 | size_t n = layer.c*layer.batch; 49 | 50 | forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); 51 | check_error(cudaPeekAtLastError()); 52 | } 53 | 54 | extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) 55 | { 56 | size_t n = layer.c*layer.batch; 57 | 58 | backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); 59 | check_error(cudaPeekAtLastError()); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /src/activation_layer.c: -------------------------------------------------------------------------------- 1 | #include "activation_layer.h" 2 | #include "utils.h" 3 | #include "cuda.h" 4 | #include "blas.h" 5 | #include "gemm.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation) 13 | { 14 | layer l = {0}; 15 | l.type = ACTIVE; 16 | 17 | l.inputs = inputs; 18 | l.outputs = inputs; 19 | l.batch=batch; 20 | 21 | l.output = calloc(batch*inputs, sizeof(float*)); 22 | l.delta = calloc(batch*inputs, sizeof(float*)); 23 | 24 | l.forward = forward_activation_layer; 25 | l.backward = backward_activation_layer; 26 | #ifdef GPU 27 | l.forward_gpu = forward_activation_layer_gpu; 28 | l.backward_gpu = backward_activation_layer_gpu; 29 | 30 | l.output_gpu = cuda_make_array(l.output, inputs*batch); 31 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 32 | #endif 33 | l.activation = activation; 34 | fprintf(stderr, "Activation Layer: %d inputs\n", inputs); 35 | return l; 36 | } 37 | 38 | void forward_activation_layer(layer l, network net) 39 | { 40 | copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); 41 | activate_array(l.output, l.outputs*l.batch, l.activation); 42 | } 43 | 44 | void backward_activation_layer(layer l, network net) 45 | { 46 | gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); 47 | copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1); 48 | } 49 | 50 | #ifdef GPU 51 | 52 | void forward_activation_layer_gpu(layer l, network net) 53 | { 54 | copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); 55 | activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); 56 | } 57 | 58 | void backward_activation_layer_gpu(layer l, network net) 59 | { 60 | gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); 61 | copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1); 62 | } 63 | #endif 64 | -------------------------------------------------------------------------------- /tools/voc_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | 7 | classes = ["hanzi"] 8 | 9 | cwd = os.getcwd() # 返回当前的目录 10 | IMG_DIR = cwd.replace("tools", "jiyan/valid") 11 | 12 | def convert(size, box): 13 | dw = 1./(size[0]) 14 | dh = 1./(size[1]) 15 | x = (box[0] + box[1])/2.0 - 1 16 | y = (box[2] + box[3])/2.0 - 1 17 | w = box[1] - box[0] 18 | h = box[3] - box[2] 19 | x = x*dw 20 | w = w*dw 21 | y = y*dh 22 | h = h*dh 23 | return (x,y,w,h) 24 | 25 | # 将.xml转化为.txt格式 26 | def convert_annotation(src): 27 | in_file = open("%s/%s" %(IMG_DIR, src)) 28 | dst = "%s/%s" %(IMG_DIR, src.replace("xml", "txt")) 29 | out_file = open(dst, 'w') 30 | tree=ET.parse(in_file) 31 | root = tree.getroot() 32 | size = root.find('size') 33 | w = int(size.find('width').text) 34 | h = int(size.find('height').text) 35 | 36 | for obj in root.iter('object'): 37 | difficult = obj.find('difficult').text 38 | cls = obj.find('name').text 39 | if cls not in classes or int(difficult)==1: 40 | continue 41 | cls_id = classes.index(cls) 42 | xmlbox = obj.find('bndbox') 43 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 44 | bb = convert((w,h), b) 45 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 46 | 47 | wd = getcwd() 48 | 49 | 50 | # 若是.jpeg则保存图片路径;若是.xml则将其转换.txt 51 | def run(): 52 | list_file = open("valid.txt", "w") #train.txt用来保存图片的路径 53 | files = os.listdir(IMG_DIR) # 返回指定的文件夹包含的文件或文件夹的名字的列表 54 | for f in files: 55 | if f.endswith(".jpg"): 56 | list_file.write('%s/%s\n' %(IMG_DIR, f)) 57 | if f.endswith(".xml"): 58 | convert_annotation(f) 59 | list_file.close() 60 | 61 | if __name__ == '__main__': 62 | run() 63 | -------------------------------------------------------------------------------- /src/l2norm_layer.c: -------------------------------------------------------------------------------- 1 | #include "l2norm_layer.h" 2 | #include "activations.h" 3 | #include "blas.h" 4 | #include "cuda.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | layer make_l2norm_layer(int batch, int inputs) 13 | { 14 | fprintf(stderr, "l2norm %4d\n", inputs); 15 | layer l = {0}; 16 | l.type = L2NORM; 17 | l.batch = batch; 18 | l.inputs = inputs; 19 | l.outputs = inputs; 20 | l.output = calloc(inputs*batch, sizeof(float)); 21 | l.scales = calloc(inputs*batch, sizeof(float)); 22 | l.delta = calloc(inputs*batch, sizeof(float)); 23 | 24 | l.forward = forward_l2norm_layer; 25 | l.backward = backward_l2norm_layer; 26 | #ifdef GPU 27 | l.forward_gpu = forward_l2norm_layer_gpu; 28 | l.backward_gpu = backward_l2norm_layer_gpu; 29 | 30 | l.output_gpu = cuda_make_array(l.output, inputs*batch); 31 | l.scales_gpu = cuda_make_array(l.output, inputs*batch); 32 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 33 | #endif 34 | return l; 35 | } 36 | 37 | void forward_l2norm_layer(const layer l, network net) 38 | { 39 | copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); 40 | l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w*l.out_h); 41 | } 42 | 43 | void backward_l2norm_layer(const layer l, network net) 44 | { 45 | //axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); 46 | axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); 47 | } 48 | 49 | #ifdef GPU 50 | 51 | void forward_l2norm_layer_gpu(const layer l, network net) 52 | { 53 | copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); 54 | l2normalize_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w*l.out_h); 55 | } 56 | 57 | void backward_l2norm_layer_gpu(const layer l, network net) 58 | { 59 | axpy_gpu(l.batch*l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); 60 | axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); 61 | } 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /src/avgpool_layer.c: -------------------------------------------------------------------------------- 1 | #include "avgpool_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) 6 | { 7 | fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); 8 | avgpool_layer l = {0}; 9 | l.type = AVGPOOL; 10 | l.batch = batch; 11 | l.h = h; 12 | l.w = w; 13 | l.c = c; 14 | l.out_w = 1; 15 | l.out_h = 1; 16 | l.out_c = c; 17 | l.outputs = l.out_c; 18 | l.inputs = h*w*c; 19 | int output_size = l.outputs * batch; 20 | l.output = calloc(output_size, sizeof(float)); 21 | l.delta = calloc(output_size, sizeof(float)); 22 | l.forward = forward_avgpool_layer; 23 | l.backward = backward_avgpool_layer; 24 | #ifdef GPU 25 | l.forward_gpu = forward_avgpool_layer_gpu; 26 | l.backward_gpu = backward_avgpool_layer_gpu; 27 | l.output_gpu = cuda_make_array(l.output, output_size); 28 | l.delta_gpu = cuda_make_array(l.delta, output_size); 29 | #endif 30 | return l; 31 | } 32 | 33 | void resize_avgpool_layer(avgpool_layer *l, int w, int h) 34 | { 35 | l->w = w; 36 | l->h = h; 37 | l->inputs = h*w*l->c; 38 | } 39 | 40 | void forward_avgpool_layer(const avgpool_layer l, network net) 41 | { 42 | int b,i,k; 43 | 44 | for(b = 0; b < l.batch; ++b){ 45 | for(k = 0; k < l.c; ++k){ 46 | int out_index = k + b*l.c; 47 | l.output[out_index] = 0; 48 | for(i = 0; i < l.h*l.w; ++i){ 49 | int in_index = i + l.h*l.w*(k + b*l.c); 50 | l.output[out_index] += net.input[in_index]; 51 | } 52 | l.output[out_index] /= l.h*l.w; 53 | } 54 | } 55 | } 56 | 57 | void backward_avgpool_layer(const avgpool_layer l, network net) 58 | { 59 | int b,i,k; 60 | 61 | for(b = 0; b < l.batch; ++b){ 62 | for(k = 0; k < l.c; ++k){ 63 | int out_index = k + b*l.c; 64 | for(i = 0; i < l.h*l.w; ++i){ 65 | int in_index = i + l.h*l.w*(k + b*l.c); 66 | net.delta[in_index] += l.delta[out_index] / (l.h*l.w); 67 | } 68 | } 69 | } 70 | } 71 | 72 | -------------------------------------------------------------------------------- /src/data.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_H 2 | #define DATA_H 3 | #include 4 | 5 | #include "darknet.h" 6 | #include "matrix.h" 7 | #include "list.h" 8 | #include "image.h" 9 | #include "tree.h" 10 | 11 | static inline float distance_from_edge(int x, int max) 12 | { 13 | int dx = (max/2) - x; 14 | if (dx < 0) dx = -dx; 15 | dx = (max/2) + 1 - dx; 16 | dx *= 2; 17 | float dist = (float)dx/max; 18 | if (dist > 1) dist = 1; 19 | return dist; 20 | } 21 | void load_data_blocking(load_args args); 22 | 23 | 24 | void print_letters(float *pred, int n); 25 | data load_data_captcha(char **paths, int n, int m, int k, int w, int h); 26 | data load_data_captcha_encode(char **paths, int n, int m, int w, int h); 27 | data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); 28 | data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); 29 | matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); 30 | data load_data_super(char **paths, int n, int m, int w, int h, int scale); 31 | data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center); 32 | data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); 33 | data load_go(char *filename); 34 | 35 | 36 | data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); 37 | 38 | void get_random_batch(data d, int n, float *X, float *y); 39 | data get_data_part(data d, int part, int total); 40 | data get_random_data(data d, int num); 41 | data load_categorical_data_csv(char *filename, int target, int k); 42 | void normalize_data_rows(data d); 43 | void scale_data_rows(data d, float s); 44 | void translate_data_rows(data d, float s); 45 | void randomize_data(data d); 46 | data *split_data(data d, int part, int total); 47 | data concat_datas(data *d, int n); 48 | void fill_truth(char *path, char **labels, int k, float *truth); 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/logistic_layer.c: -------------------------------------------------------------------------------- 1 | #include "logistic_layer.h" 2 | #include "activations.h" 3 | #include "blas.h" 4 | #include "cuda.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | layer make_logistic_layer(int batch, int inputs) 13 | { 14 | fprintf(stderr, "logistic x entropy %4d\n", inputs); 15 | layer l = {0}; 16 | l.type = LOGXENT; 17 | l.batch = batch; 18 | l.inputs = inputs; 19 | l.outputs = inputs; 20 | l.loss = calloc(inputs*batch, sizeof(float)); 21 | l.output = calloc(inputs*batch, sizeof(float)); 22 | l.delta = calloc(inputs*batch, sizeof(float)); 23 | l.cost = calloc(1, sizeof(float)); 24 | 25 | l.forward = forward_logistic_layer; 26 | l.backward = backward_logistic_layer; 27 | #ifdef GPU 28 | l.forward_gpu = forward_logistic_layer_gpu; 29 | l.backward_gpu = backward_logistic_layer_gpu; 30 | 31 | l.output_gpu = cuda_make_array(l.output, inputs*batch); 32 | l.loss_gpu = cuda_make_array(l.loss, inputs*batch); 33 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 34 | #endif 35 | return l; 36 | } 37 | 38 | void forward_logistic_layer(const layer l, network net) 39 | { 40 | copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); 41 | activate_array(l.output, l.outputs*l.batch, LOGISTIC); 42 | if(net.truth){ 43 | logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); 44 | l.cost[0] = sum_array(l.loss, l.batch*l.inputs); 45 | } 46 | } 47 | 48 | void backward_logistic_layer(const layer l, network net) 49 | { 50 | axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); 51 | } 52 | 53 | #ifdef GPU 54 | 55 | void forward_logistic_layer_gpu(const layer l, network net) 56 | { 57 | copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); 58 | activate_array_gpu(l.output_gpu, l.outputs*l.batch, LOGISTIC); 59 | if(net.truth){ 60 | logistic_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); 61 | cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); 62 | l.cost[0] = sum_array(l.loss, l.batch*l.inputs); 63 | } 64 | } 65 | 66 | void backward_logistic_layer_gpu(const layer l, network net) 67 | { 68 | axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); 69 | } 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /src/convolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONVOLUTIONAL_LAYER_H 2 | #define CONVOLUTIONAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "activations.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | typedef layer convolutional_layer; 11 | 12 | #ifdef GPU 13 | void forward_convolutional_layer_gpu(convolutional_layer layer, network net); 14 | void backward_convolutional_layer_gpu(convolutional_layer layer, network net); 15 | void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); 16 | 17 | void push_convolutional_layer(convolutional_layer layer); 18 | void pull_convolutional_layer(convolutional_layer layer); 19 | 20 | void add_bias_gpu(float *output, float *biases, int batch, int n, int size); 21 | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); 22 | void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); 23 | #ifdef CUDNN 24 | void cudnn_convolutional_setup(layer *l); 25 | #endif 26 | #endif 27 | 28 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); 29 | void resize_convolutional_layer(convolutional_layer *layer, int w, int h); 30 | void forward_convolutional_layer(const convolutional_layer layer, network net); 31 | void update_convolutional_layer(convolutional_layer layer, update_args a); 32 | image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); 33 | void binarize_weights(float *weights, int n, int size, float *binary); 34 | void swap_binary(convolutional_layer *l); 35 | void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); 36 | 37 | void backward_convolutional_layer(convolutional_layer layer, network net); 38 | 39 | void add_bias(float *output, float *biases, int batch, int n, int size); 40 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); 41 | 42 | image get_convolutional_image(convolutional_layer layer); 43 | image get_convolutional_delta(convolutional_layer layer); 44 | image get_convolutional_weight(convolutional_layer layer, int i); 45 | 46 | int convolutional_out_height(convolutional_layer layer); 47 | int convolutional_out_width(convolutional_layer layer); 48 | 49 | #endif 50 | 51 | -------------------------------------------------------------------------------- /scripts/voc_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | 7 | sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] 8 | 9 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 10 | 11 | 12 | def convert(size, box): 13 | dw = 1./(size[0]) 14 | dh = 1./(size[1]) 15 | x = (box[0] + box[1])/2.0 - 1 16 | y = (box[2] + box[3])/2.0 - 1 17 | w = box[1] - box[0] 18 | h = box[3] - box[2] 19 | x = x*dw 20 | w = w*dw 21 | y = y*dh 22 | h = h*dh 23 | return (x,y,w,h) 24 | 25 | def convert_annotation(year, image_id): 26 | in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) 27 | out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') 28 | tree=ET.parse(in_file) 29 | root = tree.getroot() 30 | size = root.find('size') 31 | w = int(size.find('width').text) 32 | h = int(size.find('height').text) 33 | 34 | for obj in root.iter('object'): 35 | difficult = obj.find('difficult').text 36 | cls = obj.find('name').text 37 | if cls not in classes or int(difficult)==1: 38 | continue 39 | cls_id = classes.index(cls) 40 | xmlbox = obj.find('bndbox') 41 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 42 | bb = convert((w,h), b) 43 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 44 | 45 | wd = getcwd() 46 | 47 | for year, image_set in sets: 48 | if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): 49 | os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) 50 | image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() 51 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 52 | for image_id in image_ids: 53 | list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) 54 | convert_annotation(year, image_id) 55 | list_file.close() 56 | 57 | os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") 58 | os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") 59 | 60 | -------------------------------------------------------------------------------- /src/im2col_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "im2col.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void im2col_gpu_kernel(const int n, const float* data_im, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_col) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | int w_out = index % width_col; 22 | int h_index = index / width_col; 23 | int h_out = h_index % height_col; 24 | int channel_in = h_index / height_col; 25 | int channel_out = channel_in * ksize * ksize; 26 | int h_in = h_out * stride - pad; 27 | int w_in = w_out * stride - pad; 28 | float* data_col_ptr = data_col; 29 | data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; 30 | const float* data_im_ptr = data_im; 31 | data_im_ptr += (channel_in * height + h_in) * width + w_in; 32 | for (int i = 0; i < ksize; ++i) { 33 | for (int j = 0; j < ksize; ++j) { 34 | int h = h_in + i; 35 | int w = w_in + j; 36 | 37 | *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? 38 | data_im_ptr[i * width + j] : 0; 39 | 40 | //*data_col_ptr = data_im_ptr[ii * width + jj]; 41 | 42 | data_col_ptr += height_col * width_col; 43 | } 44 | } 45 | } 46 | } 47 | 48 | void im2col_gpu(float *im, 49 | int channels, int height, int width, 50 | int ksize, int stride, int pad, float *data_col){ 51 | // We are going to launch channels * height_col * width_col kernels, each 52 | // kernel responsible for copying a single-channel grid. 53 | int height_col = (height + 2 * pad - ksize) / stride + 1; 54 | int width_col = (width + 2 * pad - ksize) / stride + 1; 55 | int num_kernels = channels * height_col * width_col; 56 | im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 57 | BLOCK>>>( 58 | num_kernels, im, height, width, ksize, pad, 59 | stride, height_col, 60 | width_col, data_col); 61 | } 62 | -------------------------------------------------------------------------------- /src/col2im_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "col2im.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void col2im_gpu_kernel(const int n, const float* data_col, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_im) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | float val = 0; 22 | int w = index % width + pad; 23 | int h = (index / width) % height + pad; 24 | int c = index / (width * height); 25 | // compute the start and end of the output 26 | int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; 27 | int w_col_end = min(w / stride + 1, width_col); 28 | int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; 29 | int h_col_end = min(h / stride + 1, height_col); 30 | // equivalent implementation 31 | int offset = 32 | (c * ksize * ksize + h * ksize + w) * height_col * width_col; 33 | int coeff_h_col = (1 - stride * ksize * height_col) * width_col; 34 | int coeff_w_col = (1 - stride * height_col * width_col); 35 | for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { 36 | for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { 37 | val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; 38 | } 39 | } 40 | data_im[index] += val; 41 | } 42 | } 43 | 44 | void col2im_gpu(float *data_col, 45 | int channels, int height, int width, 46 | int ksize, int stride, int pad, float *data_im){ 47 | // We are going to launch channels * height_col * width_col kernels, each 48 | // kernel responsible for copying a single-channel grid. 49 | int height_col = (height + 2 * pad - ksize) / stride + 1; 50 | int width_col = (width + 2 * pad - ksize) / stride + 1; 51 | int num_kernels = channels * height * width; 52 | col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 53 | BLOCK>>>( 54 | num_kernels, data_col, height, width, ksize, pad, 55 | stride, height_col, 56 | width_col, data_im); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /src/image.h: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_H 2 | #define IMAGE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "box.h" 10 | #include "darknet.h" 11 | 12 | #ifndef __cplusplus 13 | #ifdef OPENCV 14 | int fill_image_from_stream(CvCapture *cap, image im); 15 | image ipl_to_image(IplImage* src); 16 | void ipl_into_image(IplImage* src, image im); 17 | void flush_stream_buffer(CvCapture *cap, int n); 18 | void show_image_cv(image p, const char *name, IplImage *disp); 19 | #endif 20 | #endif 21 | 22 | float get_color(int c, int x, int max); 23 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); 24 | void draw_bbox(image a, box bbox, int w, float r, float g, float b); 25 | void write_label(image a, int r, int c, image *characters, char *string, float *rgb); 26 | image image_distance(image a, image b); 27 | void scale_image(image m, float s); 28 | image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect); 29 | image random_crop_image(image im, int w, int h); 30 | image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h); 31 | augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h); 32 | void letterbox_image_into(image im, int w, int h, image boxed); 33 | image resize_max(image im, int max); 34 | void translate_image(image m, float s); 35 | void embed_image(image source, image dest, int dx, int dy); 36 | void place_image(image im, int w, int h, int dx, int dy, image canvas); 37 | void saturate_image(image im, float sat); 38 | void exposure_image(image im, float sat); 39 | void distort_image(image im, float hue, float sat, float val); 40 | void saturate_exposure_image(image im, float sat, float exposure); 41 | void rgb_to_hsv(image im); 42 | void hsv_to_rgb(image im); 43 | void yuv_to_rgb(image im); 44 | void rgb_to_yuv(image im); 45 | 46 | 47 | image collapse_image_layers(image source, int border); 48 | image collapse_images_horz(image *ims, int n); 49 | image collapse_images_vert(image *ims, int n); 50 | 51 | void show_image_normalized(image im, const char *name); 52 | void show_images(image *ims, int n, char *window); 53 | void show_image_layers(image p, char *name); 54 | void show_image_collapsed(image p, char *name); 55 | 56 | void print_image(image m); 57 | 58 | image make_empty_image(int w, int h, int c); 59 | void copy_image_into(image src, image dest); 60 | 61 | image get_image_layer(image m, int l); 62 | 63 | #endif 64 | 65 | -------------------------------------------------------------------------------- /install-opencv.sh: -------------------------------------------------------------------------------- 1 | ###################################### 2 | # INSTALL OPENCV ON UBUNTU OR DEBIAN # 3 | ###################################### 4 | 5 | # | THIS SCRIPT IS TESTED CORRECTLY ON | 6 | # |----------------------------------------------------| 7 | # | OS | OpenCV | Test | Last test | 8 | # |----------------|--------------|------|-------------| 9 | # | Ubuntu 17.04 | OpenCV 3.4.1 | OK | 14 Mar 2018 | 10 | # | Debian 9.3 | OpenCV 3.4.0 | OK | 17 Feb 2018 | 11 | # | Ubuntu 16.04.2 | OpenCV 3.2.0 | OK | 20 May 2017 | 12 | # | Debian 8.8 | OpenCV 3.2.0 | OK | 20 May 2017 | 13 | # | Debian 9.0 | OpenCV 3.2.0 | OK | 25 Jun 2017 | 14 | 15 | # VERSION TO BE INSTALLED 16 | 17 | OPENCV_VERSION='3.4.0' 18 | 19 | 20 | # 1. KEEP UBUNTU OR DEBIAN UP TO DATE 21 | 22 | sudo apt-get -y update 23 | sudo apt-get -y upgrade 24 | sudo apt-get -y dist-upgrade 25 | sudo apt-get -y autoremove 26 | 27 | 28 | # 2. INSTALL THE DEPENDENCIES 29 | 30 | # Build tools: 31 | sudo apt-get install -y build-essential cmake 32 | 33 | # GUI (if you want to use GTK instead of Qt, replace 'qt5-default' with 'libgtkglext1-dev' and remove '-DWITH_QT=ON' option in CMake): 34 | sudo apt-get install -y qt5-default libvtk6-dev 35 | 36 | # Media I/O: 37 | sudo apt-get install -y zlib1g-dev libjpeg-dev libwebp-dev libpng-dev libtiff5-dev libjasper-dev libopenexr-dev libgdal-dev 38 | 39 | # Video I/O: 40 | sudo apt-get install -y libdc1394-22-dev libavcodec-dev libavformat-dev libswscale-dev libtheora-dev libvorbis-dev libxvidcore-dev libx264-dev yasm libopencore-amrnb-dev libopencore-amrwb-dev libv4l-dev libxine2-dev 41 | 42 | # Parallelism and linear algebra libraries: 43 | sudo apt-get install -y libtbb-dev libeigen3-dev 44 | 45 | # Python: 46 | sudo apt-get install -y python-dev python-tk python-numpy python3-dev python3-tk python3-numpy 47 | 48 | # Java: 49 | sudo apt-get install -y ant default-jdk 50 | 51 | # Documentation: 52 | sudo apt-get install -y doxygen 53 | 54 | 55 | # 3. INSTALL THE LIBRARY 56 | 57 | sudo apt-get install -y unzip wget 58 | wget https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip 59 | unzip ${OPENCV_VERSION}.zip 60 | rm ${OPENCV_VERSION}.zip 61 | mv opencv-${OPENCV_VERSION} OpenCV 62 | cd OpenCV 63 | mkdir build 64 | cd build 65 | cmake -DWITH_QT=ON -DWITH_OPENGL=ON -DFORCE_VTK=ON -DWITH_TBB=ON -DWITH_GDAL=ON -DWITH_XINE=ON -DBUILD_EXAMPLES=ON -DENABLE_PRECOMPILED_HEADERS=OFF .. 66 | make -j4 67 | sudo make install 68 | sudo ldconfig 69 | 70 | 71 | # 4. EXECUTE SOME OPENCV EXAMPLES AND COMPILE A DEMONSTRATION 72 | 73 | # To complete this step, please visit 'http://milq.github.io/install-opencv-ubuntu-debian'. 74 | 75 | -------------------------------------------------------------------------------- /examples/swag.c: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | #include 3 | 4 | void train_swag(char *cfgfile, char *weightfile) 5 | { 6 | char *train_images = "data/voc.0712.trainval"; 7 | char *backup_directory = "/home/pjreddie/backup/"; 8 | srand(time(0)); 9 | char *base = basecfg(cfgfile); 10 | printf("%s\n", base); 11 | float avg_loss = -1; 12 | network net = parse_network_cfg(cfgfile); 13 | if(weightfile){ 14 | load_weights(&net, weightfile); 15 | } 16 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); 17 | int imgs = net.batch*net.subdivisions; 18 | int i = *net.seen/imgs; 19 | data train, buffer; 20 | 21 | layer l = net.layers[net.n - 1]; 22 | 23 | int side = l.side; 24 | int classes = l.classes; 25 | float jitter = l.jitter; 26 | 27 | list *plist = get_paths(train_images); 28 | //int N = plist->size; 29 | char **paths = (char **)list_to_array(plist); 30 | 31 | load_args args = {0}; 32 | args.w = net.w; 33 | args.h = net.h; 34 | args.paths = paths; 35 | args.n = imgs; 36 | args.m = plist->size; 37 | args.classes = classes; 38 | args.jitter = jitter; 39 | args.num_boxes = side; 40 | args.d = &buffer; 41 | args.type = REGION_DATA; 42 | 43 | pthread_t load_thread = load_data_in_thread(args); 44 | clock_t time; 45 | //while(i*imgs < N*120){ 46 | while(get_current_batch(net) < net.max_batches){ 47 | i += 1; 48 | time=clock(); 49 | pthread_join(load_thread, 0); 50 | train = buffer; 51 | load_thread = load_data_in_thread(args); 52 | 53 | printf("Loaded: %lf seconds\n", sec(clock()-time)); 54 | 55 | time=clock(); 56 | float loss = train_network(net, train); 57 | if (avg_loss < 0) avg_loss = loss; 58 | avg_loss = avg_loss*.9 + loss*.1; 59 | 60 | printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); 61 | if(i%1000==0 || i == 600){ 62 | char buff[256]; 63 | sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); 64 | save_weights(net, buff); 65 | } 66 | free_data(train); 67 | } 68 | char buff[256]; 69 | sprintf(buff, "%s/%s_final.weights", backup_directory, base); 70 | save_weights(net, buff); 71 | } 72 | 73 | void run_swag(int argc, char **argv) 74 | { 75 | if(argc < 4){ 76 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 77 | return; 78 | } 79 | 80 | char *cfg = argv[3]; 81 | char *weights = (argc > 4) ? argv[4] : 0; 82 | if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); 83 | } 84 | -------------------------------------------------------------------------------- /python/segment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- #不能忘记! 2 | from darknet import load_net, load_meta, detect 3 | import cv2 4 | import time 5 | 6 | # 返回整数时间戳 7 | def timestamp(): 8 | return int(time.time()*10000) 9 | def fix(x, y, x_plus_w, y_plus_h ): 10 | x = 0 if x < 0 else x 11 | y = 0 if y < 0 else y 12 | x_plus_w = 384 if x_plus_w > 384 else x_plus_w 13 | y_plus_h = 344 if y_plus_h > 344 else y_plus_h 14 | return x, y, x_plus_w, y_plus_h 15 | 16 | 17 | # 切割汉字 18 | def seg_one_img(img_path, rets): 19 | img = cv2.imread(img_path) 20 | hanzi_list = [] 21 | for ret in rets: 22 | per_dict = {} 23 | if ret[1] > 0.5: 24 | coordinate = ret[2] 25 | center = (int(coordinate[0]*344), int(coordinate[1]*384)) 26 | origin = (coordinate[0] - coordinate[2]/2, 27 | coordinate[1] - coordinate[3]/2) 28 | 29 | x = int(origin[0]*344 - 2) 30 | x_plus_w =int((origin[0] + coordinate[2])*344 + 4) 31 | y = int(origin[1]*384 - 2) 32 | y_plus_h = int((origin[1] + coordinate[3])*384 + 4) 33 | x, y, x_plus_w, y_plus_h = fix(x,y,x_plus_w,y_plus_h) 34 | try: 35 | hanzi_img = img[y:y_plus_h, x:x_plus_w] 36 | normal_img = cv2.resize(hanzi_img, (65,65), 37 | interpolation=cv2.INTER_CUBIC) # 将截取的图片规范化为65*65*3 38 | path = 'hanzi_img/{}.jpg'.format(timestamp()) 39 | cv2.imwrite(path, normal_img) 40 | per_dict[path] = center 41 | hanzi_list.append(per_dict) 42 | except: 43 | print('#'*20) 44 | print('存在不规则的图片') 45 | return hanzi_list 46 | 47 | 48 | def load_dtc_module(cfg, weights, data): 49 | net = load_net(cfg, weights, 0) 50 | meta = load_meta(data) 51 | return net,meta 52 | 53 | 54 | 55 | def seg_all_img(path_file, net, meta): 56 | # 打开存储图片存储路径的文件,并读取所有行赋值给列表lines 57 | with open(path_file, 'r') as f: 58 | lines= f.readlines() 59 | # 遍历所有图片,进行扣字 60 | for line in lines: 61 | img_path = line.strip() # 从文件读取的路径后面有一个换行符'\n' 62 | rets = detect(net, meta, img_path) 63 | seg_one_img(img_path, rets) 64 | 65 | 66 | 67 | if __name__=='__main__': 68 | # 加载模型 69 | # net, meta = load_dtc_module("../cfg/yolo-origin.cfg", "../jiyan/backup/yolo-origin.backup" , "../cfg/yolo-origin.data") 70 | net, meta = load_dtc_module(b"../cfg/yolo-origin.cfg", b"../yolo-origin.weights" ,b"../cfg/yolo-origin.data") 71 | 72 | # 切割所有图片 73 | seg_all_img('/home/geng/darknet/jiyan/all_images.txt', net, meta) 74 | 75 | # 切割一张图片 76 | # img_path = '../11.jpg' 77 | # rets = detect(net,meta,img_path) 78 | # seg_one_img(img_path, rets) 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/activations.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATIONS_H 2 | #define ACTIVATIONS_H 3 | #include "darknet.h" 4 | #include "cuda.h" 5 | #include "math.h" 6 | 7 | ACTIVATION get_activation(char *s); 8 | 9 | char *get_activation_string(ACTIVATION a); 10 | float activate(float x, ACTIVATION a); 11 | float gradient(float x, ACTIVATION a); 12 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); 13 | void activate_array(float *x, const int n, const ACTIVATION a); 14 | #ifdef GPU 15 | void activate_array_gpu(float *x, int n, ACTIVATION a); 16 | void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); 17 | #endif 18 | 19 | static inline float stair_activate(float x) 20 | { 21 | int n = floor(x); 22 | if (n%2 == 0) return floor(x/2.); 23 | else return (x - n) + floor(x/2.); 24 | } 25 | static inline float hardtan_activate(float x) 26 | { 27 | if (x < -1) return -1; 28 | if (x > 1) return 1; 29 | return x; 30 | } 31 | static inline float linear_activate(float x){return x;} 32 | static inline float logistic_activate(float x){return 1./(1. + exp(-x));} 33 | static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} 34 | static inline float relu_activate(float x){return x*(x>0);} 35 | static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} 36 | static inline float relie_activate(float x){return (x>0) ? x : .01*x;} 37 | static inline float ramp_activate(float x){return x*(x>0)+.1*x;} 38 | static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} 39 | static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} 40 | static inline float plse_activate(float x) 41 | { 42 | if(x < -4) return .01 * (x + 4); 43 | if(x > 4) return .01 * (x - 4) + 1; 44 | return .125*x + .5; 45 | } 46 | 47 | static inline float lhtan_activate(float x) 48 | { 49 | if(x < 0) return .001*x; 50 | if(x > 1) return .001*(x-1) + 1; 51 | return x; 52 | } 53 | static inline float lhtan_gradient(float x) 54 | { 55 | if(x > 0 && x < 1) return 1; 56 | return .001; 57 | } 58 | 59 | static inline float hardtan_gradient(float x) 60 | { 61 | if (x > -1 && x < 1) return 1; 62 | return 0; 63 | } 64 | static inline float linear_gradient(float x){return 1;} 65 | static inline float logistic_gradient(float x){return (1-x)*x;} 66 | static inline float loggy_gradient(float x) 67 | { 68 | float y = (x+1.)/2.; 69 | return 2*(1-y)*y; 70 | } 71 | static inline float stair_gradient(float x) 72 | { 73 | if (floor(x) == x) return 0; 74 | return 1; 75 | } 76 | static inline float relu_gradient(float x){return (x>0);} 77 | static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} 78 | static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} 79 | static inline float ramp_gradient(float x){return (x>0)+.1;} 80 | static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} 81 | static inline float tanh_gradient(float x){return 1-x*x;} 82 | static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} 83 | 84 | #endif 85 | 86 | -------------------------------------------------------------------------------- /jiyan/get_pic.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from lxml import etree 3 | from selenium.webdriver.common.keys import Keys 4 | import time 5 | import urllib 6 | 7 | def get_pic(): 8 | # PROXY = '119.28.194.66:8888' 9 | chrome_options = webdriver.ChromeOptions() 10 | # chrome_options.add_argument('--proxy-server=http://%s' % PROXY) 11 | driver = webdriver.Chrome(chrome_options=chrome_options) # 启动全局浏览器 12 | driver.set_window_size(1024, 768) 13 | 14 | driver.get('http://www.gsxt.gov.cn/index.html') 15 | time.sleep(3) 16 | # kw = driver.find_element_by_xpath('//form[@class="search_index_box auto fix mt10"]//input[7]') 17 | try: 18 | kw = driver.find_element_by_xpath('//*[@id="keyword"]') 19 | except: 20 | driver.quit() 21 | get_pic() 22 | 23 | kw.clear() 24 | kw.send_keys('阿里巴巴') 25 | 26 | # driver.find_element_by_xpath('//*[@id="btn_query"]').click() #这句会影响验证码通过 27 | time.sleep(2) 28 | btn = driver.find_element_by_xpath('//*[@id="btn_query"]') 29 | 30 | btn.send_keys(Keys.ENTER) 31 | print('+' * 100) 32 | time.sleep(3) # 点击搜索后,等待加载 33 | 34 | while True: 35 | login_text = driver.page_source 36 | login_html = etree.HTML(login_text) 37 | 38 | 39 | img_link = login_html.xpath( 40 | '//*[@class="geetest_item_img"]/@src') # 获取验证码链接 41 | 42 | 43 | try: 44 | if img_link[0]: # 如果弹出验证码 45 | print(img_link) 46 | # driver.get_screenshot_as_file('.//static/1.png') 47 | print('*' * 100) 48 | # logger.info('出现验证码--{}'.format(uuid)) 49 | img_path = '../jiyan/crawled_img/verifyCode{0}.jpg'.format( 50 | int(time.time())) 51 | urllib.request.urlretrieve(img_link[0], img_path) 52 | else: 53 | btn_retry = driver.find_element_by_xpath('/html/body/div[7]/div[2]/div[4]/div[3]') 54 | print(btn_retry) 55 | btn_retry.click() 56 | time.sleep(6) 57 | login_text = driver.page_source 58 | login_html = etree.HTML(login_text) 59 | img_link1 = login_html.xpath('//*[@class="geetest_item_img"]/@src') # 获取验证码链接 60 | img_path = '../jiyan/crawled_img/verifyCode{0}.jpg'.format(int(time.time())) 61 | urllib.request.urlretrieve(img_link1[0], img_path) 62 | except: 63 | driver.quit() 64 | get_pic() 65 | 66 | # except: 67 | # btn_retry = driver.find_element_by_xpath('//*[@class="geetest_panel_content"]') 68 | # print(btn_retry) 69 | # btn_retry.click() 70 | # # btn_retry.send_keys(Keys.ENTER) 71 | # time.sleep(2) 72 | # img_link = login_html.xpath('//*[@class="geetest_item_img"]/@src') # 获取验证码链接 73 | # urllib.request.urlretrieve(img_link[0], img_path) 74 | # # except: 75 | # driver.quit() 76 | # get_pic() 77 | 78 | btn = driver.find_element_by_xpath('//*[@class="geetest_commit"]') 79 | btn.send_keys(Keys.ENTER) 80 | time.sleep(5) 81 | 82 | if __name__ == '__main__': 83 | 84 | get_pic() 85 | -------------------------------------------------------------------------------- /src/crop_layer.c: -------------------------------------------------------------------------------- 1 | #include "crop_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | image get_crop_image(crop_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.out_c; 10 | return float_to_image(w,h,c,l.output); 11 | } 12 | 13 | void backward_crop_layer(const crop_layer l, network net){} 14 | void backward_crop_layer_gpu(const crop_layer l, network net){} 15 | 16 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) 17 | { 18 | fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); 19 | crop_layer l = {0}; 20 | l.type = CROP; 21 | l.batch = batch; 22 | l.h = h; 23 | l.w = w; 24 | l.c = c; 25 | l.scale = (float)crop_height / h; 26 | l.flip = flip; 27 | l.angle = angle; 28 | l.saturation = saturation; 29 | l.exposure = exposure; 30 | l.out_w = crop_width; 31 | l.out_h = crop_height; 32 | l.out_c = c; 33 | l.inputs = l.w * l.h * l.c; 34 | l.outputs = l.out_w * l.out_h * l.out_c; 35 | l.output = calloc(l.outputs*batch, sizeof(float)); 36 | l.forward = forward_crop_layer; 37 | l.backward = backward_crop_layer; 38 | 39 | #ifdef GPU 40 | l.forward_gpu = forward_crop_layer_gpu; 41 | l.backward_gpu = backward_crop_layer_gpu; 42 | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); 43 | l.rand_gpu = cuda_make_array(0, l.batch*8); 44 | #endif 45 | return l; 46 | } 47 | 48 | void resize_crop_layer(layer *l, int w, int h) 49 | { 50 | l->w = w; 51 | l->h = h; 52 | 53 | l->out_w = l->scale*w; 54 | l->out_h = l->scale*h; 55 | 56 | l->inputs = l->w * l->h * l->c; 57 | l->outputs = l->out_h * l->out_w * l->out_c; 58 | 59 | l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); 60 | #ifdef GPU 61 | cuda_free(l->output_gpu); 62 | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 63 | #endif 64 | } 65 | 66 | 67 | void forward_crop_layer(const crop_layer l, network net) 68 | { 69 | int i,j,c,b,row,col; 70 | int index; 71 | int count = 0; 72 | int flip = (l.flip && rand()%2); 73 | int dh = rand()%(l.h - l.out_h + 1); 74 | int dw = rand()%(l.w - l.out_w + 1); 75 | float scale = 2; 76 | float trans = -1; 77 | if(l.noadjust){ 78 | scale = 1; 79 | trans = 0; 80 | } 81 | if(!net.train){ 82 | flip = 0; 83 | dh = (l.h - l.out_h)/2; 84 | dw = (l.w - l.out_w)/2; 85 | } 86 | for(b = 0; b < l.batch; ++b){ 87 | for(c = 0; c < l.c; ++c){ 88 | for(i = 0; i < l.out_h; ++i){ 89 | for(j = 0; j < l.out_w; ++j){ 90 | if(flip){ 91 | col = l.w - dw - j - 1; 92 | }else{ 93 | col = j + dw; 94 | } 95 | row = i + dh; 96 | index = col+l.w*(row+l.h*(c + l.c*b)); 97 | l.output[count++] = net.input[index]*scale + trans; 98 | } 99 | } 100 | } 101 | } 102 | } 103 | 104 | -------------------------------------------------------------------------------- /src/shortcut_layer.c: -------------------------------------------------------------------------------- 1 | #include "shortcut_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | #include "activations.h" 5 | 6 | #include 7 | #include 8 | 9 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) 10 | { 11 | fprintf(stderr, "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n",index, w2,h2,c2, w,h,c); 12 | layer l = {0}; 13 | l.type = SHORTCUT; 14 | l.batch = batch; 15 | l.w = w2; 16 | l.h = h2; 17 | l.c = c2; 18 | l.out_w = w; 19 | l.out_h = h; 20 | l.out_c = c; 21 | l.outputs = w*h*c; 22 | l.inputs = l.outputs; 23 | 24 | l.index = index; 25 | 26 | l.delta = calloc(l.outputs*batch, sizeof(float)); 27 | l.output = calloc(l.outputs*batch, sizeof(float));; 28 | 29 | l.forward = forward_shortcut_layer; 30 | l.backward = backward_shortcut_layer; 31 | #ifdef GPU 32 | l.forward_gpu = forward_shortcut_layer_gpu; 33 | l.backward_gpu = backward_shortcut_layer_gpu; 34 | 35 | l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); 36 | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); 37 | #endif 38 | return l; 39 | } 40 | 41 | void resize_shortcut_layer(layer *l, int w, int h) 42 | { 43 | assert(l->w == l->out_w); 44 | assert(l->h == l->out_h); 45 | l->w = l->out_w = w; 46 | l->h = l->out_h = h; 47 | l->outputs = w*h*l->out_c; 48 | l->inputs = l->outputs; 49 | l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); 50 | l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); 51 | 52 | #ifdef GPU 53 | cuda_free(l->output_gpu); 54 | cuda_free(l->delta_gpu); 55 | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 56 | l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); 57 | #endif 58 | 59 | } 60 | 61 | 62 | void forward_shortcut_layer(const layer l, network net) 63 | { 64 | copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1); 65 | shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output); 66 | activate_array(l.output, l.outputs*l.batch, l.activation); 67 | } 68 | 69 | void backward_shortcut_layer(const layer l, network net) 70 | { 71 | gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); 72 | axpy_cpu(l.outputs*l.batch, l.alpha, l.delta, 1, net.delta, 1); 73 | shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta); 74 | } 75 | 76 | #ifdef GPU 77 | void forward_shortcut_layer_gpu(const layer l, network net) 78 | { 79 | copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1); 80 | shortcut_gpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.alpha, l.beta, l.output_gpu); 81 | activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation); 82 | } 83 | 84 | void backward_shortcut_layer_gpu(const layer l, network net) 85 | { 86 | gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); 87 | axpy_gpu(l.outputs*l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); 88 | shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, 1, l.beta, net.layers[l.index].delta_gpu); 89 | } 90 | #endif 91 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GPU=0 2 | CUDNN=0 3 | OPENCV=1 4 | OPENMP=0 5 | DEBUG=0 6 | 7 | ARCH= -D_FORCE_INLINES -gencode arch=compute_30,code=sm_30 \ 8 | -gencode arch=compute_35,code=sm_35 \ 9 | -gencode arch=compute_50,code=[sm_50,compute_50] \ 10 | -gencode arch=compute_52,code=[sm_52,compute_52] 11 | # -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated? 12 | 13 | # This is what I use, uncomment if you know your arch and want to specify 14 | # ARCH= -gencode arch=compute_52,code=compute_52 15 | 16 | VPATH=./src/:./examples 17 | SLIB=libdarknet.so 18 | ALIB=libdarknet.a 19 | EXEC=darknet 20 | OBJDIR=./obj/ 21 | 22 | CC=gcc 23 | NVCC=nvcc 24 | AR=ar 25 | ARFLAGS=rcs 26 | OPTS=-Ofast 27 | LDFLAGS= -lm -pthread 28 | COMMON= -Iinclude/ -Isrc/ 29 | CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC 30 | 31 | ifeq ($(OPENMP), 1) 32 | CFLAGS+= -fopenmp 33 | endif 34 | 35 | ifeq ($(DEBUG), 1) 36 | OPTS=-O0 -g 37 | endif 38 | 39 | CFLAGS+=$(OPTS) 40 | 41 | ifeq ($(OPENCV), 1) 42 | COMMON+= -DOPENCV 43 | CFLAGS+= -DOPENCV 44 | LDFLAGS+= `pkg-config --libs opencv` 45 | COMMON+= `pkg-config --cflags opencv` 46 | endif 47 | 48 | ifeq ($(GPU), 1) 49 | COMMON+= -DGPU -I/usr/local/cuda/include/ 50 | CFLAGS+= -DGPU 51 | LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand 52 | endif 53 | 54 | ifeq ($(CUDNN), 1) 55 | COMMON+= -DCUDNN 56 | CFLAGS+= -DCUDNN 57 | LDFLAGS+= -lcudnn 58 | endif 59 | 60 | OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o 61 | EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o darknet.o 62 | ifeq ($(GPU), 1) 63 | LDFLAGS+= -lstdc++ 64 | OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o 65 | endif 66 | 67 | EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA)) 68 | OBJS = $(addprefix $(OBJDIR), $(OBJ)) 69 | DEPS = $(wildcard src/*.h) Makefile include/darknet.h 70 | 71 | all: obj backup results $(SLIB) $(ALIB) $(EXEC) 72 | #all: obj results $(SLIB) $(ALIB) $(EXEC) 73 | 74 | 75 | $(EXEC): $(EXECOBJ) $(ALIB) 76 | $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB) 77 | 78 | $(ALIB): $(OBJS) 79 | $(AR) $(ARFLAGS) $@ $^ 80 | 81 | $(SLIB): $(OBJS) 82 | $(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS) 83 | 84 | $(OBJDIR)%.o: %.c $(DEPS) 85 | $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ 86 | 87 | $(OBJDIR)%.o: %.cu $(DEPS) 88 | $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ 89 | 90 | obj: 91 | mkdir -p obj 92 | backup: 93 | mkdir -p backup 94 | results: 95 | mkdir -p results 96 | 97 | .PHONY: clean 98 | 99 | clean: 100 | rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ) $(OBJDIR)/* 101 | 102 | -------------------------------------------------------------------------------- /src/maxpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "maxpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) 11 | { 12 | int h = (in_h + 2*pad)/stride; 13 | int w = (in_w + 2*pad)/stride; 14 | int c = in_c; 15 | 16 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 17 | if(id >= n) return; 18 | 19 | int j = id % w; 20 | id /= w; 21 | int i = id % h; 22 | id /= h; 23 | int k = id % c; 24 | id /= c; 25 | int b = id; 26 | 27 | int w_offset = -pad; 28 | int h_offset = -pad; 29 | 30 | int out_index = j + w*(i + h*(k + c*b)); 31 | float max = -INFINITY; 32 | int max_i = -1; 33 | int l, m; 34 | for(l = 0; l < size; ++l){ 35 | for(m = 0; m < size; ++m){ 36 | int cur_h = h_offset + i*stride + l; 37 | int cur_w = w_offset + j*stride + m; 38 | int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); 39 | int valid = (cur_h >= 0 && cur_h < in_h && 40 | cur_w >= 0 && cur_w < in_w); 41 | float val = (valid != 0) ? input[index] : -INFINITY; 42 | max_i = (val > max) ? index : max_i; 43 | max = (val > max) ? val : max; 44 | } 45 | } 46 | output[out_index] = max; 47 | indexes[out_index] = max_i; 48 | } 49 | 50 | __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) 51 | { 52 | int h = (in_h + 2*pad)/stride; 53 | int w = (in_w + 2*pad)/stride; 54 | int c = in_c; 55 | int area = (size-1)/stride; 56 | 57 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 58 | if(id >= n) return; 59 | 60 | int index = id; 61 | int j = id % in_w; 62 | id /= in_w; 63 | int i = id % in_h; 64 | id /= in_h; 65 | int k = id % in_c; 66 | id /= in_c; 67 | int b = id; 68 | 69 | int w_offset = -pad; 70 | int h_offset = -pad; 71 | 72 | float d = 0; 73 | int l, m; 74 | for(l = -area; l < area+1; ++l){ 75 | for(m = -area; m < area+1; ++m){ 76 | int out_w = (j-w_offset)/stride + m; 77 | int out_h = (i-h_offset)/stride + l; 78 | int out_index = out_w + w*(out_h + h*(k + c*b)); 79 | int valid = (out_w >= 0 && out_w < w && 80 | out_h >= 0 && out_h < h); 81 | d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; 82 | } 83 | } 84 | prev_delta[index] += d; 85 | } 86 | 87 | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) 88 | { 89 | int h = layer.out_h; 90 | int w = layer.out_w; 91 | int c = layer.c; 92 | 93 | size_t n = h*w*c*layer.batch; 94 | 95 | forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); 96 | check_error(cudaPeekAtLastError()); 97 | } 98 | 99 | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) 100 | { 101 | size_t n = layer.h*layer.w*layer.c*layer.batch; 102 | 103 | backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); 104 | check_error(cudaPeekAtLastError()); 105 | } 106 | 107 | -------------------------------------------------------------------------------- /src/upsample_layer.c: -------------------------------------------------------------------------------- 1 | #include "upsample_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | 5 | #include 6 | 7 | layer make_upsample_layer(int batch, int w, int h, int c, int stride) 8 | { 9 | layer l = {0}; 10 | l.type = UPSAMPLE; 11 | l.batch = batch; 12 | l.w = w; 13 | l.h = h; 14 | l.c = c; 15 | l.out_w = w*stride; 16 | l.out_h = h*stride; 17 | l.out_c = c; 18 | if(stride < 0){ 19 | stride = -stride; 20 | l.reverse=1; 21 | l.out_w = w/stride; 22 | l.out_h = h/stride; 23 | } 24 | l.stride = stride; 25 | l.outputs = l.out_w*l.out_h*l.out_c; 26 | l.inputs = l.w*l.h*l.c; 27 | l.delta = calloc(l.outputs*batch, sizeof(float)); 28 | l.output = calloc(l.outputs*batch, sizeof(float));; 29 | 30 | l.forward = forward_upsample_layer; 31 | l.backward = backward_upsample_layer; 32 | #ifdef GPU 33 | l.forward_gpu = forward_upsample_layer_gpu; 34 | l.backward_gpu = backward_upsample_layer_gpu; 35 | 36 | l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); 37 | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); 38 | #endif 39 | if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); 40 | else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); 41 | return l; 42 | } 43 | 44 | void resize_upsample_layer(layer *l, int w, int h) 45 | { 46 | l->w = w; 47 | l->h = h; 48 | l->out_w = w*l->stride; 49 | l->out_h = h*l->stride; 50 | if(l->reverse){ 51 | l->out_w = w/l->stride; 52 | l->out_h = h/l->stride; 53 | } 54 | l->outputs = l->out_w*l->out_h*l->out_c; 55 | l->inputs = l->h*l->w*l->c; 56 | l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); 57 | l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); 58 | 59 | #ifdef GPU 60 | cuda_free(l->output_gpu); 61 | cuda_free(l->delta_gpu); 62 | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 63 | l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); 64 | #endif 65 | 66 | } 67 | 68 | void forward_upsample_layer(const layer l, network net) 69 | { 70 | fill_cpu(l.outputs*l.batch, 0, l.output, 1); 71 | if(l.reverse){ 72 | upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); 73 | }else{ 74 | upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); 75 | } 76 | } 77 | 78 | void backward_upsample_layer(const layer l, network net) 79 | { 80 | if(l.reverse){ 81 | upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta); 82 | }else{ 83 | upsample_cpu(net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); 84 | } 85 | } 86 | 87 | #ifdef GPU 88 | void forward_upsample_layer_gpu(const layer l, network net) 89 | { 90 | fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); 91 | if(l.reverse){ 92 | upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input_gpu); 93 | }else{ 94 | upsample_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); 95 | } 96 | } 97 | 98 | void backward_upsample_layer_gpu(const layer l, network net) 99 | { 100 | if(l.reverse){ 101 | upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, net.delta_gpu); 102 | }else{ 103 | upsample_gpu(net.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); 104 | } 105 | } 106 | #endif 107 | -------------------------------------------------------------------------------- /src/option_list.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "option_list.h" 5 | #include "utils.h" 6 | 7 | list *read_data_cfg(char *filename) 8 | { 9 | FILE *file = fopen(filename, "r"); 10 | if(file == 0) file_error(filename); 11 | char *line; 12 | int nu = 0; 13 | list *options = make_list(); 14 | while((line=fgetl(file)) != 0){ 15 | ++ nu; 16 | strip(line); 17 | switch(line[0]){ 18 | case '\0': 19 | case '#': 20 | case ';': 21 | free(line); 22 | break; 23 | default: 24 | if(!read_option(line, options)){ 25 | fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); 26 | free(line); 27 | } 28 | break; 29 | } 30 | } 31 | fclose(file); 32 | return options; 33 | } 34 | 35 | metadata get_metadata(char *file) 36 | { 37 | metadata m = {0}; 38 | list *options = read_data_cfg(file); 39 | 40 | char *name_list = option_find_str(options, "names", 0); 41 | if(!name_list) name_list = option_find_str(options, "labels", 0); 42 | if(!name_list) { 43 | fprintf(stderr, "No names or labels found\n"); 44 | } else { 45 | m.names = get_labels(name_list); 46 | } 47 | m.classes = option_find_int(options, "classes", 2); 48 | free_list(options); 49 | return m; 50 | } 51 | 52 | int read_option(char *s, list *options) 53 | { 54 | size_t i; 55 | size_t len = strlen(s); 56 | char *val = 0; 57 | for(i = 0; i < len; ++i){ 58 | if(s[i] == '='){ 59 | s[i] = '\0'; 60 | val = s+i+1; 61 | break; 62 | } 63 | } 64 | if(i == len-1) return 0; 65 | char *key = s; 66 | option_insert(options, key, val); 67 | return 1; 68 | } 69 | 70 | void option_insert(list *l, char *key, char *val) 71 | { 72 | kvp *p = malloc(sizeof(kvp)); 73 | p->key = key; 74 | p->val = val; 75 | p->used = 0; 76 | list_insert(l, p); 77 | } 78 | 79 | void option_unused(list *l) 80 | { 81 | node *n = l->front; 82 | while(n){ 83 | kvp *p = (kvp *)n->val; 84 | if(!p->used){ 85 | fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); 86 | } 87 | n = n->next; 88 | } 89 | } 90 | 91 | char *option_find(list *l, char *key) 92 | { 93 | node *n = l->front; 94 | while(n){ 95 | kvp *p = (kvp *)n->val; 96 | if(strcmp(p->key, key) == 0){ 97 | p->used = 1; 98 | return p->val; 99 | } 100 | n = n->next; 101 | } 102 | return 0; 103 | } 104 | char *option_find_str(list *l, char *key, char *def) 105 | { 106 | char *v = option_find(l, key); 107 | if(v) return v; 108 | if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); 109 | return def; 110 | } 111 | 112 | int option_find_int(list *l, char *key, int def) 113 | { 114 | char *v = option_find(l, key); 115 | if(v) return atoi(v); 116 | fprintf(stderr, "%s: Using default '%d'\n", key, def); 117 | return def; 118 | } 119 | 120 | int option_find_int_quiet(list *l, char *key, int def) 121 | { 122 | char *v = option_find(l, key); 123 | if(v) return atoi(v); 124 | return def; 125 | } 126 | 127 | float option_find_float_quiet(list *l, char *key, float def) 128 | { 129 | char *v = option_find(l, key); 130 | if(v) return atof(v); 131 | return def; 132 | } 133 | 134 | float option_find_float(list *l, char *key, float def) 135 | { 136 | char *v = option_find(l, key); 137 | if(v) return atof(v); 138 | fprintf(stderr, "%s: Using default '%lf'\n", key, def); 139 | return def; 140 | } 141 | -------------------------------------------------------------------------------- /src/softmax_layer.c: -------------------------------------------------------------------------------- 1 | #include "softmax_layer.h" 2 | #include "blas.h" 3 | #include "cuda.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | softmax_layer make_softmax_layer(int batch, int inputs, int groups) 12 | { 13 | assert(inputs%groups == 0); 14 | fprintf(stderr, "softmax %4d\n", inputs); 15 | softmax_layer l = {0}; 16 | l.type = SOFTMAX; 17 | l.batch = batch; 18 | l.groups = groups; 19 | l.inputs = inputs; 20 | l.outputs = inputs; 21 | l.loss = calloc(inputs*batch, sizeof(float)); 22 | l.output = calloc(inputs*batch, sizeof(float)); 23 | l.delta = calloc(inputs*batch, sizeof(float)); 24 | l.cost = calloc(1, sizeof(float)); 25 | 26 | l.forward = forward_softmax_layer; 27 | l.backward = backward_softmax_layer; 28 | #ifdef GPU 29 | l.forward_gpu = forward_softmax_layer_gpu; 30 | l.backward_gpu = backward_softmax_layer_gpu; 31 | 32 | l.output_gpu = cuda_make_array(l.output, inputs*batch); 33 | l.loss_gpu = cuda_make_array(l.loss, inputs*batch); 34 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 35 | #endif 36 | return l; 37 | } 38 | 39 | void forward_softmax_layer(const softmax_layer l, network net) 40 | { 41 | if(l.softmax_tree){ 42 | int i; 43 | int count = 0; 44 | for (i = 0; i < l.softmax_tree->groups; ++i) { 45 | int group_size = l.softmax_tree->group_size[i]; 46 | softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); 47 | count += group_size; 48 | } 49 | } else { 50 | softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); 51 | } 52 | 53 | if(net.truth){ 54 | softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); 55 | l.cost[0] = sum_array(l.loss, l.batch*l.inputs); 56 | } 57 | } 58 | 59 | void backward_softmax_layer(const softmax_layer l, network net) 60 | { 61 | axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); 62 | } 63 | 64 | #ifdef GPU 65 | 66 | void pull_softmax_layer_output(const softmax_layer layer) 67 | { 68 | cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); 69 | } 70 | 71 | void forward_softmax_layer_gpu(const softmax_layer l, network net) 72 | { 73 | if(l.softmax_tree){ 74 | softmax_tree(net.input_gpu, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); 75 | /* 76 | int i; 77 | int count = 0; 78 | for (i = 0; i < l.softmax_tree->groups; ++i) { 79 | int group_size = l.softmax_tree->group_size[i]; 80 | softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); 81 | count += group_size; 82 | } 83 | */ 84 | } else { 85 | if(l.spatial){ 86 | softmax_gpu(net.input_gpu, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); 87 | }else{ 88 | softmax_gpu(net.input_gpu, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); 89 | } 90 | } 91 | if(net.truth){ 92 | softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth_gpu, l.delta_gpu, l.loss_gpu); 93 | if(l.softmax_tree){ 94 | mask_gpu(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); 95 | mask_gpu(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); 96 | } 97 | cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); 98 | l.cost[0] = sum_array(l.loss, l.batch*l.inputs); 99 | } 100 | } 101 | 102 | void backward_softmax_layer_gpu(const softmax_layer layer, network net) 103 | { 104 | axpy_gpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); 105 | } 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /examples/dice.c: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | 3 | char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; 4 | 5 | void train_dice(char *cfgfile, char *weightfile) 6 | { 7 | srand(time(0)); 8 | float avg_loss = -1; 9 | char *base = basecfg(cfgfile); 10 | char *backup_directory = "/home/pjreddie/backup/"; 11 | printf("%s\n", base); 12 | network net = parse_network_cfg(cfgfile); 13 | if(weightfile){ 14 | load_weights(&net, weightfile); 15 | } 16 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); 17 | int imgs = 1024; 18 | int i = *net.seen/imgs; 19 | char **labels = dice_labels; 20 | list *plist = get_paths("data/dice/dice.train.list"); 21 | char **paths = (char **)list_to_array(plist); 22 | printf("%d\n", plist->size); 23 | clock_t time; 24 | while(1){ 25 | ++i; 26 | time=clock(); 27 | data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); 28 | printf("Loaded: %lf seconds\n", sec(clock()-time)); 29 | 30 | time=clock(); 31 | float loss = train_network(net, train); 32 | if(avg_loss == -1) avg_loss = loss; 33 | avg_loss = avg_loss*.9 + loss*.1; 34 | printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); 35 | free_data(train); 36 | if((i % 100) == 0) net.learning_rate *= .1; 37 | if(i%100==0){ 38 | char buff[256]; 39 | sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); 40 | save_weights(net, buff); 41 | } 42 | } 43 | } 44 | 45 | void validate_dice(char *filename, char *weightfile) 46 | { 47 | network net = parse_network_cfg(filename); 48 | if(weightfile){ 49 | load_weights(&net, weightfile); 50 | } 51 | srand(time(0)); 52 | 53 | char **labels = dice_labels; 54 | list *plist = get_paths("data/dice/dice.val.list"); 55 | 56 | char **paths = (char **)list_to_array(plist); 57 | int m = plist->size; 58 | free_list(plist); 59 | 60 | data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); 61 | float *acc = network_accuracies(net, val, 2); 62 | printf("Validation Accuracy: %f, %d images\n", acc[0], m); 63 | free_data(val); 64 | } 65 | 66 | void test_dice(char *cfgfile, char *weightfile, char *filename) 67 | { 68 | network net = parse_network_cfg(cfgfile); 69 | if(weightfile){ 70 | load_weights(&net, weightfile); 71 | } 72 | set_batch_network(&net, 1); 73 | srand(2222222); 74 | int i = 0; 75 | char **names = dice_labels; 76 | char buff[256]; 77 | char *input = buff; 78 | int indexes[6]; 79 | while(1){ 80 | if(filename){ 81 | strncpy(input, filename, 256); 82 | }else{ 83 | printf("Enter Image Path: "); 84 | fflush(stdout); 85 | input = fgets(input, 256, stdin); 86 | if(!input) return; 87 | strtok(input, "\n"); 88 | } 89 | image im = load_image_color(input, net.w, net.h); 90 | float *X = im.data; 91 | float *predictions = network_predict(net, X); 92 | top_predictions(net, 6, indexes); 93 | for(i = 0; i < 6; ++i){ 94 | int index = indexes[i]; 95 | printf("%s: %f\n", names[index], predictions[index]); 96 | } 97 | free_image(im); 98 | if (filename) break; 99 | } 100 | } 101 | 102 | void run_dice(int argc, char **argv) 103 | { 104 | if(argc < 4){ 105 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 106 | return; 107 | } 108 | 109 | char *cfg = argv[3]; 110 | char *weights = (argc > 4) ? argv[4] : 0; 111 | char *filename = (argc > 5) ? argv[5]: 0; 112 | if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); 113 | else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); 114 | else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); 115 | } 116 | 117 | -------------------------------------------------------------------------------- /examples/super.c: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | 3 | void train_super(char *cfgfile, char *weightfile, int clear) 4 | { 5 | char *train_images = "/data/imagenet/imagenet1k.train.list"; 6 | char *backup_directory = "/home/pjreddie/backup/"; 7 | srand(time(0)); 8 | char *base = basecfg(cfgfile); 9 | printf("%s\n", base); 10 | float avg_loss = -1; 11 | network *net = load_network(cfgfile, weightfile, clear); 12 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); 13 | int imgs = net->batch*net->subdivisions; 14 | int i = *net->seen/imgs; 15 | data train, buffer; 16 | 17 | 18 | list *plist = get_paths(train_images); 19 | //int N = plist->size; 20 | char **paths = (char **)list_to_array(plist); 21 | 22 | load_args args = {0}; 23 | args.w = net->w; 24 | args.h = net->h; 25 | args.scale = 4; 26 | args.paths = paths; 27 | args.n = imgs; 28 | args.m = plist->size; 29 | args.d = &buffer; 30 | args.type = SUPER_DATA; 31 | 32 | pthread_t load_thread = load_data_in_thread(args); 33 | clock_t time; 34 | //while(i*imgs < N*120){ 35 | while(get_current_batch(net) < net->max_batches){ 36 | i += 1; 37 | time=clock(); 38 | pthread_join(load_thread, 0); 39 | train = buffer; 40 | load_thread = load_data_in_thread(args); 41 | 42 | printf("Loaded: %lf seconds\n", sec(clock()-time)); 43 | 44 | time=clock(); 45 | float loss = train_network(net, train); 46 | if (avg_loss < 0) avg_loss = loss; 47 | avg_loss = avg_loss*.9 + loss*.1; 48 | 49 | printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); 50 | if(i%1000==0){ 51 | char buff[256]; 52 | sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); 53 | save_weights(net, buff); 54 | } 55 | if(i%100==0){ 56 | char buff[256]; 57 | sprintf(buff, "%s/%s.backup", backup_directory, base); 58 | save_weights(net, buff); 59 | } 60 | free_data(train); 61 | } 62 | char buff[256]; 63 | sprintf(buff, "%s/%s_final.weights", backup_directory, base); 64 | save_weights(net, buff); 65 | } 66 | 67 | void test_super(char *cfgfile, char *weightfile, char *filename) 68 | { 69 | network *net = load_network(cfgfile, weightfile, 0); 70 | set_batch_network(net, 1); 71 | srand(2222222); 72 | 73 | clock_t time; 74 | char buff[256]; 75 | char *input = buff; 76 | while(1){ 77 | if(filename){ 78 | strncpy(input, filename, 256); 79 | }else{ 80 | printf("Enter Image Path: "); 81 | fflush(stdout); 82 | input = fgets(input, 256, stdin); 83 | if(!input) return; 84 | strtok(input, "\n"); 85 | } 86 | image im = load_image_color(input, 0, 0); 87 | resize_network(net, im.w, im.h); 88 | printf("%d %d\n", im.w, im.h); 89 | 90 | float *X = im.data; 91 | time=clock(); 92 | network_predict(net, X); 93 | image out = get_network_image(net); 94 | printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); 95 | save_image(out, "out"); 96 | show_image(out, "out"); 97 | 98 | free_image(im); 99 | if (filename) break; 100 | } 101 | } 102 | 103 | 104 | void run_super(int argc, char **argv) 105 | { 106 | if(argc < 4){ 107 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 108 | return; 109 | } 110 | 111 | char *cfg = argv[3]; 112 | char *weights = (argc > 4) ? argv[4] : 0; 113 | char *filename = (argc > 5) ? argv[5] : 0; 114 | int clear = find_arg(argc, argv, "-clear"); 115 | if(0==strcmp(argv[2], "train")) train_super(cfg, weights, clear); 116 | else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); 117 | /* 118 | else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); 119 | */ 120 | } 121 | -------------------------------------------------------------------------------- /src/activations.c: -------------------------------------------------------------------------------- 1 | #include "activations.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | char *get_activation_string(ACTIVATION a) 9 | { 10 | switch(a){ 11 | case LOGISTIC: 12 | return "logistic"; 13 | case LOGGY: 14 | return "loggy"; 15 | case RELU: 16 | return "relu"; 17 | case ELU: 18 | return "elu"; 19 | case RELIE: 20 | return "relie"; 21 | case RAMP: 22 | return "ramp"; 23 | case LINEAR: 24 | return "linear"; 25 | case TANH: 26 | return "tanh"; 27 | case PLSE: 28 | return "plse"; 29 | case LEAKY: 30 | return "leaky"; 31 | case STAIR: 32 | return "stair"; 33 | case HARDTAN: 34 | return "hardtan"; 35 | case LHTAN: 36 | return "lhtan"; 37 | default: 38 | break; 39 | } 40 | return "relu"; 41 | } 42 | 43 | ACTIVATION get_activation(char *s) 44 | { 45 | if (strcmp(s, "logistic")==0) return LOGISTIC; 46 | if (strcmp(s, "loggy")==0) return LOGGY; 47 | if (strcmp(s, "relu")==0) return RELU; 48 | if (strcmp(s, "elu")==0) return ELU; 49 | if (strcmp(s, "relie")==0) return RELIE; 50 | if (strcmp(s, "plse")==0) return PLSE; 51 | if (strcmp(s, "hardtan")==0) return HARDTAN; 52 | if (strcmp(s, "lhtan")==0) return LHTAN; 53 | if (strcmp(s, "linear")==0) return LINEAR; 54 | if (strcmp(s, "ramp")==0) return RAMP; 55 | if (strcmp(s, "leaky")==0) return LEAKY; 56 | if (strcmp(s, "tanh")==0) return TANH; 57 | if (strcmp(s, "stair")==0) return STAIR; 58 | fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); 59 | return RELU; 60 | } 61 | 62 | float activate(float x, ACTIVATION a) 63 | { 64 | switch(a){ 65 | case LINEAR: 66 | return linear_activate(x); 67 | case LOGISTIC: 68 | return logistic_activate(x); 69 | case LOGGY: 70 | return loggy_activate(x); 71 | case RELU: 72 | return relu_activate(x); 73 | case ELU: 74 | return elu_activate(x); 75 | case RELIE: 76 | return relie_activate(x); 77 | case RAMP: 78 | return ramp_activate(x); 79 | case LEAKY: 80 | return leaky_activate(x); 81 | case TANH: 82 | return tanh_activate(x); 83 | case PLSE: 84 | return plse_activate(x); 85 | case STAIR: 86 | return stair_activate(x); 87 | case HARDTAN: 88 | return hardtan_activate(x); 89 | case LHTAN: 90 | return lhtan_activate(x); 91 | } 92 | return 0; 93 | } 94 | 95 | void activate_array(float *x, const int n, const ACTIVATION a) 96 | { 97 | int i; 98 | for(i = 0; i < n; ++i){ 99 | x[i] = activate(x[i], a); 100 | } 101 | } 102 | 103 | float gradient(float x, ACTIVATION a) 104 | { 105 | switch(a){ 106 | case LINEAR: 107 | return linear_gradient(x); 108 | case LOGISTIC: 109 | return logistic_gradient(x); 110 | case LOGGY: 111 | return loggy_gradient(x); 112 | case RELU: 113 | return relu_gradient(x); 114 | case ELU: 115 | return elu_gradient(x); 116 | case RELIE: 117 | return relie_gradient(x); 118 | case RAMP: 119 | return ramp_gradient(x); 120 | case LEAKY: 121 | return leaky_gradient(x); 122 | case TANH: 123 | return tanh_gradient(x); 124 | case PLSE: 125 | return plse_gradient(x); 126 | case STAIR: 127 | return stair_gradient(x); 128 | case HARDTAN: 129 | return hardtan_gradient(x); 130 | case LHTAN: 131 | return lhtan_gradient(x); 132 | } 133 | return 0; 134 | } 135 | 136 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) 137 | { 138 | int i; 139 | for(i = 0; i < n; ++i){ 140 | delta[i] *= gradient(x[i], a); 141 | } 142 | } 143 | 144 | -------------------------------------------------------------------------------- /src/tree.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tree.h" 4 | #include "utils.h" 5 | #include "data.h" 6 | 7 | void change_leaves(tree *t, char *leaf_list) 8 | { 9 | list *llist = get_paths(leaf_list); 10 | char **leaves = (char **)list_to_array(llist); 11 | int n = llist->size; 12 | int i,j; 13 | int found = 0; 14 | for(i = 0; i < t->n; ++i){ 15 | t->leaf[i] = 0; 16 | for(j = 0; j < n; ++j){ 17 | if (0==strcmp(t->name[i], leaves[j])){ 18 | t->leaf[i] = 1; 19 | ++found; 20 | break; 21 | } 22 | } 23 | } 24 | fprintf(stderr, "Found %d leaves.\n", found); 25 | } 26 | 27 | float get_hierarchy_probability(float *x, tree *hier, int c, int stride) 28 | { 29 | float p = 1; 30 | while(c >= 0){ 31 | p = p * x[c*stride]; 32 | c = hier->parent[c]; 33 | } 34 | return p; 35 | } 36 | 37 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride) 38 | { 39 | int j; 40 | for(j = 0; j < n; ++j){ 41 | int parent = hier->parent[j]; 42 | if(parent >= 0){ 43 | predictions[j*stride] *= predictions[parent*stride]; 44 | } 45 | } 46 | if(only_leaves){ 47 | for(j = 0; j < n; ++j){ 48 | if(!hier->leaf[j]) predictions[j*stride] = 0; 49 | } 50 | } 51 | } 52 | 53 | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) 54 | { 55 | float p = 1; 56 | int group = 0; 57 | int i; 58 | while(1){ 59 | float max = 0; 60 | int max_i = 0; 61 | 62 | for(i = 0; i < hier->group_size[group]; ++i){ 63 | int index = i + hier->group_offset[group]; 64 | float val = predictions[(i + hier->group_offset[group])*stride]; 65 | if(val > max){ 66 | max_i = index; 67 | max = val; 68 | } 69 | } 70 | if(p*max > thresh){ 71 | p = p*max; 72 | group = hier->child[max_i]; 73 | if(hier->child[max_i] < 0) return max_i; 74 | } else if (group == 0){ 75 | return max_i; 76 | } else { 77 | return hier->parent[hier->group_offset[group]]; 78 | } 79 | } 80 | return 0; 81 | } 82 | 83 | tree *read_tree(char *filename) 84 | { 85 | tree t = {0}; 86 | FILE *fp = fopen(filename, "r"); 87 | 88 | char *line; 89 | int last_parent = -1; 90 | int group_size = 0; 91 | int groups = 0; 92 | int n = 0; 93 | while((line=fgetl(fp)) != 0){ 94 | char *id = calloc(256, sizeof(char)); 95 | int parent = -1; 96 | sscanf(line, "%s %d", id, &parent); 97 | t.parent = realloc(t.parent, (n+1)*sizeof(int)); 98 | t.parent[n] = parent; 99 | 100 | t.child = realloc(t.child, (n+1)*sizeof(int)); 101 | t.child[n] = -1; 102 | 103 | t.name = realloc(t.name, (n+1)*sizeof(char *)); 104 | t.name[n] = id; 105 | if(parent != last_parent){ 106 | ++groups; 107 | t.group_offset = realloc(t.group_offset, groups * sizeof(int)); 108 | t.group_offset[groups - 1] = n - group_size; 109 | t.group_size = realloc(t.group_size, groups * sizeof(int)); 110 | t.group_size[groups - 1] = group_size; 111 | group_size = 0; 112 | last_parent = parent; 113 | } 114 | t.group = realloc(t.group, (n+1)*sizeof(int)); 115 | t.group[n] = groups; 116 | if (parent >= 0) { 117 | t.child[parent] = groups; 118 | } 119 | ++n; 120 | ++group_size; 121 | } 122 | ++groups; 123 | t.group_offset = realloc(t.group_offset, groups * sizeof(int)); 124 | t.group_offset[groups - 1] = n - group_size; 125 | t.group_size = realloc(t.group_size, groups * sizeof(int)); 126 | t.group_size[groups - 1] = group_size; 127 | t.n = n; 128 | t.groups = groups; 129 | t.leaf = calloc(n, sizeof(int)); 130 | int i; 131 | for(i = 0; i < n; ++i) t.leaf[i] = 1; 132 | for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; 133 | 134 | fclose(fp); 135 | tree *tree_ptr = calloc(1, sizeof(tree)); 136 | *tree_ptr = t; 137 | //error(0); 138 | return tree_ptr; 139 | } 140 | -------------------------------------------------------------------------------- /src/maxpool_layer.c: -------------------------------------------------------------------------------- 1 | #include "maxpool_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | image get_maxpool_image(maxpool_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.c; 10 | return float_to_image(w,h,c,l.output); 11 | } 12 | 13 | image get_maxpool_delta(maxpool_layer l) 14 | { 15 | int h = l.out_h; 16 | int w = l.out_w; 17 | int c = l.c; 18 | return float_to_image(w,h,c,l.delta); 19 | } 20 | 21 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) 22 | { 23 | maxpool_layer l = {0}; 24 | l.type = MAXPOOL; 25 | l.batch = batch; 26 | l.h = h; 27 | l.w = w; 28 | l.c = c; 29 | l.pad = padding; 30 | l.out_w = (w + 2*padding)/stride; 31 | l.out_h = (h + 2*padding)/stride; 32 | l.out_c = c; 33 | l.outputs = l.out_h * l.out_w * l.out_c; 34 | l.inputs = h*w*c; 35 | l.size = size; 36 | l.stride = stride; 37 | int output_size = l.out_h * l.out_w * l.out_c * batch; 38 | l.indexes = calloc(output_size, sizeof(int)); 39 | l.output = calloc(output_size, sizeof(float)); 40 | l.delta = calloc(output_size, sizeof(float)); 41 | l.forward = forward_maxpool_layer; 42 | l.backward = backward_maxpool_layer; 43 | #ifdef GPU 44 | l.forward_gpu = forward_maxpool_layer_gpu; 45 | l.backward_gpu = backward_maxpool_layer_gpu; 46 | l.indexes_gpu = cuda_make_int_array(0, output_size); 47 | l.output_gpu = cuda_make_array(l.output, output_size); 48 | l.delta_gpu = cuda_make_array(l.delta, output_size); 49 | #endif 50 | fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); 51 | return l; 52 | } 53 | 54 | void resize_maxpool_layer(maxpool_layer *l, int w, int h) 55 | { 56 | l->h = h; 57 | l->w = w; 58 | l->inputs = h*w*l->c; 59 | 60 | l->out_w = (w + 2*l->pad)/l->stride; 61 | l->out_h = (h + 2*l->pad)/l->stride; 62 | l->outputs = l->out_w * l->out_h * l->c; 63 | int output_size = l->outputs * l->batch; 64 | 65 | l->indexes = realloc(l->indexes, output_size * sizeof(int)); 66 | l->output = realloc(l->output, output_size * sizeof(float)); 67 | l->delta = realloc(l->delta, output_size * sizeof(float)); 68 | 69 | #ifdef GPU 70 | cuda_free((float *)l->indexes_gpu); 71 | cuda_free(l->output_gpu); 72 | cuda_free(l->delta_gpu); 73 | l->indexes_gpu = cuda_make_int_array(0, output_size); 74 | l->output_gpu = cuda_make_array(l->output, output_size); 75 | l->delta_gpu = cuda_make_array(l->delta, output_size); 76 | #endif 77 | } 78 | 79 | void forward_maxpool_layer(const maxpool_layer l, network net) 80 | { 81 | int b,i,j,k,m,n; 82 | int w_offset = -l.pad; 83 | int h_offset = -l.pad; 84 | 85 | int h = l.out_h; 86 | int w = l.out_w; 87 | int c = l.c; 88 | 89 | for(b = 0; b < l.batch; ++b){ 90 | for(k = 0; k < c; ++k){ 91 | for(i = 0; i < h; ++i){ 92 | for(j = 0; j < w; ++j){ 93 | int out_index = j + w*(i + h*(k + c*b)); 94 | float max = -FLT_MAX; 95 | int max_i = -1; 96 | for(n = 0; n < l.size; ++n){ 97 | for(m = 0; m < l.size; ++m){ 98 | int cur_h = h_offset + i*l.stride + n; 99 | int cur_w = w_offset + j*l.stride + m; 100 | int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); 101 | int valid = (cur_h >= 0 && cur_h < l.h && 102 | cur_w >= 0 && cur_w < l.w); 103 | float val = (valid != 0) ? net.input[index] : -FLT_MAX; 104 | max_i = (val > max) ? index : max_i; 105 | max = (val > max) ? val : max; 106 | } 107 | } 108 | l.output[out_index] = max; 109 | l.indexes[out_index] = max_i; 110 | } 111 | } 112 | } 113 | } 114 | } 115 | 116 | void backward_maxpool_layer(const maxpool_layer l, network net) 117 | { 118 | int i; 119 | int h = l.out_h; 120 | int w = l.out_w; 121 | int c = l.c; 122 | for(i = 0; i < h*w*c*l.batch; ++i){ 123 | int index = l.indexes[i]; 124 | net.delta[index] += l.delta[i]; 125 | } 126 | } 127 | 128 | -------------------------------------------------------------------------------- /src/route_layer.c: -------------------------------------------------------------------------------- 1 | #include "route_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | 5 | #include 6 | 7 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) 8 | { 9 | fprintf(stderr,"route "); 10 | route_layer l = {0}; 11 | l.type = ROUTE; 12 | l.batch = batch; 13 | l.n = n; 14 | l.input_layers = input_layers; 15 | l.input_sizes = input_sizes; 16 | int i; 17 | int outputs = 0; 18 | for(i = 0; i < n; ++i){ 19 | fprintf(stderr," %d", input_layers[i]); 20 | outputs += input_sizes[i]; 21 | } 22 | fprintf(stderr, "\n"); 23 | l.outputs = outputs; 24 | l.inputs = outputs; 25 | l.delta = calloc(outputs*batch, sizeof(float)); 26 | l.output = calloc(outputs*batch, sizeof(float));; 27 | 28 | l.forward = forward_route_layer; 29 | l.backward = backward_route_layer; 30 | #ifdef GPU 31 | l.forward_gpu = forward_route_layer_gpu; 32 | l.backward_gpu = backward_route_layer_gpu; 33 | 34 | l.delta_gpu = cuda_make_array(l.delta, outputs*batch); 35 | l.output_gpu = cuda_make_array(l.output, outputs*batch); 36 | #endif 37 | return l; 38 | } 39 | 40 | void resize_route_layer(route_layer *l, network *net) 41 | { 42 | int i; 43 | layer first = net->layers[l->input_layers[0]]; 44 | l->out_w = first.out_w; 45 | l->out_h = first.out_h; 46 | l->out_c = first.out_c; 47 | l->outputs = first.outputs; 48 | l->input_sizes[0] = first.outputs; 49 | for(i = 1; i < l->n; ++i){ 50 | int index = l->input_layers[i]; 51 | layer next = net->layers[index]; 52 | l->outputs += next.outputs; 53 | l->input_sizes[i] = next.outputs; 54 | if(next.out_w == first.out_w && next.out_h == first.out_h){ 55 | l->out_c += next.out_c; 56 | }else{ 57 | printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); 58 | l->out_h = l->out_w = l->out_c = 0; 59 | } 60 | } 61 | l->inputs = l->outputs; 62 | l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); 63 | l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); 64 | 65 | #ifdef GPU 66 | cuda_free(l->output_gpu); 67 | cuda_free(l->delta_gpu); 68 | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 69 | l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); 70 | #endif 71 | 72 | } 73 | 74 | void forward_route_layer(const route_layer l, network net) 75 | { 76 | int i, j; 77 | int offset = 0; 78 | for(i = 0; i < l.n; ++i){ 79 | int index = l.input_layers[i]; 80 | float *input = net.layers[index].output; 81 | int input_size = l.input_sizes[i]; 82 | for(j = 0; j < l.batch; ++j){ 83 | copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); 84 | } 85 | offset += input_size; 86 | } 87 | } 88 | 89 | void backward_route_layer(const route_layer l, network net) 90 | { 91 | int i, j; 92 | int offset = 0; 93 | for(i = 0; i < l.n; ++i){ 94 | int index = l.input_layers[i]; 95 | float *delta = net.layers[index].delta; 96 | int input_size = l.input_sizes[i]; 97 | for(j = 0; j < l.batch; ++j){ 98 | axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); 99 | } 100 | offset += input_size; 101 | } 102 | } 103 | 104 | #ifdef GPU 105 | void forward_route_layer_gpu(const route_layer l, network net) 106 | { 107 | int i, j; 108 | int offset = 0; 109 | for(i = 0; i < l.n; ++i){ 110 | int index = l.input_layers[i]; 111 | float *input = net.layers[index].output_gpu; 112 | int input_size = l.input_sizes[i]; 113 | for(j = 0; j < l.batch; ++j){ 114 | copy_gpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); 115 | } 116 | offset += input_size; 117 | } 118 | } 119 | 120 | void backward_route_layer_gpu(const route_layer l, network net) 121 | { 122 | int i, j; 123 | int offset = 0; 124 | for(i = 0; i < l.n; ++i){ 125 | int index = l.input_layers[i]; 126 | float *delta = net.layers[index].delta_gpu; 127 | int input_size = l.input_sizes[i]; 128 | for(j = 0; j < l.batch; ++j){ 129 | axpy_gpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); 130 | } 131 | offset += input_size; 132 | } 133 | } 134 | #endif 135 | -------------------------------------------------------------------------------- /src/layer.c: -------------------------------------------------------------------------------- 1 | #include "layer.h" 2 | #include "cuda.h" 3 | 4 | #include 5 | 6 | void free_layer(layer l) 7 | { 8 | if(l.type == DROPOUT){ 9 | if(l.rand) free(l.rand); 10 | #ifdef GPU 11 | if(l.rand_gpu) cuda_free(l.rand_gpu); 12 | #endif 13 | return; 14 | } 15 | if(l.cweights) free(l.cweights); 16 | if(l.indexes) free(l.indexes); 17 | if(l.input_layers) free(l.input_layers); 18 | if(l.input_sizes) free(l.input_sizes); 19 | if(l.map) free(l.map); 20 | if(l.rand) free(l.rand); 21 | if(l.cost) free(l.cost); 22 | if(l.state) free(l.state); 23 | if(l.prev_state) free(l.prev_state); 24 | if(l.forgot_state) free(l.forgot_state); 25 | if(l.forgot_delta) free(l.forgot_delta); 26 | if(l.state_delta) free(l.state_delta); 27 | if(l.concat) free(l.concat); 28 | if(l.concat_delta) free(l.concat_delta); 29 | if(l.binary_weights) free(l.binary_weights); 30 | if(l.biases) free(l.biases); 31 | if(l.bias_updates) free(l.bias_updates); 32 | if(l.scales) free(l.scales); 33 | if(l.scale_updates) free(l.scale_updates); 34 | if(l.weights) free(l.weights); 35 | if(l.weight_updates) free(l.weight_updates); 36 | if(l.delta) free(l.delta); 37 | if(l.output) free(l.output); 38 | if(l.squared) free(l.squared); 39 | if(l.norms) free(l.norms); 40 | if(l.spatial_mean) free(l.spatial_mean); 41 | if(l.mean) free(l.mean); 42 | if(l.variance) free(l.variance); 43 | if(l.mean_delta) free(l.mean_delta); 44 | if(l.variance_delta) free(l.variance_delta); 45 | if(l.rolling_mean) free(l.rolling_mean); 46 | if(l.rolling_variance) free(l.rolling_variance); 47 | if(l.x) free(l.x); 48 | if(l.x_norm) free(l.x_norm); 49 | if(l.m) free(l.m); 50 | if(l.v) free(l.v); 51 | if(l.z_cpu) free(l.z_cpu); 52 | if(l.r_cpu) free(l.r_cpu); 53 | if(l.h_cpu) free(l.h_cpu); 54 | if(l.binary_input) free(l.binary_input); 55 | 56 | #ifdef GPU 57 | if(l.indexes_gpu) cuda_free((float *)l.indexes_gpu); 58 | 59 | if(l.z_gpu) cuda_free(l.z_gpu); 60 | if(l.r_gpu) cuda_free(l.r_gpu); 61 | if(l.h_gpu) cuda_free(l.h_gpu); 62 | if(l.m_gpu) cuda_free(l.m_gpu); 63 | if(l.v_gpu) cuda_free(l.v_gpu); 64 | if(l.prev_state_gpu) cuda_free(l.prev_state_gpu); 65 | if(l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); 66 | if(l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); 67 | if(l.state_gpu) cuda_free(l.state_gpu); 68 | if(l.state_delta_gpu) cuda_free(l.state_delta_gpu); 69 | if(l.gate_gpu) cuda_free(l.gate_gpu); 70 | if(l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); 71 | if(l.save_gpu) cuda_free(l.save_gpu); 72 | if(l.save_delta_gpu) cuda_free(l.save_delta_gpu); 73 | if(l.concat_gpu) cuda_free(l.concat_gpu); 74 | if(l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); 75 | if(l.binary_input_gpu) cuda_free(l.binary_input_gpu); 76 | if(l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); 77 | if(l.mean_gpu) cuda_free(l.mean_gpu); 78 | if(l.variance_gpu) cuda_free(l.variance_gpu); 79 | if(l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); 80 | if(l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); 81 | if(l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); 82 | if(l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); 83 | if(l.x_gpu) cuda_free(l.x_gpu); 84 | if(l.x_norm_gpu) cuda_free(l.x_norm_gpu); 85 | if(l.weights_gpu) cuda_free(l.weights_gpu); 86 | if(l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); 87 | if(l.biases_gpu) cuda_free(l.biases_gpu); 88 | if(l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); 89 | if(l.scales_gpu) cuda_free(l.scales_gpu); 90 | if(l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); 91 | if(l.output_gpu) cuda_free(l.output_gpu); 92 | if(l.delta_gpu) cuda_free(l.delta_gpu); 93 | if(l.rand_gpu) cuda_free(l.rand_gpu); 94 | if(l.squared_gpu) cuda_free(l.squared_gpu); 95 | if(l.norms_gpu) cuda_free(l.norms_gpu); 96 | #endif 97 | } 98 | -------------------------------------------------------------------------------- /cfg/yolo-origin.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | #height=448 9 | #width=448 10 | height=416 11 | width=416 12 | channels=3 13 | momentum=0.9 14 | decay=0.0005 15 | angle=0 16 | saturation = 1.5 17 | exposure = 1.5 18 | hue=.1 19 | 20 | learning_rate=0.001 21 | burn_in=1000 22 | #max_batches = 50000 23 | max_batches = 80200 24 | policy=steps 25 | steps=40000,60000 26 | #steps=30000,45000 27 | scales=0.1,0.1 28 | 29 | [convolutional] 30 | batch_normalize=1 31 | filters=32 32 | size=3 33 | stride=1 34 | pad=1 35 | activation=leaky 36 | 37 | [maxpool] 38 | size=2 39 | stride=2 40 | 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=1 46 | pad=1 47 | activation=leaky 48 | 49 | [maxpool] 50 | size=2 51 | stride=2 52 | 53 | [convolutional] 54 | batch_normalize=1 55 | filters=128 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=leaky 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=64 64 | size=1 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [convolutional] 70 | batch_normalize=1 71 | filters=128 72 | size=3 73 | stride=1 74 | pad=1 75 | activation=leaky 76 | 77 | [maxpool] 78 | size=2 79 | stride=2 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=256 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [convolutional] 90 | batch_normalize=1 91 | filters=128 92 | size=1 93 | stride=1 94 | pad=1 95 | activation=leaky 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=256 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [maxpool] 106 | size=2 107 | stride=2 108 | 109 | [convolutional] 110 | batch_normalize=1 111 | filters=512 112 | size=3 113 | stride=1 114 | pad=1 115 | activation=leaky 116 | 117 | [convolutional] 118 | batch_normalize=1 119 | filters=256 120 | size=1 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | batch_normalize=1 127 | filters=512 128 | size=3 129 | stride=1 130 | pad=1 131 | activation=leaky 132 | 133 | [convolutional] 134 | batch_normalize=1 135 | filters=256 136 | size=1 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [convolutional] 142 | batch_normalize=1 143 | filters=512 144 | size=3 145 | stride=1 146 | pad=1 147 | activation=leaky 148 | 149 | [maxpool] 150 | size=2 151 | stride=2 152 | 153 | [convolutional] 154 | batch_normalize=1 155 | filters=1024 156 | size=3 157 | stride=1 158 | pad=1 159 | activation=leaky 160 | 161 | [convolutional] 162 | batch_normalize=1 163 | filters=512 164 | size=1 165 | stride=1 166 | pad=1 167 | activation=leaky 168 | 169 | [convolutional] 170 | batch_normalize=1 171 | filters=1024 172 | size=3 173 | stride=1 174 | pad=1 175 | activation=leaky 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=512 180 | size=1 181 | stride=1 182 | pad=1 183 | activation=leaky 184 | 185 | [convolutional] 186 | batch_normalize=1 187 | filters=1024 188 | size=3 189 | stride=1 190 | pad=1 191 | activation=leaky 192 | 193 | 194 | ####### 195 | 196 | [convolutional] 197 | batch_normalize=1 198 | size=3 199 | stride=1 200 | pad=1 201 | filters=1024 202 | activation=leaky 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | size=3 207 | stride=1 208 | pad=1 209 | filters=1024 210 | activation=leaky 211 | 212 | [route] 213 | layers=-9 214 | 215 | [convolutional] 216 | batch_normalize=1 217 | size=1 218 | stride=1 219 | pad=1 220 | filters=64 221 | activation=leaky 222 | 223 | [reorg] 224 | stride=2 225 | 226 | [route] 227 | layers=-1,-4 228 | 229 | [convolutional] 230 | batch_normalize=1 231 | size=3 232 | stride=1 233 | pad=1 234 | filters=1024 235 | activation=leaky 236 | 237 | [convolutional] 238 | size=1 239 | stride=1 240 | pad=1 241 | filters=30 242 | activation=linear 243 | 244 | 245 | [region] 246 | anchors = 2.1882101346810248, 1.7273508247089326, 2.5877106474986844, 2.3101804619114943, 1.8459417510394331, 1.7319281925870702, 2.125632169606032, 2.0649405635139693, 2.458238797504399, 1.9738465447154578 247 | #anchors = 2.18218568314, 1.72591145833, 1.82391327972, 1.58272975078, 2.17396893169, 2.09366861979, 1.87342167214, 1.91000846754, 2.51557005105, 2.00755208333 248 | #anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 249 | #anchors = 1.10628851 , 0.74142205 , 1.46469359 , 1.10719697 , 1.52204367 , 0.91983528 , 1.77219672 , 1.25183029 , 1.94877405 , 1.59463705 250 | #anchors = 0.80628851 , 0.44142205 , 1.77219672 , 1.25183029 , 2.94877405 , 2.59463705 251 | bias_match=1 252 | classes=1 253 | coords=4 254 | num=5 255 | softmax=1 256 | jitter=.3 257 | rescore=1 258 | 259 | object_scale=5 260 | noobject_scale=1 261 | class_scale=1 262 | coord_scale=1 263 | 264 | absolute=1 265 | thresh=.6 266 | random=1 267 | -------------------------------------------------------------------------------- /src/cuda.c: -------------------------------------------------------------------------------- 1 | int gpu_index = 0; 2 | 3 | #ifdef GPU 4 | 5 | #include "cuda.h" 6 | #include "utils.h" 7 | #include "blas.h" 8 | #include 9 | #include 10 | #include 11 | 12 | void cuda_set_device(int n) 13 | { 14 | gpu_index = n; 15 | cudaError_t status = cudaSetDevice(n); 16 | check_error(status); 17 | } 18 | 19 | int cuda_get_device() 20 | { 21 | int n = 0; 22 | cudaError_t status = cudaGetDevice(&n); 23 | check_error(status); 24 | return n; 25 | } 26 | 27 | void check_error(cudaError_t status) 28 | { 29 | //cudaDeviceSynchronize(); 30 | cudaError_t status2 = cudaGetLastError(); 31 | if (status != cudaSuccess) 32 | { 33 | const char *s = cudaGetErrorString(status); 34 | char buffer[256]; 35 | printf("CUDA Error: %s\n", s); 36 | assert(0); 37 | snprintf(buffer, 256, "CUDA Error: %s", s); 38 | error(buffer); 39 | } 40 | if (status2 != cudaSuccess) 41 | { 42 | const char *s = cudaGetErrorString(status); 43 | char buffer[256]; 44 | printf("CUDA Error Prev: %s\n", s); 45 | assert(0); 46 | snprintf(buffer, 256, "CUDA Error Prev: %s", s); 47 | error(buffer); 48 | } 49 | } 50 | 51 | dim3 cuda_gridsize(size_t n){ 52 | size_t k = (n-1) / BLOCK + 1; 53 | size_t x = k; 54 | size_t y = 1; 55 | if(x > 65535){ 56 | x = ceil(sqrt(k)); 57 | y = (n-1)/(x*BLOCK) + 1; 58 | } 59 | dim3 d = {x, y, 1}; 60 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 61 | return d; 62 | } 63 | 64 | #ifdef CUDNN 65 | cudnnHandle_t cudnn_handle() 66 | { 67 | static int init[16] = {0}; 68 | static cudnnHandle_t handle[16]; 69 | int i = cuda_get_device(); 70 | if(!init[i]) { 71 | cudnnCreate(&handle[i]); 72 | init[i] = 1; 73 | } 74 | return handle[i]; 75 | } 76 | #endif 77 | 78 | cublasHandle_t blas_handle() 79 | { 80 | static int init[16] = {0}; 81 | static cublasHandle_t handle[16]; 82 | int i = cuda_get_device(); 83 | if(!init[i]) { 84 | cublasCreate(&handle[i]); 85 | init[i] = 1; 86 | } 87 | return handle[i]; 88 | } 89 | 90 | float *cuda_make_array(float *x, size_t n) 91 | { 92 | float *x_gpu; 93 | size_t size = sizeof(float)*n; 94 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 95 | check_error(status); 96 | if(x){ 97 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 98 | check_error(status); 99 | } else { 100 | fill_gpu(n, 0, x_gpu, 1); 101 | } 102 | if(!x_gpu) error("Cuda malloc failed\n"); 103 | return x_gpu; 104 | } 105 | 106 | void cuda_random(float *x_gpu, size_t n) 107 | { 108 | static curandGenerator_t gen[16]; 109 | static int init[16] = {0}; 110 | int i = cuda_get_device(); 111 | if(!init[i]){ 112 | curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); 113 | curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); 114 | init[i] = 1; 115 | } 116 | curandGenerateUniform(gen[i], x_gpu, n); 117 | check_error(cudaPeekAtLastError()); 118 | } 119 | 120 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s) 121 | { 122 | float *tmp = calloc(n, sizeof(float)); 123 | cuda_pull_array(x_gpu, tmp, n); 124 | //int i; 125 | //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); 126 | axpy_cpu(n, -1, x, 1, tmp, 1); 127 | float err = dot_cpu(n, tmp, 1, tmp, 1); 128 | printf("Error %s: %f\n", s, sqrt(err/n)); 129 | free(tmp); 130 | return err; 131 | } 132 | 133 | int *cuda_make_int_array(int *x, size_t n) 134 | { 135 | int *x_gpu; 136 | size_t size = sizeof(int)*n; 137 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 138 | check_error(status); 139 | if(x){ 140 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 141 | check_error(status); 142 | } 143 | if(!x_gpu) error("Cuda malloc failed\n"); 144 | return x_gpu; 145 | } 146 | 147 | void cuda_free(float *x_gpu) 148 | { 149 | cudaError_t status = cudaFree(x_gpu); 150 | check_error(status); 151 | } 152 | 153 | void cuda_push_array(float *x_gpu, float *x, size_t n) 154 | { 155 | size_t size = sizeof(float)*n; 156 | cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 157 | check_error(status); 158 | } 159 | 160 | void cuda_pull_array(float *x_gpu, float *x, size_t n) 161 | { 162 | size_t size = sizeof(float)*n; 163 | cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); 164 | check_error(status); 165 | } 166 | 167 | float cuda_mag_array(float *x_gpu, size_t n) 168 | { 169 | float *temp = calloc(n, sizeof(float)); 170 | cuda_pull_array(x_gpu, temp, n); 171 | float m = mag_array(temp, n); 172 | free(temp); 173 | return m; 174 | } 175 | #else 176 | void cuda_set_device(int n){} 177 | 178 | #endif 179 | -------------------------------------------------------------------------------- /examples/tag.c: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | 3 | void train_tag(char *cfgfile, char *weightfile, int clear) 4 | { 5 | srand(time(0)); 6 | float avg_loss = -1; 7 | char *base = basecfg(cfgfile); 8 | char *backup_directory = "/home/pjreddie/backup/"; 9 | printf("%s\n", base); 10 | network *net = load_network(cfgfile, weightfile, clear); 11 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay); 12 | int imgs = 1024; 13 | list *plist = get_paths("/home/pjreddie/tag/train.list"); 14 | char **paths = (char **)list_to_array(plist); 15 | printf("%d\n", plist->size); 16 | int N = plist->size; 17 | clock_t time; 18 | pthread_t load_thread; 19 | data train; 20 | data buffer; 21 | 22 | load_args args = {0}; 23 | args.w = net->w; 24 | args.h = net->h; 25 | 26 | args.min = net->w; 27 | args.max = net->max_crop; 28 | args.size = net->w; 29 | 30 | args.paths = paths; 31 | args.classes = net->outputs; 32 | args.n = imgs; 33 | args.m = N; 34 | args.d = &buffer; 35 | args.type = TAG_DATA; 36 | 37 | args.angle = net->angle; 38 | args.exposure = net->exposure; 39 | args.saturation = net->saturation; 40 | args.hue = net->hue; 41 | 42 | fprintf(stderr, "%d classes\n", net->outputs); 43 | 44 | load_thread = load_data_in_thread(args); 45 | int epoch = (*net->seen)/N; 46 | while(get_current_batch(net) < net->max_batches || net->max_batches == 0){ 47 | time=clock(); 48 | pthread_join(load_thread, 0); 49 | train = buffer; 50 | 51 | load_thread = load_data_in_thread(args); 52 | printf("Loaded: %lf seconds\n", sec(clock()-time)); 53 | time=clock(); 54 | float loss = train_network(net, train); 55 | if(avg_loss == -1) avg_loss = loss; 56 | avg_loss = avg_loss*.9 + loss*.1; 57 | printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen); 58 | free_data(train); 59 | if(*net->seen/N > epoch){ 60 | epoch = *net->seen/N; 61 | char buff[256]; 62 | sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); 63 | save_weights(net, buff); 64 | } 65 | if(get_current_batch(net)%100 == 0){ 66 | char buff[256]; 67 | sprintf(buff, "%s/%s.backup",backup_directory,base); 68 | save_weights(net, buff); 69 | } 70 | } 71 | char buff[256]; 72 | sprintf(buff, "%s/%s.weights", backup_directory, base); 73 | save_weights(net, buff); 74 | 75 | pthread_join(load_thread, 0); 76 | free_data(buffer); 77 | free_network(net); 78 | free_ptrs((void**)paths, plist->size); 79 | free_list(plist); 80 | free(base); 81 | } 82 | 83 | void test_tag(char *cfgfile, char *weightfile, char *filename) 84 | { 85 | network *net = load_network(cfgfile, weightfile, 0); 86 | set_batch_network(net, 1); 87 | srand(2222222); 88 | int i = 0; 89 | char **names = get_labels("data/tags.txt"); 90 | clock_t time; 91 | int indexes[10]; 92 | char buff[256]; 93 | char *input = buff; 94 | int size = net->w; 95 | while(1){ 96 | if(filename){ 97 | strncpy(input, filename, 256); 98 | }else{ 99 | printf("Enter Image Path: "); 100 | fflush(stdout); 101 | input = fgets(input, 256, stdin); 102 | if(!input) return; 103 | strtok(input, "\n"); 104 | } 105 | image im = load_image_color(input, 0, 0); 106 | image r = resize_min(im, size); 107 | resize_network(net, r.w, r.h); 108 | printf("%d %d\n", r.w, r.h); 109 | 110 | float *X = r.data; 111 | time=clock(); 112 | float *predictions = network_predict(net, X); 113 | top_predictions(net, 10, indexes); 114 | printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); 115 | for(i = 0; i < 10; ++i){ 116 | int index = indexes[i]; 117 | printf("%.1f%%: %s\n", predictions[index]*100, names[index]); 118 | } 119 | if(r.data != im.data) free_image(r); 120 | free_image(im); 121 | if (filename) break; 122 | } 123 | } 124 | 125 | 126 | void run_tag(int argc, char **argv) 127 | { 128 | if(argc < 4){ 129 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 130 | return; 131 | } 132 | 133 | int clear = find_arg(argc, argv, "-clear"); 134 | char *cfg = argv[3]; 135 | char *weights = (argc > 4) ? argv[4] : 0; 136 | char *filename = (argc > 5) ? argv[5] : 0; 137 | if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); 138 | else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); 139 | } 140 | 141 | -------------------------------------------------------------------------------- /examples/writing.c: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | 3 | void train_writing(char *cfgfile, char *weightfile) 4 | { 5 | char *backup_directory = "/home/pjreddie/backup/"; 6 | srand(time(0)); 7 | float avg_loss = -1; 8 | char *base = basecfg(cfgfile); 9 | printf("%s\n", base); 10 | network net = parse_network_cfg(cfgfile); 11 | if(weightfile){ 12 | load_weights(&net, weightfile); 13 | } 14 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); 15 | int imgs = net.batch*net.subdivisions; 16 | list *plist = get_paths("figures.list"); 17 | char **paths = (char **)list_to_array(plist); 18 | clock_t time; 19 | int N = plist->size; 20 | printf("N: %d\n", N); 21 | image out = get_network_image(net); 22 | 23 | data train, buffer; 24 | 25 | load_args args = {0}; 26 | args.w = net.w; 27 | args.h = net.h; 28 | args.out_w = out.w; 29 | args.out_h = out.h; 30 | args.paths = paths; 31 | args.n = imgs; 32 | args.m = N; 33 | args.d = &buffer; 34 | args.type = WRITING_DATA; 35 | 36 | pthread_t load_thread = load_data_in_thread(args); 37 | int epoch = (*net.seen)/N; 38 | while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ 39 | time=clock(); 40 | pthread_join(load_thread, 0); 41 | train = buffer; 42 | load_thread = load_data_in_thread(args); 43 | printf("Loaded %lf seconds\n",sec(clock()-time)); 44 | 45 | time=clock(); 46 | float loss = train_network(net, train); 47 | 48 | /* 49 | image pred = float_to_image(64, 64, 1, out); 50 | print_image(pred); 51 | */ 52 | 53 | /* 54 | image im = float_to_image(256, 256, 3, train.X.vals[0]); 55 | image lab = float_to_image(64, 64, 1, train.y.vals[0]); 56 | image pred = float_to_image(64, 64, 1, out); 57 | show_image(im, "image"); 58 | show_image(lab, "label"); 59 | print_image(lab); 60 | show_image(pred, "pred"); 61 | cvWaitKey(0); 62 | */ 63 | 64 | if(avg_loss == -1) avg_loss = loss; 65 | avg_loss = avg_loss*.9 + loss*.1; 66 | printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); 67 | free_data(train); 68 | if(get_current_batch(net)%100 == 0){ 69 | char buff[256]; 70 | sprintf(buff, "%s/%s_batch_%ld.weights", backup_directory, base, get_current_batch(net)); 71 | save_weights(net, buff); 72 | } 73 | if(*net.seen/N > epoch){ 74 | epoch = *net.seen/N; 75 | char buff[256]; 76 | sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); 77 | save_weights(net, buff); 78 | } 79 | } 80 | } 81 | 82 | void test_writing(char *cfgfile, char *weightfile, char *filename) 83 | { 84 | network net = parse_network_cfg(cfgfile); 85 | if(weightfile){ 86 | load_weights(&net, weightfile); 87 | } 88 | set_batch_network(&net, 1); 89 | srand(2222222); 90 | clock_t time; 91 | char buff[256]; 92 | char *input = buff; 93 | while(1){ 94 | if(filename){ 95 | strncpy(input, filename, 256); 96 | }else{ 97 | printf("Enter Image Path: "); 98 | fflush(stdout); 99 | input = fgets(input, 256, stdin); 100 | if(!input) return; 101 | strtok(input, "\n"); 102 | } 103 | 104 | image im = load_image_color(input, 0, 0); 105 | resize_network(&net, im.w, im.h); 106 | printf("%d %d %d\n", im.h, im.w, im.c); 107 | float *X = im.data; 108 | time=clock(); 109 | network_predict(net, X); 110 | printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); 111 | image pred = get_network_image(net); 112 | 113 | image upsampled = resize_image(pred, im.w, im.h); 114 | image thresh = threshold_image(upsampled, .5); 115 | pred = thresh; 116 | 117 | show_image(pred, "prediction"); 118 | show_image(im, "orig"); 119 | #ifdef OPENCV 120 | cvWaitKey(0); 121 | cvDestroyAllWindows(); 122 | #endif 123 | 124 | free_image(upsampled); 125 | free_image(thresh); 126 | free_image(im); 127 | if (filename) break; 128 | } 129 | } 130 | 131 | void run_writing(int argc, char **argv) 132 | { 133 | if(argc < 4){ 134 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 135 | return; 136 | } 137 | 138 | char *cfg = argv[3]; 139 | char *weights = (argc > 4) ? argv[4] : 0; 140 | char *filename = (argc > 5) ? argv[5] : 0; 141 | if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); 142 | else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); 143 | } 144 | 145 | -------------------------------------------------------------------------------- /src/matrix.c: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include "utils.h" 3 | #include "blas.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | void free_matrix(matrix m) 11 | { 12 | int i; 13 | for(i = 0; i < m.rows; ++i) free(m.vals[i]); 14 | free(m.vals); 15 | } 16 | 17 | float matrix_topk_accuracy(matrix truth, matrix guess, int k) 18 | { 19 | int *indexes = calloc(k, sizeof(int)); 20 | int n = truth.cols; 21 | int i,j; 22 | int correct = 0; 23 | for(i = 0; i < truth.rows; ++i){ 24 | top_k(guess.vals[i], n, k, indexes); 25 | for(j = 0; j < k; ++j){ 26 | int class = indexes[j]; 27 | if(truth.vals[i][class]){ 28 | ++correct; 29 | break; 30 | } 31 | } 32 | } 33 | free(indexes); 34 | return (float)correct/truth.rows; 35 | } 36 | 37 | void scale_matrix(matrix m, float scale) 38 | { 39 | int i,j; 40 | for(i = 0; i < m.rows; ++i){ 41 | for(j = 0; j < m.cols; ++j){ 42 | m.vals[i][j] *= scale; 43 | } 44 | } 45 | } 46 | 47 | matrix resize_matrix(matrix m, int size) 48 | { 49 | int i; 50 | if (m.rows == size) return m; 51 | if (m.rows < size) { 52 | m.vals = realloc(m.vals, size*sizeof(float*)); 53 | for (i = m.rows; i < size; ++i) { 54 | m.vals[i] = calloc(m.cols, sizeof(float)); 55 | } 56 | } else if (m.rows > size) { 57 | for (i = size; i < m.rows; ++i) { 58 | free(m.vals[i]); 59 | } 60 | m.vals = realloc(m.vals, size*sizeof(float*)); 61 | } 62 | m.rows = size; 63 | return m; 64 | } 65 | 66 | void matrix_add_matrix(matrix from, matrix to) 67 | { 68 | assert(from.rows == to.rows && from.cols == to.cols); 69 | int i,j; 70 | for(i = 0; i < from.rows; ++i){ 71 | for(j = 0; j < from.cols; ++j){ 72 | to.vals[i][j] += from.vals[i][j]; 73 | } 74 | } 75 | } 76 | 77 | matrix copy_matrix(matrix m) 78 | { 79 | matrix c = {0}; 80 | c.rows = m.rows; 81 | c.cols = m.cols; 82 | c.vals = calloc(c.rows, sizeof(float *)); 83 | int i; 84 | for(i = 0; i < c.rows; ++i){ 85 | c.vals[i] = calloc(c.cols, sizeof(float)); 86 | copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); 87 | } 88 | return c; 89 | } 90 | 91 | matrix make_matrix(int rows, int cols) 92 | { 93 | int i; 94 | matrix m; 95 | m.rows = rows; 96 | m.cols = cols; 97 | m.vals = calloc(m.rows, sizeof(float *)); 98 | for(i = 0; i < m.rows; ++i){ 99 | m.vals[i] = calloc(m.cols, sizeof(float)); 100 | } 101 | return m; 102 | } 103 | 104 | matrix hold_out_matrix(matrix *m, int n) 105 | { 106 | int i; 107 | matrix h; 108 | h.rows = n; 109 | h.cols = m->cols; 110 | h.vals = calloc(h.rows, sizeof(float *)); 111 | for(i = 0; i < n; ++i){ 112 | int index = rand()%m->rows; 113 | h.vals[i] = m->vals[index]; 114 | m->vals[index] = m->vals[--(m->rows)]; 115 | } 116 | return h; 117 | } 118 | 119 | float *pop_column(matrix *m, int c) 120 | { 121 | float *col = calloc(m->rows, sizeof(float)); 122 | int i, j; 123 | for(i = 0; i < m->rows; ++i){ 124 | col[i] = m->vals[i][c]; 125 | for(j = c; j < m->cols-1; ++j){ 126 | m->vals[i][j] = m->vals[i][j+1]; 127 | } 128 | } 129 | --m->cols; 130 | return col; 131 | } 132 | 133 | matrix csv_to_matrix(char *filename) 134 | { 135 | FILE *fp = fopen(filename, "r"); 136 | if(!fp) file_error(filename); 137 | 138 | matrix m; 139 | m.cols = -1; 140 | 141 | char *line; 142 | 143 | int n = 0; 144 | int size = 1024; 145 | m.vals = calloc(size, sizeof(float*)); 146 | while((line = fgetl(fp))){ 147 | if(m.cols == -1) m.cols = count_fields(line); 148 | if(n == size){ 149 | size *= 2; 150 | m.vals = realloc(m.vals, size*sizeof(float*)); 151 | } 152 | m.vals[n] = parse_fields(line, m.cols); 153 | free(line); 154 | ++n; 155 | } 156 | m.vals = realloc(m.vals, n*sizeof(float*)); 157 | m.rows = n; 158 | return m; 159 | } 160 | 161 | void matrix_to_csv(matrix m) 162 | { 163 | int i, j; 164 | 165 | for(i = 0; i < m.rows; ++i){ 166 | for(j = 0; j < m.cols; ++j){ 167 | if(j > 0) printf(","); 168 | printf("%.17g", m.vals[i][j]); 169 | } 170 | printf("\n"); 171 | } 172 | } 173 | 174 | void print_matrix(matrix m) 175 | { 176 | int i, j; 177 | printf("%d X %d Matrix:\n",m.rows, m.cols); 178 | printf(" __"); 179 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 180 | printf("__ \n"); 181 | 182 | printf("| "); 183 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 184 | printf(" |\n"); 185 | 186 | for(i = 0; i < m.rows; ++i){ 187 | printf("| "); 188 | for(j = 0; j < m.cols; ++j){ 189 | printf("%15.7f ", m.vals[i][j]); 190 | } 191 | printf(" |\n"); 192 | } 193 | printf("|__"); 194 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 195 | printf("__|\n"); 196 | } 197 | -------------------------------------------------------------------------------- /src/deconvolutional_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "convolutional_layer.h" 7 | #include "deconvolutional_layer.h" 8 | #include "batchnorm_layer.h" 9 | #include "gemm.h" 10 | #include "blas.h" 11 | #include "im2col.h" 12 | #include "col2im.h" 13 | #include "utils.h" 14 | #include "cuda.h" 15 | } 16 | 17 | extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) 18 | { 19 | int i; 20 | 21 | int m = l.size*l.size*l.n; 22 | int n = l.h*l.w; 23 | int k = l.c; 24 | 25 | fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); 26 | 27 | for(i = 0; i < l.batch; ++i){ 28 | float *a = l.weights_gpu; 29 | float *b = net.input_gpu + i*l.c*l.h*l.w; 30 | float *c = net.workspace; 31 | 32 | gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); 33 | 34 | col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); 35 | } 36 | if (l.batch_normalize) { 37 | forward_batchnorm_layer_gpu(l, net); 38 | } else { 39 | add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); 40 | } 41 | activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); 42 | } 43 | 44 | extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) 45 | { 46 | int i; 47 | 48 | //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); 49 | gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); 50 | 51 | if(l.batch_normalize){ 52 | backward_batchnorm_layer_gpu(l, net); 53 | } else { 54 | backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); 55 | } 56 | 57 | //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); 58 | 59 | for(i = 0; i < l.batch; ++i){ 60 | int m = l.c; 61 | int n = l.size*l.size*l.n; 62 | int k = l.h*l.w; 63 | 64 | float *a = net.input_gpu + i*m*k; 65 | float *b = net.workspace; 66 | float *c = l.weight_updates_gpu; 67 | 68 | im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, 69 | l.size, l.stride, l.pad, b); 70 | gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); 71 | 72 | if(net.delta_gpu){ 73 | int m = l.c; 74 | int n = l.h*l.w; 75 | int k = l.size*l.size*l.n; 76 | 77 | float *a = l.weights_gpu; 78 | float *b = net.workspace; 79 | float *c = net.delta_gpu + i*n*m; 80 | 81 | gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); 82 | } 83 | } 84 | } 85 | 86 | extern "C" void pull_deconvolutional_layer(layer l) 87 | { 88 | cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); 89 | cuda_pull_array(l.biases_gpu, l.biases, l.n); 90 | cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); 91 | cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); 92 | if (l.batch_normalize){ 93 | cuda_pull_array(l.scales_gpu, l.scales, l.n); 94 | cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); 95 | cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); 96 | } 97 | } 98 | 99 | extern "C" void push_deconvolutional_layer(layer l) 100 | { 101 | cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); 102 | cuda_push_array(l.biases_gpu, l.biases, l.n); 103 | cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); 104 | cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); 105 | if (l.batch_normalize){ 106 | cuda_push_array(l.scales_gpu, l.scales, l.n); 107 | cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); 108 | cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); 109 | } 110 | } 111 | 112 | void update_deconvolutional_layer_gpu(layer l, update_args a) 113 | { 114 | float learning_rate = a.learning_rate*l.learning_rate_scale; 115 | float momentum = a.momentum; 116 | float decay = a.decay; 117 | int batch = a.batch; 118 | 119 | if(a.adam){ 120 | adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); 121 | adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); 122 | if(l.scales_gpu){ 123 | adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); 124 | } 125 | }else{ 126 | axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); 127 | axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); 128 | scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); 129 | 130 | axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); 131 | scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); 132 | 133 | if(l.scales_gpu){ 134 | axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); 135 | scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); 136 | } 137 | } 138 | } 139 | 140 | -------------------------------------------------------------------------------- /tools/generate_anchorsv2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import numpy as np 9 | import os 10 | import random 11 | from tqdm import tqdm 12 | import sklearn.cluster as cluster 13 | 14 | 15 | def iou(x, centroids): 16 | dists = [] 17 | for centroid in centroids: 18 | c_w, c_h = centroid 19 | w, h = x 20 | if c_w >= w and c_h >= h: 21 | dist = w * h / (c_w * c_h) 22 | elif c_w >= w and c_h <= h: 23 | dist = w * c_h / (w * h + (c_w - w) * c_h) 24 | elif c_w <= w and c_h >= h: 25 | dist = c_w * h / (w * h + c_w * (c_h - h)) 26 | else: # means both w,h are bigger than c_w and c_h respectively 27 | dist = (c_w * c_h) / (w * h) 28 | dists.append(dist) 29 | return np.array(dists) 30 | 31 | 32 | def avg_iou(x, centroids): 33 | n, d = x.shape 34 | sums = 0. 35 | for i in range(x.shape[0]): 36 | # note IOU() will return array which contains IoU for each centroid and X[i] 37 | # slightly ineffective, but I am too lazy 38 | sums += max(iou(x[i], centroids)) 39 | return sums / n 40 | 41 | 42 | def write_anchors_to_file(centroids, distance, anchor_file): 43 | anchors = centroids * 416 / 32 # I do not know whi it is 416/32 44 | anchors = [str(i) for i in anchors.ravel()] 45 | print( 46 | "\n", 47 | "Cluster Result:\n", 48 | "Clusters:", len(centroids), "\n", 49 | "Average IoU:", distance, "\n", 50 | "Anchors:\n", 51 | ", ".join(anchors) 52 | ) 53 | 54 | with open(anchor_file, 'w') as f: 55 | f.write(", ".join(anchors)) 56 | f.write('\n%f\n' % distance) 57 | 58 | 59 | def k_means(x, n_clusters, eps): 60 | init_index = [random.randrange(x.shape[0]) for _ in range(n_clusters)] 61 | centroids = x[init_index] 62 | 63 | d = old_d = [] 64 | iterations = 0 65 | diff = 1e10 66 | c, dim = centroids.shape 67 | 68 | while True: 69 | iterations += 1 70 | d = np.array([1 - iou(i, centroids) for i in x]) 71 | if len(old_d) > 0: 72 | diff = np.sum(np.abs(d - old_d)) 73 | 74 | print('diff = %f' % diff) 75 | 76 | if diff < eps or iterations > 1000: 77 | print("Number of iterations took = %d" % iterations) 78 | print("Centroids = ", centroids) 79 | return centroids 80 | 81 | # assign samples to centroids 82 | belonging_centroids = np.argmin(d, axis=1) 83 | 84 | # calculate the new centroids 85 | centroid_sums = np.zeros((c, dim), np.float) 86 | for i in range(belonging_centroids.shape[0]): 87 | centroid_sums[belonging_centroids[i]] += x[i] 88 | 89 | for j in range(c): 90 | centroids[j] = centroid_sums[j] / \ 91 | np.sum(belonging_centroids == j) 92 | 93 | old_d = d.copy() 94 | 95 | 96 | def get_file_content(fnm): 97 | with open(fnm) as f: 98 | return [line.strip() for line in f] 99 | 100 | 101 | def main(args): 102 | print("Reading Data ...") 103 | 104 | file_list = [] 105 | for f in args.file_list: 106 | file_list.extend(get_file_content(f)) 107 | 108 | data = [] 109 | for one_file in tqdm(file_list): 110 | one_file = one_file.replace('images', 'labels') \ 111 | .replace('JPEGImages', 'labels') \ 112 | .replace('.png', '.txt') \ 113 | .replace('.jpg', '.txt') 114 | #print("one_file is:\n",one_file) 115 | for line in get_file_content(one_file): 116 | clazz, xx, yy, w, h = line.split() 117 | data.append([float(w), float(h)]) 118 | 119 | data = np.array(data) 120 | if args.engine.startswith("sklearn"): 121 | if args.engine == "sklearn": 122 | km = cluster.KMeans( 123 | n_clusters=args.num_clusters, tol=args.tol, verbose=True) 124 | elif args.engine == "sklearn-mini": 125 | km = cluster.MiniBatchKMeans( 126 | n_clusters=args.num_clusters, tol=args.tol, verbose=True) 127 | km.fit(data) 128 | result = km.cluster_centers_ 129 | # distance = km.inertia_ / data.shape[0] 130 | distance = avg_iou(data, result) 131 | else: 132 | result = k_means(data, args.num_clusters, args.tol) 133 | distance = avg_iou(data, result) 134 | 135 | write_anchors_to_file(result, distance, args.output) 136 | 137 | 138 | if "__main__" == __name__: 139 | parser = argparse.ArgumentParser() # 创建解析器 140 | parser.add_argument('file_list', nargs='+', help='TrainList') 141 | parser.add_argument('--num_clusters', '-n', default=5, 142 | type=int, help='Number of Clusters') 143 | parser.add_argument( 144 | '--output', '-o', default='../results/anchor.txt', type=str, help='Result Output File') 145 | parser.add_argument('--tol', '-t', default=0.005, 146 | type=float, help='Tolerate') 147 | parser.add_argument('--engine', '-m', default='sklearn', type=str, 148 | choices=['original', 'sklearn', 'sklearn-mini'], help='Method to use') 149 | 150 | args = parser.parse_args() # 解析参数 151 | 152 | main(args) 153 | -------------------------------------------------------------------------------- /tools/generate_anchorsv3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import numpy as np 9 | import os 10 | import random 11 | from tqdm import tqdm 12 | import sklearn.cluster as cluster 13 | 14 | 15 | def iou(x, centroids): 16 | dists = [] 17 | for centroid in centroids: 18 | c_w, c_h = centroid 19 | w, h = x 20 | if c_w >= w and c_h >= h: 21 | dist = w * h / (c_w * c_h) 22 | elif c_w >= w and c_h <= h: 23 | dist = w * c_h / (w * h + (c_w - w) * c_h) 24 | elif c_w <= w and c_h >= h: 25 | dist = c_w * h / (w * h + c_w * (c_h - h)) 26 | else: # means both w,h are bigger than c_w and c_h respectively 27 | dist = (c_w * c_h) / (w * h) 28 | dists.append(dist) 29 | return np.array(dists) 30 | 31 | 32 | def avg_iou(x, centroids): 33 | n, d = x.shape 34 | sums = 0. 35 | for i in range(x.shape[0]): 36 | # note IOU() will return array which contains IoU for each centroid and X[i] 37 | # slightly ineffective, but I am too lazy 38 | sums += max(iou(x[i], centroids)) 39 | return sums / n 40 | 41 | 42 | def write_anchors_to_file(centroids, distance, anchor_file): 43 | # anchors = centroids * 416 / 32 # I do not know whi it is 416/32 44 | 45 | anchors = centroids * 416 46 | anchors = [str(i) for i in anchors.ravel()] 47 | print( 48 | "\n", 49 | "Cluster Result:\n", 50 | "Clusters:", len(centroids), "\n", 51 | "Average IoU:", distance, "\n", 52 | "Anchors:\n", 53 | ", ".join(anchors) 54 | ) 55 | 56 | with open(anchor_file, 'w') as f: 57 | f.write(", ".join(anchors)) 58 | f.write('\n%f\n' % distance) 59 | 60 | 61 | def k_means(x, n_clusters, eps): 62 | init_index = [random.randrange(x.shape[0]) for _ in range(n_clusters)] 63 | centroids = x[init_index] 64 | 65 | d = old_d = [] 66 | iterations = 0 67 | diff = 1e10 68 | c, dim = centroids.shape 69 | 70 | while True: 71 | iterations += 1 72 | d = np.array([1 - iou(i, centroids) for i in x]) 73 | if len(old_d) > 0: 74 | diff = np.sum(np.abs(d - old_d)) 75 | 76 | print('diff = %f' % diff) 77 | 78 | if diff < eps or iterations > 1000: 79 | print("Number of iterations took = %d" % iterations) 80 | print("Centroids = ", centroids) 81 | return centroids 82 | 83 | # assign samples to centroids 84 | belonging_centroids = np.argmin(d, axis=1) 85 | 86 | # calculate the new centroids 87 | centroid_sums = np.zeros((c, dim), np.float) 88 | for i in range(belonging_centroids.shape[0]): 89 | centroid_sums[belonging_centroids[i]] += x[i] 90 | 91 | for j in range(c): 92 | centroids[j] = centroid_sums[j] / \ 93 | np.sum(belonging_centroids == j) 94 | 95 | old_d = d.copy() 96 | 97 | 98 | def get_file_content(fnm): 99 | with open(fnm) as f: 100 | return [line.strip() for line in f] 101 | 102 | 103 | def main(args): 104 | print("Reading Data ...") 105 | 106 | file_list = [] 107 | for f in args.file_list: 108 | file_list.extend(get_file_content(f)) 109 | 110 | data = [] 111 | for one_file in tqdm(file_list): 112 | one_file = one_file.replace('images', 'labels') \ 113 | .replace('JPEGImages', 'labels') \ 114 | .replace('.png', '.txt') \ 115 | .replace('.jpg', '.txt') 116 | #print("one_file is:\n",one_file) 117 | for line in get_file_content(one_file): 118 | clazz, xx, yy, w, h = line.split() 119 | data.append([float(w), float(h)]) 120 | 121 | data = np.array(data) 122 | if args.engine.startswith("sklearn"): 123 | if args.engine == "sklearn": 124 | km = cluster.KMeans( 125 | n_clusters=args.num_clusters, tol=args.tol, verbose=True) 126 | elif args.engine == "sklearn-mini": 127 | km = cluster.MiniBatchKMeans( 128 | n_clusters=args.num_clusters, tol=args.tol, verbose=True) 129 | km.fit(data) 130 | result = km.cluster_centers_ 131 | # distance = km.inertia_ / data.shape[0] 132 | distance = avg_iou(data, result) 133 | else: 134 | result = k_means(data, args.num_clusters, args.tol) 135 | distance = avg_iou(data, result) 136 | 137 | write_anchors_to_file(result, distance, args.output) 138 | 139 | 140 | if "__main__" == __name__: 141 | parser = argparse.ArgumentParser() # 创建解析器 142 | parser.add_argument('file_list', nargs='+', help='TrainList') 143 | parser.add_argument('--num_clusters', '-n', default=9, 144 | type=int, help='Number of Clusters') 145 | parser.add_argument( 146 | '--output', '-o', default='../results/anchor.txt', type=str, help='Result Output File') 147 | parser.add_argument('--tol', '-t', default=0.005, 148 | type=float, help='Tolerate') 149 | parser.add_argument('--engine', '-m', default='sklearn', type=str, 150 | choices=['original', 'sklearn', 'sklearn-mini'], help='Method to use') 151 | 152 | args = parser.parse_args() # 解析参数 153 | 154 | main(args) 155 | -------------------------------------------------------------------------------- /examples/voxel.c: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | 3 | void extract_voxel(char *lfile, char *rfile, char *prefix) 4 | { 5 | #ifdef OPENCV 6 | int w = 1920; 7 | int h = 1080; 8 | int shift = 0; 9 | int count = 0; 10 | CvCapture *lcap = cvCaptureFromFile(lfile); 11 | CvCapture *rcap = cvCaptureFromFile(rfile); 12 | while(1){ 13 | image l = get_image_from_stream(lcap); 14 | image r = get_image_from_stream(rcap); 15 | if(!l.w || !r.w) break; 16 | if(count%100 == 0) { 17 | shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); 18 | printf("%d\n", shift); 19 | } 20 | image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); 21 | image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); 22 | char buff[256]; 23 | sprintf(buff, "%s_%05d_l", prefix, count); 24 | save_image(ls, buff); 25 | sprintf(buff, "%s_%05d_r", prefix, count); 26 | save_image(rs, buff); 27 | free_image(l); 28 | free_image(r); 29 | free_image(ls); 30 | free_image(rs); 31 | ++count; 32 | } 33 | 34 | #else 35 | printf("need OpenCV for extraction\n"); 36 | #endif 37 | } 38 | 39 | void train_voxel(char *cfgfile, char *weightfile) 40 | { 41 | char *train_images = "/data/imagenet/imagenet1k.train.list"; 42 | char *backup_directory = "/home/pjreddie/backup/"; 43 | srand(time(0)); 44 | char *base = basecfg(cfgfile); 45 | printf("%s\n", base); 46 | float avg_loss = -1; 47 | network net = parse_network_cfg(cfgfile); 48 | if(weightfile){ 49 | load_weights(&net, weightfile); 50 | } 51 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); 52 | int imgs = net.batch*net.subdivisions; 53 | int i = *net.seen/imgs; 54 | data train, buffer; 55 | 56 | 57 | list *plist = get_paths(train_images); 58 | //int N = plist->size; 59 | char **paths = (char **)list_to_array(plist); 60 | 61 | load_args args = {0}; 62 | args.w = net.w; 63 | args.h = net.h; 64 | args.scale = 4; 65 | args.paths = paths; 66 | args.n = imgs; 67 | args.m = plist->size; 68 | args.d = &buffer; 69 | args.type = SUPER_DATA; 70 | 71 | pthread_t load_thread = load_data_in_thread(args); 72 | clock_t time; 73 | //while(i*imgs < N*120){ 74 | while(get_current_batch(net) < net.max_batches){ 75 | i += 1; 76 | time=clock(); 77 | pthread_join(load_thread, 0); 78 | train = buffer; 79 | load_thread = load_data_in_thread(args); 80 | 81 | printf("Loaded: %lf seconds\n", sec(clock()-time)); 82 | 83 | time=clock(); 84 | float loss = train_network(net, train); 85 | if (avg_loss < 0) avg_loss = loss; 86 | avg_loss = avg_loss*.9 + loss*.1; 87 | 88 | printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); 89 | if(i%1000==0){ 90 | char buff[256]; 91 | sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); 92 | save_weights(net, buff); 93 | } 94 | if(i%100==0){ 95 | char buff[256]; 96 | sprintf(buff, "%s/%s.backup", backup_directory, base); 97 | save_weights(net, buff); 98 | } 99 | free_data(train); 100 | } 101 | char buff[256]; 102 | sprintf(buff, "%s/%s_final.weights", backup_directory, base); 103 | save_weights(net, buff); 104 | } 105 | 106 | void test_voxel(char *cfgfile, char *weightfile, char *filename) 107 | { 108 | network net = parse_network_cfg(cfgfile); 109 | if(weightfile){ 110 | load_weights(&net, weightfile); 111 | } 112 | set_batch_network(&net, 1); 113 | srand(2222222); 114 | 115 | clock_t time; 116 | char buff[256]; 117 | char *input = buff; 118 | while(1){ 119 | if(filename){ 120 | strncpy(input, filename, 256); 121 | }else{ 122 | printf("Enter Image Path: "); 123 | fflush(stdout); 124 | input = fgets(input, 256, stdin); 125 | if(!input) return; 126 | strtok(input, "\n"); 127 | } 128 | image im = load_image_color(input, 0, 0); 129 | resize_network(&net, im.w, im.h); 130 | printf("%d %d\n", im.w, im.h); 131 | 132 | float *X = im.data; 133 | time=clock(); 134 | network_predict(net, X); 135 | image out = get_network_image(net); 136 | printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); 137 | save_image(out, "out"); 138 | 139 | free_image(im); 140 | if (filename) break; 141 | } 142 | } 143 | 144 | 145 | void run_voxel(int argc, char **argv) 146 | { 147 | if(argc < 4){ 148 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); 149 | return; 150 | } 151 | 152 | char *cfg = argv[3]; 153 | char *weights = (argc > 4) ? argv[4] : 0; 154 | char *filename = (argc > 5) ? argv[5] : 0; 155 | if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); 156 | else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); 157 | else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); 158 | /* 159 | else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); 160 | */ 161 | } 162 | -------------------------------------------------------------------------------- /python/crack_pro.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from darknet import load_net, load_meta, detect, classify, load_image 3 | from segment import seg_one_img, load_dtc_module 4 | from recog_order import search_engine_recog, recog_order, recog_order_jieba 5 | import time 6 | import cv2 7 | from PIL import Image 8 | import numpy as np 9 | import copy 10 | import os 11 | from itertools import permutations 12 | from functools import reduce 13 | 14 | # 求多个列表的组合 15 | def combination(*lists): 16 | total = reduce(lambda x, y: x * y, map(len, lists)) 17 | retList = [] 18 | for i in range(0, total): 19 | step = total 20 | tempItem = [] 21 | for l in lists: 22 | step /= len(l) 23 | tempItem.append(l[int(i/step % len(l))]) 24 | retList.append(tuple(tempItem)) 25 | return retList 26 | 27 | # 加载模块 28 | def load_classify_module(cfg, weights, data): 29 | net = load_net(cfg, weights, 0) 30 | meta = load_meta(data) 31 | return net, meta 32 | 33 | # 使用新字典记录坐标,注意字典是无序的!! 34 | def recordCoordinate(wordList, hanziList): 35 | center = {} 36 | for i in range(len(wordList)): 37 | center[wordList[i]] = [center for center in hanziList[i].values()][0] 38 | return center 39 | 40 | # 破解函数 41 | def crack(img_path, dtc_modu, classify_modu, k): 42 | # 定位汉字,返回多个矩形框 43 | print('\n'*2 + '定位汉字' + '\n' + '*'*80) 44 | d = time.time() 45 | rets = detect(dtc_modu[0], dtc_modu[1], img_path.encode()) 46 | print('定位汉字耗时{}'.format(time.time() - d)) 47 | l = len(rets) 48 | # 设置阈值 49 | if l > k: 50 | return 0 51 | 52 | 53 | # 切割图片,返回切割后的汉字图片 54 | print('\n'*2 + '切割图片' + '\n' + '*'*80) 55 | s = time.time() 56 | hanzi_list = seg_one_img(img_path, rets) 57 | # print(hanzi_list)mmmmmmmmmmmmmm 58 | print('切割图片耗时{}'.format(time.time() - s)) 59 | 60 | 61 | # 汉字识别,返回汉字字符串 62 | print('\n'*2 + '汉字识别' + '\n' + '*'*80) 63 | r = time.time() 64 | all_hanzi_lists = [] # 存储所有汉字的列表 65 | # 提取路径存入列表 66 | paths = [] 67 | for per in hanzi_list: 68 | paths.extend([i for i in per.keys()]) 69 | 70 | for path in paths: # 对切割的汉字图片进行遍历 71 | hanzis = [] 72 | img = load_image(path.encode(), 0 , 0) 73 | res = classify(classify_modu[0], classify_modu[1], img) 74 | print(res[0:5]) 75 | if res[0][1] < 0.95: 76 | for hz in res[0:5]: # 对识别的top5进行遍历 77 | hanzi = ('\\' + hz[0].decode('utf-8')).encode('utf-8').decode('unicode_escape') 78 | hanzis.append(hanzi) 79 | else: 80 | hanzi = ('\\' + res[0][0].decode('utf-8')).encode('utf-8').decode('unicode_escape') 81 | hanzis.append(hanzi) 82 | 83 | all_hanzi_lists.append(hanzis) 84 | # print(all_hanzi_lists)mmmmmmmmmmmmmmmmmmmmmmmmmm 85 | hanzi_combination = combination(*all_hanzi_lists) 86 | # print(hanzi_combination) 87 | hanzi_combination_connect = [] 88 | for words in hanzi_combination: 89 | hanzi_combination_connect.append(''.join(words)) 90 | # print(hanzi_combination_connect)mmmmmmmmmmmmmmmmmmmmm 91 | print('汉字识别耗时{}'.format(time.time() - r)) 92 | 93 | 94 | # 识别语序 95 | hanzi_center = [] 96 | jieba_flag = 0 97 | o = time.time() 98 | print('\n'*2 + '语序识别' + '\n' + '*'*80) 99 | for words in hanzi_combination_connect: # 对每一个组合进行结巴分词 100 | # 此处对汉字的坐标进行记忆 101 | hanzi_center = recordCoordinate(words, hanzi_list) 102 | 103 | # print(hanzi_center, 'jiaba')mmmmmmmmmmmmm 104 | o = time.time() 105 | rec_word_possible = recog_order_jieba(words) 106 | if rec_word_possible: # 如果遇到正确的词,则标志位置1 107 | jieba_flag = 1 108 | break 109 | if jieba_flag: 110 | rec_word = rec_word_possible 111 | else: 112 | hanzi_center = recordCoordinate(hanzi_combination_connect[0], hanzi_list) 113 | # print(hanzi_center, 'engine')mmmmmmmmmmmmmmm 114 | rec_word = search_engine_recog(hanzi_combination_connect[0]) 115 | print('语序识别结果:{}'.format(rec_word)) 116 | print('语序识别耗时{}'.format(time.time() - o)) 117 | 118 | 119 | # 按正确语序输出坐标 120 | print('\n'*2 + '最终结果' + '\n' + '*'*80) 121 | centers = [] 122 | for i in rec_word: 123 | centers.append(hanzi_center[i]) 124 | print('正确语序的坐标:{}'.format(centers)) 125 | print('总耗时{}'.format(time.time() - d)) 126 | ## 调用时需要返回坐标 127 | return(rec_word) 128 | 129 | 130 | if __name__ == '__main__': 131 | 132 | # 加载汉字定位模型 133 | print('\n'*2 + '加载模型' + '\n' + '*'*80) 134 | dtc_modu = load_dtc_module(b'../cfg/yolo-origin.cfg', 135 | b'../jiyan/backup/yolo-origin.weights', b'../cfg/yolo-origin.data') 136 | # 加载汉字识别模型 137 | classify_modu = load_classify_module(b"../cfg/chinese_character.cfg", 138 | b"../chinese_classify/backup/chinese_character.backup", b"../cfg/chinese.data") 139 | # crack('crack.jpg', dtc_modu, classify_modu,5) 140 | 141 | cwd = os.getcwd() 142 | IMG_DIR = cwd.replace("python", "python/valid/") 143 | with open('valid.txt')as f: 144 | lines = f.readlines() 145 | right = 0 146 | num = len(lines) 147 | for line in lines: 148 | line = line.strip() 149 | rec_word = crack(IMG_DIR + line[:24], dtc_modu, classify_modu, 5) 150 | if rec_word == line[26:]: 151 | right = right + 1 152 | elif rec_word == 0: 153 | num = num - 1 154 | else: 155 | print('#'*20 + line[26:]+' ' + rec_word) 156 | print('正确率={}'.format(right/num)) 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /src/reorg_layer.c: -------------------------------------------------------------------------------- 1 | #include "reorg_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | 5 | #include 6 | 7 | 8 | layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra) 9 | { 10 | layer l = {0}; 11 | l.type = REORG; 12 | l.batch = batch; 13 | l.stride = stride; 14 | l.extra = extra; 15 | l.h = h; 16 | l.w = w; 17 | l.c = c; 18 | l.flatten = flatten; 19 | if(reverse){ 20 | l.out_w = w*stride; 21 | l.out_h = h*stride; 22 | l.out_c = c/(stride*stride); 23 | }else{ 24 | l.out_w = w/stride; 25 | l.out_h = h/stride; 26 | l.out_c = c*(stride*stride); 27 | } 28 | l.reverse = reverse; 29 | 30 | l.outputs = l.out_h * l.out_w * l.out_c; 31 | l.inputs = h*w*c; 32 | if(l.extra){ 33 | l.out_w = l.out_h = l.out_c = 0; 34 | l.outputs = l.inputs + l.extra; 35 | } 36 | 37 | if(extra){ 38 | fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, l.outputs); 39 | } else { 40 | fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); 41 | } 42 | int output_size = l.outputs * batch; 43 | l.output = calloc(output_size, sizeof(float)); 44 | l.delta = calloc(output_size, sizeof(float)); 45 | 46 | l.forward = forward_reorg_layer; 47 | l.backward = backward_reorg_layer; 48 | #ifdef GPU 49 | l.forward_gpu = forward_reorg_layer_gpu; 50 | l.backward_gpu = backward_reorg_layer_gpu; 51 | 52 | l.output_gpu = cuda_make_array(l.output, output_size); 53 | l.delta_gpu = cuda_make_array(l.delta, output_size); 54 | #endif 55 | return l; 56 | } 57 | 58 | void resize_reorg_layer(layer *l, int w, int h) 59 | { 60 | int stride = l->stride; 61 | int c = l->c; 62 | 63 | l->h = h; 64 | l->w = w; 65 | 66 | if(l->reverse){ 67 | l->out_w = w*stride; 68 | l->out_h = h*stride; 69 | l->out_c = c/(stride*stride); 70 | }else{ 71 | l->out_w = w/stride; 72 | l->out_h = h/stride; 73 | l->out_c = c*(stride*stride); 74 | } 75 | 76 | l->outputs = l->out_h * l->out_w * l->out_c; 77 | l->inputs = l->outputs; 78 | int output_size = l->outputs * l->batch; 79 | 80 | l->output = realloc(l->output, output_size * sizeof(float)); 81 | l->delta = realloc(l->delta, output_size * sizeof(float)); 82 | 83 | #ifdef GPU 84 | cuda_free(l->output_gpu); 85 | cuda_free(l->delta_gpu); 86 | l->output_gpu = cuda_make_array(l->output, output_size); 87 | l->delta_gpu = cuda_make_array(l->delta, output_size); 88 | #endif 89 | } 90 | 91 | void forward_reorg_layer(const layer l, network net) 92 | { 93 | int i; 94 | if(l.flatten){ 95 | memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); 96 | if(l.reverse){ 97 | flatten(l.output, l.w*l.h, l.c, l.batch, 0); 98 | }else{ 99 | flatten(l.output, l.w*l.h, l.c, l.batch, 1); 100 | } 101 | } else if (l.extra) { 102 | for(i = 0; i < l.batch; ++i){ 103 | copy_cpu(l.inputs, net.input + i*l.inputs, 1, l.output + i*l.outputs, 1); 104 | } 105 | } else if (l.reverse){ 106 | reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); 107 | } else { 108 | reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); 109 | } 110 | } 111 | 112 | void backward_reorg_layer(const layer l, network net) 113 | { 114 | int i; 115 | if(l.flatten){ 116 | memcpy(net.delta, l.delta, l.outputs*l.batch*sizeof(float)); 117 | if(l.reverse){ 118 | flatten(net.delta, l.w*l.h, l.c, l.batch, 1); 119 | }else{ 120 | flatten(net.delta, l.w*l.h, l.c, l.batch, 0); 121 | } 122 | } else if(l.reverse){ 123 | reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); 124 | } else if (l.extra) { 125 | for(i = 0; i < l.batch; ++i){ 126 | copy_cpu(l.inputs, l.delta + i*l.outputs, 1, net.delta + i*l.inputs, 1); 127 | } 128 | }else{ 129 | reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); 130 | } 131 | } 132 | 133 | #ifdef GPU 134 | void forward_reorg_layer_gpu(layer l, network net) 135 | { 136 | int i; 137 | if(l.flatten){ 138 | if(l.reverse){ 139 | flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 0, l.output_gpu); 140 | }else{ 141 | flatten_gpu(net.input_gpu, l.w*l.h, l.c, l.batch, 1, l.output_gpu); 142 | } 143 | } else if (l.extra) { 144 | for(i = 0; i < l.batch; ++i){ 145 | copy_gpu(l.inputs, net.input_gpu + i*l.inputs, 1, l.output_gpu + i*l.outputs, 1); 146 | } 147 | } else if (l.reverse) { 148 | reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); 149 | }else { 150 | reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); 151 | } 152 | } 153 | 154 | void backward_reorg_layer_gpu(layer l, network net) 155 | { 156 | if(l.flatten){ 157 | if(l.reverse){ 158 | flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 1, net.delta_gpu); 159 | }else{ 160 | flatten_gpu(l.delta_gpu, l.w*l.h, l.c, l.batch, 0, net.delta_gpu); 161 | } 162 | } else if (l.extra) { 163 | int i; 164 | for(i = 0; i < l.batch; ++i){ 165 | copy_gpu(l.inputs, l.delta_gpu + i*l.outputs, 1, net.delta_gpu + i*l.inputs, 1); 166 | } 167 | } else if(l.reverse){ 168 | reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); 169 | } else { 170 | reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); 171 | } 172 | } 173 | #endif 174 | -------------------------------------------------------------------------------- /python/recog_order.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | import jieba 3 | from itertools import permutations 4 | import requests 5 | from lxml import etree 6 | import threading 7 | # jieba.load_userdict('word.txt') #加载自定义词典 8 | import time 9 | 10 | flags = [] 11 | all_related = [] 12 | # 获得汉字所有排列方式 13 | def _permutation(str, r = None): 14 | word_list = list(permutations(str, r)) 15 | for i in range(len(word_list)): 16 | word_list[i] = ''.join(word_list[i]) 17 | return word_list 18 | 19 | # 将文件数据转换为字典 20 | def file2dict(filename): 21 | with open(filename) as f: 22 | array_lines = f.readlines() 23 | returnDict = {} 24 | # 以下三行解析文件数据到列表 25 | for line in array_lines: 26 | line = line.strip() 27 | listFromLine = line.split() 28 | returnDict[listFromLine[0]] = int(listFromLine[1]) 29 | return returnDict 30 | 31 | 32 | # 对字典根据key排序 33 | def sortedDictValues(di): 34 | return [(k,di[k]) for k in sorted(di.keys())] 35 | 36 | 37 | # 输入词列表,返回结巴分词内词频最高的词 38 | def highest_frequency(possible_words): 39 | word_dict = file2dict('dict.txt') 40 | possible_dict = {} 41 | for possible_word in possible_words: 42 | possible_dict[word_dict[possible_word]] = possible_word 43 | sortedList = sortedDictValues(possible_dict) 44 | return sortedList[-1][1] 45 | 46 | 47 | # 结巴分词 + 搜索引擎 识别语序 48 | def recog_order(str): 49 | l = len(str) # l表示词语汉字个数 50 | word_list = _permutation(str) # 获得排列 51 | possible_words = [] 52 | for word in word_list: 53 | seg_list = jieba.lcut(word, cut_all=True ) # 全模式 54 | index = find_longest(seg_list) 55 | if len(seg_list[index]) == l: 56 | possible_words.append(seg_list[index]) 57 | if len(possible_words) ==1: 58 | return possible_words[0] 59 | elif len(possible_words) >1: 60 | return highest_frequency(possible_words) 61 | else: 62 | return search_engine_recog(str) 63 | 64 | 65 | # 结巴分词 识别语序 66 | def recog_order_jieba(str): 67 | l = len(str) # l表示词语汉字个数 68 | word_list = _permutation(str) # 获得排列 69 | # print(word_list) 70 | possible_words = [] 71 | for word in word_list: 72 | seg_list = jieba.lcut(word, cut_all=True ) # 全模式 73 | # print(seg_list) 74 | index = find_longest(seg_list) 75 | # print(index) 76 | if len(seg_list[index]) == l: 77 | possible_words.append(seg_list[index]) 78 | # print(possible_words) 79 | if len(possible_words) ==1: 80 | return possible_words[0] 81 | elif len(possible_words) >1: 82 | return highest_frequency(possible_words) 83 | else: 84 | return 0 85 | 86 | 87 | # 寻找列表中最长的词 88 | def find_longest(list): 89 | l = 0 90 | index = 0 91 | for i,word in enumerate(list): 92 | if len(word) > l: 93 | l = len(word) 94 | index = i 95 | return index 96 | 97 | 98 | # 搜索引擎搜索关键字,返回相关列表 99 | def search_engine(word): 100 | headers = { 101 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36' 102 | } 103 | r = requests.get('https://www.baidu.com/s?wd=' + word, headers=headers) 104 | html = etree.HTML(r.text) 105 | 106 | related_words1 = html.xpath('//*[@id="rs"]/table//tr//th/a/text()') 107 | 108 | related_words2 = html.xpath('//div[@id="content_left"]//a//em/text()') 109 | 110 | related_words = related_words1 + related_words2 111 | # print(related_words) 112 | return related_words 113 | 114 | 115 | # 调用一次线程 116 | def search(word): 117 | related_words = search_engine(word) 118 | global all_related 119 | all_related = all_related + related_words 120 | 121 | 122 | # 通过搜索引擎识别语序 123 | def search_engine_recog(str): 124 | word_list = _permutation(str) # 获得排列 125 | # print(word_list) 126 | global flags 127 | flags = [0] * len(word_list) 128 | threads = [] 129 | 130 | for word in word_list: # 遍历所有可能的排列组合 131 | thread = threading.Thread(target=search, args=[word]) 132 | threads.append(thread) 133 | thread.start() 134 | for thread in threads: 135 | thread.join() 136 | global all_related 137 | # print(all_related) 138 | for i,word in enumerate(word_list): 139 | flag = 0 140 | for related_word in all_related: 141 | if word in related_word: 142 | flag = flag + 1 143 | flags[i] = flag 144 | # print(flags) 145 | all_related = [] 146 | # sorted_flags = sorted(flags, reverse=True) 147 | index = flags.index(max(flags)) 148 | # print (sorted_flags) 149 | return word_list[index] 150 | 151 | def reverse(str): 152 | return str[::-1] 153 | 154 | 155 | 156 | if __name__ == '__main__': 157 | word = '现无中意发' 158 | # reversed_word = reverse(word) 159 | print('开始识别语序了') 160 | # print(search_engine_recog(word)) 161 | start = time.time() 162 | rec_word = search_engine_recog(word) 163 | print(rec_word, time.time() - start) 164 | # search_engine(word) 165 | 166 | 167 | 168 | # # # 结巴分词识别语序 169 | # # def recog_order(str): 170 | # # l = len(str) # l表示词语汉字个数 171 | # # word_list = _permutation(str) # 获得排列 172 | # # for word in word_list: 173 | # # # print('#'*50) 174 | # # seg_list = jieba.lcut(word, cut_all=True ) # 全模式 175 | # # index = find_longest(seg_list) 176 | # # if len(seg_list[index]) == l: 177 | # # return seg_list[index] 178 | # # return str 179 | 180 | 181 | # # # 寻找列表中最长的词 182 | # # def find_longest(list): 183 | # # l = 0 184 | # # index = 0 185 | # # for i,word in enumerate(list): 186 | # # if len(word) > l: 187 | # # l = len(word) 188 | # # index = i 189 | # # return index 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | --------------------------------------------------------------------------------