├── .gitignore ├── README.md ├── cfgs ├── __init__.py ├── config.py ├── config_crowdai.yaml ├── config_detrac.yaml ├── config_kitti.py ├── config_kitti.yaml ├── config_kitti_demo.yaml ├── config_v2.py ├── config_voc.py ├── config_voc.yaml └── exps │ ├── crowdai │ └── crowdai_baseline.yaml │ ├── detrac │ ├── detrac_baseline.yaml │ ├── detrac_baseline_2.yaml │ └── detrac_flow_center_w01.yaml │ ├── kitti │ ├── kitti_baseline.yaml │ ├── kitti_baseline_v3.yaml │ ├── kitti_baseline_v3_yf.yaml │ ├── kitti_joint_flow.yaml │ ├── kitti_joint_flow_low_lr.yaml │ ├── kitti_new_1.yaml │ ├── kitti_new_2.yaml │ ├── kitti_new_2_flow_center_ft.yaml │ ├── kitti_new_2_flow_center_ft_bbox_merge.yaml │ ├── kitti_new_2_flow_center_ft_cls.yaml │ ├── kitti_new_2_flow_center_ft_cls_bbox_merge.yaml │ ├── kitti_new_2_flow_center_ft_cls_flownet2.yaml │ ├── kitti_new_2_flow_center_ft_cls_merge.yaml │ ├── kitti_new_2_flow_center_ft_cls_merge_2x.yaml │ ├── kitti_new_2_flow_center_ft_cls_merge_ped.yaml │ ├── kitti_new_2_flow_center_ft_flownet2.yaml │ ├── kitti_new_2_flow_center_ft_flownet2_joint.yaml │ ├── kitti_new_2_flow_center_ft_half.yaml │ ├── kitti_new_2_flow_center_ft_iou_merge.yaml │ ├── kitti_new_2_flow_center_joint_ft.yaml │ ├── kitti_new_2_flow_center_joint_ft_low_lr.yaml │ ├── kitti_new_2_flow_center_joint_ft_w10.yaml │ ├── kitti_new_2_flow_dis.yaml │ ├── kitti_new_2_flow_ft.yaml │ ├── kitti_new_2_flow_ft_cls_bbox_merge.yaml │ ├── kitti_new_2_flow_ft_cls_merge.yaml │ ├── kitti_new_2_flow_ft_std_2_5.yaml │ ├── kitti_new_2_flow_ft_std_2_5_merge.yaml │ └── kitti_new_2_flow_spy.yaml │ ├── kitti_ft_exp3.py │ ├── voc0712 │ ├── voc0712_anchor.yaml │ ├── voc0712_baseline.yaml │ ├── voc0712_baseline_v3.yaml │ ├── voc0712_baseline_v3_rand.yaml │ ├── voc0712_box_mask_0.yaml │ ├── voc0712_low_lr.yaml │ ├── voc0712_mask.yaml │ ├── voc0712_multiple_anchors.yaml │ ├── voc0712_new.py │ ├── voc0712_obj_scale.yaml │ ├── voc0712_obj_scale_ft.yaml │ ├── voc0712_one_anchor.yaml │ ├── voc0712_overfit.yaml │ ├── voc0712_pred_raw.yaml │ ├── voc0712_template.yaml │ └── voc0712_trainval_ft_debug2.yaml │ └── voc0712_new_2.py ├── darknet.py ├── darknet_training_v3.py ├── darknet_v3.py ├── datasets ├── DataLoaderIterX.py ├── DataLoaderX.py ├── DetectionDataset.py ├── DetectionDatasetHelper.py ├── ImageFileDataset.py ├── ImageFileDataset_v2.py ├── KittiDataset.py ├── __init__.py ├── imdb.py ├── pascal_voc.py └── voc_eval.py ├── demo ├── demo_images_list.txt ├── detection_0030.jpg └── images │ ├── 000040.jpg │ ├── 000041.jpg │ ├── 000042.jpg │ ├── 000043.jpg │ ├── 000044.jpg │ ├── 000045.jpg │ ├── 000046.jpg │ ├── 000047.jpg │ ├── 000048.jpg │ ├── 000049.jpg │ ├── 000050.jpg │ ├── 000051.jpg │ ├── 000052.jpg │ ├── 000053.jpg │ ├── 000054.jpg │ ├── 000055.jpg │ ├── 000056.jpg │ ├── 000057.jpg │ ├── 000058.jpg │ ├── 000059.jpg │ ├── 000060.jpg │ ├── 000061.jpg │ ├── 000062.jpg │ ├── 000063.jpg │ ├── 000064.jpg │ ├── 000065.jpg │ ├── 000066.jpg │ ├── 000067.jpg │ ├── 000068.jpg │ ├── 000069.jpg │ ├── 000070.jpg │ ├── 000071.jpg │ ├── 000072.jpg │ ├── 000073.jpg │ ├── 000074.jpg │ ├── 000075.jpg │ ├── 000076.jpg │ ├── 000077.jpg │ ├── 000078.jpg │ └── 000079.jpg ├── flow ├── detection_analyzer.py ├── flow_generator.py ├── flow_util.py ├── flow_vis.py ├── flow_warper.py ├── gen_flow_images.py ├── gen_flow_images_cloudai.py ├── gen_flow_images_detrac.py ├── gen_val_from_all.sh ├── gen_warp_images_by_flow.py ├── img_diff.py ├── img_pairs.txt ├── kitti_train_labels.txt ├── kitti_val_images_warp.txt ├── plot_util.py ├── run_flow.sh ├── run_of.sh ├── shift_gt_by_flow.py ├── vis.jpg └── yolo_flow.py ├── layers ├── __init__.py ├── reorg │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── reorg_layer │ │ │ └── __init__.py │ ├── build.py │ ├── reorg_layer.py │ └── src │ │ ├── reorg_cpu.c │ │ ├── reorg_cpu.h │ │ ├── reorg_cuda.c │ │ ├── reorg_cuda.h │ │ ├── reorg_cuda_kernel.cu │ │ └── reorg_cuda_kernel.h └── roi_pooling │ ├── __init__.py │ ├── _ext │ ├── __init__.py │ └── roi_pooling │ │ └── __init__.py │ ├── build.py │ ├── roi_pool.py │ ├── roi_pool_py.py │ └── src │ ├── cuda │ ├── roi_pooling_kernel.cu │ └── roi_pooling_kernel.h │ ├── roi_pooling.c │ ├── roi_pooling.h │ ├── roi_pooling_cuda.c │ └── roi_pooling_cuda.h ├── make.sh ├── misc ├── kitti_detect.py ├── validate_dataset.py ├── vis.jpg ├── visualize_gt.py ├── voc_ap.py ├── voc_ap_v2.py ├── voc_data.py └── yolo_video_test.py ├── train ├── train_dataset_v3.py ├── train_util_v2.py └── yellowfin.py ├── train_data ├── gen_dashcam_train_data.py ├── gen_gtav_train_data.py ├── gen_kitti_det_train_data.py ├── gen_kitti_train_data.py └── gen_voc_train_data.py ├── utils ├── __init__.py ├── barrier.py ├── bbox.c ├── bbox.pyx ├── build.py ├── im_transform.py ├── network.py ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── nms_wrapper.py ├── plot_loss.py ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── license.txt │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h ├── timer.py ├── vis_util.py ├── yolo.c ├── yolo.py ├── yolo.pyx └── yolo_v2.py └── yolo_detect.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | .venv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | .idea 98 | 99 | *.o 100 | *.pkl 101 | models/* 102 | data/* 103 | train_data/* 104 | output/ 105 | flow/output/ 106 | flow/warp_classifier/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv2 in PyTorch 2 | This is a [PyTorch](https://github.com/pytorch/pytorch) 3 | implementation of YOLOv2. 4 | This project is forked from (https://github.com/longcw/yolo2-pytorch), but not compatible with origin version. 5 | 6 | Currently, I train this model for [KITTI Dataset](http://www.cvlibs.net/datasets/kitti/) to demo. It predicts car, pedestrian and cyclist. If you want a general detecotr, please refer to [this](https://github.com/longcw/yolo2-pytorch). 7 | 8 | You can also use [original YOLOv2](https://github.com/pjreddie/darknet) COCO model on KITTI, Here is a [demo video]( 9 | https://www.youtube.com/watch?v=mfB1C4QQJr4) 10 | 11 | For details about YOLO and YOLOv2 please refer to their [project page](https://pjreddie.com/darknet/yolo/) 12 | and the [paper](https://arxiv.org/abs/1612.08242): 13 | YOLO9000: Better, Faster, Stronger by Joseph Redmon and Ali Farhadi. 14 | 15 |

16 | 17 |

18 | 19 | ### System Environment 20 | + Ubuntu 16.04 21 | + CUDA 8.0 / cuDNN 5.1 22 | + Python 3.5 23 | + Numpy 1.12 24 | + PyTorch 0.1.12 25 | + OpenCV 3.2 26 | 27 | With a 1080Ti GPU, I get ~30 fps using this KITTI model (input size = 1216 x 352) 28 | 29 | ### Installation and demo 30 | 1. Clone this repository 31 | ```bash 32 | git clone git@github.com:cory8249/yolo2-pytorch.git 33 | ``` 34 | 35 | 2. Build the reorg layer ([`tf.extract_image_patches`](https://www.tensorflow.org/api_docs/python/tf/extract_image_patches)) 36 | ```bash 37 | cd yolo2-pytorch 38 | ./make.sh 39 | ``` 40 | 3. Download the trained model [kitti_baseline_v3_100.h5](https://drive.google.com/file/d/0B3IzhcU-mEUsWnBIcW00aUsteTQ) 41 | and set the model path in `yolo_detect.py` 42 | 4. Run demo `python3 yolo_detect.py`. 43 | 44 | Install any missing packages manually via pip 45 | -------------------------------------------------------------------------------- /cfgs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/cfgs/__init__.py -------------------------------------------------------------------------------- /cfgs/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | if os.environ.get('DATASET') == 'kitti': 4 | from cfgs.config_kitti import * 5 | # from cfgs.config_voc import * 6 | from cfgs.exps.kitti_ft_exp3 import * 7 | else: 8 | from cfgs.config_voc import * 9 | from cfgs.exps.voc0712_new_2 import * 10 | 11 | # 10.5 ~ 11 ms yolo_flow detection only OpenBLAS 12 | # 0.75 s/batch 13 | 14 | # 16 ~ 17 ms anaconda 15 | # 1.55 s/batch 16 | 17 | label_names = label_names 18 | num_classes = len(label_names) 19 | 20 | 21 | def mkdir(path, max_depth=3): 22 | parent, child = os.path.split(path) 23 | if not os.path.exists(parent) and max_depth > 1: 24 | mkdir(parent, max_depth-1) 25 | 26 | if not os.path.exists(path): 27 | os.mkdir(path) 28 | 29 | 30 | # detection config 31 | ############################ 32 | thresh = 0.3 33 | 34 | 35 | # dir config 36 | ############################ 37 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 38 | DATA_DIR = os.path.join(ROOT_DIR, 'data') 39 | MODEL_DIR = os.path.join(ROOT_DIR, 'models') 40 | TRAIN_DIR = os.path.join(MODEL_DIR, 'training') 41 | TEST_DIR = os.path.join(MODEL_DIR, 'testing') 42 | 43 | trained_model = os.path.join(MODEL_DIR, h5_fname) 44 | pretrained_model = os.path.join(MODEL_DIR, pretrained_fname) 45 | train_output_dir = os.path.join(TRAIN_DIR, exp_name) 46 | test_output_dir = os.path.join(TEST_DIR, imdb_test, h5_fname) 47 | log_file = os.path.join(train_output_dir, 'train.log') 48 | check_point_file = os.path.join(train_output_dir, 'check_point.txt') 49 | mkdir(train_output_dir, max_depth=3) 50 | mkdir(test_output_dir, max_depth=4) 51 | 52 | rand_seed = 1024 53 | use_tensorboard = False 54 | 55 | log_interval = 50 56 | disp_interval = 50 -------------------------------------------------------------------------------- /cfgs/config_crowdai.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | # trained model 4 | h5_fname: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 5 | 6 | # Udacity crowdai 7 | label_names: [Car, Truck,Pedestrian] 8 | num_classes: 3 9 | 10 | # YOLO priors 11 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]] 12 | num_anchors: 5 13 | -------------------------------------------------------------------------------- /cfgs/config_detrac.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | # trained model 4 | h5_fname: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 5 | 6 | # kitti 7 | label_names: [car, bus, van, others] 8 | num_classes: 4 9 | 10 | # YOLO priors 11 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]] 12 | num_anchors: 5 13 | -------------------------------------------------------------------------------- /cfgs/config_kitti.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # trained model 5 | h5_fname = 'yolo-voc.weights.h5' 6 | 7 | # KITTI 8 | label_names = ('Car', 'Van', 'Truck', 'Tram', 'Pedestrian', 'Person', 'Cyclist') 9 | num_classes = len(label_names) 10 | 11 | anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float) 12 | num_anchors = len(anchors) 13 | 14 | -------------------------------------------------------------------------------- /cfgs/config_kitti.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | # trained model 4 | h5_fname: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 5 | 6 | # kitti 7 | label_names: [Car, Van, Truck, Tram, Pedestrian, Person, Cyclist] 8 | num_classes: 7 9 | 10 | # YOLO priors 11 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]] 12 | num_anchors: 5 13 | -------------------------------------------------------------------------------- /cfgs/config_kitti_demo.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | # trained model 4 | inp_size: [1216, 352] 5 | out_size: [38, 11] # inp_size / 32 6 | 7 | # kitti 8 | label_names: [Car, Van, Truck, Tram, Pedestrian, Person, Cyclist] 9 | num_classes: 7 10 | 11 | # YOLO priors 12 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]] 13 | num_anchors: 5 14 | 15 | -------------------------------------------------------------------------------- /cfgs/config_v2.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | 4 | def add_cfg(cfg, yaml_file): 5 | # Read YAML file 6 | try: 7 | cfg.update(yaml.load(open(yaml_file, 'r'))) 8 | except Exception: 9 | print('Error: cannot parse cfg', yaml_file) 10 | raise Exception 11 | 12 | 13 | def load_cfg_yamls(yaml_files): 14 | cfg = dict() 15 | for yf in yaml_files: 16 | add_cfg(cfg, yf) 17 | return cfg 18 | -------------------------------------------------------------------------------- /cfgs/config_voc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # trained model 5 | h5_fname = 'yolo-voc.weights.h5' 6 | 7 | # VOC 8 | label_names = ('aeroplane', 'bicycle', 'bird', 'boat', 9 | 'bottle', 'bus', 'car', 'cat', 'chair', 10 | 'cow', 'diningtable', 'dog', 'horse', 11 | 'motorbike', 'person', 'pottedplant', 12 | 'sheep', 'sofa', 'train', 'tvmonitor') 13 | num_classes = len(label_names) 14 | 15 | #anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float) 16 | # anchors = np.asarray([[1.3221, 1.73145], [3.19275, 4.00944], [5.05587, 8.09892], [9.47112, 4.84053], [11.2364, 10.0071]]) 17 | #anchors = np.asarray([[1.08, 1.19], [1.32, 1.73], [3.19, 4.01], [3.42, 4.41], [5.05, 8.09], 18 | # [6.63, 11.38], [9.47, 4.84], [11.23, 10.00], [16.62, 10.52]]) 19 | anchors = np.asarray([[1., 1.], [3., 3.], [5., 5.], [9., 9.], [13., 13.]]) 20 | num_anchors = len(anchors) 21 | 22 | -------------------------------------------------------------------------------- /cfgs/config_voc.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | # trained model 4 | h5_fname: /home/cory/project/yolo2-pytorch/models/yolo-voc.weights.h5 5 | 6 | # VOC 7 | label_names: [aeroplane, bicycle, bird, boat, 8 | bottle, bus, car, cat, chair, 9 | cow, diningtable, dog, horse, 10 | motorbike, person, pottedplant, 11 | sheep, sofa, train, tvmonitor] 12 | num_classes: 20 13 | 14 | # YOLO priors 15 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]] 16 | num_anchors: 5 17 | -------------------------------------------------------------------------------- /cfgs/exps/crowdai/crowdai_baseline.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: crowdai_baseline 4 | dataset_name: crowdai 5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/crowdai_baseline 7 | network_size_rand_period: 10 8 | inp_size: [992, 608] # img = 1920 x 1200 9 | out_size: [31, 19] # inp_size / 32 10 | inp_size_candidates: [[1056, 672], [992, 608], [960, 576], [928, 544]] 11 | optimizer: SGD # 'SGD, Adam' 12 | opt_param: all # 'all, conv345' 13 | 14 | start_step: 0 15 | lr_epoch: [0, 40, 80] 16 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 17 | max_epoch: 200 18 | 19 | # SGD only 20 | weight_decay: 0.0005 21 | momentum: 0.9 22 | 23 | # for training yolo2 24 | object_scale: 5. 25 | noobject_scale: 1. 26 | class_scale: 1. 27 | coord_scale: 1. 28 | iou_thresh: 0.6 29 | 30 | # dataset 31 | train_images: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_images.txt 32 | train_labels: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_labels.txt 33 | val_images: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_val_images.txt 34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_val_labels.txt 35 | batch_size: 1 36 | train_batch_size: 8 37 | val_batch_size: 8 38 | 39 | # log & display 40 | disp_interval: 10 41 | -------------------------------------------------------------------------------- /cfgs/exps/detrac/detrac_baseline.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: detrac_baseline 4 | dataset_name: detrac 5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/detrac_baseline 7 | network_size_rand_period: 10 8 | inp_size: [960, 512] 9 | out_size: [30, 16] # inp_size / 32 10 | inp_size_candidates: [[960, 512]] 11 | optimizer: SGD # 'SGD, Adam' 12 | opt_param: all # 'all, conv345' 13 | 14 | start_step: 0 15 | lr_epoch: [0, 40, 80] 16 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 17 | max_epoch: 200 18 | 19 | # SGD only 20 | weight_decay: 0.0005 21 | momentum: 0.9 22 | 23 | # for training yolo2 24 | object_scale: 5. 25 | noobject_scale: 1. 26 | class_scale: 1. 27 | coord_scale: 1. 28 | iou_thresh: 0.6 29 | 30 | # dataset 31 | train_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images.txt 32 | train_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels.txt 33 | val_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_images.txt 34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_labels.txt 35 | batch_size: 1 36 | train_batch_size: 16 37 | val_batch_size: 8 38 | 39 | # log & display 40 | disp_interval: 10 41 | -------------------------------------------------------------------------------- /cfgs/exps/detrac/detrac_baseline_2.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: detrac_baseline_2 4 | dataset_name: detrac 5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/detrac_baseline_2 7 | network_size_rand_period: 10 8 | inp_size: [960, 512] 9 | out_size: [30, 16] # inp_size / 32 10 | inp_size_candidates: [[960, 512]] 11 | optimizer: SGD # 'SGD, Adam' 12 | opt_param: all # 'all, conv345' 13 | 14 | start_step: 0 15 | lr_epoch: [0, 40, 80] 16 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 17 | max_epoch: 200 18 | 19 | # SGD only 20 | weight_decay: 0.0005 21 | momentum: 0.9 22 | 23 | # for training yolo2 24 | object_scale: 5. 25 | noobject_scale: 1. 26 | class_scale: 1. 27 | coord_scale: 1. 28 | iou_thresh: 0.6 29 | 30 | # dataset 31 | train_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images_2.txt 32 | train_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels_2.txt 33 | val_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_images_2.txt 34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_labels_2.txt 35 | batch_size: 1 36 | train_batch_size: 16 37 | val_batch_size: 8 38 | 39 | # log & display 40 | disp_interval: 10 41 | -------------------------------------------------------------------------------- /cfgs/exps/detrac/detrac_flow_center_w01.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: detrac_flow_center_w01 4 | dataset_name: detrac 5 | pretrained_model: /media/cory/data2/yolo2_models/detrac_baseline/detrac_baseline_20.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/detrac_flow_center_w01 7 | network_size_rand_period: 10 8 | inp_size: [960, 512] 9 | out_size: [30, 16] # inp_size / 32 10 | inp_size_candidates: [[960, 512]] 11 | optimizer: SGD # 'SGD, Adam' 12 | opt_param: all # 'all, conv345' 13 | 14 | start_step: 0 15 | lr_epoch: [0] 16 | lr_val: [!!float 1e-5] 17 | max_epoch: 100 18 | 19 | # SGD only 20 | weight_decay: 0.0005 21 | momentum: 0.9 22 | 23 | # for training yolo2 24 | object_scale: 5. 25 | noobject_scale: 1. 26 | class_scale: 1. 27 | coord_scale: 1. 28 | iou_thresh: 0.6 29 | 30 | # dataset 31 | train_images: /home/cory/project/yolo2-pytorch/flow/detrac_w01_images.txt 32 | train_labels: /home/cory/project/yolo2-pytorch/flow/detrac_w01_labels.txt 33 | val_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_images.txt 34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_labels.txt 35 | batch_size: 1 36 | train_batch_size: 16 37 | val_batch_size: 8 38 | 39 | # log & display 40 | disp_interval: 10 41 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_baseline.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_baseline 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_baseline 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 40, 80] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 20 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_baseline_v3.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_baseline_v3 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_baseline_v3 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | optimizer: SGD # 'SGD, Adam' 13 | opt_param: all # 'all, conv345' 14 | 15 | start_step: 0 16 | lr_epoch: [0, 40, 80] 17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 18 | max_epoch: 200 19 | 20 | # SGD only 21 | weight_decay: 0.0005 22 | momentum: 0.9 23 | 24 | # for training yolo2 25 | object_scale: 5. 26 | noobject_scale: 1. 27 | class_scale: 1. 28 | coord_scale: 1. 29 | iou_thresh: 0.6 30 | 31 | # dataset 32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 36 | batch_size: 1 37 | train_batch_size: 12 38 | val_batch_size: 12 39 | 40 | # log & display 41 | disp_interval: 10 42 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_baseline_v3_yf.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_baseline_v3_yf 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_baseline_v3_yf 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | optimizer: YF # 'SGD, Adam, YF' 13 | opt_param: all # 'all, conv345' 14 | 15 | start_step: 0 16 | lr_epoch: [0, 40, 80] 17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 18 | max_epoch: 200 19 | 20 | # SGD only 21 | weight_decay: 0.0005 22 | momentum: 0.9 23 | 24 | # for training yolo2 25 | object_scale: 5. 26 | noobject_scale: 1. 27 | class_scale: 1. 28 | coord_scale: 1. 29 | iou_thresh: 0.6 30 | 31 | # dataset 32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 36 | batch_size: 1 37 | train_batch_size: 12 38 | val_batch_size: 12 39 | 40 | # log & display 41 | disp_interval: 10 42 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_joint_flow.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_joint_flow 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_joint_flow 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | optimizer: SGD # 'SGD, Adam' 13 | opt_param: all # 'all, conv345' 14 | 15 | start_step: 0 16 | lr_epoch: [0, 40, 80] 17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 18 | max_epoch: 200 19 | 20 | # SGD only 21 | weight_decay: 0.0005 22 | momentum: 0.9 23 | 24 | # for training yolo2 25 | object_scale: 5. 26 | noobject_scale: 1. 27 | class_scale: 1. 28 | coord_scale: 1. 29 | iou_thresh: 0.6 30 | 31 | # dataset 32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt 33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt 34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 36 | batch_size: 1 37 | train_batch_size: 12 38 | val_batch_size: 12 39 | 40 | # log & display 41 | disp_interval: 10 42 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_joint_flow_low_lr.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_joint_flow_low_lr 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_joint_flow_low_lr 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | optimizer: SGD # 'SGD, Adam' 13 | opt_param: all # 'all, conv345' 14 | 15 | start_step: 0 16 | lr_epoch: [0, 20, 40] 17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 18 | max_epoch: 200 19 | 20 | # SGD only 21 | weight_decay: 0.0005 22 | momentum: 0.9 23 | 24 | # for training yolo2 25 | object_scale: 5. 26 | noobject_scale: 1. 27 | class_scale: 1. 28 | coord_scale: 1. 29 | iou_thresh: 0.6 30 | 31 | # dataset 32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt 33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt 34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 36 | batch_size: 1 37 | train_batch_size: 12 38 | val_batch_size: 12 39 | 40 | # log & display 41 | disp_interval: 10 42 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_1.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_1 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_1 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1216, 352]] 11 | 12 | optimizer: SGD # 'SGD, Adam' 13 | opt_param: all # 'all, conv345' 14 | 15 | start_step: 0 16 | lr_epoch: [0, 60, 90] 17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 18 | max_epoch: 200 19 | 20 | # SGD only 21 | weight_decay: 0.0005 22 | momentum: 0.9 23 | 24 | # for training yolo2 25 | object_scale: 5. 26 | noobject_scale: 1. 27 | class_scale: 1. 28 | coord_scale: 1. 29 | iou_thresh: 0.6 30 | 31 | # dataset 32 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 33 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 34 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 35 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 36 | batch_size: 1 37 | train_batch_size: 16 38 | 39 | # log & display 40 | disp_interval: 10 41 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_fixed 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_fixed 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 60, 90] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 20 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_bbox_merge.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_bbox_merge 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_bbox_merge 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_cls 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_bbox_merge.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_cls_bbox_merge 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_bbox_merge 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_flownet2.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_cls_flownet2 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_flownet2 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_flownet2.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_merge.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_cls_merge 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_merge 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_merge_2x.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_cls_merge_2x 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_merge_2x 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_merge_ped.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_cls_merge_ped 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_merge_ped 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_flownet2.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_flownet2 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_flownet2 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_flownet2.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_flownet2_joint.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_flownet2_joint 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_flownet2_joint 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_flownet2_joint.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_half.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_half 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_half 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_half.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_ft_iou_merge.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_ft_iou_merge 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_iou_merge 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_joint_ft.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_joint_ft 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_joint_ft 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_joint_ft_low_lr.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_joint_ft_low_lr 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_joint_ft_low_lr 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-6] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_center_joint_ft_w10.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_center_joint_ft_w10 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_joint_ft_w10 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images_w10.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels_w10.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_dis.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_dis 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_flow_dis 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 40] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_dis_flow_images.txt 34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_dis_flow_labels.txt 35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 20 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_ft.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_ft 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_flow_ft 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_flow_images.txt 34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_ft_cls_bbox_merge.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_ft_cls_bbox_merge 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_cls_bbox_merge 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_ft_cls_merge.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_ft_cls_merge 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_cls_merge 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt 35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt 36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt 37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_ft_std_2_5.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_ft_std_2_5 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_std_2_5 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/flow_std_labels.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_ft_std_2_5_merge.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_ft_std_2_5_merge 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_std_2_5_merge 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0] 18 | lr_val: [!!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt 34 | train_labels: /home/cory/project/yolo2-pytorch/flow/flow_std_labels_merge.txt 35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 16 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 10 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti/kitti_new_2_flow_spy.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: kitti_new_2_flow_spy 4 | dataset_name: kitti 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_flow_spy 7 | network_size_rand_period: 10 8 | inp_size: [1216, 352] 9 | out_size: [38, 11] # inp_size / 32 10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384], 11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 40] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_spy_flow_images.txt 34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_dis_flow_labels.txt 35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt 36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt 37 | batch_size: 1 38 | train_batch_size: 12 39 | val_batch_size: 8 40 | 41 | # log & display 42 | disp_interval: 20 43 | -------------------------------------------------------------------------------- /cfgs/exps/kitti_ft_exp3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | exp_name = 'kitti_ft_exp3_new' 4 | dataset_name = 'kitti' 5 | # pretrained_fname = '/home/cory/yolo2-pytorch/models/yolo-voc.weights.h5' 6 | # pretrained_fname = '/home/cory/yolo2-pytorch/models/training/voc0712_new_2/voc0712_new_2_160.h5' 7 | pretrained_fname = '/home/cory/yolo2-pytorch/data/darknet19.weights.npz' 8 | 9 | network_size_rand_period = 10 10 | inp_size_candidates = [(1216, 352)] 11 | inp_size = np.array([1216, 352], dtype=np.int) # w, h 12 | # inp_size = np.array([992, 544], dtype=np.int) 13 | out_size = inp_size / 32 14 | 15 | 16 | optimizer = 'SGD' # 'SGD, Adam' 17 | opt_param = 'all' # 'all, conv345' 18 | 19 | start_step = 0 20 | lr_epoch = (0, 60, 90) 21 | lr_val = (1e-3, 1e-4, 1e-5) 22 | 23 | max_epoch = 200 24 | 25 | # SGD only 26 | weight_decay = 0.0005 27 | momentum = 0.9 28 | 29 | # for training yolo2 30 | object_scale = 5. 31 | noobject_scale = 1. 32 | class_scale = 1. 33 | coord_scale = 1. 34 | iou_thresh = 0.6 35 | 36 | # dataset 37 | imdb_train = 'voc_2012_trainval' 38 | imdb_test = 'voc_2007_test' 39 | train_images = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt' 40 | train_labels = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt' 41 | val_images = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt' 42 | val_labels = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt' 43 | batch_size = 1 44 | train_batch_size = 16 45 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_anchor.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_anchor 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_anchor 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 40] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 100 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5.0 27 | noobject_scale: 1.0 28 | class_scale: 1.0 29 | coord_scale: 1.0 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 12 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | 46 | anchors: [[1.3221, 1.73145], [3.19275, 4.00944], [5.05587, 8.09892], [9.47112, 4.84053], [11.2364, 10.0071]] 47 | 48 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_baseline.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_baseline 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_baseline 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 40] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 12 41 | val_batch_size: 12 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_baseline_v3.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_baseline_v3 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_baseline_v3 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 40] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 12 41 | val_batch_size: 12 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_baseline_v3_rand.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_baseline_v3_rand 4 | dataset_name: voc 5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/voc0712_baseline_v3_rand 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | optimizer: SGD # 'SGD, Adam' 13 | opt_param: all # 'all, conv345' 14 | 15 | start_step: 0 16 | lr_epoch: [0, 20, 40] 17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 18 | max_epoch: 200 19 | 20 | # SGD only 21 | weight_decay: 0.0005 22 | momentum: 0.9 23 | 24 | # for training yolo2 25 | object_scale: 5. 26 | noobject_scale: 1. 27 | class_scale: 1. 28 | coord_scale: 1. 29 | iou_thresh: 0.6 30 | 31 | # dataset 32 | imdb_train: voc0712_trainval 33 | imdb_test: voc07_test 34 | train_images: /home/cory/project/yolo2-pytorch/train_data/voc/voc_train_images.txt 35 | train_labels: /home/cory/project/yolo2-pytorch/train_data/voc/voc_train_labels.txt 36 | val_images: /home/cory/project/yolo2-pytorch/train_data/voc/voc_test_images.txt 37 | val_labels: /home/cory/project/yolo2-pytorch/train_data/voc/voc_test_labels.txt 38 | batch_size: 1 39 | train_batch_size: 16 40 | val_batch_size: 16 41 | 42 | # log & display 43 | disp_interval: 10 44 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_box_mask_0.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_box_mask_0 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_box_mask_0 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 60, 90] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 201 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 16 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_low_lr.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_low_lr 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_low_lr 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 30, 40] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5, !!float 1e-6] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 12 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_mask.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_mask 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/models/training/voc0712_template/voc0712_template_100.h5 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_mask 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 100] 18 | lr_val: [!!float 1e-5, !!float 1e-6] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 12 41 | 42 | # log & display 43 | disp_interval: 10 44 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_multiple_anchors.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_multiple_anchors 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_multiple_anchors 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 100] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 12 41 | val_batch_size: 12 42 | 43 | # log & display 44 | disp_interval: 10 45 | 46 | anchors: [[1.08, 1.19], [1.32, 1.73], [3.19, 4.01], [3.42, 4.41], [5.05, 8.09], 47 | [6.63, 11.38], [9.47, 4.84], [11.23, 10.00], [16.62, 10.52]] 48 | num_anchors: 9 49 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_new.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | exp_name = 'voc0712_new' 4 | dataset_name = 'voc' 5 | pretrained_fname = '/home/cory/yolo2-pytorch/data/darknet19.weights.npz' 6 | 7 | network_size_rand_period = 10 8 | inp_size_candidates = [(320, 320), (352, 352), (384, 384), (416, 416), (448, 448), 9 | (480, 480), (512, 512), (544, 544), (576, 576), (608, 608)] 10 | inp_size = np.array([416, 416], dtype=np.int) 11 | out_size = inp_size / 32 12 | 13 | optimizer = 'SGD' # 'SGD, Adam' 14 | opt_param = 'all' # 'all, conv345' 15 | 16 | start_step = 0 17 | lr_epoch = (0, 60, 90) 18 | lr_val = (1E-3, 1E-4, 1E-5) 19 | 20 | max_epoch = 200 21 | 22 | # SGD only 23 | weight_decay = 0.0005 24 | momentum = 0.9 25 | 26 | # for training yolo2 27 | object_scale = 5. 28 | noobject_scale = 1. 29 | class_scale = 1. 30 | coord_scale = 1. 31 | iou_thresh = 0.6 32 | 33 | # dataset 34 | imdb_train = 'voc_2012_trainval' 35 | imdb_test = 'voc_2007_test' 36 | train_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt' 37 | train_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt' 38 | val_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt' 39 | val_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt' 40 | batch_size = 1 41 | train_batch_size = 16 42 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_obj_scale.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_obj_scale 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/models/yolo-voc.weights.h5 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_obj_scale 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 60] 18 | lr_val: [!!float 1e-6, !!float 1e-7] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 1. 27 | noobject_scale: 0.5 28 | class_scale: 1. 29 | coord_scale: 5. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | log_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/train.log 46 | check_point_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/check_point.txt 47 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_obj_scale_ft.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_obj_scale_ft 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/models/training/voc0712_new_2/voc0712_new_2_160.h5 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_obj_scale_ft 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 30] 18 | lr_val: [!!float 1e-5, !!float 1e-6] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 1. 27 | noobject_scale: 0.5 28 | class_scale: 1. 29 | coord_scale: 5. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | val_batch_size: 8 42 | 43 | # log & display 44 | disp_interval: 10 45 | log_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/train.log 46 | check_point_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/check_point.txt 47 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_one_anchor.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_one_anchor 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_one_anchor 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 20, 40] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 12 41 | val_batch_size: 12 42 | 43 | # log & display 44 | disp_interval: 10 45 | 46 | anchors: [[1., 1.], [3., 3.], [5., 5.], [9., 9.], [13., 13.]] 47 | num_anchors: 5 48 | 49 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_overfit.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_overfit 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_overfit 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 100, 400] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 2000 20 | 21 | # SGD only 22 | weight_decay: 0.0 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_10_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_10_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 10 41 | val_batch_size: 10 42 | 43 | # log & display 44 | disp_interval: 1 45 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_pred_raw.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_pred_raw 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | # pretrained_model: /home/cory/yolo2-pytorch/models/training/voc0712_low_lr/voc0712_low_lr_30.h5 7 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_pred_raw 8 | network_size_rand_period: 10 9 | inp_size: [416, 416] 10 | out_size: [13, 13] # inp_size / 32 11 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 12 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 13 | 14 | optimizer: SGD # 'SGD, Adam' 15 | opt_param: all # 'all, conv345' 16 | 17 | start_step: 0 18 | lr_epoch: [0, 20, 40] 19 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 20 | max_epoch: 200 21 | 22 | # SGD only 23 | weight_decay: 0.0005 24 | momentum: 0.9 25 | 26 | # for training yolo2 27 | object_scale: 5. 28 | noobject_scale: 1. 29 | class_scale: 1. 30 | coord_scale: 1. 31 | iou_thresh: 0.6 32 | 33 | # dataset 34 | imdb_train: voc0712_trainval 35 | imdb_test: voc07_test 36 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 37 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 38 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 39 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 40 | batch_size: 1 41 | train_batch_size: 16 42 | val_batch_size: 12 43 | 44 | # log & display 45 | disp_interval: 10 46 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_template.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_template 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_template 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 60, 90] 18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 12 41 | val_batch_size: 12 42 | 43 | # log & display 44 | disp_interval: 10 45 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712/voc0712_trainval_ft_debug2.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | exp_name: voc0712_ft 4 | dataset_name: voc 5 | pretrained_model: /home/cory/yolo2-pytorch/models/yolo-voc.weights.h5 6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_ft 7 | network_size_rand_period: 10 8 | inp_size: [416, 416] 9 | out_size: [13, 13] # inp_size / 32 10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448], 11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]] 12 | 13 | optimizer: SGD # 'SGD, Adam' 14 | opt_param: all # 'all, conv345' 15 | 16 | start_step: 0 17 | lr_epoch: [0, 60] 18 | lr_val: [!!float 1e-6, !!float 1e-7] 19 | max_epoch: 200 20 | 21 | # SGD only 22 | weight_decay: 0.0005 23 | momentum: 0.9 24 | 25 | # for training yolo2 26 | object_scale: 5. 27 | noobject_scale: 1. 28 | class_scale: 1. 29 | coord_scale: 1. 30 | iou_thresh: 0.6 31 | 32 | # dataset 33 | imdb_train: voc0712_trainval 34 | imdb_test: voc07_test 35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt 36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt 37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt 38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt 39 | batch_size: 1 40 | train_batch_size: 16 41 | 42 | # log & display 43 | disp_interval: 10 44 | log_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/train.log 45 | check_point_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/check_point.txt 46 | -------------------------------------------------------------------------------- /cfgs/exps/voc0712_new_2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | exp_name = 'voc0712_new_2' 4 | dataset_name = 'voc' 5 | pretrained_fname = '/home/cory/yolo2-pytorch/data/darknet19.weights.npz' 6 | 7 | network_size_rand_period = 10 8 | # inp_size_candidates = [(320, 320), (352, 352), (384, 384), (416, 416), (448, 448)] 9 | inp_size_candidates = [(320, 320), (352, 352), (384, 384), (416, 416), (448, 448), 10 | (480, 480), (512, 512), (544, 544), (576, 576), (608, 608)] 11 | inp_size = np.array([416, 416], dtype=np.int) 12 | out_size = inp_size / 32 13 | 14 | optimizer = 'SGD' # 'SGD, Adam' 15 | opt_param = 'all' # 'all, conv345' 16 | 17 | start_step = 0 18 | lr_epoch = (0, 60, 90) 19 | lr_val = (1e-3, 1e-4, 1e-5) 20 | 21 | max_epoch = 300 22 | 23 | # SGD only 24 | weight_decay = 0.0005 25 | momentum = 0.9 26 | 27 | # for training yolo2 28 | object_scale = 5. 29 | noobject_scale = 1. 30 | class_scale = 1. 31 | coord_scale = 1. 32 | iou_thresh = 0.6 33 | 34 | # dataset 35 | imdb_train = 'voc_2012_trainval' 36 | imdb_test = 'voc_2007_test' 37 | train_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt' 38 | train_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt' 39 | val_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt' 40 | val_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt' 41 | batch_size = 1 42 | train_batch_size = 16 # epoch 1~200 batch_size 32 43 | -------------------------------------------------------------------------------- /datasets/DataLoaderX.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import torch.utils.data as data 5 | from torch.autograd import Variable 6 | 7 | from datasets.DetectionDataset import DetectionDataset 8 | from datasets.DataLoaderIterX import DataLoaderIterX 9 | 10 | 11 | # modify /usr/local/lib/python3.5/dist-packages/torch/utils/data/__init__.py 12 | # add this line: from .dataloader import DataLoaderIter 13 | # thus, let data.DataLoaderIter class become publicly available to inherent 14 | # class DataLoaderIterX(data.DataLoaderIter): 15 | # pass 16 | 17 | 18 | class DataLoaderX(data.DataLoader): 19 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, num_workers=1, 20 | pin_memory=False, drop_last=False): 21 | super(DataLoaderX, self).__init__(dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler, 22 | num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last) 23 | 24 | def __iter__(self): 25 | return DataLoaderIterX(self) 26 | 27 | 28 | def test_detection_dataset(): 29 | from cfgs.config_v2 import add_cfg 30 | dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_detrac.yaml' 31 | exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/detrac/detrac_baseline.yaml' 32 | cfg = dict() 33 | add_cfg(cfg, dataset_yaml) 34 | add_cfg(cfg, exp_yaml) 35 | dataset = DetectionDataset(cfg) 36 | num_workers = 4 37 | batch_size = 16 38 | dataloader = DataLoaderX(dataset, batch_size=batch_size, 39 | shuffle=True, num_workers=num_workers) 40 | 41 | t0 = time.time() 42 | for i, data in enumerate(dataloader): 43 | if i > 100: 44 | break 45 | 46 | # get the inputs 47 | inputs, labels = data 48 | print(i, inputs.size(), labels.size()) 49 | 50 | # wrap them in Variable 51 | inputs, labels = Variable(inputs.cuda()), labels 52 | import numpy as np 53 | assert np.sum(inputs.data.cpu().numpy()) > 0 54 | t1 = time.time() 55 | print(t1 - t0) 56 | 57 | 58 | if __name__ == '__main__': 59 | test_detection_dataset() 60 | -------------------------------------------------------------------------------- /datasets/DetectionDataset.py: -------------------------------------------------------------------------------- 1 | import PIL.Image as Image 2 | import torch 3 | import torch.utils.data as data 4 | from torch.autograd import Variable 5 | 6 | from datasets.DetectionDatasetHelper import * 7 | 8 | 9 | class DetectionDataset(data.Dataset): 10 | def __init__(self, cfg, mode='train'): 11 | self.cfg = cfg 12 | self.mode = mode 13 | if self.mode == 'train': 14 | self.batch_size = cfg['train_batch_size'] 15 | self.image_list_file = cfg['train_images'] 16 | self.label_list_file = cfg['train_labels'] 17 | else: 18 | self.batch_size = cfg['val_batch_size'] 19 | self.image_list_file = cfg['val_images'] 20 | self.label_list_file = cfg['val_labels'] 21 | 22 | self.image_paths = list() 23 | self.annotations = list() 24 | self.image_indexes = list() 25 | self.classes_ = cfg['label_names'] 26 | self.load_dataset(self.classes_) 27 | 28 | # use cfg for default input size, but it will change every 10 batch (refer to DataLoaderX) 29 | self.inp_size = cfg['inp_size'] 30 | 31 | def __getitem__(self, index): 32 | raise NotImplemented 33 | 34 | def __len__(self): 35 | return len(self.image_paths) 36 | 37 | def get_train_data(self, index, network_size): 38 | img = Image.open(self.image_paths[index]).convert('RGB') 39 | gt = self.annotations[index] 40 | gt.update({'img_size': img.size}) 41 | 42 | # random transforms (scale, color, flip) 43 | im, boxes = affine_transform(img, gt['boxes'], network_size) 44 | gt.update({'boxes': boxes}) 45 | target_np = encode_to_np(gt) 46 | im_tensor = torch.from_numpy(im.transpose((2, 0, 1))).float() 47 | return im_tensor, target_np 48 | 49 | def input_size(self): 50 | return self.inp_size 51 | 52 | def change_input_size_rand(self): 53 | # call this function to change input size randomly from cfg['inp_size_candidates'] 54 | # random change network size 55 | rand_id = np.random.randint(0, len(self.cfg['inp_size_candidates'])) 56 | rand_network_size = self.cfg['inp_size_candidates'][rand_id] 57 | self.inp_size = rand_network_size 58 | # print('change_input_size_rand', rand_network_size) 59 | 60 | def load_dataset(self, label_map): 61 | remove_id_list = list() 62 | try: 63 | img_file = open(self.image_list_file) 64 | self.image_paths = [line.strip() for line in img_file.readlines()] 65 | gt_file = open(self.label_list_file) 66 | for fi, label_file_name in enumerate(gt_file.readlines()): 67 | label_file_name = label_file_name.strip() 68 | label_dict = parse_label_file(label_file_name, label_map) 69 | if not label_dict['has_label']: 70 | remove_id_list.append(fi) 71 | self.annotations.append(label_dict) 72 | except Exception as e: 73 | raise e 74 | 75 | self.image_paths = np.delete(self.image_paths, remove_id_list) 76 | self.annotations = np.delete(self.annotations, remove_id_list) 77 | print('dataset size =', len(self.image_paths), ' (delete', len(remove_id_list), ')') 78 | assert len(self.image_paths) == len(self.annotations) 79 | self.image_indexes = range(len(self.image_paths)) 80 | 81 | 82 | def test_detection_dataset(): 83 | from cfgs.config_v2 import add_cfg 84 | dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_kitti.yaml' 85 | exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/kitti/kitti_baseline_v3.yaml' 86 | cfg = dict() 87 | add_cfg(cfg, dataset_yaml) 88 | add_cfg(cfg, exp_yaml) 89 | dataset = DetectionDataset(cfg) 90 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, 91 | shuffle=True, num_workers=4) 92 | for i, data in enumerate(dataloader): 93 | # get the inputs 94 | print(i) 95 | inputs, labels = data 96 | print(inputs.size(), labels.size()) 97 | 98 | # wrap them in Variable 99 | inputs, labels = Variable(inputs), labels 100 | 101 | 102 | if __name__ == '__main__': 103 | test_detection_dataset() 104 | -------------------------------------------------------------------------------- /datasets/DetectionDatasetHelper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from utils.im_transform import imcv2_affine_trans, imcv2_recolor 4 | 5 | 6 | def parse_label_file(label_file_path, label_map): 7 | gt_classes = list() 8 | boxes = list() 9 | has_label = False 10 | with open(label_file_path) as label_file: 11 | for line in label_file.readlines(): 12 | if line == '\n': 13 | continue 14 | values = line.strip().split(' ') 15 | label = values[0] 16 | 17 | try: 18 | label_id = label_map.index(label) 19 | except ValueError: 20 | # label not exist, ignore it 21 | label_id = -1 22 | gt_classes.append(label_id) 23 | bbox = [int(float(v)) for v in values[1:5]] 24 | boxes.append(bbox) 25 | has_label = True 26 | assert len(gt_classes) == len(boxes) 27 | return {'boxes': boxes, 'gt_classes': gt_classes, 'has_label': has_label} 28 | 29 | 30 | def clip_boxes(boxes, im_shape): 31 | """ 32 | Clip boxes to image boundaries. 33 | """ 34 | if boxes.shape[0] == 0: 35 | return boxes 36 | 37 | # x1 >= 0 38 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 39 | # y1 >= 0 40 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 41 | # x2 < im_shape[1] 42 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 43 | # y2 < im_shape[0] 44 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 45 | return boxes 46 | 47 | 48 | def offset_boxes(boxes, im_shape, scale, offs, flip): 49 | if len(boxes) == 0: 50 | return boxes 51 | boxes = np.asarray(boxes, dtype=np.float) 52 | boxes *= scale 53 | boxes[:, 0::2] -= offs[0] 54 | boxes[:, 1::2] -= offs[1] 55 | boxes = clip_boxes(boxes, im_shape) 56 | 57 | if flip: 58 | boxes_x = np.copy(boxes[:, 0]) 59 | boxes[:, 0] = im_shape[1] - boxes[:, 2] 60 | boxes[:, 2] = im_shape[1] - boxes_x 61 | 62 | return boxes 63 | 64 | 65 | def affine_transform(img, boxes, net_inp_size): 66 | if len(boxes) == 0: 67 | return 68 | im = np.asarray(img, dtype=np.uint8) 69 | w, h = net_inp_size 70 | im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) 71 | im, trans_param = imcv2_affine_trans(im) 72 | scale, offs, flip = trans_param 73 | boxes = offset_boxes(boxes, im.shape, scale, offs, flip) 74 | 75 | boxes[:, 0::2] *= float(w) / im.shape[1] 76 | boxes[:, 1::2] *= float(h) / im.shape[0] 77 | np.clip(boxes[:, 0::2], 0, w - 1, out=boxes[:, 0::2]) 78 | np.clip(boxes[:, 1::2], 0, h - 1, out=boxes[:, 1::2]) 79 | im = cv2.resize(im, (w, h)) 80 | 81 | im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) 82 | im = imcv2_recolor(im) 83 | boxes = np.asarray(boxes, dtype=np.int) 84 | 85 | debug = False 86 | if debug: 87 | import matplotlib.pyplot as plt 88 | for idx, box in enumerate(boxes): 89 | # box = [xmin, ymin, xmax, ymax] with original pixel scale 90 | bb = [int(b) for b in box] 91 | im[bb[1]:bb[3], bb[0], :] = 1. 92 | im[bb[1]:bb[3], bb[2], :] = 1. 93 | im[bb[1], bb[0]:bb[2], :] = 1. 94 | im[bb[3], bb[0]:bb[2], :] = 1. 95 | plt.imshow(im) 96 | plt.show() 97 | 98 | # im (pixels range 0~1) 99 | # boxes (pos range 0~max_img_size) 100 | return im, boxes 101 | 102 | 103 | def encode_to_np(gt): 104 | labels = gt['gt_classes'] 105 | bboxes = gt['boxes'] 106 | img_size = gt['img_size'] 107 | gt_size = len(labels) 108 | 109 | num_type = 8 # 1 + 1 + 2 + 4 110 | max_label_num_per_image = 50 111 | 112 | data_matrix = np.zeros([max_label_num_per_image, num_type], dtype=np.float32) 113 | data_matrix[0:gt_size, 0] = 1 # valid mask 114 | data_matrix[0:gt_size, 1] = labels 115 | data_matrix[0:gt_size, 2:4] = img_size 116 | data_matrix[0:gt_size, 4:8] = bboxes 117 | return data_matrix 118 | 119 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/imdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import PIL 3 | import numpy as np 4 | from multiprocessing import Pool 5 | 6 | 7 | def mkdir(path, max_depth=3): 8 | parent, child = os.path.split(path) 9 | if not os.path.exists(parent) and max_depth > 1: 10 | mkdir(parent, max_depth-1) 11 | 12 | if not os.path.exists(path): 13 | os.mkdir(path) 14 | 15 | 16 | class ImageDataset(object): 17 | def __init__(self, name, datadir, batch_size, im_processor, processes=2, shuffle=True, dst_size=None): 18 | self._name = name 19 | self._data_dir = datadir 20 | self._batch_size = batch_size 21 | self.dst_size = dst_size 22 | 23 | self._epoch = -1 24 | self._num_classes = 0 25 | self._classes = [] 26 | 27 | # load by self.load_dataset() 28 | self._image_indexes = [] 29 | self._image_names = [] 30 | self._annotations = [] 31 | # Use this dict for storing dataset specific config options 32 | self.config = {} 33 | 34 | # Pool 35 | self._shuffle = shuffle 36 | self._pool_processes = processes 37 | self.pool = Pool(self._pool_processes) 38 | self.gen = None 39 | self._im_processor = im_processor 40 | 41 | def next_batch(self): 42 | batch = {'images': [], 'gt_boxes': [], 'gt_classes': [], 'dontcare': [], 'origin_im': []} 43 | i = 0 44 | while i < self.batch_size: 45 | try: 46 | if self.gen is None: 47 | raise AttributeError 48 | images, gt_boxes, classes, dontcare, origin_im = next(self.gen) 49 | batch['images'].append(images) 50 | batch['gt_boxes'].append(gt_boxes) 51 | batch['gt_classes'].append(classes) 52 | batch['dontcare'].append(dontcare) 53 | batch['origin_im'].append(origin_im) 54 | i += 1 55 | except (StopIteration, AttributeError): 56 | indexes = np.arange(len(self.image_names), dtype=np.int) 57 | if self._shuffle: 58 | np.random.shuffle(indexes) 59 | self.gen = self.pool.imap(self._im_processor, 60 | ([self.image_names[i], self.get_annotation(i), self.dst_size] for i in indexes), 61 | chunksize=self.batch_size) 62 | self._epoch += 1 63 | print('epoch {} start...'.format(self._epoch)) 64 | batch['images'] = np.asarray(batch['images']) 65 | 66 | return batch 67 | 68 | def close(self): 69 | self.pool.terminate() 70 | self.pool.join() 71 | self.gen = None 72 | 73 | def load_dataset(self): 74 | raise NotImplementedError 75 | 76 | def evaluate_detections(self, all_boxes, output_dir=None): 77 | """ 78 | all_boxes is a list of length number-of-classes. 79 | Each list element is a list of length number-of-images. 80 | Each of those list elements is either an empty list [] 81 | or a numpy array of detection. 82 | 83 | all_boxes[class][image] = [] or np.array of shape #dets x 5 84 | """ 85 | raise NotImplementedError 86 | 87 | def get_annotation(self, i): 88 | if self.annotations is None: 89 | return None 90 | return self.annotations[i] 91 | 92 | @property 93 | def name(self): 94 | return self._name 95 | 96 | @property 97 | def num_classes(self): 98 | return len(self._classes) 99 | 100 | @property 101 | def classes(self): 102 | return self._classes 103 | 104 | @property 105 | def image_names(self): 106 | return self._image_names 107 | 108 | @property 109 | def image_indexes(self): 110 | return self._image_indexes 111 | 112 | @property 113 | def annotations(self): 114 | return self._annotations 115 | 116 | @property 117 | def cache_path(self): 118 | cache_path = os.path.join(self._data_dir, 'cache') 119 | mkdir(cache_path) 120 | return cache_path 121 | 122 | @property 123 | def num_images(self): 124 | return len(self.image_names) 125 | 126 | @property 127 | def epoch(self): 128 | return self._epoch 129 | 130 | @epoch.setter 131 | def epoch(self, value): 132 | self._epoch = value 133 | 134 | @property 135 | def batch_size(self): 136 | return self._batch_size 137 | 138 | @property 139 | def batch_per_epoch(self): 140 | return self.num_images // self.batch_size 141 | 142 | 143 | -------------------------------------------------------------------------------- /demo/demo_images_list.txt: -------------------------------------------------------------------------------- 1 | demo/images/000040.jpg 2 | demo/images/000041.jpg 3 | demo/images/000042.jpg 4 | demo/images/000043.jpg 5 | demo/images/000044.jpg 6 | demo/images/000045.jpg 7 | demo/images/000046.jpg 8 | demo/images/000047.jpg 9 | demo/images/000048.jpg 10 | demo/images/000049.jpg 11 | demo/images/000050.jpg 12 | demo/images/000051.jpg 13 | demo/images/000052.jpg 14 | demo/images/000053.jpg 15 | demo/images/000054.jpg 16 | demo/images/000055.jpg 17 | demo/images/000056.jpg 18 | demo/images/000057.jpg 19 | demo/images/000058.jpg 20 | demo/images/000059.jpg 21 | demo/images/000060.jpg 22 | demo/images/000061.jpg 23 | demo/images/000062.jpg 24 | demo/images/000063.jpg 25 | demo/images/000064.jpg 26 | demo/images/000065.jpg 27 | demo/images/000066.jpg 28 | demo/images/000067.jpg 29 | demo/images/000068.jpg 30 | demo/images/000069.jpg 31 | demo/images/000070.jpg 32 | demo/images/000071.jpg 33 | demo/images/000072.jpg 34 | demo/images/000073.jpg 35 | demo/images/000074.jpg 36 | demo/images/000075.jpg 37 | demo/images/000076.jpg 38 | demo/images/000077.jpg 39 | demo/images/000078.jpg 40 | demo/images/000079.jpg 41 | -------------------------------------------------------------------------------- /demo/detection_0030.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/detection_0030.jpg -------------------------------------------------------------------------------- /demo/images/000040.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000040.jpg -------------------------------------------------------------------------------- /demo/images/000041.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000041.jpg -------------------------------------------------------------------------------- /demo/images/000042.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000042.jpg -------------------------------------------------------------------------------- /demo/images/000043.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000043.jpg -------------------------------------------------------------------------------- /demo/images/000044.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000044.jpg -------------------------------------------------------------------------------- /demo/images/000045.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000045.jpg -------------------------------------------------------------------------------- /demo/images/000046.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000046.jpg -------------------------------------------------------------------------------- /demo/images/000047.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000047.jpg -------------------------------------------------------------------------------- /demo/images/000048.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000048.jpg -------------------------------------------------------------------------------- /demo/images/000049.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000049.jpg -------------------------------------------------------------------------------- /demo/images/000050.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000050.jpg -------------------------------------------------------------------------------- /demo/images/000051.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000051.jpg -------------------------------------------------------------------------------- /demo/images/000052.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000052.jpg -------------------------------------------------------------------------------- /demo/images/000053.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000053.jpg -------------------------------------------------------------------------------- /demo/images/000054.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000054.jpg -------------------------------------------------------------------------------- /demo/images/000055.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000055.jpg -------------------------------------------------------------------------------- /demo/images/000056.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000056.jpg -------------------------------------------------------------------------------- /demo/images/000057.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000057.jpg -------------------------------------------------------------------------------- /demo/images/000058.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000058.jpg -------------------------------------------------------------------------------- /demo/images/000059.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000059.jpg -------------------------------------------------------------------------------- /demo/images/000060.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000060.jpg -------------------------------------------------------------------------------- /demo/images/000061.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000061.jpg -------------------------------------------------------------------------------- /demo/images/000062.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000062.jpg -------------------------------------------------------------------------------- /demo/images/000063.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000063.jpg -------------------------------------------------------------------------------- /demo/images/000064.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000064.jpg -------------------------------------------------------------------------------- /demo/images/000065.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000065.jpg -------------------------------------------------------------------------------- /demo/images/000066.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000066.jpg -------------------------------------------------------------------------------- /demo/images/000067.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000067.jpg -------------------------------------------------------------------------------- /demo/images/000068.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000068.jpg -------------------------------------------------------------------------------- /demo/images/000069.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000069.jpg -------------------------------------------------------------------------------- /demo/images/000070.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000070.jpg -------------------------------------------------------------------------------- /demo/images/000071.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000071.jpg -------------------------------------------------------------------------------- /demo/images/000072.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000072.jpg -------------------------------------------------------------------------------- /demo/images/000073.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000073.jpg -------------------------------------------------------------------------------- /demo/images/000074.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000074.jpg -------------------------------------------------------------------------------- /demo/images/000075.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000075.jpg -------------------------------------------------------------------------------- /demo/images/000076.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000076.jpg -------------------------------------------------------------------------------- /demo/images/000077.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000077.jpg -------------------------------------------------------------------------------- /demo/images/000078.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000078.jpg -------------------------------------------------------------------------------- /demo/images/000079.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000079.jpg -------------------------------------------------------------------------------- /flow/detection_analyzer.py: -------------------------------------------------------------------------------- 1 | from yolo_detect import * 2 | 3 | 4 | def diff_detection(img1, img2, cfg, net, thresh): 5 | bboxes_1, cls_inds_1, image_1, scores_1 = detect_image(cfg, img1, net, thresh) 6 | bboxes_2, cls_inds_2, image_2, scores_2 = detect_image(cfg, img2, net, thresh) 7 | is_cls_equal = False 8 | if len(cls_inds_1) == len(cls_inds_2): 9 | is_cls_equal = np.all(np.equal(cls_inds_1, cls_inds_2)) 10 | 11 | if not is_cls_equal: 12 | im2show = yolo_utils.draw_detection(image_1, bboxes_1, scores_1, cls_inds_1, cfg) 13 | cv2.imshow('detection_1', im2show) 14 | im2show = yolo_utils.draw_detection(image_2, bboxes_2, scores_2, cls_inds_2, cfg) 15 | cv2.imshow('detection_2', im2show) 16 | cv2.waitKey(0) 17 | 18 | return is_cls_equal 19 | 20 | 21 | def run(): 22 | net, cfg = init_network() 23 | image_orig_paths = load_image_paths('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt') 24 | image_warp_paths = load_image_paths('/home/cory/project/yolo2-pytorch/flow/kitti_val_images_warp.txt') 25 | n_img = len(image_orig_paths) 26 | 27 | thresh = 0.6 28 | 29 | for i in range(n_img - 1): 30 | img_orig = image_orig_paths[i] 31 | img_warp = image_warp_paths[i] 32 | is_equal = diff_detection(img_orig, img_warp, cfg, net, thresh) 33 | 34 | 35 | if __name__ == '__main__': 36 | run() 37 | -------------------------------------------------------------------------------- /flow/flow_generator.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class FlowGenerator: 4 | def __init__(self, input_images, output_dir, flow_warper): 5 | self.imgs = input_images 6 | self.output_dir = output_dir 7 | self.warper = flow_warper 8 | 9 | def gen(self): 10 | for i in range(len(self.imgs)): 11 | img1 = self.imgs[i] 12 | img2 = self.imgs[i + 1] 13 | print(img1, img2) 14 | img_w, flow_w = self.warper.warp(img1, img2) 15 | -------------------------------------------------------------------------------- /flow/flow_util.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import cv2 3 | import numpy as np 4 | from subprocess import check_output 5 | import sys 6 | import os 7 | 8 | sys.path.append('/home/cory/project/spynet') 9 | os.environ['TERM'] = 'xterm-256color' 10 | # from spynet import Spynet 11 | 12 | 13 | def shift_filter(feature, flow): 14 | # feature shape = (batch, filters, h, w) 15 | shifted_feature = list() 16 | for feat in feature: 17 | # print(feat.shape) 18 | for i in range(feat.shape[0]): 19 | act2d = feat[i, ...] 20 | act2d = act2d[:, :, np.newaxis] 21 | res = warp_flow(act2d, flow) 22 | shifted_feature.append(res) 23 | 24 | if False: 25 | print('act2d', act2d.shape, sum(act2d.ravel())) 26 | print('flow', flow.shape, sum(flow.ravel())) 27 | plt.figure(11) 28 | plt.imshow(act2d[:, :, 0], cmap='gray') 29 | plt.figure(12) 30 | plt.imshow(flow[..., 0], cmap='gray') 31 | plt.figure(13) 32 | plt.imshow(flow[..., 1], cmap='gray') 33 | plt.figure(14) 34 | plt.imshow(res, cmap='gray') 35 | plt.show() 36 | pass 37 | 38 | return np.asarray([shifted_feature]) 39 | 40 | 41 | # spynet = Spynet() 42 | 43 | 44 | def spynet_flow(image_path1, image_path2): 45 | import time 46 | t1 = time.time() 47 | flow = spynet.compute_flow(image_path1, image_path2) 48 | t2 = time.time() 49 | # print(t2 -t1) 50 | flow = np.transpose(flow[0], (1, 2, 0)) # 2 x h x w--> h x w x 2 51 | return flow 52 | 53 | 54 | def read_flo_file(filename): 55 | with open(filename, 'rb') as f: 56 | magic = np.fromfile(f, np.float32, count=1) 57 | if 202021.25 != magic: 58 | print('Magic number incorrect. Invalid .flo file') 59 | return None 60 | w = np.fromfile(f, np.int32, count=1)[0] 61 | h = np.fromfile(f, np.int32, count=1)[0] 62 | # print('Reading %d x %d flo file' % (w, h)) 63 | data = np.fromfile(f, np.float32, count=2 * w * h) 64 | # Reshape data into 3D array (columns, rows, bands) 65 | data2D = np.reshape(data, (h, w, 2)) 66 | return data2D 67 | 68 | 69 | def dis_flow(img_path1, img_path2): 70 | out = check_output(['./run_of.sh ' + img_path1 + ' ' + img_path2], shell=True) 71 | flow_val = read_flo_file('flow.flo') 72 | return flow_val 73 | # print(out) 74 | 75 | 76 | def flownet2_flow(img_path1, img_path2): 77 | out = check_output(['./run_of.sh ' + img_path1 + ' ' + img_path2], shell=True) 78 | flow_val = read_flo_file('flow.flo') 79 | return flow_val 80 | # print(out) 81 | 82 | 83 | def get_flow_for_filter(flow, feat_map_size): 84 | filter_map_height = feat_map_size[0] 85 | filter_map_width = feat_map_size[1] 86 | flow_ratio_y = flow.shape[0] / filter_map_height 87 | flow_ratio_x = flow.shape[1] / filter_map_width 88 | flow_small = np.asarray([cv2.resize(src=flow[:, :, 0] / flow_ratio_y, 89 | dsize=(filter_map_width, filter_map_height), 90 | interpolation=cv2.INTER_CUBIC), 91 | cv2.resize(src=flow[:, :, 1] / flow_ratio_x, 92 | dsize=(filter_map_width, filter_map_height), 93 | interpolation=cv2.INTER_CUBIC)]) 94 | flow_small = flow_small.transpose([1, 2, 0]) 95 | # print('flow_small.shape', flow_small.shape) 96 | return flow_small 97 | 98 | 99 | def warp_flow(img, flow): 100 | h, w = flow.shape[:2] 101 | flow_map = flow.copy() 102 | flow_map[:, :, 0] += np.arange(w) 103 | flow_map[:, :, 1] += np.arange(h)[:, np.newaxis] 104 | res = cv2.remap(src=img, map1=flow_map, map2=None, interpolation=cv2.INTER_LINEAR) 105 | return res 106 | -------------------------------------------------------------------------------- /flow/flow_vis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from flow.plot_util import * 4 | from flow.flow_util import * 5 | 6 | pwd = '/home/cory/project/flownet2/output/' 7 | flos = sorted(os.listdir(pwd)) 8 | for flo in flos: 9 | ff = read_flo_file(pwd + flo) 10 | flow_hsv = draw_hsv(ff, ratio=4) 11 | cv2.imshow('flow', flow_hsv) 12 | cv2.waitKey(10) 13 | -------------------------------------------------------------------------------- /flow/flow_warper.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class FlowWarper: 4 | def __init__(self): 5 | pass 6 | 7 | def warp(self, img1, img2): 8 | pass -------------------------------------------------------------------------------- /flow/gen_flow_images.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | 5 | from flow import flow_util 6 | 7 | 8 | def find_tracklet_id(img_path): 9 | str_offset = img_path.rfind('/') 10 | tracklet_id = img_path[str_offset - 4: str_offset] 11 | return tracklet_id 12 | 13 | 14 | def gen_warp(img_path_0, img_path_1): 15 | # img_flow = flow_util.spynet_flow(img_path_1, img_path_0) 16 | img_flow = flow_util.dis_flow(img_path_1, img_path_0) 17 | img_0 = cv2.imread(img_path_0) 18 | img_warp = flow_util.warp_flow(img_0, img_flow / 2) 19 | return img_warp 20 | 21 | 22 | def parse_label_file(label_file_path): 23 | label_file = open(label_file_path) 24 | vlist = list() 25 | for l in label_file.readlines(): 26 | v = l.split(' ')[0:5] 27 | v[1:5] = list(map(float, v[1:5])) 28 | if v[1] < 50 or v[2] < 50 or v[3] > 1200 or v[4] > 350: 29 | v[0] = 'DontCare' 30 | vlist.append(v) 31 | # print(v) 32 | return vlist 33 | 34 | 35 | def write_to_file(labels, filename): 36 | curr_label = parse_label_file(labels) 37 | new_label_file = open(filename, 'w') 38 | for v in curr_label: 39 | line = ' '.join([str(x) for x in v]) 40 | # print(line) 41 | new_label_file.write(line + '\n') 42 | 43 | 44 | def gen_images(gen_label_only=False): 45 | 46 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt') 47 | image_abs_paths = img_files.readlines() 48 | image_abs_paths = [f.strip() for f in image_abs_paths] 49 | 50 | label_files = open('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt') 51 | label_abs_paths = label_files.readlines() 52 | label_abs_paths = [f.strip() for f in label_abs_paths] 53 | 54 | out_img_dir = 'images_flow_warp_half' 55 | if not os.path.exists(out_img_dir): 56 | os.mkdir(out_img_dir) 57 | 58 | img_pairs = open('img_pairs.txt', 'w') 59 | 60 | for i in range(0, len(image_abs_paths)): 61 | curr_img_path = image_abs_paths[i] 62 | prev_img_path = image_abs_paths[i - 1] 63 | 64 | curr_tracklet_id = find_tracklet_id(curr_img_path) 65 | prev_tracklet_id = find_tracklet_id(prev_img_path) 66 | 67 | print(i, curr_img_path, curr_tracklet_id) 68 | 69 | tracklet_out_path = out_img_dir + '/' + curr_tracklet_id 70 | if not os.path.exists(tracklet_out_path): 71 | os.mkdir(tracklet_out_path) 72 | 73 | if curr_tracklet_id != prev_tracklet_id: 74 | prev_img_path = curr_img_path 75 | 76 | if not gen_label_only: 77 | # w(0 -> 1) = frame(0) * flow(1 -> 0) 78 | w01 = gen_warp(prev_img_path, curr_img_path) 79 | out_path = curr_img_path.replace('.png', '') 80 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:] 81 | w01_path = out_path + '_w01.png' 82 | 83 | write_to_file(label_abs_paths[i], out_path + '_w01_label.txt') 84 | 85 | # for flownet 2.0 86 | flo_id = out_path[out_path.find('/') + 1:].replace('/', '_') 87 | img_pairs.write(curr_img_path + ' ' + prev_img_path + ' ../output/' + flo_id + '.flo\n') 88 | 89 | out_path = prev_img_path.replace('.png', '') 90 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:] 91 | w10_path = out_path + '_w10.png' 92 | write_to_file(label_abs_paths[i], out_path + '_w10_label.txt') 93 | 94 | if not gen_label_only: 95 | # w(1 -> 0) = frame(1) * flow(0 -> 1) 96 | # w10 = gen_warp(curr_img_path, prev_img_path) 97 | cv2.imshow('w01', w01) 98 | # cv2.imshow('w10', w10) 99 | 100 | os.makedirs(w01_path[0: w01_path.rfind('/')], exist_ok=True) 101 | cv2.imwrite(w01_path, w01) 102 | # cv2.imwrite(w10_path, w10) 103 | cv2.waitKey(30) 104 | 105 | 106 | if __name__ == '__main__': 107 | gen_images() 108 | -------------------------------------------------------------------------------- /flow/gen_flow_images_cloudai.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | 5 | from flow import flow_util 6 | 7 | 8 | def find_tracklet_id(img_path): 9 | str_offset = img_path.rfind('/') 10 | tracklet_id = img_path[str_offset - 4: str_offset] 11 | return tracklet_id 12 | 13 | 14 | def gen_warp(img_path_0, img_path_1): 15 | # img_flow = flow_util.spynet_flow(img_path_1, img_path_0) 16 | img_flow = flow_util.dis_flow(img_path_1, img_path_0) 17 | img_0 = cv2.imread(img_path_0) 18 | img_warp = flow_util.warp_flow(img_0, img_flow / 2) 19 | return img_warp 20 | 21 | 22 | def parse_label_file(label_file_path): 23 | label_file = open(label_file_path) 24 | vlist = list() 25 | for l in label_file.readlines(): 26 | v = l.split(' ')[0:5] 27 | v[1:5] = list(map(float, v[1:5])) 28 | if v[1] < 50 or v[2] < 50 or v[3] > 1200 or v[4] > 350: 29 | v[0] = 'DontCare' 30 | vlist.append(v) 31 | # print(v) 32 | return vlist 33 | 34 | 35 | def write_to_file(labels, filename): 36 | curr_label = parse_label_file(labels) 37 | new_label_file = open(filename, 'w') 38 | for v in curr_label: 39 | line = ' '.join([str(x) for x in v]) 40 | # print(line) 41 | new_label_file.write(line + '\n') 42 | 43 | 44 | def gen_images(gen_label_only=False): 45 | 46 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_images.txt') 47 | image_abs_paths = img_files.readlines() 48 | image_abs_paths = [f.strip() for f in image_abs_paths] 49 | 50 | label_files = open('/home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_labels.txt') 51 | label_abs_paths = label_files.readlines() 52 | label_abs_paths = [f.strip() for f in label_abs_paths] 53 | 54 | out_img_dir = 'images_flow_warp_crowdai' 55 | if not os.path.exists(out_img_dir): 56 | os.mkdir(out_img_dir) 57 | 58 | for i in range(0, len(image_abs_paths)): 59 | curr_img_path = image_abs_paths[i] 60 | prev_img_path = image_abs_paths[i - 1] 61 | 62 | w01 = gen_warp(prev_img_path, curr_img_path) 63 | 64 | cv2.imshow('w01', w01) 65 | cv2.waitKey(1) 66 | 67 | 68 | if __name__ == '__main__': 69 | gen_images() 70 | -------------------------------------------------------------------------------- /flow/gen_flow_images_detrac.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | 5 | from flow import flow_util 6 | 7 | 8 | def find_tracklet_id(img_path): 9 | str_offset = img_path.rfind('/') 10 | tracklet_id = img_path[str_offset - 5: str_offset] 11 | return tracklet_id 12 | 13 | 14 | def gen_warp(img_path_0, img_path_1): 15 | # img_flow = flow_util.spynet_flow(img_path_1, img_path_0) 16 | img_flow = flow_util.dis_flow(img_path_1, img_path_0) 17 | img_0 = cv2.imread(img_path_0) 18 | img_warp = flow_util.warp_flow(img_0, img_flow) 19 | return img_warp 20 | 21 | 22 | def parse_label_file(label_file_path): 23 | label_file = open(label_file_path) 24 | vlist = list() 25 | for l in label_file.readlines(): 26 | v = l.split(' ')[0:5] 27 | if len(v) <= 1: 28 | continue 29 | v[1:5] = list(map(float, v[1:5])) 30 | if v[1] < 50 or v[2] < 50 or v[3] > 900 or v[4] > 500: 31 | v[0] = 'DontCare' 32 | vlist.append(v) 33 | # print(v) 34 | return vlist 35 | 36 | 37 | def write_to_file(labels, filename): 38 | curr_label = parse_label_file(labels) 39 | new_label_file = open(filename, 'w') 40 | for v in curr_label: 41 | line = ' '.join([str(x) for x in v]) 42 | # print(line) 43 | new_label_file.write(line + '\n') 44 | 45 | 46 | def gen_images(gen_label_only=False): 47 | 48 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images.txt') 49 | image_abs_paths = img_files.readlines() 50 | image_abs_paths = [f.strip() for f in image_abs_paths] 51 | 52 | label_files = open('/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels.txt') 53 | label_abs_paths = label_files.readlines() 54 | label_abs_paths = [f.strip() for f in label_abs_paths] 55 | 56 | out_img_dir = 'images_flow_warp_detrac' 57 | if not os.path.exists(out_img_dir): 58 | os.mkdir(out_img_dir) 59 | 60 | for i in range(0, len(image_abs_paths)): 61 | curr_img_path = image_abs_paths[i] 62 | prev_img_path = image_abs_paths[i - 1] 63 | 64 | curr_tracklet_id = find_tracklet_id(curr_img_path) 65 | prev_tracklet_id = find_tracklet_id(prev_img_path) 66 | 67 | print(i, curr_img_path, curr_tracklet_id) 68 | 69 | tracklet_out_path = out_img_dir + '/' + curr_tracklet_id 70 | if not os.path.exists(tracklet_out_path): 71 | os.mkdir(tracklet_out_path) 72 | 73 | if curr_tracklet_id != prev_tracklet_id: 74 | prev_img_path = curr_img_path 75 | 76 | if not gen_label_only: 77 | # w(0 -> 1) = frame(0) * flow(1 -> 0) 78 | w01 = gen_warp(prev_img_path, curr_img_path) 79 | out_path = curr_img_path.replace('.png', '') 80 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:] 81 | w01_path = out_path + '_w01.png' 82 | 83 | write_to_file(label_abs_paths[i], out_path + '_w01_label.txt') 84 | 85 | out_path = prev_img_path.replace('.png', '') 86 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:] 87 | w10_path = out_path + '_w10.png' 88 | write_to_file(label_abs_paths[i], out_path + '_w10_label.txt') 89 | 90 | if not gen_label_only: 91 | # w(1 -> 0) = frame(1) * flow(0 -> 1) 92 | # w10 = gen_warp(curr_img_path, prev_img_path) 93 | cv2.imshow('w01', w01) 94 | # cv2.imshow('w10', w10) 95 | 96 | os.makedirs(w01_path[0: w01_path.rfind('/')], exist_ok=True) 97 | cv2.imwrite(w01_path, w01) 98 | # cv2.imwrite(w10_path, w10) 99 | cv2.waitKey(1) 100 | 101 | 102 | if __name__ == '__main__': 103 | gen_images() 104 | -------------------------------------------------------------------------------- /flow/gen_val_from_all.sh: -------------------------------------------------------------------------------- 1 | realpath images_flow_warp/0001/*.png 2 | realpath images_flow_warp/0005/*.png 3 | realpath images_flow_warp/0013/*.png 4 | realpath images_flow_warp/0017/*.png 5 | -------------------------------------------------------------------------------- /flow/gen_warp_images_by_flow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from flow.plot_util import * 4 | from flow.flow_util import * 5 | from flow.gen_flow_images import find_tracklet_id 6 | 7 | 8 | def gen_images(): 9 | 10 | out_img_dir = 'images_flow_warp_flownet2' 11 | if not os.path.exists(out_img_dir): 12 | os.mkdir(out_img_dir) 13 | 14 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt') 15 | image_abs_paths = img_files.readlines() 16 | image_abs_paths = [f.strip() for f in image_abs_paths] 17 | 18 | pwd = '/home/cory/project/flownet2/output/' 19 | flo_list = list() 20 | for flo in sorted(os.listdir(pwd)): 21 | ff = pwd + flo 22 | flo_list.append(ff) 23 | 24 | for i in range(0, len(image_abs_paths)): 25 | curr_img_path = image_abs_paths[i] 26 | prev_img_path = image_abs_paths[i - 1] 27 | 28 | curr_tracklet_id = find_tracklet_id(curr_img_path) 29 | prev_tracklet_id = find_tracklet_id(prev_img_path) 30 | 31 | print(i, curr_img_path, curr_tracklet_id) 32 | 33 | tracklet_out_path = out_img_dir + '/' + curr_tracklet_id 34 | if not os.path.exists(tracklet_out_path): 35 | os.mkdir(tracklet_out_path) 36 | 37 | if curr_tracklet_id != prev_tracklet_id: 38 | prev_img_path = curr_img_path 39 | 40 | # w(0 -> 1) = frame(0) * flow(1 -> 0) 41 | print(flo_list[i]) 42 | flo = read_flo_file(flo_list[i]) 43 | flow_hsv = draw_hsv(flo, ratio=2) 44 | cv2.imshow('flow', flow_hsv) 45 | 46 | w01 = warp_flow(cv2.imread(prev_img_path), flo) 47 | out_path = curr_img_path.replace('.png', '') 48 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:] 49 | w01_path = out_path + '_w01.png' 50 | cv2.imshow('w01', w01) 51 | 52 | cv2.imwrite(out_path + '_flow.png', flow_hsv) 53 | cv2.imwrite(w01_path, w01) 54 | 55 | cv2.waitKey(30) 56 | 57 | 58 | if __name__ == '__main__': 59 | gen_images() 60 | -------------------------------------------------------------------------------- /flow/img_diff.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import random 3 | 4 | 5 | def diff(img1, img2, window_name=''): 6 | df = img1 - img2 7 | cv2.imshow(window_name, df) 8 | cv2.imwrite(window_name + '.jpg', df) 9 | 10 | 11 | def main(): 12 | img0 = cv2.imread('/media/cory/c_disk/Project/KITTI_Dataset/data_tracking_image_2/training/image_02/0003/000035.png') 13 | img1 = cv2.imread('/home/cory/project/yolo2-pytorch/flow/images_flow_warp/0003/000035_w01.png') 14 | img2 = cv2.imread('/home/cory/project/yolo2-pytorch/flow/images_flow_warp/0003/000035_w10.png') 15 | cv2.imshow('0', img0) 16 | cv2.imshow('1', img1) 17 | cv2.imshow('2', img2) 18 | diff(img1, img0, '1-0') 19 | diff(img2, img0, '2-0') 20 | diff(img1, img2, '1-2') 21 | cv2.waitKey(0) 22 | 23 | 24 | if __name__ == '__main__': 25 | main() 26 | -------------------------------------------------------------------------------- /flow/plot_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | import math 5 | 6 | 7 | def imshow_fig(img, title='', **kwargs): 8 | h = img.shape[0] 9 | w = img.shape[1] 10 | dpi = 96 11 | fig = plt.figure(num=0, figsize=(w / dpi, h / dpi)) 12 | fig.add_axes([0., 0., 1., 1.]) 13 | fig.canvas.set_window_title(title) 14 | plt.imshow(img, **kwargs) 15 | plt.axis('off') 16 | return fig 17 | 18 | 19 | def plot_feature_map(features, border=2, resize_ratio=2): 20 | num_channel = features.shape[1] 21 | feat_h = features.shape[2] 22 | feat_w = features.shape[3] 23 | map_border_num = int(math.ceil(math.sqrt(num_channel))) 24 | map_h = (feat_h + border) * map_border_num 25 | map_w = (feat_w + border) * map_border_num 26 | # print('create act map {:d} x {:d}'.format(map_h, map_w)) 27 | feature_map_all = np.zeros((map_h, map_w)) 28 | 29 | # print(features.shape) 30 | all_sum = 0 31 | idx = 0 32 | max_val = np.max(features.ravel()) 33 | for i_y in range(0, map_h, feat_h+border): 34 | for i_x in range(0, map_w, feat_w+border): 35 | if idx >= num_channel: 36 | break 37 | act = features[0, idx, :, :] 38 | idx += 1 39 | if border != 0: 40 | act_pad = np.lib.pad(array=act, 41 | pad_width=((0, border), (0, border)), 42 | mode='constant', 43 | constant_values=max_val/6) 44 | else: 45 | act_pad = act 46 | feature_map_all[i_y: i_y + feat_h + border, i_x: i_x + feat_w + border] = act_pad 47 | act_sum = sum(act.ravel()) 48 | all_sum += act_sum 49 | # print('filter-{:d} act_sum={:f}'.format(idx, act_sum)) 50 | 51 | # print('all_sum = {:f}'.format(all_sum)) 52 | # min max normalization 53 | feature_map_all /= feature_map_all.max() 54 | feature_map_all = cv2.resize(feature_map_all, (feature_map_all.shape[1] * resize_ratio, 55 | feature_map_all.shape[0] * resize_ratio)) 56 | return feature_map_all 57 | 58 | 59 | def draw_hsv(flow, ratio=4): 60 | h, w = flow.shape[:2] 61 | fx, fy = flow[:, :, 0], flow[:, :, 1] 62 | ang = np.arctan2(fy, fx) + np.pi 63 | v = np.sqrt(fx * fx + fy * fy) 64 | hsv = np.zeros((h, w, 3), np.uint8) 65 | hsv[..., 0] = ang * (180 / np.pi / 2) 66 | hsv[..., 1] = v * ratio 67 | hsv[..., 2] = 255 68 | bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) 69 | return bgr 70 | -------------------------------------------------------------------------------- /flow/run_flow.sh: -------------------------------------------------------------------------------- 1 | cd /home/cory/project/spynet 2 | th run_flow.lua $1 $2 && cp ./flow.npy ~/yolo2-pytorch 3 | -------------------------------------------------------------------------------- /flow/run_of.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo $1 $2 3 | ~/project/OF_DIS/run_OF_RGB $1 $2 flow.flo 4 | -------------------------------------------------------------------------------- /flow/shift_gt_by_flow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from flow.flow_util import dis_flow 3 | from flow.gen_flow_images import parse_label_file, write_to_file 4 | from misc.visualize_gt import plot_vis 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def flow_avg_in_rectangle(flow, pos): 9 | pos = list(map(int, pos)) 10 | crop = flow[pos[1]: pos[3], pos[0]: pos[2]] 11 | avg_x = np.average(crop[:, :, 1]) 12 | avg_y = np.average(crop[:, :, 0]) 13 | return avg_x, avg_y 14 | 15 | 16 | def flow_std_in_rectangle(flow, pos): 17 | pos = list(map(int, pos)) 18 | crop = flow[pos[1]: pos[3], pos[0]: pos[2]] 19 | std_x = np.std(crop[:, :, 1]) 20 | std_y = np.std(crop[:, :, 0]) 21 | return std_x, std_y 22 | 23 | 24 | def gt_save_to_file(gt, filepath): 25 | print(filepath) 26 | out_file = open(filepath, 'w') 27 | for g in gt: 28 | gs = [str(int(x)) for x in g[1: 5]] 29 | wline = g[0] + ' ' + ' '.join(gs) + ' 0 0 0\n' 30 | out_file.write(wline) 31 | 32 | 33 | def shift_gt_by_flow(): 34 | img_list_filename = 'w01_images.txt' 35 | # gt_list_filename = 'w01_center_labels.txt' 36 | gt_list_filename = 'kitti_train_labels.txt' 37 | img_list_file = open(img_list_filename) 38 | gt_list_file = open(gt_list_filename) 39 | 40 | img_paths = [f.strip() for f in img_list_file.readlines()] 41 | gt_paths = [f.strip() for f in gt_list_file.readlines()] 42 | 43 | total_num = len(img_paths) 44 | print(total_num) 45 | 46 | pt_x = list() 47 | pt_y = list() 48 | for i in range(total_num - 1): 49 | img_file = open(img_paths[i]) 50 | out_gt_filepath = gt_paths[i].replace('.txt', '_shift.txt') 51 | gts = parse_label_file(gt_paths[i]) 52 | # print(gts) 53 | print(i) 54 | 55 | flow = dis_flow(img_paths[i + 1], img_paths[i]) 56 | for gt in gts: 57 | std_flow = flow_std_in_rectangle(flow, gt[1:5]) 58 | pt_x.append(std_flow[0]) 59 | pt_y.append(std_flow[1]) 60 | if abs(std_flow[0]) > 2 or abs(std_flow[1]) > 5: 61 | print(gt[0], std_flow[0], std_flow[1]) 62 | gt[0] = 'DontCare' 63 | print(gt) 64 | 65 | # r = plot_vis(img_paths[i], gts) 66 | gt_save_to_file(gts, out_gt_filepath) 67 | 68 | plt.plot(pt_x, pt_y, '*') 69 | plt.show() 70 | 71 | if __name__ == '__main__': 72 | shift_gt_by_flow() 73 | -------------------------------------------------------------------------------- /flow/vis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/flow/vis.jpg -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/__init__.py -------------------------------------------------------------------------------- /layers/reorg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/reorg/__init__.py -------------------------------------------------------------------------------- /layers/reorg/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/reorg/_ext/__init__.py -------------------------------------------------------------------------------- /layers/reorg/_ext/reorg_layer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._reorg_layer import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /layers/reorg/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/reorg_cpu.c'] 7 | headers = ['src/reorg_cpu.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/reorg_cuda.c'] 14 | headers += ['src/reorg_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | # print(this_file) 20 | extra_objects = ['src/reorg_cuda_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.reorg_layer', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /layers/reorg/reorg_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ._ext import reorg_layer 4 | 5 | 6 | class ReorgFunction(Function): 7 | def __init__(self, stride=2): 8 | super().__init__() 9 | self.stride = stride 10 | 11 | def forward(self, x): 12 | stride = self.stride 13 | 14 | bsize, c, h, w = x.size() 15 | out_w, out_h, out_c = int(w / stride), int(h / stride), c * (stride * stride) 16 | out = torch.FloatTensor(bsize, out_c, out_h, out_w) 17 | 18 | if x.is_cuda: 19 | out = out.cuda() 20 | reorg_layer.reorg_cuda(x, out_w, out_h, out_c, bsize, stride, 0, out) 21 | else: 22 | reorg_layer.reorg_cpu(x, out_w, out_h, out_c, bsize, stride, 0, out) 23 | 24 | return out 25 | 26 | def backward(self, grad_top): 27 | stride = self.stride 28 | bsize, c, h, w = grad_top.size() 29 | 30 | out_w, out_h, out_c = w * stride, h * stride, c // (stride * stride) 31 | grad_bottom = torch.FloatTensor(bsize, out_c, out_h, out_w) 32 | 33 | # rev_stride = 1. / stride # reverse 34 | if grad_top.is_cuda: 35 | grad_bottom = grad_bottom.cuda() 36 | reorg_layer.reorg_cuda(grad_top, w, h, c, bsize, stride, 1, grad_bottom) 37 | else: 38 | reorg_layer.reorg_cpu(grad_top, w, h, c, bsize, stride, 1, grad_bottom) 39 | 40 | return grad_bottom 41 | 42 | 43 | class ReorgLayer(torch.nn.Module): 44 | def __init__(self, stride): 45 | super(ReorgLayer, self).__init__() 46 | self.stride = stride 47 | 48 | def forward(self, x): 49 | x = ReorgFunction(self.stride)(x) 50 | return x 51 | -------------------------------------------------------------------------------- /layers/reorg/src/reorg_cpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor) 4 | { 5 | // Grab the tensor 6 | float * x = THFloatTensor_data(x_tensor); 7 | float * out = THFloatTensor_data(out_tensor); 8 | 9 | // https://github.com/pjreddie/darknet/blob/master/src/blas.c 10 | int b,i,j,k; 11 | int out_c = c/(stride*stride); 12 | 13 | for(b = 0; b < batch; ++b){ 14 | for(k = 0; k < c; ++k){ 15 | for(j = 0; j < h; ++j){ 16 | for(i = 0; i < w; ++i){ 17 | int in_index = i + w*(j + h*(k + c*b)); 18 | int c2 = k % out_c; 19 | int offset = k / out_c; 20 | int w2 = i*stride + offset % stride; 21 | int h2 = j*stride + offset / stride; 22 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); 23 | if(forward) out[out_index] = x[in_index]; 24 | else out[in_index] = x[out_index]; 25 | } 26 | } 27 | } 28 | } 29 | 30 | return 1; 31 | } -------------------------------------------------------------------------------- /layers/reorg/src/reorg_cpu.h: -------------------------------------------------------------------------------- 1 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor); -------------------------------------------------------------------------------- /layers/reorg/src/reorg_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "reorg_cuda_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor) 7 | { 8 | float * x = THCudaTensor_data(state, x_tensor); 9 | float * out = THCudaTensor_data(state, out_tensor); 10 | 11 | cudaStream_t stream = THCState_getCurrentStream(state); 12 | reorg_ongpu(x, w, h, c, batch, stride, forward, out, stream); 13 | 14 | return 1; 15 | } -------------------------------------------------------------------------------- /layers/reorg/src/reorg_cuda.h: -------------------------------------------------------------------------------- 1 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor); -------------------------------------------------------------------------------- /layers/reorg/src/reorg_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "reorg_cuda_kernel.h" 9 | 10 | #define BLOCK 512 11 | 12 | dim3 cuda_gridsize(int n) 13 | { 14 | int k = (n-1) / BLOCK + 1; 15 | int x = k; 16 | int y = 1; 17 | if(x > 65535){ 18 | x = ceil(sqrt(k)); 19 | y = (n-1)/(x*BLOCK) + 1; 20 | } 21 | dim3 d(x, y, 1); 22 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 23 | return d; 24 | } 25 | 26 | __global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) 27 | { 28 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 29 | if(i >= N) return; 30 | int in_index = i; 31 | int in_w = i%w; 32 | i = i/w; 33 | int in_h = i%h; 34 | i = i/h; 35 | int in_c = i%c; 36 | i = i/c; 37 | int b = i%batch; 38 | 39 | int out_c = c/(stride*stride); 40 | 41 | int c2 = in_c % out_c; 42 | int offset = in_c / out_c; 43 | int w2 = in_w*stride + offset % stride; 44 | int h2 = in_h*stride + offset / stride; 45 | //printf("%d\n", offset); 46 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); 47 | 48 | // printf("%d %d %d\n", w2, h2, c2); 49 | //printf("%d %d\n", in_index, out_index); 50 | //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); 51 | 52 | if(forward) out[out_index] = x[in_index]; 53 | else out[in_index] = x[out_index]; 54 | //if(forward) out[1] = x[1]; 55 | //else out[0] = x[0]; 56 | } 57 | 58 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream) 59 | { 60 | int size = w*h*c*batch; 61 | cudaError_t err; 62 | 63 | reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); 64 | 65 | err = cudaGetLastError(); 66 | if(cudaSuccess != err) 67 | { 68 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 69 | exit( -1 ); 70 | } 71 | } 72 | 73 | 74 | 75 | #ifdef __cplusplus 76 | } 77 | #endif 78 | -------------------------------------------------------------------------------- /layers/reorg/src/reorg_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _REORG_CUDA_KERNEL 2 | #define _REORG_CUDA_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream); 9 | 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /layers/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/roi_pooling/__init__.py -------------------------------------------------------------------------------- /layers/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /layers/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /layers/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/roi_pooling.c'] 7 | headers = ['src/roi_pooling.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/roi_pooling_cuda.c'] 14 | headers += ['src/roi_pooling_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.roi_pooling', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /layers/roi_pooling/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from _ext import roi_pooling 4 | 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale): 8 | self.pooled_width = int(pooled_width) 9 | self.pooled_height = int(pooled_height) 10 | self.spatial_scale = float(spatial_scale) 11 | self.output = None 12 | self.argmax = None 13 | self.rois = None 14 | self.feature_size = None 15 | 16 | def forward(self, features, rois): 17 | batch_size, num_channels, data_height, data_width = features.size() 18 | num_rois = rois.size()[0] 19 | output = torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width) 20 | argmax = torch.IntTensor(num_rois, num_channels, self.pooled_height, self.pooled_width).zero_() 21 | 22 | if not features.is_cuda: 23 | _features = features.permute(0, 2, 3, 1) 24 | roi_pooling.roi_pooling_forward(self.pooled_height, self.pooled_width, self.spatial_scale, 25 | _features, rois, output) 26 | # output = output.cuda() 27 | else: 28 | output = output.cuda() 29 | argmax = argmax.cuda() 30 | roi_pooling.roi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 31 | features, rois, output, argmax) 32 | self.output = output 33 | self.argmax = argmax 34 | self.rois = rois 35 | self.feature_size = features.size() 36 | 37 | return output 38 | 39 | def backward(self, grad_output): 40 | assert(self.feature_size is not None and grad_output.is_cuda) 41 | 42 | batch_size, num_channels, data_height, data_width = self.feature_size 43 | 44 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda() 45 | roi_pooling.roi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 46 | grad_output, self.rois, grad_input, self.argmax) 47 | 48 | # print grad_input 49 | 50 | return grad_input, None 51 | 52 | 53 | class RoIPool(torch.nn.Module): 54 | def __init__(self, pooled_height, pooled_width, spatial_scale): 55 | super(RoIPool, self).__init__() 56 | 57 | self.pooled_width = int(pooled_width) 58 | self.pooled_height = int(pooled_height) 59 | self.spatial_scale = float(spatial_scale) 60 | 61 | def forward(self, features, rois): 62 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 63 | -------------------------------------------------------------------------------- /layers/roi_pooling/roi_pool_py.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | 7 | class RoIPool(nn.Module): 8 | def __init__(self, pooled_height, pooled_width, spatial_scale): 9 | super(RoIPool, self).__init__() 10 | self.pooled_width = int(pooled_width) 11 | self.pooled_height = int(pooled_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | batch_size, num_channels, data_height, data_width = features.size() 16 | num_rois = rois.size()[0] 17 | outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda() 18 | 19 | for roi_ind, roi in enumerate(rois): 20 | batch_ind = int(roi[0].data[0]) 21 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round( 22 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int) 23 | roi_width = max(roi_end_w - roi_start_w + 1, 1) 24 | roi_height = max(roi_end_h - roi_start_h + 1, 1) 25 | bin_size_w = float(roi_width) / float(self.pooled_width) 26 | bin_size_h = float(roi_height) / float(self.pooled_height) 27 | 28 | for ph in range(self.pooled_height): 29 | hstart = int(np.floor(ph * bin_size_h)) 30 | hend = int(np.ceil((ph + 1) * bin_size_h)) 31 | hstart = min(data_height, max(0, hstart + roi_start_h)) 32 | hend = min(data_height, max(0, hend + roi_start_h)) 33 | for pw in range(self.pooled_width): 34 | wstart = int(np.floor(pw * bin_size_w)) 35 | wend = int(np.ceil((pw + 1) * bin_size_w)) 36 | wstart = min(data_width, max(0, wstart + roi_start_w)) 37 | wend = min(data_width, max(0, wend + roi_start_w)) 38 | 39 | is_empty = (hend <= hstart) or(wend <= wstart) 40 | if is_empty: 41 | outputs[roi_ind, :, ph, pw] = 0 42 | else: 43 | data = features[batch_ind] 44 | outputs[roi_ind, :, ph, pw] = torch.max( 45 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1) 46 | 47 | return outputs 48 | 49 | -------------------------------------------------------------------------------- /layers/roi_pooling/src/cuda/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /layers/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /layers/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /layers/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | int batch_size = THCudaTensor_size(state, features, 0); 27 | if (batch_size != 1) 28 | { 29 | return 0; 30 | } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | if (batch_size != 1) 70 | { 71 | return 0; 72 | } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } -------------------------------------------------------------------------------- /layers/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #PYTHON=python 4 | PYTHON=python3 5 | 6 | NVCC=nvcc 7 | 8 | cd utils 9 | ${PYTHON} build.py build_ext --inplace 10 | cd ../ 11 | 12 | cd layers/reorg/src 13 | echo "Compiling reorg layer kernels by nvcc..." 14 | ${NVCC} -c -o reorg_cuda_kernel.cu.o reorg_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 15 | cd ../ 16 | ${PYTHON} build.py 17 | cd ../ 18 | 19 | cd roi_pooling/src/cuda 20 | echo "Compiling roi_pooling kernels by nvcc..." 21 | ${NVCC} -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 22 | cd ../../ 23 | ${PYTHON} build.py 24 | cd ../ 25 | -------------------------------------------------------------------------------- /misc/kitti_detect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 5 | os.environ['DATASET'] = 'kitti' 6 | 7 | from cfgs.config_v2 import load_cfg_yamls 8 | import utils.network as net_utils 9 | import utils.yolo_v2 as yolo_utils 10 | from darknet_v3 import Darknet19 11 | from flow.flow_util import * 12 | from utils.timer import Timer 13 | 14 | dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_kitti.yaml' 15 | exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/kitti/kitti_baseline_v3.yaml' 16 | gpu_id = 0 17 | 18 | cfg = load_cfg_yamls([dataset_yaml, exp_yaml]) 19 | 20 | 21 | def preprocess(filename): 22 | image = cv2.imread(filename) 23 | im_data = np.expand_dims(yolo_utils.preprocess_test((image, None, cfg['inp_size']))[0], 0) 24 | return image, im_data 25 | 26 | 27 | def detection_objects(bboxes, scores, cls_inds): 28 | objects = list() 29 | for i in range(len(bboxes)): 30 | box = bboxes[i] 31 | score = scores[i] 32 | label = cfg['label_names'][cls_inds[i]] 33 | objects.append((box, score, label)) 34 | return objects 35 | 36 | 37 | def save_as_kitti_format(frame_id, det_obj, output_dir, src_label='voc'): 38 | # 'Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01' 39 | # 0 -1 car 0 0 0 1078 142 1126 164 0 0 0 0 0 0 0.415537 40 | with open(output_dir + '/{:06d}.txt'.format(frame_id), 'w') as file: 41 | for det in det_obj: 42 | bbox = det[0] 43 | score = det[1] 44 | label = det[2] 45 | if src_label == 'voc': 46 | if label != 'car' and label != 'person': 47 | continue 48 | label = label.replace('person', 'pedestrian') 49 | label.replace('Person', 'Person_sitting') 50 | line_str = '{:s} 0 0 0 {:d} {:d} {:d} {:d} 0 0 0 0 0 0 0 {:.4f}\n' \ 51 | .format(label, bbox[0], bbox[1], bbox[2], bbox[3], score) 52 | # print(line_str) 53 | file.write(line_str) 54 | 55 | 56 | def main(): 57 | 58 | output_dir = '../output' 59 | output_template_dir = '../output_template' 60 | kitti_output_dir = '../kitti_det_output' 61 | input_file_list = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt' 62 | # input_file_list = '/home/cory/project/yolo2-pytorch/flow/w01_imgs.txt' 63 | vis_enable = False 64 | thresh = 0.5 65 | 66 | trained_model = '/home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_half/' \ 67 | 'kitti_new_2_flow_center_ft_half_5.h5' 68 | 69 | shutil.rmtree(output_dir, ignore_errors=True) 70 | shutil.rmtree(kitti_output_dir, ignore_errors=True) 71 | shutil.copytree(output_template_dir, output_dir) 72 | os.makedirs(kitti_output_dir) 73 | 74 | net = Darknet19(cfg) 75 | net_utils.load_net(trained_model, net) 76 | net.eval() 77 | net.cuda() 78 | print(trained_model) 79 | print('load model successfully') 80 | 81 | img_files = open(input_file_list) 82 | image_abs_paths = img_files.readlines() 83 | image_abs_paths = [f.strip() for f in image_abs_paths] 84 | 85 | t_det = Timer() 86 | t_total = Timer() 87 | for i, image_path in enumerate(image_abs_paths): 88 | t_total.tic() 89 | image, im_data = preprocess(image_path) 90 | im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) 91 | 92 | t_det.tic() 93 | bbox_pred, iou_pred, prob_pred = net.forward(im_data) 94 | det_time = t_det.toc() 95 | 96 | bbox_pred = bbox_pred.data.cpu().numpy() 97 | iou_pred = iou_pred.data.cpu().numpy() 98 | prob_pred = prob_pred.data.cpu().numpy() 99 | 100 | bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) 101 | det_obj = detection_objects(bboxes, scores, cls_inds) 102 | save_as_kitti_format(i, det_obj, kitti_output_dir, src_label='kitti') 103 | 104 | total_time = t_total.toc() 105 | format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms) %s' 106 | print(format_str % ( 107 | i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000, image_path)) 108 | 109 | t_det.clear() 110 | t_total.clear() 111 | 112 | if vis_enable: 113 | im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) 114 | cv2.imshow('detection', im2show) 115 | cv2.imwrite(output_dir + '/detection/{:04d}.jpg'.format(i), im2show) 116 | key = cv2.waitKey(0) 117 | if key == ord('q'): 118 | break 119 | 120 | 121 | if __name__ == '__main__': 122 | main() 123 | -------------------------------------------------------------------------------- /misc/validate_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 7 | 8 | 9 | from cfgs.config_v2 import add_cfg 10 | import utils.network as net_utils 11 | from darknet_v3 import Darknet19 12 | from datasets.ImageFileDataset_v2 import ImageFileDataset 13 | from utils.timer import Timer 14 | from train.train_util_v2 import * 15 | 16 | 17 | # dataset_yaml = '/home/cory/yolo2-pytorch/cfgs/config_kitti.yaml' 18 | # exp_yaml = '/home/cory/yolo2-pytorch/cfgs/exps/kitti_new_2.yaml' 19 | dataset_yaml = '/home/cory/yolo2-pytorch/cfgs/config_voc.yaml' 20 | exp_yaml = '/home/cory/yolo2-pytorch/cfgs/exps/voc0712_template.yaml' 21 | 22 | cfg = dict() 23 | # add_cfg(cfg, '/home/cory/yolo2-pytorch/cfgs/config_voc.yaml') 24 | add_cfg(cfg, dataset_yaml) 25 | add_cfg(cfg, exp_yaml) 26 | 27 | # data loader 28 | imdb = ImageFileDataset(cfg, ImageFileDataset.preprocess_train, 29 | processes=4, shuffle=False, dst_size=None, mode='val') 30 | 31 | print('imdb load data succeeded') 32 | net = Darknet19(cfg) 33 | 34 | # CUDA_VISIBLE_DEVICES=1 35 | # 20 0.68 36 | # 40 0.60 37 | # 45 0.56 38 | # 50 0.58 39 | # 55 0.55 40 | # 60 0.59 41 | 42 | os.makedirs(cfg['train_output_dir'], exist_ok=True) 43 | try: 44 | ckp = open(cfg['train_output_dir'] + '/check_point.txt') 45 | ckp_epoch = int(ckp.readlines()[0]) 46 | # ckp_epoch = 100 47 | # raise IOError 48 | use_model = os.path.join(cfg['train_output_dir'], cfg['exp_name'] + '_' + str(ckp_epoch) + '.h5') 49 | except IOError: 50 | ckp_epoch = 0 51 | use_model = cfg['pretrained_model'] 52 | 53 | net_utils.load_net(use_model, net) 54 | 55 | net.cuda() 56 | net.train() 57 | print('load net succeeded') 58 | 59 | start_epoch = ckp_epoch 60 | imdb.epoch = start_epoch 61 | 62 | # show training parameters 63 | print('-------------------------------') 64 | print('gpu_id', os.environ.get('CUDA_VISIBLE_DEVICES')) 65 | print('use_model', use_model) 66 | print('exp_name', cfg['exp_name']) 67 | print('dataset', cfg['dataset_name']) 68 | print('optimizer', cfg['optimizer']) 69 | print('opt_param', cfg['opt_param']) 70 | print('train_batch_size', cfg['train_batch_size']) 71 | print('start_epoch', start_epoch) 72 | print('lr', lookup_lr(cfg, start_epoch)) 73 | print('-------------------------------') 74 | 75 | 76 | train_loss = 0 77 | bbox_loss, iou_loss, cls_loss = 0., 0., 0. 78 | cnt = 0 79 | 80 | timer = Timer() 81 | 82 | # default input size 83 | network_size = np.array(cfg['inp_size'], dtype=np.int) 84 | 85 | for step in range(start_epoch * imdb.batch_per_epoch, (start_epoch + 5) * imdb.batch_per_epoch + 1): 86 | timer.tic() 87 | 88 | prev_epoch = imdb.epoch 89 | batch = imdb.next_batch(network_size) 90 | 91 | # when go to next epoch 92 | if imdb.epoch > prev_epoch: 93 | train_loss /= cnt 94 | bbox_loss /= cnt 95 | iou_loss /= cnt 96 | cls_loss /= cnt 97 | print() 98 | print('loss: %.3f, bbox_loss: %.3f, iou_loss: %.3f, cls_loss: %.3f' % 99 | (train_loss, bbox_loss, iou_loss, cls_loss)) 100 | 101 | train_loss = 0 102 | bbox_loss, iou_loss, cls_loss = 0., 0., 0. 103 | cnt = 0 104 | timer.clear() 105 | 106 | # forward 107 | im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=False).permute(0, 3, 1, 2) 108 | x = net.forward(im_data, batch['gt_boxes'], batch['gt_classes'], network_size) 109 | 110 | # loss 111 | bbox_loss += net.bbox_loss.data.cpu().numpy()[0] 112 | iou_loss += net.iou_loss.data.cpu().numpy()[0] 113 | cls_loss += net.class_loss.data.cpu().numpy()[0] 114 | train_loss += net.loss.data.cpu().numpy()[0] 115 | cnt += 1 116 | 117 | if step % cfg['disp_interval'] == 0: 118 | progress_in_epoch = (step % imdb.batch_per_epoch) / imdb.batch_per_epoch 119 | print('%.2f%%' % (progress_in_epoch * 100), end=' ') 120 | sys.stdout.flush() 121 | 122 | imdb.close() 123 | -------------------------------------------------------------------------------- /misc/vis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/misc/vis.jpg -------------------------------------------------------------------------------- /misc/visualize_gt.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | def proc_label(img, values): 5 | if len(values) <= 1: 6 | return 7 | label = values[0] 8 | official_format = False 9 | label = label.replace('DontCare', '') 10 | if official_format: 11 | xmin = int(float(values[4])) 12 | ymin = int(float(values[5])) 13 | xmax = int(float(values[6])) 14 | ymax = int(float(values[7])) 15 | else: 16 | xmin = int(float(values[1])) 17 | ymin = int(float(values[2])) 18 | xmax = int(float(values[3])) 19 | ymax = int(float(values[4])) 20 | 21 | cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 1) 22 | cv2.putText(img, label, (xmin, ymax), cv2.FORMATTER_FMT_CSV, 1, (0, 255, 0), 1, cv2.LINE_AA) 23 | print(values) 24 | 25 | 26 | def plot_vis(image_path, label_path): 27 | img = cv2.imread(image_path) 28 | print(img.shape) 29 | 30 | if isinstance(label_path, str): 31 | label_file = open(label_path) 32 | vv = [f.strip().split(' ') for f in label_file.readlines()] 33 | else: 34 | vv = label_path 35 | 36 | for values in vv: 37 | proc_label(img, values) 38 | 39 | cv2.imshow('img', img) 40 | cv2.imwrite('vis.jpg', img) 41 | key = cv2.waitKey(100) 42 | print(key) 43 | if key == ord('q'): 44 | return -1 45 | return 0 46 | 47 | 48 | def run_vis(): 49 | choice = 5 50 | if choice == 1: 51 | image_path = '/home/cory/cedl/dashcam/images/000900/000010.jpg' 52 | label_path = '/home/cory/cedl/dashcam/labels/000900/000010.txt' 53 | elif choice == 2: 54 | image_path = '/home/cory/KITTI_Dataset/data_tracking_image_2/training/image_02/0000/000000.png' 55 | label_path = '/home/cory/KITTI_Dataset/tracking_label/0000/000000.txt' 56 | elif choice == 3: 57 | image_path = '/home/cory/VOC/VOCdevkit/VOC2007/JPEGImages/000009.jpg' 58 | label_path = '/home/cory/VOC/VOCdevkit/VOC2007/labels/000009.txt' 59 | elif choice == 4: 60 | image_path = '/home/cory/GTAV/VOCdevkit/VOC2012/JPEGImages/3384645.jpg' 61 | label_path = '/home/cory/GTAV/VOCdevkit/VOC2012/labels/3384645.txt' 62 | 63 | plot_vis(image_path, label_path) 64 | 65 | 66 | def vis_list_file(): 67 | # image_path = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt' 68 | # label_path = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt' 69 | # image_path = '/media/cory/BackUp/ImageNet/vid_all_images.txt' 70 | # label_path = '/media/cory/BackUp/ImageNet/vid_all_labels.txt' 71 | image_path = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt' 72 | label_path = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt' 73 | 74 | # image_path = '/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images.txt' 75 | # label_path = '/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels.txt' 76 | 77 | image_file = open(image_path) 78 | label_file = open(label_path) 79 | images = [p.strip() for p in image_file.readlines()] 80 | labels = [p.strip() for p in label_file.readlines()] 81 | for i in range(len(images)): 82 | if i < 500: 83 | continue 84 | print(images[i], labels[i]) 85 | r = plot_vis(images[i], labels[i]) 86 | if r == -1: 87 | break 88 | 89 | if __name__ == '__main__': 90 | vis_list_file() 91 | # run_vis() 92 | -------------------------------------------------------------------------------- /misc/voc_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | all_labels = 'VOC/voc0712_labels.txt' 4 | train_images = 'VOC/detrac_train_images.txt' 5 | train_labels = 'VOC/detrac_train_labels.txt' 6 | 7 | 8 | prefix = '/home/cory/VOC/VOCdevkit/VOC2007/labels/' 9 | 10 | 11 | def sort_file_line(filename): 12 | f = open(filename) 13 | sorted_line = sorted(f.readlines()) 14 | for line in sorted_line: 15 | print(line, end='') 16 | 17 | sort_file_line(train_images) 18 | 19 | 20 | def get_filename_id(fullpath): 21 | filename_begin_pos = fullpath.rfind('/') + 1 22 | filename_end_pos = fullpath.rfind('.') 23 | fname = fullpath[filename_begin_pos: filename_end_pos] 24 | return fname 25 | 26 | 27 | def convert_main(): 28 | all_id = [get_filename_id(f.strip()) for f in all_labels.readlines()] 29 | train_id = [get_filename_id(f.strip()) for f in train_images.readlines()] 30 | 31 | train_counter = 0 32 | test_counter = 0 33 | for id in all_id: 34 | if id in train_id: 35 | # print(train_counter, id) 36 | print(prefix + id + '.txt') 37 | train_counter += 1 38 | else: 39 | # print(test_counter, id, 'test') 40 | # print(prefix + id + '.txt') 41 | test_counter += 1 42 | 43 | print('total label:', len(all_id)) 44 | print('train', train_counter) 45 | print('test', test_counter) 46 | weights -------------------------------------------------------------------------------- /misc/yolo_video_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | from darknet import Darknet19 6 | import utils.yolo as yolo_utils 7 | import utils.network as net_utils 8 | from utils.timer import Timer 9 | import cfgs.config as cfg 10 | 11 | 12 | def preprocess(filename): 13 | image = cv2.imread(filename) 14 | im_data = np.expand_dims(yolo_utils.preprocess_test((image, None, cfg.inp_size))[0], 0) 15 | return image, im_data 16 | 17 | 18 | def main(): 19 | 20 | trained_model = cfg.trained_model 21 | thresh = 0.5 22 | image_dir = '/home/cory/cedl/vid/videos/vid04' 23 | 24 | net = Darknet19() 25 | net_utils.load_net(trained_model, net) 26 | net.eval() 27 | net.cuda() 28 | print('load model successfully') 29 | print(net) 30 | 31 | image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] 32 | image_abs_paths = sorted([os.path.join(image_dir, name) 33 | for name in os.listdir(image_dir) 34 | if name[-4:] in image_extensions]) 35 | 36 | t_det = Timer() 37 | t_total = Timer() 38 | 39 | for i, image_path in enumerate(image_abs_paths): 40 | t_total.tic() 41 | image, im_data = preprocess(image_path) 42 | im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) 43 | t_det.tic() 44 | bbox_pred, iou_pred, prob_pred = net.forward(im_data) 45 | det_time = t_det.toc() 46 | # to numpy 47 | bbox_pred = bbox_pred.data.cpu().numpy() 48 | iou_pred = iou_pred.data.cpu().numpy() 49 | prob_pred = prob_pred.data.cpu().numpy() 50 | 51 | # print bbox_pred.shape, iou_pred.shape, prob_pred.shape 52 | 53 | bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) 54 | 55 | im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) 56 | 57 | if im2show.shape[0] > 1100: 58 | im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) 59 | cv2.imshow('test', im2show) 60 | 61 | total_time = t_total.toc() 62 | format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' 63 | print(format_str % ( 64 | i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) 65 | 66 | t_det.clear() 67 | t_total.clear() 68 | 69 | key = cv2.waitKey(1) 70 | if key == ord('q'): 71 | break 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /train/train_util_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from train.yellowfin import YFOptimizer 3 | 4 | 5 | def lookup_lr(cfg, ep): 6 | lr_epochs = cfg['lr_epoch'] 7 | lr_vals = cfg['lr_val'] 8 | for i in range(len(lr_epochs) - 1): 9 | if lr_epochs[i] <= ep < lr_epochs[i + 1]: 10 | return lr_vals[i] 11 | return lr_vals[- 1] # last lr 12 | 13 | 14 | def get_optimizer_lr(optimizer): 15 | return optimizer.param_groups[0]['lr'] 16 | 17 | 18 | def get_optimizer(cfg, net, epoch): 19 | lr = lookup_lr(cfg, epoch) 20 | optimizer = None 21 | if cfg['optimizer'] == 'SGD': 22 | if cfg['opt_param'] == 'all': 23 | optimizer = torch.optim.SGD(params=net.parameters(), 24 | momentum=cfg['momentum'], 25 | weight_decay=cfg['weight_decay'], 26 | nesterov=True, 27 | lr=lr) 28 | elif cfg['opt_param'] == 'conv345': 29 | optimizer = torch.optim.SGD(params=[{'params': net.conv3.parameters()}, 30 | {'params': net.conv4.parameters()}, 31 | {'params': net.conv5.parameters()}], 32 | momentum=cfg['momentum'], 33 | weight_decay=cfg['weight_decay'], 34 | nesterov=True, 35 | lr=lr) 36 | elif cfg['optimizer'] == 'Adam': 37 | if cfg['opt_param'] == 'all': 38 | optimizer = torch.optim.Adam(params=net.parameters(), 39 | weight_decay=cfg['weight_decay'], 40 | lr=lr) 41 | elif cfg['opt_param'] == 'conv345': 42 | optimizer = torch.optim.Adam(params=[{'params': net.conv3.parameters()}, 43 | {'params': net.conv4.parameters()}, 44 | {'params': net.conv5.parameters()}], 45 | weight_decay=cfg['weight_decay'], 46 | lr=lr) 47 | elif cfg['optimizer'] == 'YF': 48 | if cfg['opt_param'] == 'all': 49 | optimizer = YFOptimizer(var_list=net.parameters()) 50 | elif cfg['opt_param'] == 'conv345': 51 | optimizer = YFOptimizer(var_list=[{'params': net.conv3.parameters()}, 52 | {'params': net.conv4.parameters()}, 53 | {'params': net.conv5.parameters()}]) 54 | 55 | assert optimizer is not None 56 | 57 | print('optimizer_lr =', get_optimizer_lr(optimizer)) 58 | return optimizer 59 | -------------------------------------------------------------------------------- /train_data/gen_dashcam_train_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | all_images_file = '/home/cory/cedl/dashcam/all_images.txt' 4 | all_labels_file = '/home/cory/cedl/dashcam/all_labels.txt' 5 | orig_label_dir = '/home/cory/cedl/dashcam/labels_video' 6 | output_label_dir = '/home/cory/cedl/dashcam/labels' 7 | 8 | 9 | def copy_exclude(filename, outfilename, patterns): 10 | with open(outfilename, 'w') as out: 11 | with open(filename) as f: 12 | for line in f.readlines(): 13 | pattern_found = False 14 | for p in patterns: 15 | if line.find(p) >= 0: 16 | pattern_found = True 17 | if not pattern_found: 18 | out.write(line) 19 | print(line.strip()) 20 | 21 | 22 | def copy_if(filename, outfilename, patterns): 23 | with open(outfilename, 'w') as out: 24 | with open(filename) as f: 25 | for line in f.readlines(): 26 | matched = True 27 | for p in patterns: 28 | if line.find(p) == -1: 29 | matched = False 30 | break 31 | if matched: 32 | out.write(line) 33 | print(line.strip()) 34 | 35 | 36 | # 37 | all_class = list() 38 | 39 | 40 | def gen_each_label(): 41 | for label_file in os.listdir(orig_label_dir): 42 | id_str = label_file[:label_file.rfind('.')] 43 | full_path = os.path.join(orig_label_dir, label_file) 44 | print(id_str, full_path) 45 | out_dir_video = os.path.join(output_label_dir, id_str) 46 | if not os.path.exists(out_dir_video): 47 | os.mkdir(out_dir_video) 48 | 49 | video_label_file = open(full_path) 50 | labels_per_frame = list() 51 | for i in range(100): 52 | labels_per_frame.append(list()) 53 | 54 | for line in video_label_file.readlines(): 55 | values = line.strip().split('\t') 56 | frame = int(values[0]) 57 | classs = values[2].replace('"', '') 58 | if classs not in all_class: 59 | all_class.append(classs) 60 | xmin = int(values[3]) 61 | ymin = int(values[4]) 62 | xmax = int(values[5]) 63 | ymax = int(values[6]) 64 | bundle = (classs, xmin, ymin, xmax, ymax) 65 | labels_per_frame[frame - 1].append(bundle) 66 | 67 | for frame_i, labels in enumerate(labels_per_frame): 68 | out_file_name = out_dir_video + '/{:06d}.txt'.format(frame_i + 1) 69 | print(out_file_name, labels) 70 | out_file = open(out_file_name, 'w') 71 | for label in labels: 72 | print(label) 73 | out_file.write(' '.join([str(s) for s in label]) + '\n') 74 | 75 | 76 | if __name__ == '__main__': 77 | # gen_each_label() 78 | 79 | # exclude 9xx series video 80 | copy_exclude(all_images_file, 'dashcam_train_images.txt', ['/0009', '1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.']) 81 | copy_exclude(all_labels_file, 'dashcam_train_labels.txt', ['/0009', '1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.']) 82 | copy_if(all_images_file, 'dashcam_val_images.txt', ['/0009', '0.']) 83 | copy_if(all_labels_file, 'dashcam_val_labels.txt', ['/0009', '0.']) 84 | print(all_class) 85 | -------------------------------------------------------------------------------- /train_data/gen_kitti_det_train_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | kitti_det_label_path = '/home/cory/KITTI_Dataset/data_object_image_2/training/label_2' 4 | out_label_path = '/home/cory/KITTI_Dataset/detection_label' 5 | 6 | all_images_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_detection_images.txt' 7 | all_labels_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_detection_labels.txt' 8 | 9 | 10 | def copy_exclude(filename, outfilename, patterns): 11 | with open(outfilename, 'w') as out: 12 | with open(filename) as f: 13 | for line in f.readlines(): 14 | pattern_found = False 15 | for p in patterns: 16 | if line.find(p) >= 0: 17 | pattern_found = True 18 | if not pattern_found: 19 | out.write(line) 20 | print(line.strip()) 21 | 22 | 23 | def copy_include(filename, outfilename, patterns): 24 | with open(outfilename, 'w') as out: 25 | with open(filename) as f: 26 | for line in f.readlines(): 27 | for p in patterns: 28 | if line.find(p) >= 0: 29 | print(line.strip()) 30 | out.write(line) 31 | break 32 | 33 | 34 | def convert_file(infile_path, outfile_path): 35 | # 'Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01' 36 | infile = open(infile_path) 37 | outfile = open(outfile_path, 'w') 38 | for line in infile.readlines(): 39 | v = line.strip().split(' ') 40 | bb = list(map(str, map(int, map(float, v[4:8])))) 41 | outfile.write(v[0] + ' ' + ' '.join(bb) + '\n') 42 | 43 | 44 | def convert_format(): 45 | file_list = os.listdir(kitti_det_label_path) 46 | file_list.sort() 47 | for f in file_list: 48 | infile_path = kitti_det_label_path + '/' + f 49 | outfile_path = out_label_path + '/' + f 50 | convert_file(infile_path, outfile_path) 51 | 52 | print(infile_path, outfile_path) 53 | 54 | print(len(file_list)) 55 | 56 | 57 | def main(): 58 | copy_exclude(all_images_file, 'kitti/kitti_det_train_images.txt', ['/006', '/007']) 59 | copy_exclude(all_labels_file, 'kitti/kitti_det_train_labels.txt', ['/006', '/007']) 60 | copy_include(all_images_file, 'kitti/kitti_det_val_images.txt', ['/006', '/007']) 61 | copy_include(all_labels_file, 'kitti/kitti_det_val_labels.txt', ['/006', '/007']) 62 | 63 | if __name__ == '__main__': 64 | # convert_format() 65 | main() 66 | -------------------------------------------------------------------------------- /train_data/gen_kitti_train_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | all_images_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_tracking_images.txt' 5 | all_labels_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_tracking_labels.txt' 6 | 7 | tracking_raw_dir = '/media/cory/c_disk/Project/KITTI_Dataset/data_tracking_label_2/training/label_02' 8 | tracking_label_output = '/media/cory/c_disk/Project/KITTI_Dataset/trk' 9 | 10 | 11 | tracklet_count = [154, 447, 233, 144, 314, 297, 270, 800, 390, 803, 12 | 294, 373, 78, 340, 106, 376, 209, 145, 339, 1059, 13 | 837] 14 | 15 | 16 | def convert_tracking_to_detection(): 17 | all_tracking_labels = os.listdir(tracking_raw_dir) 18 | if not os.path.exists(tracking_label_output): 19 | os.mkdir(tracking_label_output) 20 | for merged_file in all_tracking_labels: 21 | track_id = merged_file.replace('.txt', '') 22 | print(track_id) 23 | tk_out = tracking_label_output + '/' + track_id 24 | shutil.rmtree(tk_out, ignore_errors=True) 25 | os.mkdir(tk_out) 26 | f = open(tracking_raw_dir + '/' + merged_file) 27 | lines = f.readlines() 28 | num_image = tracklet_count[int(track_id)] 29 | for i in range(num_image): 30 | frame_id = '{:06d}'.format(i) 31 | open(tk_out + '/' + frame_id + '.txt', 'w') 32 | 33 | for line in lines: 34 | v = line.strip().split(' ') 35 | frame_id = '{:06d}'.format(int(v[0])) 36 | data = v[2] + ' ' + ' '.join(v[6:10]) + ' ' + ' '.join(v[3:6]) + '\n' 37 | of = open(tk_out + '/' + frame_id + '.txt', 'a') 38 | of.write(data) 39 | 40 | 41 | def copy_exclude(filename, outfilename, patterns): 42 | with open(outfilename, 'w') as out: 43 | with open(filename) as f: 44 | for line in f.readlines(): 45 | pattern_found = False 46 | for p in patterns: 47 | if line.find(p) >= 0: 48 | pattern_found = True 49 | if not pattern_found: 50 | out.write(line) 51 | print(line.strip()) 52 | 53 | 54 | def copy_include(filename, outfilename, patterns): 55 | with open(outfilename, 'w') as out: 56 | with open(filename) as f: 57 | for line in f.readlines(): 58 | for p in patterns: 59 | if line.find(p) >= 0: 60 | print(line.strip()) 61 | out.write(line) 62 | break 63 | 64 | 65 | def main(): 66 | test_set = ['/0001/', '/0005/', '/0013/', '/0017/'] 67 | copy_exclude(all_images_file, 'kitti/kitti_train_images.txt', test_set) 68 | copy_exclude(all_labels_file, 'kitti/kitti_train_labels.txt', test_set) 69 | copy_include(all_images_file, 'kitti/kitti_val_images.txt', test_set) 70 | copy_include(all_labels_file, 'kitti/kitti_val_labels.txt', test_set) 71 | 72 | if __name__ == '__main__': 73 | convert_tracking_to_detection() 74 | # main() 75 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/utils/__init__.py -------------------------------------------------------------------------------- /utils/barrier.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class Barrier(object): 5 | def __init__(self): 6 | self.t_list = [time.time()] 7 | self.idx_list = [0] 8 | pass 9 | 10 | def add(self, idx): 11 | self.t_list.append(time.time()) 12 | self.idx_list.append(idx) 13 | 14 | def print(self): 15 | sum = 0.0 16 | for i in range(len(self.t_list) - 1): 17 | diff = self.t_list[i + 1] - self.t_list[i] 18 | print(self.idx_list[i + 1], '{:.4f} seconds'.format(diff)) 19 | sum += diff 20 | print('--- sum {:.4f}'.format(sum)) 21 | -------------------------------------------------------------------------------- /utils/im_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def imcv2_recolor(im, a=.1): 6 | # t = [np.random.uniform()] 7 | # t += [np.random.uniform()] 8 | # t += [np.random.uniform()] 9 | # t = np.array(t) * 2. - 1. 10 | t = np.random.uniform(-1, 1, 3) 11 | 12 | # random amplify each channel 13 | im = im.astype(np.float) 14 | im *= (1 + t * a) 15 | mx = 255. * (1 + a) 16 | up = np.random.uniform(-1, 1) 17 | im = np.power(im / mx, 1. + up * .5) 18 | # return np.array(im * 255., np.uint8) 19 | return im 20 | 21 | 22 | def imcv2_affine_trans(im): 23 | # Scale and translate 24 | h, w, c = im.shape 25 | scale = np.random.uniform() / 10. + 1. 26 | max_offx = (scale - 1.) * w 27 | max_offy = (scale - 1.) * h 28 | offx = int(np.random.uniform() * max_offx) 29 | offy = int(np.random.uniform() * max_offy) 30 | 31 | im = cv2.resize(im, (0, 0), fx=scale, fy=scale) 32 | im = im[offy: (offy + h), offx: (offx + w)] 33 | flip = np.random.uniform() > 0.5 34 | if flip: 35 | im = cv2.flip(im, 1) 36 | 37 | return im, [scale, [offx, offy], flip] 38 | -------------------------------------------------------------------------------- /utils/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/utils/nms/__init__.py -------------------------------------------------------------------------------- /utils/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /utils/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /utils/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /utils/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .nms.cpu_nms import cpu_nms 9 | from .nms.gpu_nms import gpu_nms 10 | 11 | 12 | # def nms(dets, thresh, force_cpu=False): 13 | # """Dispatch to either CPU or GPU NMS implementations.""" 14 | # 15 | # if dets.shape[0] == 0: 16 | # return [] 17 | # if cfg.USE_GPU_NMS and not force_cpu: 18 | # return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # else: 20 | # return cpu_nms(dets, thresh) 21 | 22 | 23 | def nms(dets, thresh, force_cpu=False): 24 | """Dispatch to either CPU or GPU NMS implementations.""" 25 | 26 | if dets.shape[0] == 0: 27 | return [] 28 | if force_cpu: 29 | return cpu_nms(dets, thresh) 30 | return gpu_nms(dets, thresh) 31 | -------------------------------------------------------------------------------- /utils/plot_loss.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | exp1 = 'detrac_baseline' 5 | exp2 = 'detrac_baseline' 6 | log_file1 = '/home/cory/project/yolo2-pytorch/models/training/' + exp1 + '/train.log' # red 7 | log_file2 = '/home/cory/project/yolo2-pytorch/models/training/' + exp2 + '/train.log' # blue 8 | log1 = np.genfromtxt(log_file1, delimiter=', ') 9 | log2 = np.genfromtxt(log_file2, delimiter=', ') 10 | 11 | 12 | def moving_avg(x, N): 13 | return np.convolve(x, np.ones((N,))/N, mode='valid') 14 | 15 | begin_index = min(0, log1.shape[0], log2.shape[0]) 16 | end_index = min(log1.shape[0], log2.shape[0]) 17 | N_avg = 5 18 | N_log_per_epoch = 55 19 | x = np.arange(begin_index, end_index - N_avg + 1, dtype=np.float32) 20 | x /= N_log_per_epoch 21 | print() 22 | s1 = moving_avg(log1[begin_index:end_index, 2], N_avg) 23 | s2 = moving_avg(log2[begin_index:end_index, 2], N_avg) 24 | 25 | log_scale = True 26 | if log_scale: 27 | s1 = np.log(s1) 28 | s2 = np.log(s2) 29 | 30 | if log_file1 != log_file2: 31 | plt.plot(x, s1, 'r-', x, s2, 'b-') 32 | else: 33 | plt.plot(x, s1, 'r-') 34 | 35 | axes = plt.gca() 36 | # plt.ylim([0, 1]) 37 | plt.show() 38 | -------------------------------------------------------------------------------- /utils/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /utils/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /utils/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /utils/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /utils/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /utils/vis_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cfgs.config import * 3 | 4 | # for display 5 | ############################ 6 | def _to_color(indx, base): 7 | """ return (b, r, g) tuple""" 8 | base2 = base * base 9 | b = 2 - indx / base2 10 | r = 2 - (indx % base2) / base 11 | g = 2 - (indx % base2) % base 12 | return b * 127, r * 127, g * 127 13 | 14 | base = int(np.ceil(pow(num_classes, 1. / 3))) 15 | colors = [_to_color(x, base) for x in range(num_classes)] 16 | 17 | -------------------------------------------------------------------------------- /utils/yolo.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | DTYPE = np.float 6 | ctypedef np.float_t DTYPE_t 7 | 8 | cdef extern from "math.h": 9 | double abs(double m) 10 | double log(double x) 11 | 12 | 13 | def yolo_to_bbox( 14 | np.ndarray[DTYPE_t, ndim=4] bbox_pred, 15 | np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W): 16 | return yolo_to_bbox_c(bbox_pred, anchors, H, W) 17 | 18 | cdef yolo_to_bbox_c( 19 | np.ndarray[DTYPE_t, ndim=4] bbox_pred, 20 | np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W): 21 | """ 22 | Parameters 23 | ---------- 24 | bbox_pred: (bsize, HxW, num_anchors, 4) ndarray of float (sig(tx), sig(ty), exp(tw), exp(th)) 25 | anchors: (num_anchors, 2) (pw, ph) 26 | Returns 27 | ------- 28 | bbox_out: (HxWxnum_anchors, 4) ndarray of bbox (x1, y1, x2, y2) rescaled to (0, 1) 29 | """ 30 | cdef unsigned int bsize = bbox_pred.shape[0] 31 | cdef unsigned int num_anchors = anchors.shape[0] 32 | cdef np.ndarray[DTYPE_t, ndim=4] bbox_out = np.zeros((bsize, H*W, num_anchors, 4), dtype=DTYPE) 33 | 34 | cdef DTYPE_t cx, cy, bw, bh 35 | cdef unsigned int row, col, a, ind 36 | for b in range(bsize): 37 | for row in range(H): 38 | for col in range(W): 39 | ind = row * W + col 40 | for a in range(num_anchors): 41 | cx = (bbox_pred[b, ind, a, 0] + col) / W 42 | cy = (bbox_pred[b, ind, a, 1] + row) / H 43 | bw = bbox_pred[b, ind, a, 2] * anchors[a][0] / W * 0.5 44 | bh = bbox_pred[b, ind, a, 3] * anchors[a][1] / H * 0.5 45 | 46 | bbox_out[b, ind, a, 0] = cx - bw 47 | bbox_out[b, ind, a, 1] = cy - bh 48 | bbox_out[b, ind, a, 2] = cx + bw 49 | bbox_out[b, ind, a, 3] = cy + bh 50 | 51 | return bbox_out -------------------------------------------------------------------------------- /yolo_detect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import time 5 | from darknet_v3 import Darknet19 6 | from cfgs.config_v2 import load_cfg_yamls 7 | import utils.network as net_utils 8 | import utils.yolo_v2 as yolo_utils 9 | 10 | base_dir = './' 11 | 12 | 13 | def init_network(): 14 | dataset_yaml = os.path.join(base_dir, 'cfgs/config_kitti_demo.yaml') 15 | cfg = load_cfg_yamls([dataset_yaml]) 16 | 17 | model = os.path.join(base_dir, 'models/training/kitti_baseline_v3/kitti_baseline_v3_100.h5') 18 | net = Darknet19(cfg) 19 | net_utils.load_net(model, net) 20 | net.eval() 21 | net.cuda() 22 | print('load model successfully') 23 | return net, cfg 24 | 25 | 26 | def load_image_paths(img_list_file): 27 | img_files = open(img_list_file) 28 | image_paths = [f.strip() for f in img_files.readlines()] 29 | return image_paths 30 | 31 | 32 | def preprocess(filename, inp_size): 33 | image = cv2.imread(filename) 34 | im_data = np.expand_dims(yolo_utils.preprocess_test((image, None, inp_size))[0], 0) 35 | return image, im_data 36 | 37 | 38 | def detect_image(cfg, image_path, net, thresh): 39 | image, im_data = preprocess(image_path, cfg['inp_size']) 40 | im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) 41 | bbox_pred, iou_pred, prob_pred = net.forward(im_data) 42 | bbox_pred = bbox_pred.data.cpu().numpy() 43 | iou_pred = iou_pred.data.cpu().numpy() 44 | prob_pred = prob_pred.data.cpu().numpy() 45 | bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) 46 | return bboxes, cls_inds, image, scores 47 | 48 | 49 | def run(): 50 | net, cfg = init_network() 51 | image_paths = load_image_paths(os.path.join('./demo/', 'demo_images_list.txt')) 52 | 53 | thresh = 0.6 54 | imshow = True 55 | time_rec = list() 56 | for i, image_path in enumerate(image_paths): 57 | begin_time = time.time() 58 | bboxes, cls_inds, image, scores = detect_image(cfg, image_path, net, thresh) 59 | end_time = time.time() 60 | time_rec.append(end_time - begin_time) 61 | im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) 62 | 63 | cv2.imwrite('output/detection_{:04d}.jpg'.format(i), im2show) 64 | if imshow: 65 | cv2.imshow('detection', im2show) 66 | key = cv2.waitKey(30) 67 | if key == ord('q'): 68 | break 69 | 70 | avg = sum(time_rec) / len(time_rec) 71 | print('processed {:d} images in {:.3f} seconds'.format(len(time_rec), sum(time_rec))) 72 | print('{:.3f} sec/image'.format(avg)) 73 | print('{:.2f} fps'.format(1/avg)) 74 | 75 | 76 | if __name__ == '__main__': 77 | run() 78 | --------------------------------------------------------------------------------