├── .gitignore
├── README.md
├── cfgs
├── __init__.py
├── config.py
├── config_crowdai.yaml
├── config_detrac.yaml
├── config_kitti.py
├── config_kitti.yaml
├── config_kitti_demo.yaml
├── config_v2.py
├── config_voc.py
├── config_voc.yaml
└── exps
│ ├── crowdai
│ └── crowdai_baseline.yaml
│ ├── detrac
│ ├── detrac_baseline.yaml
│ ├── detrac_baseline_2.yaml
│ └── detrac_flow_center_w01.yaml
│ ├── kitti
│ ├── kitti_baseline.yaml
│ ├── kitti_baseline_v3.yaml
│ ├── kitti_baseline_v3_yf.yaml
│ ├── kitti_joint_flow.yaml
│ ├── kitti_joint_flow_low_lr.yaml
│ ├── kitti_new_1.yaml
│ ├── kitti_new_2.yaml
│ ├── kitti_new_2_flow_center_ft.yaml
│ ├── kitti_new_2_flow_center_ft_bbox_merge.yaml
│ ├── kitti_new_2_flow_center_ft_cls.yaml
│ ├── kitti_new_2_flow_center_ft_cls_bbox_merge.yaml
│ ├── kitti_new_2_flow_center_ft_cls_flownet2.yaml
│ ├── kitti_new_2_flow_center_ft_cls_merge.yaml
│ ├── kitti_new_2_flow_center_ft_cls_merge_2x.yaml
│ ├── kitti_new_2_flow_center_ft_cls_merge_ped.yaml
│ ├── kitti_new_2_flow_center_ft_flownet2.yaml
│ ├── kitti_new_2_flow_center_ft_flownet2_joint.yaml
│ ├── kitti_new_2_flow_center_ft_half.yaml
│ ├── kitti_new_2_flow_center_ft_iou_merge.yaml
│ ├── kitti_new_2_flow_center_joint_ft.yaml
│ ├── kitti_new_2_flow_center_joint_ft_low_lr.yaml
│ ├── kitti_new_2_flow_center_joint_ft_w10.yaml
│ ├── kitti_new_2_flow_dis.yaml
│ ├── kitti_new_2_flow_ft.yaml
│ ├── kitti_new_2_flow_ft_cls_bbox_merge.yaml
│ ├── kitti_new_2_flow_ft_cls_merge.yaml
│ ├── kitti_new_2_flow_ft_std_2_5.yaml
│ ├── kitti_new_2_flow_ft_std_2_5_merge.yaml
│ └── kitti_new_2_flow_spy.yaml
│ ├── kitti_ft_exp3.py
│ ├── voc0712
│ ├── voc0712_anchor.yaml
│ ├── voc0712_baseline.yaml
│ ├── voc0712_baseline_v3.yaml
│ ├── voc0712_baseline_v3_rand.yaml
│ ├── voc0712_box_mask_0.yaml
│ ├── voc0712_low_lr.yaml
│ ├── voc0712_mask.yaml
│ ├── voc0712_multiple_anchors.yaml
│ ├── voc0712_new.py
│ ├── voc0712_obj_scale.yaml
│ ├── voc0712_obj_scale_ft.yaml
│ ├── voc0712_one_anchor.yaml
│ ├── voc0712_overfit.yaml
│ ├── voc0712_pred_raw.yaml
│ ├── voc0712_template.yaml
│ └── voc0712_trainval_ft_debug2.yaml
│ └── voc0712_new_2.py
├── darknet.py
├── darknet_training_v3.py
├── darknet_v3.py
├── datasets
├── DataLoaderIterX.py
├── DataLoaderX.py
├── DetectionDataset.py
├── DetectionDatasetHelper.py
├── ImageFileDataset.py
├── ImageFileDataset_v2.py
├── KittiDataset.py
├── __init__.py
├── imdb.py
├── pascal_voc.py
└── voc_eval.py
├── demo
├── demo_images_list.txt
├── detection_0030.jpg
└── images
│ ├── 000040.jpg
│ ├── 000041.jpg
│ ├── 000042.jpg
│ ├── 000043.jpg
│ ├── 000044.jpg
│ ├── 000045.jpg
│ ├── 000046.jpg
│ ├── 000047.jpg
│ ├── 000048.jpg
│ ├── 000049.jpg
│ ├── 000050.jpg
│ ├── 000051.jpg
│ ├── 000052.jpg
│ ├── 000053.jpg
│ ├── 000054.jpg
│ ├── 000055.jpg
│ ├── 000056.jpg
│ ├── 000057.jpg
│ ├── 000058.jpg
│ ├── 000059.jpg
│ ├── 000060.jpg
│ ├── 000061.jpg
│ ├── 000062.jpg
│ ├── 000063.jpg
│ ├── 000064.jpg
│ ├── 000065.jpg
│ ├── 000066.jpg
│ ├── 000067.jpg
│ ├── 000068.jpg
│ ├── 000069.jpg
│ ├── 000070.jpg
│ ├── 000071.jpg
│ ├── 000072.jpg
│ ├── 000073.jpg
│ ├── 000074.jpg
│ ├── 000075.jpg
│ ├── 000076.jpg
│ ├── 000077.jpg
│ ├── 000078.jpg
│ └── 000079.jpg
├── flow
├── detection_analyzer.py
├── flow_generator.py
├── flow_util.py
├── flow_vis.py
├── flow_warper.py
├── gen_flow_images.py
├── gen_flow_images_cloudai.py
├── gen_flow_images_detrac.py
├── gen_val_from_all.sh
├── gen_warp_images_by_flow.py
├── img_diff.py
├── img_pairs.txt
├── kitti_train_labels.txt
├── kitti_val_images_warp.txt
├── plot_util.py
├── run_flow.sh
├── run_of.sh
├── shift_gt_by_flow.py
├── vis.jpg
└── yolo_flow.py
├── layers
├── __init__.py
├── reorg
│ ├── __init__.py
│ ├── _ext
│ │ ├── __init__.py
│ │ └── reorg_layer
│ │ │ └── __init__.py
│ ├── build.py
│ ├── reorg_layer.py
│ └── src
│ │ ├── reorg_cpu.c
│ │ ├── reorg_cpu.h
│ │ ├── reorg_cuda.c
│ │ ├── reorg_cuda.h
│ │ ├── reorg_cuda_kernel.cu
│ │ └── reorg_cuda_kernel.h
└── roi_pooling
│ ├── __init__.py
│ ├── _ext
│ ├── __init__.py
│ └── roi_pooling
│ │ └── __init__.py
│ ├── build.py
│ ├── roi_pool.py
│ ├── roi_pool_py.py
│ └── src
│ ├── cuda
│ ├── roi_pooling_kernel.cu
│ └── roi_pooling_kernel.h
│ ├── roi_pooling.c
│ ├── roi_pooling.h
│ ├── roi_pooling_cuda.c
│ └── roi_pooling_cuda.h
├── make.sh
├── misc
├── kitti_detect.py
├── validate_dataset.py
├── vis.jpg
├── visualize_gt.py
├── voc_ap.py
├── voc_ap_v2.py
├── voc_data.py
└── yolo_video_test.py
├── train
├── train_dataset_v3.py
├── train_util_v2.py
└── yellowfin.py
├── train_data
├── gen_dashcam_train_data.py
├── gen_gtav_train_data.py
├── gen_kitti_det_train_data.py
├── gen_kitti_train_data.py
└── gen_voc_train_data.py
├── utils
├── __init__.py
├── barrier.py
├── bbox.c
├── bbox.pyx
├── build.py
├── im_transform.py
├── network.py
├── nms
│ ├── .gitignore
│ ├── __init__.py
│ ├── cpu_nms.pyx
│ ├── gpu_nms.hpp
│ ├── gpu_nms.pyx
│ ├── nms_kernel.cu
│ └── py_cpu_nms.py
├── nms_wrapper.py
├── plot_loss.py
├── pycocotools
│ ├── UPSTREAM_REV
│ ├── __init__.py
│ ├── _mask.c
│ ├── _mask.pyx
│ ├── coco.py
│ ├── cocoeval.py
│ ├── license.txt
│ ├── mask.py
│ ├── maskApi.c
│ └── maskApi.h
├── timer.py
├── vis_util.py
├── yolo.c
├── yolo.py
├── yolo.pyx
└── yolo_v2.py
└── yolo_detect.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Python template
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # dotenv
85 | .env
86 |
87 | # virtualenv
88 | .venv
89 | venv/
90 | ENV/
91 |
92 | # Spyder project settings
93 | .spyderproject
94 |
95 | # Rope project settings
96 | .ropeproject
97 | .idea
98 |
99 | *.o
100 | *.pkl
101 | models/*
102 | data/*
103 | train_data/*
104 | output/
105 | flow/output/
106 | flow/warp_classifier/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv2 in PyTorch
2 | This is a [PyTorch](https://github.com/pytorch/pytorch)
3 | implementation of YOLOv2.
4 | This project is forked from (https://github.com/longcw/yolo2-pytorch), but not compatible with origin version.
5 |
6 | Currently, I train this model for [KITTI Dataset](http://www.cvlibs.net/datasets/kitti/) to demo. It predicts car, pedestrian and cyclist. If you want a general detecotr, please refer to [this](https://github.com/longcw/yolo2-pytorch).
7 |
8 | You can also use [original YOLOv2](https://github.com/pjreddie/darknet) COCO model on KITTI, Here is a [demo video](
9 | https://www.youtube.com/watch?v=mfB1C4QQJr4)
10 |
11 | For details about YOLO and YOLOv2 please refer to their [project page](https://pjreddie.com/darknet/yolo/)
12 | and the [paper](https://arxiv.org/abs/1612.08242):
13 | YOLO9000: Better, Faster, Stronger by Joseph Redmon and Ali Farhadi.
14 |
15 |
16 |
17 |
18 |
19 | ### System Environment
20 | + Ubuntu 16.04
21 | + CUDA 8.0 / cuDNN 5.1
22 | + Python 3.5
23 | + Numpy 1.12
24 | + PyTorch 0.1.12
25 | + OpenCV 3.2
26 |
27 | With a 1080Ti GPU, I get ~30 fps using this KITTI model (input size = 1216 x 352)
28 |
29 | ### Installation and demo
30 | 1. Clone this repository
31 | ```bash
32 | git clone git@github.com:cory8249/yolo2-pytorch.git
33 | ```
34 |
35 | 2. Build the reorg layer ([`tf.extract_image_patches`](https://www.tensorflow.org/api_docs/python/tf/extract_image_patches))
36 | ```bash
37 | cd yolo2-pytorch
38 | ./make.sh
39 | ```
40 | 3. Download the trained model [kitti_baseline_v3_100.h5](https://drive.google.com/file/d/0B3IzhcU-mEUsWnBIcW00aUsteTQ)
41 | and set the model path in `yolo_detect.py`
42 | 4. Run demo `python3 yolo_detect.py`.
43 |
44 | Install any missing packages manually via pip
45 |
--------------------------------------------------------------------------------
/cfgs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/cfgs/__init__.py
--------------------------------------------------------------------------------
/cfgs/config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | if os.environ.get('DATASET') == 'kitti':
4 | from cfgs.config_kitti import *
5 | # from cfgs.config_voc import *
6 | from cfgs.exps.kitti_ft_exp3 import *
7 | else:
8 | from cfgs.config_voc import *
9 | from cfgs.exps.voc0712_new_2 import *
10 |
11 | # 10.5 ~ 11 ms yolo_flow detection only OpenBLAS
12 | # 0.75 s/batch
13 |
14 | # 16 ~ 17 ms anaconda
15 | # 1.55 s/batch
16 |
17 | label_names = label_names
18 | num_classes = len(label_names)
19 |
20 |
21 | def mkdir(path, max_depth=3):
22 | parent, child = os.path.split(path)
23 | if not os.path.exists(parent) and max_depth > 1:
24 | mkdir(parent, max_depth-1)
25 |
26 | if not os.path.exists(path):
27 | os.mkdir(path)
28 |
29 |
30 | # detection config
31 | ############################
32 | thresh = 0.3
33 |
34 |
35 | # dir config
36 | ############################
37 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
38 | DATA_DIR = os.path.join(ROOT_DIR, 'data')
39 | MODEL_DIR = os.path.join(ROOT_DIR, 'models')
40 | TRAIN_DIR = os.path.join(MODEL_DIR, 'training')
41 | TEST_DIR = os.path.join(MODEL_DIR, 'testing')
42 |
43 | trained_model = os.path.join(MODEL_DIR, h5_fname)
44 | pretrained_model = os.path.join(MODEL_DIR, pretrained_fname)
45 | train_output_dir = os.path.join(TRAIN_DIR, exp_name)
46 | test_output_dir = os.path.join(TEST_DIR, imdb_test, h5_fname)
47 | log_file = os.path.join(train_output_dir, 'train.log')
48 | check_point_file = os.path.join(train_output_dir, 'check_point.txt')
49 | mkdir(train_output_dir, max_depth=3)
50 | mkdir(test_output_dir, max_depth=4)
51 |
52 | rand_seed = 1024
53 | use_tensorboard = False
54 |
55 | log_interval = 50
56 | disp_interval = 50
--------------------------------------------------------------------------------
/cfgs/config_crowdai.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | # trained model
4 | h5_fname: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
5 |
6 | # Udacity crowdai
7 | label_names: [Car, Truck,Pedestrian]
8 | num_classes: 3
9 |
10 | # YOLO priors
11 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]
12 | num_anchors: 5
13 |
--------------------------------------------------------------------------------
/cfgs/config_detrac.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | # trained model
4 | h5_fname: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
5 |
6 | # kitti
7 | label_names: [car, bus, van, others]
8 | num_classes: 4
9 |
10 | # YOLO priors
11 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]
12 | num_anchors: 5
13 |
--------------------------------------------------------------------------------
/cfgs/config_kitti.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | # trained model
5 | h5_fname = 'yolo-voc.weights.h5'
6 |
7 | # KITTI
8 | label_names = ('Car', 'Van', 'Truck', 'Tram', 'Pedestrian', 'Person', 'Cyclist')
9 | num_classes = len(label_names)
10 |
11 | anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float)
12 | num_anchors = len(anchors)
13 |
14 |
--------------------------------------------------------------------------------
/cfgs/config_kitti.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | # trained model
4 | h5_fname: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
5 |
6 | # kitti
7 | label_names: [Car, Van, Truck, Tram, Pedestrian, Person, Cyclist]
8 | num_classes: 7
9 |
10 | # YOLO priors
11 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]
12 | num_anchors: 5
13 |
--------------------------------------------------------------------------------
/cfgs/config_kitti_demo.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | # trained model
4 | inp_size: [1216, 352]
5 | out_size: [38, 11] # inp_size / 32
6 |
7 | # kitti
8 | label_names: [Car, Van, Truck, Tram, Pedestrian, Person, Cyclist]
9 | num_classes: 7
10 |
11 | # YOLO priors
12 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]
13 | num_anchors: 5
14 |
15 |
--------------------------------------------------------------------------------
/cfgs/config_v2.py:
--------------------------------------------------------------------------------
1 | import yaml
2 |
3 |
4 | def add_cfg(cfg, yaml_file):
5 | # Read YAML file
6 | try:
7 | cfg.update(yaml.load(open(yaml_file, 'r')))
8 | except Exception:
9 | print('Error: cannot parse cfg', yaml_file)
10 | raise Exception
11 |
12 |
13 | def load_cfg_yamls(yaml_files):
14 | cfg = dict()
15 | for yf in yaml_files:
16 | add_cfg(cfg, yf)
17 | return cfg
18 |
--------------------------------------------------------------------------------
/cfgs/config_voc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | # trained model
5 | h5_fname = 'yolo-voc.weights.h5'
6 |
7 | # VOC
8 | label_names = ('aeroplane', 'bicycle', 'bird', 'boat',
9 | 'bottle', 'bus', 'car', 'cat', 'chair',
10 | 'cow', 'diningtable', 'dog', 'horse',
11 | 'motorbike', 'person', 'pottedplant',
12 | 'sheep', 'sofa', 'train', 'tvmonitor')
13 | num_classes = len(label_names)
14 |
15 | #anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float)
16 | # anchors = np.asarray([[1.3221, 1.73145], [3.19275, 4.00944], [5.05587, 8.09892], [9.47112, 4.84053], [11.2364, 10.0071]])
17 | #anchors = np.asarray([[1.08, 1.19], [1.32, 1.73], [3.19, 4.01], [3.42, 4.41], [5.05, 8.09],
18 | # [6.63, 11.38], [9.47, 4.84], [11.23, 10.00], [16.62, 10.52]])
19 | anchors = np.asarray([[1., 1.], [3., 3.], [5., 5.], [9., 9.], [13., 13.]])
20 | num_anchors = len(anchors)
21 |
22 |
--------------------------------------------------------------------------------
/cfgs/config_voc.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | # trained model
4 | h5_fname: /home/cory/project/yolo2-pytorch/models/yolo-voc.weights.h5
5 |
6 | # VOC
7 | label_names: [aeroplane, bicycle, bird, boat,
8 | bottle, bus, car, cat, chair,
9 | cow, diningtable, dog, horse,
10 | motorbike, person, pottedplant,
11 | sheep, sofa, train, tvmonitor]
12 | num_classes: 20
13 |
14 | # YOLO priors
15 | anchors: [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]
16 | num_anchors: 5
17 |
--------------------------------------------------------------------------------
/cfgs/exps/crowdai/crowdai_baseline.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: crowdai_baseline
4 | dataset_name: crowdai
5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/crowdai_baseline
7 | network_size_rand_period: 10
8 | inp_size: [992, 608] # img = 1920 x 1200
9 | out_size: [31, 19] # inp_size / 32
10 | inp_size_candidates: [[1056, 672], [992, 608], [960, 576], [928, 544]]
11 | optimizer: SGD # 'SGD, Adam'
12 | opt_param: all # 'all, conv345'
13 |
14 | start_step: 0
15 | lr_epoch: [0, 40, 80]
16 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
17 | max_epoch: 200
18 |
19 | # SGD only
20 | weight_decay: 0.0005
21 | momentum: 0.9
22 |
23 | # for training yolo2
24 | object_scale: 5.
25 | noobject_scale: 1.
26 | class_scale: 1.
27 | coord_scale: 1.
28 | iou_thresh: 0.6
29 |
30 | # dataset
31 | train_images: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_images.txt
32 | train_labels: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_labels.txt
33 | val_images: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_val_images.txt
34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_val_labels.txt
35 | batch_size: 1
36 | train_batch_size: 8
37 | val_batch_size: 8
38 |
39 | # log & display
40 | disp_interval: 10
41 |
--------------------------------------------------------------------------------
/cfgs/exps/detrac/detrac_baseline.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: detrac_baseline
4 | dataset_name: detrac
5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/detrac_baseline
7 | network_size_rand_period: 10
8 | inp_size: [960, 512]
9 | out_size: [30, 16] # inp_size / 32
10 | inp_size_candidates: [[960, 512]]
11 | optimizer: SGD # 'SGD, Adam'
12 | opt_param: all # 'all, conv345'
13 |
14 | start_step: 0
15 | lr_epoch: [0, 40, 80]
16 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
17 | max_epoch: 200
18 |
19 | # SGD only
20 | weight_decay: 0.0005
21 | momentum: 0.9
22 |
23 | # for training yolo2
24 | object_scale: 5.
25 | noobject_scale: 1.
26 | class_scale: 1.
27 | coord_scale: 1.
28 | iou_thresh: 0.6
29 |
30 | # dataset
31 | train_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images.txt
32 | train_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels.txt
33 | val_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_images.txt
34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_labels.txt
35 | batch_size: 1
36 | train_batch_size: 16
37 | val_batch_size: 8
38 |
39 | # log & display
40 | disp_interval: 10
41 |
--------------------------------------------------------------------------------
/cfgs/exps/detrac/detrac_baseline_2.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: detrac_baseline_2
4 | dataset_name: detrac
5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/detrac_baseline_2
7 | network_size_rand_period: 10
8 | inp_size: [960, 512]
9 | out_size: [30, 16] # inp_size / 32
10 | inp_size_candidates: [[960, 512]]
11 | optimizer: SGD # 'SGD, Adam'
12 | opt_param: all # 'all, conv345'
13 |
14 | start_step: 0
15 | lr_epoch: [0, 40, 80]
16 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
17 | max_epoch: 200
18 |
19 | # SGD only
20 | weight_decay: 0.0005
21 | momentum: 0.9
22 |
23 | # for training yolo2
24 | object_scale: 5.
25 | noobject_scale: 1.
26 | class_scale: 1.
27 | coord_scale: 1.
28 | iou_thresh: 0.6
29 |
30 | # dataset
31 | train_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images_2.txt
32 | train_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels_2.txt
33 | val_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_images_2.txt
34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_labels_2.txt
35 | batch_size: 1
36 | train_batch_size: 16
37 | val_batch_size: 8
38 |
39 | # log & display
40 | disp_interval: 10
41 |
--------------------------------------------------------------------------------
/cfgs/exps/detrac/detrac_flow_center_w01.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: detrac_flow_center_w01
4 | dataset_name: detrac
5 | pretrained_model: /media/cory/data2/yolo2_models/detrac_baseline/detrac_baseline_20.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/detrac_flow_center_w01
7 | network_size_rand_period: 10
8 | inp_size: [960, 512]
9 | out_size: [30, 16] # inp_size / 32
10 | inp_size_candidates: [[960, 512]]
11 | optimizer: SGD # 'SGD, Adam'
12 | opt_param: all # 'all, conv345'
13 |
14 | start_step: 0
15 | lr_epoch: [0]
16 | lr_val: [!!float 1e-5]
17 | max_epoch: 100
18 |
19 | # SGD only
20 | weight_decay: 0.0005
21 | momentum: 0.9
22 |
23 | # for training yolo2
24 | object_scale: 5.
25 | noobject_scale: 1.
26 | class_scale: 1.
27 | coord_scale: 1.
28 | iou_thresh: 0.6
29 |
30 | # dataset
31 | train_images: /home/cory/project/yolo2-pytorch/flow/detrac_w01_images.txt
32 | train_labels: /home/cory/project/yolo2-pytorch/flow/detrac_w01_labels.txt
33 | val_images: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_images.txt
34 | val_labels: /home/cory/project/yolo2-pytorch/train_data/detrac/detrac_val_labels.txt
35 | batch_size: 1
36 | train_batch_size: 16
37 | val_batch_size: 8
38 |
39 | # log & display
40 | disp_interval: 10
41 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_baseline.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_baseline
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_baseline
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 40, 80]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 20
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_baseline_v3.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_baseline_v3
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_baseline_v3
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 | optimizer: SGD # 'SGD, Adam'
13 | opt_param: all # 'all, conv345'
14 |
15 | start_step: 0
16 | lr_epoch: [0, 40, 80]
17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
18 | max_epoch: 200
19 |
20 | # SGD only
21 | weight_decay: 0.0005
22 | momentum: 0.9
23 |
24 | # for training yolo2
25 | object_scale: 5.
26 | noobject_scale: 1.
27 | class_scale: 1.
28 | coord_scale: 1.
29 | iou_thresh: 0.6
30 |
31 | # dataset
32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
36 | batch_size: 1
37 | train_batch_size: 12
38 | val_batch_size: 12
39 |
40 | # log & display
41 | disp_interval: 10
42 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_baseline_v3_yf.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_baseline_v3_yf
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_baseline_v3_yf
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 | optimizer: YF # 'SGD, Adam, YF'
13 | opt_param: all # 'all, conv345'
14 |
15 | start_step: 0
16 | lr_epoch: [0, 40, 80]
17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
18 | max_epoch: 200
19 |
20 | # SGD only
21 | weight_decay: 0.0005
22 | momentum: 0.9
23 |
24 | # for training yolo2
25 | object_scale: 5.
26 | noobject_scale: 1.
27 | class_scale: 1.
28 | coord_scale: 1.
29 | iou_thresh: 0.6
30 |
31 | # dataset
32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
36 | batch_size: 1
37 | train_batch_size: 12
38 | val_batch_size: 12
39 |
40 | # log & display
41 | disp_interval: 10
42 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_joint_flow.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_joint_flow
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_joint_flow
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 | optimizer: SGD # 'SGD, Adam'
13 | opt_param: all # 'all, conv345'
14 |
15 | start_step: 0
16 | lr_epoch: [0, 40, 80]
17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
18 | max_epoch: 200
19 |
20 | # SGD only
21 | weight_decay: 0.0005
22 | momentum: 0.9
23 |
24 | # for training yolo2
25 | object_scale: 5.
26 | noobject_scale: 1.
27 | class_scale: 1.
28 | coord_scale: 1.
29 | iou_thresh: 0.6
30 |
31 | # dataset
32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt
33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt
34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
36 | batch_size: 1
37 | train_batch_size: 12
38 | val_batch_size: 12
39 |
40 | # log & display
41 | disp_interval: 10
42 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_joint_flow_low_lr.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_joint_flow_low_lr
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_joint_flow_low_lr
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 | optimizer: SGD # 'SGD, Adam'
13 | opt_param: all # 'all, conv345'
14 |
15 | start_step: 0
16 | lr_epoch: [0, 20, 40]
17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
18 | max_epoch: 200
19 |
20 | # SGD only
21 | weight_decay: 0.0005
22 | momentum: 0.9
23 |
24 | # for training yolo2
25 | object_scale: 5.
26 | noobject_scale: 1.
27 | class_scale: 1.
28 | coord_scale: 1.
29 | iou_thresh: 0.6
30 |
31 | # dataset
32 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt
33 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt
34 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
35 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
36 | batch_size: 1
37 | train_batch_size: 12
38 | val_batch_size: 12
39 |
40 | # log & display
41 | disp_interval: 10
42 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_1.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_1
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_1
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1216, 352]]
11 |
12 | optimizer: SGD # 'SGD, Adam'
13 | opt_param: all # 'all, conv345'
14 |
15 | start_step: 0
16 | lr_epoch: [0, 60, 90]
17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
18 | max_epoch: 200
19 |
20 | # SGD only
21 | weight_decay: 0.0005
22 | momentum: 0.9
23 |
24 | # for training yolo2
25 | object_scale: 5.
26 | noobject_scale: 1.
27 | class_scale: 1.
28 | coord_scale: 1.
29 | iou_thresh: 0.6
30 |
31 | # dataset
32 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
33 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
34 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
35 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
36 | batch_size: 1
37 | train_batch_size: 16
38 |
39 | # log & display
40 | disp_interval: 10
41 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_fixed
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_fixed
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 60, 90]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 20
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_bbox_merge.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_bbox_merge
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_bbox_merge
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_cls
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_bbox_merge.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_cls_bbox_merge
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_bbox_merge
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_flownet2.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_cls_flownet2
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_flownet2
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_flownet2.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_merge.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_cls_merge
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_merge
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_merge_2x.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_cls_merge_2x
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_merge_2x
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_cls_merge_ped.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_cls_merge_ped
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_cls_merge_ped
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_flownet2.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_flownet2
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_flownet2
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_flownet2.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_flownet2_joint.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_flownet2_joint
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_flownet2_joint
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_flownet2_joint.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_half.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_half
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_half
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_half.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_ft_iou_merge.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_ft_iou_merge
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_iou_merge
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_joint_ft.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_joint_ft
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_joint_ft
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_joint_ft_low_lr.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_joint_ft_low_lr
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_joint_ft_low_lr
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-6]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_center_joint_ft_w10.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_center_joint_ft_w10
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_joint_ft_w10
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_images_w10.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_joint_center_labels_w10.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_dis.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_dis
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_flow_dis
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 40]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_dis_flow_images.txt
34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_dis_flow_labels.txt
35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 20
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_ft.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_ft
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_flow_ft
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_flow_images.txt
34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_ft_cls_bbox_merge.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_ft_cls_bbox_merge
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_cls_bbox_merge
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_ft_cls_merge.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_ft_cls_merge
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_cls_merge
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/w01_center_labels_merge.txt
35 | # train_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt
36 | # train_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt
37 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
38 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_ft_std_2_5.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_ft_std_2_5
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_std_2_5
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/flow_std_labels.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_ft_std_2_5_merge.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_ft_std_2_5_merge
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_ft_std_2_5_merge
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0]
18 | lr_val: [!!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/project/yolo2-pytorch/flow/w01_images_merge.txt
34 | train_labels: /home/cory/project/yolo2-pytorch/flow/flow_std_labels_merge.txt
35 | val_images: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 16
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 10
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti/kitti_new_2_flow_spy.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: kitti_new_2_flow_spy
4 | dataset_name: kitti
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/kitti_new_2_flow_spy
7 | network_size_rand_period: 10
8 | inp_size: [1216, 352]
9 | out_size: [38, 11] # inp_size / 32
10 | inp_size_candidates: [[1024, 320], [1024, 384], [1120, 354], [1120, 384],
11 | [1184, 320], [1216, 320], [1216, 352], [1248, 352]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 40]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | train_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_spy_flow_images.txt
34 | train_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_joint_dis_flow_labels.txt
35 | val_images: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt
36 | val_labels: /home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt
37 | batch_size: 1
38 | train_batch_size: 12
39 | val_batch_size: 8
40 |
41 | # log & display
42 | disp_interval: 20
43 |
--------------------------------------------------------------------------------
/cfgs/exps/kitti_ft_exp3.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | exp_name = 'kitti_ft_exp3_new'
4 | dataset_name = 'kitti'
5 | # pretrained_fname = '/home/cory/yolo2-pytorch/models/yolo-voc.weights.h5'
6 | # pretrained_fname = '/home/cory/yolo2-pytorch/models/training/voc0712_new_2/voc0712_new_2_160.h5'
7 | pretrained_fname = '/home/cory/yolo2-pytorch/data/darknet19.weights.npz'
8 |
9 | network_size_rand_period = 10
10 | inp_size_candidates = [(1216, 352)]
11 | inp_size = np.array([1216, 352], dtype=np.int) # w, h
12 | # inp_size = np.array([992, 544], dtype=np.int)
13 | out_size = inp_size / 32
14 |
15 |
16 | optimizer = 'SGD' # 'SGD, Adam'
17 | opt_param = 'all' # 'all, conv345'
18 |
19 | start_step = 0
20 | lr_epoch = (0, 60, 90)
21 | lr_val = (1e-3, 1e-4, 1e-5)
22 |
23 | max_epoch = 200
24 |
25 | # SGD only
26 | weight_decay = 0.0005
27 | momentum = 0.9
28 |
29 | # for training yolo2
30 | object_scale = 5.
31 | noobject_scale = 1.
32 | class_scale = 1.
33 | coord_scale = 1.
34 | iou_thresh = 0.6
35 |
36 | # dataset
37 | imdb_train = 'voc_2012_trainval'
38 | imdb_test = 'voc_2007_test'
39 | train_images = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_train_images.txt'
40 | train_labels = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt'
41 | val_images = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt'
42 | val_labels = '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_labels.txt'
43 | batch_size = 1
44 | train_batch_size = 16
45 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_anchor.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_anchor
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_anchor
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 40]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 100
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.0
27 | noobject_scale: 1.0
28 | class_scale: 1.0
29 | coord_scale: 1.0
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 12
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 |
46 | anchors: [[1.3221, 1.73145], [3.19275, 4.00944], [5.05587, 8.09892], [9.47112, 4.84053], [11.2364, 10.0071]]
47 |
48 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_baseline.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_baseline
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_baseline
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 40]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 12
41 | val_batch_size: 12
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_baseline_v3.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_baseline_v3
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_baseline_v3
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 40]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 12
41 | val_batch_size: 12
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_baseline_v3_rand.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_baseline_v3_rand
4 | dataset_name: voc
5 | pretrained_model: /home/cory/project/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/project/yolo2-pytorch/models/training/voc0712_baseline_v3_rand
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 | optimizer: SGD # 'SGD, Adam'
13 | opt_param: all # 'all, conv345'
14 |
15 | start_step: 0
16 | lr_epoch: [0, 20, 40]
17 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
18 | max_epoch: 200
19 |
20 | # SGD only
21 | weight_decay: 0.0005
22 | momentum: 0.9
23 |
24 | # for training yolo2
25 | object_scale: 5.
26 | noobject_scale: 1.
27 | class_scale: 1.
28 | coord_scale: 1.
29 | iou_thresh: 0.6
30 |
31 | # dataset
32 | imdb_train: voc0712_trainval
33 | imdb_test: voc07_test
34 | train_images: /home/cory/project/yolo2-pytorch/train_data/voc/voc_train_images.txt
35 | train_labels: /home/cory/project/yolo2-pytorch/train_data/voc/voc_train_labels.txt
36 | val_images: /home/cory/project/yolo2-pytorch/train_data/voc/voc_test_images.txt
37 | val_labels: /home/cory/project/yolo2-pytorch/train_data/voc/voc_test_labels.txt
38 | batch_size: 1
39 | train_batch_size: 16
40 | val_batch_size: 16
41 |
42 | # log & display
43 | disp_interval: 10
44 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_box_mask_0.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_box_mask_0
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_box_mask_0
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 60, 90]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 201
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 16
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_low_lr.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_low_lr
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_low_lr
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 30, 40]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5, !!float 1e-6]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 12
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_mask.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_mask
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/models/training/voc0712_template/voc0712_template_100.h5
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_mask
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 100]
18 | lr_val: [!!float 1e-5, !!float 1e-6]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 12
41 |
42 | # log & display
43 | disp_interval: 10
44 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_multiple_anchors.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_multiple_anchors
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_multiple_anchors
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 100]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 12
41 | val_batch_size: 12
42 |
43 | # log & display
44 | disp_interval: 10
45 |
46 | anchors: [[1.08, 1.19], [1.32, 1.73], [3.19, 4.01], [3.42, 4.41], [5.05, 8.09],
47 | [6.63, 11.38], [9.47, 4.84], [11.23, 10.00], [16.62, 10.52]]
48 | num_anchors: 9
49 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_new.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | exp_name = 'voc0712_new'
4 | dataset_name = 'voc'
5 | pretrained_fname = '/home/cory/yolo2-pytorch/data/darknet19.weights.npz'
6 |
7 | network_size_rand_period = 10
8 | inp_size_candidates = [(320, 320), (352, 352), (384, 384), (416, 416), (448, 448),
9 | (480, 480), (512, 512), (544, 544), (576, 576), (608, 608)]
10 | inp_size = np.array([416, 416], dtype=np.int)
11 | out_size = inp_size / 32
12 |
13 | optimizer = 'SGD' # 'SGD, Adam'
14 | opt_param = 'all' # 'all, conv345'
15 |
16 | start_step = 0
17 | lr_epoch = (0, 60, 90)
18 | lr_val = (1E-3, 1E-4, 1E-5)
19 |
20 | max_epoch = 200
21 |
22 | # SGD only
23 | weight_decay = 0.0005
24 | momentum = 0.9
25 |
26 | # for training yolo2
27 | object_scale = 5.
28 | noobject_scale = 1.
29 | class_scale = 1.
30 | coord_scale = 1.
31 | iou_thresh = 0.6
32 |
33 | # dataset
34 | imdb_train = 'voc_2012_trainval'
35 | imdb_test = 'voc_2007_test'
36 | train_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt'
37 | train_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt'
38 | val_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt'
39 | val_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt'
40 | batch_size = 1
41 | train_batch_size = 16
42 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_obj_scale.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_obj_scale
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/models/yolo-voc.weights.h5
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_obj_scale
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 60]
18 | lr_val: [!!float 1e-6, !!float 1e-7]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 1.
27 | noobject_scale: 0.5
28 | class_scale: 1.
29 | coord_scale: 5.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 | log_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/train.log
46 | check_point_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/check_point.txt
47 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_obj_scale_ft.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_obj_scale_ft
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/models/training/voc0712_new_2/voc0712_new_2_160.h5
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_obj_scale_ft
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 30]
18 | lr_val: [!!float 1e-5, !!float 1e-6]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 1.
27 | noobject_scale: 0.5
28 | class_scale: 1.
29 | coord_scale: 5.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 | val_batch_size: 8
42 |
43 | # log & display
44 | disp_interval: 10
45 | log_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/train.log
46 | check_point_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/check_point.txt
47 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_one_anchor.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_one_anchor
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_one_anchor
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 20, 40]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 12
41 | val_batch_size: 12
42 |
43 | # log & display
44 | disp_interval: 10
45 |
46 | anchors: [[1., 1.], [3., 3.], [5., 5.], [9., 9.], [13., 13.]]
47 | num_anchors: 5
48 |
49 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_overfit.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_overfit
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_overfit
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 100, 400]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 2000
20 |
21 | # SGD only
22 | weight_decay: 0.0
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_10_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_10_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 10
41 | val_batch_size: 10
42 |
43 | # log & display
44 | disp_interval: 1
45 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_pred_raw.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_pred_raw
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | # pretrained_model: /home/cory/yolo2-pytorch/models/training/voc0712_low_lr/voc0712_low_lr_30.h5
7 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_pred_raw
8 | network_size_rand_period: 10
9 | inp_size: [416, 416]
10 | out_size: [13, 13] # inp_size / 32
11 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
12 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
13 |
14 | optimizer: SGD # 'SGD, Adam'
15 | opt_param: all # 'all, conv345'
16 |
17 | start_step: 0
18 | lr_epoch: [0, 20, 40]
19 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
20 | max_epoch: 200
21 |
22 | # SGD only
23 | weight_decay: 0.0005
24 | momentum: 0.9
25 |
26 | # for training yolo2
27 | object_scale: 5.
28 | noobject_scale: 1.
29 | class_scale: 1.
30 | coord_scale: 1.
31 | iou_thresh: 0.6
32 |
33 | # dataset
34 | imdb_train: voc0712_trainval
35 | imdb_test: voc07_test
36 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
37 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
38 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
39 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
40 | batch_size: 1
41 | train_batch_size: 16
42 | val_batch_size: 12
43 |
44 | # log & display
45 | disp_interval: 10
46 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_template.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_template
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/data/darknet19.weights.npz
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_template
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 60, 90]
18 | lr_val: [!!float 1e-3, !!float 1e-4, !!float 1e-5]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 12
41 | val_batch_size: 12
42 |
43 | # log & display
44 | disp_interval: 10
45 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712/voc0712_trainval_ft_debug2.yaml:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | exp_name: voc0712_ft
4 | dataset_name: voc
5 | pretrained_model: /home/cory/yolo2-pytorch/models/yolo-voc.weights.h5
6 | train_output_dir: /home/cory/yolo2-pytorch/models/training/voc0712_ft
7 | network_size_rand_period: 10
8 | inp_size: [416, 416]
9 | out_size: [13, 13] # inp_size / 32
10 | inp_size_candidates: [[320, 320], [352, 352], [384, 384], [416, 416], [448, 448],
11 | [480, 480], [512, 512], [544, 544], [576, 576], [608, 608]]
12 |
13 | optimizer: SGD # 'SGD, Adam'
14 | opt_param: all # 'all, conv345'
15 |
16 | start_step: 0
17 | lr_epoch: [0, 60]
18 | lr_val: [!!float 1e-6, !!float 1e-7]
19 | max_epoch: 200
20 |
21 | # SGD only
22 | weight_decay: 0.0005
23 | momentum: 0.9
24 |
25 | # for training yolo2
26 | object_scale: 5.
27 | noobject_scale: 1.
28 | class_scale: 1.
29 | coord_scale: 1.
30 | iou_thresh: 0.6
31 |
32 | # dataset
33 | imdb_train: voc0712_trainval
34 | imdb_test: voc07_test
35 | train_images: /home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt
36 | train_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt
37 | val_images: /home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt
38 | val_labels: /home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt
39 | batch_size: 1
40 | train_batch_size: 16
41 |
42 | # log & display
43 | disp_interval: 10
44 | log_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/train.log
45 | check_point_file: /home/cory/yolo2-pytorch/models/training/voc0712_ft/check_point.txt
46 |
--------------------------------------------------------------------------------
/cfgs/exps/voc0712_new_2.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | exp_name = 'voc0712_new_2'
4 | dataset_name = 'voc'
5 | pretrained_fname = '/home/cory/yolo2-pytorch/data/darknet19.weights.npz'
6 |
7 | network_size_rand_period = 10
8 | # inp_size_candidates = [(320, 320), (352, 352), (384, 384), (416, 416), (448, 448)]
9 | inp_size_candidates = [(320, 320), (352, 352), (384, 384), (416, 416), (448, 448),
10 | (480, 480), (512, 512), (544, 544), (576, 576), (608, 608)]
11 | inp_size = np.array([416, 416], dtype=np.int)
12 | out_size = inp_size / 32
13 |
14 | optimizer = 'SGD' # 'SGD, Adam'
15 | opt_param = 'all' # 'all, conv345'
16 |
17 | start_step = 0
18 | lr_epoch = (0, 60, 90)
19 | lr_val = (1e-3, 1e-4, 1e-5)
20 |
21 | max_epoch = 300
22 |
23 | # SGD only
24 | weight_decay = 0.0005
25 | momentum = 0.9
26 |
27 | # for training yolo2
28 | object_scale = 5.
29 | noobject_scale = 1.
30 | class_scale = 1.
31 | coord_scale = 1.
32 | iou_thresh = 0.6
33 |
34 | # dataset
35 | imdb_train = 'voc_2012_trainval'
36 | imdb_test = 'voc_2007_test'
37 | train_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt'
38 | train_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt'
39 | val_images = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_images.txt'
40 | val_labels = '/home/cory/yolo2-pytorch/train_data/voc/voc_test_labels.txt'
41 | batch_size = 1
42 | train_batch_size = 16 # epoch 1~200 batch_size 32
43 |
--------------------------------------------------------------------------------
/datasets/DataLoaderX.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import torch
4 | import torch.utils.data as data
5 | from torch.autograd import Variable
6 |
7 | from datasets.DetectionDataset import DetectionDataset
8 | from datasets.DataLoaderIterX import DataLoaderIterX
9 |
10 |
11 | # modify /usr/local/lib/python3.5/dist-packages/torch/utils/data/__init__.py
12 | # add this line: from .dataloader import DataLoaderIter
13 | # thus, let data.DataLoaderIter class become publicly available to inherent
14 | # class DataLoaderIterX(data.DataLoaderIter):
15 | # pass
16 |
17 |
18 | class DataLoaderX(data.DataLoader):
19 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, num_workers=1,
20 | pin_memory=False, drop_last=False):
21 | super(DataLoaderX, self).__init__(dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler,
22 | num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last)
23 |
24 | def __iter__(self):
25 | return DataLoaderIterX(self)
26 |
27 |
28 | def test_detection_dataset():
29 | from cfgs.config_v2 import add_cfg
30 | dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_detrac.yaml'
31 | exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/detrac/detrac_baseline.yaml'
32 | cfg = dict()
33 | add_cfg(cfg, dataset_yaml)
34 | add_cfg(cfg, exp_yaml)
35 | dataset = DetectionDataset(cfg)
36 | num_workers = 4
37 | batch_size = 16
38 | dataloader = DataLoaderX(dataset, batch_size=batch_size,
39 | shuffle=True, num_workers=num_workers)
40 |
41 | t0 = time.time()
42 | for i, data in enumerate(dataloader):
43 | if i > 100:
44 | break
45 |
46 | # get the inputs
47 | inputs, labels = data
48 | print(i, inputs.size(), labels.size())
49 |
50 | # wrap them in Variable
51 | inputs, labels = Variable(inputs.cuda()), labels
52 | import numpy as np
53 | assert np.sum(inputs.data.cpu().numpy()) > 0
54 | t1 = time.time()
55 | print(t1 - t0)
56 |
57 |
58 | if __name__ == '__main__':
59 | test_detection_dataset()
60 |
--------------------------------------------------------------------------------
/datasets/DetectionDataset.py:
--------------------------------------------------------------------------------
1 | import PIL.Image as Image
2 | import torch
3 | import torch.utils.data as data
4 | from torch.autograd import Variable
5 |
6 | from datasets.DetectionDatasetHelper import *
7 |
8 |
9 | class DetectionDataset(data.Dataset):
10 | def __init__(self, cfg, mode='train'):
11 | self.cfg = cfg
12 | self.mode = mode
13 | if self.mode == 'train':
14 | self.batch_size = cfg['train_batch_size']
15 | self.image_list_file = cfg['train_images']
16 | self.label_list_file = cfg['train_labels']
17 | else:
18 | self.batch_size = cfg['val_batch_size']
19 | self.image_list_file = cfg['val_images']
20 | self.label_list_file = cfg['val_labels']
21 |
22 | self.image_paths = list()
23 | self.annotations = list()
24 | self.image_indexes = list()
25 | self.classes_ = cfg['label_names']
26 | self.load_dataset(self.classes_)
27 |
28 | # use cfg for default input size, but it will change every 10 batch (refer to DataLoaderX)
29 | self.inp_size = cfg['inp_size']
30 |
31 | def __getitem__(self, index):
32 | raise NotImplemented
33 |
34 | def __len__(self):
35 | return len(self.image_paths)
36 |
37 | def get_train_data(self, index, network_size):
38 | img = Image.open(self.image_paths[index]).convert('RGB')
39 | gt = self.annotations[index]
40 | gt.update({'img_size': img.size})
41 |
42 | # random transforms (scale, color, flip)
43 | im, boxes = affine_transform(img, gt['boxes'], network_size)
44 | gt.update({'boxes': boxes})
45 | target_np = encode_to_np(gt)
46 | im_tensor = torch.from_numpy(im.transpose((2, 0, 1))).float()
47 | return im_tensor, target_np
48 |
49 | def input_size(self):
50 | return self.inp_size
51 |
52 | def change_input_size_rand(self):
53 | # call this function to change input size randomly from cfg['inp_size_candidates']
54 | # random change network size
55 | rand_id = np.random.randint(0, len(self.cfg['inp_size_candidates']))
56 | rand_network_size = self.cfg['inp_size_candidates'][rand_id]
57 | self.inp_size = rand_network_size
58 | # print('change_input_size_rand', rand_network_size)
59 |
60 | def load_dataset(self, label_map):
61 | remove_id_list = list()
62 | try:
63 | img_file = open(self.image_list_file)
64 | self.image_paths = [line.strip() for line in img_file.readlines()]
65 | gt_file = open(self.label_list_file)
66 | for fi, label_file_name in enumerate(gt_file.readlines()):
67 | label_file_name = label_file_name.strip()
68 | label_dict = parse_label_file(label_file_name, label_map)
69 | if not label_dict['has_label']:
70 | remove_id_list.append(fi)
71 | self.annotations.append(label_dict)
72 | except Exception as e:
73 | raise e
74 |
75 | self.image_paths = np.delete(self.image_paths, remove_id_list)
76 | self.annotations = np.delete(self.annotations, remove_id_list)
77 | print('dataset size =', len(self.image_paths), ' (delete', len(remove_id_list), ')')
78 | assert len(self.image_paths) == len(self.annotations)
79 | self.image_indexes = range(len(self.image_paths))
80 |
81 |
82 | def test_detection_dataset():
83 | from cfgs.config_v2 import add_cfg
84 | dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_kitti.yaml'
85 | exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/kitti/kitti_baseline_v3.yaml'
86 | cfg = dict()
87 | add_cfg(cfg, dataset_yaml)
88 | add_cfg(cfg, exp_yaml)
89 | dataset = DetectionDataset(cfg)
90 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=16,
91 | shuffle=True, num_workers=4)
92 | for i, data in enumerate(dataloader):
93 | # get the inputs
94 | print(i)
95 | inputs, labels = data
96 | print(inputs.size(), labels.size())
97 |
98 | # wrap them in Variable
99 | inputs, labels = Variable(inputs), labels
100 |
101 |
102 | if __name__ == '__main__':
103 | test_detection_dataset()
104 |
--------------------------------------------------------------------------------
/datasets/DetectionDatasetHelper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from utils.im_transform import imcv2_affine_trans, imcv2_recolor
4 |
5 |
6 | def parse_label_file(label_file_path, label_map):
7 | gt_classes = list()
8 | boxes = list()
9 | has_label = False
10 | with open(label_file_path) as label_file:
11 | for line in label_file.readlines():
12 | if line == '\n':
13 | continue
14 | values = line.strip().split(' ')
15 | label = values[0]
16 |
17 | try:
18 | label_id = label_map.index(label)
19 | except ValueError:
20 | # label not exist, ignore it
21 | label_id = -1
22 | gt_classes.append(label_id)
23 | bbox = [int(float(v)) for v in values[1:5]]
24 | boxes.append(bbox)
25 | has_label = True
26 | assert len(gt_classes) == len(boxes)
27 | return {'boxes': boxes, 'gt_classes': gt_classes, 'has_label': has_label}
28 |
29 |
30 | def clip_boxes(boxes, im_shape):
31 | """
32 | Clip boxes to image boundaries.
33 | """
34 | if boxes.shape[0] == 0:
35 | return boxes
36 |
37 | # x1 >= 0
38 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
39 | # y1 >= 0
40 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
41 | # x2 < im_shape[1]
42 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
43 | # y2 < im_shape[0]
44 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
45 | return boxes
46 |
47 |
48 | def offset_boxes(boxes, im_shape, scale, offs, flip):
49 | if len(boxes) == 0:
50 | return boxes
51 | boxes = np.asarray(boxes, dtype=np.float)
52 | boxes *= scale
53 | boxes[:, 0::2] -= offs[0]
54 | boxes[:, 1::2] -= offs[1]
55 | boxes = clip_boxes(boxes, im_shape)
56 |
57 | if flip:
58 | boxes_x = np.copy(boxes[:, 0])
59 | boxes[:, 0] = im_shape[1] - boxes[:, 2]
60 | boxes[:, 2] = im_shape[1] - boxes_x
61 |
62 | return boxes
63 |
64 |
65 | def affine_transform(img, boxes, net_inp_size):
66 | if len(boxes) == 0:
67 | return
68 | im = np.asarray(img, dtype=np.uint8)
69 | w, h = net_inp_size
70 | im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
71 | im, trans_param = imcv2_affine_trans(im)
72 | scale, offs, flip = trans_param
73 | boxes = offset_boxes(boxes, im.shape, scale, offs, flip)
74 |
75 | boxes[:, 0::2] *= float(w) / im.shape[1]
76 | boxes[:, 1::2] *= float(h) / im.shape[0]
77 | np.clip(boxes[:, 0::2], 0, w - 1, out=boxes[:, 0::2])
78 | np.clip(boxes[:, 1::2], 0, h - 1, out=boxes[:, 1::2])
79 | im = cv2.resize(im, (w, h))
80 |
81 | im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
82 | im = imcv2_recolor(im)
83 | boxes = np.asarray(boxes, dtype=np.int)
84 |
85 | debug = False
86 | if debug:
87 | import matplotlib.pyplot as plt
88 | for idx, box in enumerate(boxes):
89 | # box = [xmin, ymin, xmax, ymax] with original pixel scale
90 | bb = [int(b) for b in box]
91 | im[bb[1]:bb[3], bb[0], :] = 1.
92 | im[bb[1]:bb[3], bb[2], :] = 1.
93 | im[bb[1], bb[0]:bb[2], :] = 1.
94 | im[bb[3], bb[0]:bb[2], :] = 1.
95 | plt.imshow(im)
96 | plt.show()
97 |
98 | # im (pixels range 0~1)
99 | # boxes (pos range 0~max_img_size)
100 | return im, boxes
101 |
102 |
103 | def encode_to_np(gt):
104 | labels = gt['gt_classes']
105 | bboxes = gt['boxes']
106 | img_size = gt['img_size']
107 | gt_size = len(labels)
108 |
109 | num_type = 8 # 1 + 1 + 2 + 4
110 | max_label_num_per_image = 50
111 |
112 | data_matrix = np.zeros([max_label_num_per_image, num_type], dtype=np.float32)
113 | data_matrix[0:gt_size, 0] = 1 # valid mask
114 | data_matrix[0:gt_size, 1] = labels
115 | data_matrix[0:gt_size, 2:4] = img_size
116 | data_matrix[0:gt_size, 4:8] = bboxes
117 | return data_matrix
118 |
119 |
--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/datasets/__init__.py
--------------------------------------------------------------------------------
/datasets/imdb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import PIL
3 | import numpy as np
4 | from multiprocessing import Pool
5 |
6 |
7 | def mkdir(path, max_depth=3):
8 | parent, child = os.path.split(path)
9 | if not os.path.exists(parent) and max_depth > 1:
10 | mkdir(parent, max_depth-1)
11 |
12 | if not os.path.exists(path):
13 | os.mkdir(path)
14 |
15 |
16 | class ImageDataset(object):
17 | def __init__(self, name, datadir, batch_size, im_processor, processes=2, shuffle=True, dst_size=None):
18 | self._name = name
19 | self._data_dir = datadir
20 | self._batch_size = batch_size
21 | self.dst_size = dst_size
22 |
23 | self._epoch = -1
24 | self._num_classes = 0
25 | self._classes = []
26 |
27 | # load by self.load_dataset()
28 | self._image_indexes = []
29 | self._image_names = []
30 | self._annotations = []
31 | # Use this dict for storing dataset specific config options
32 | self.config = {}
33 |
34 | # Pool
35 | self._shuffle = shuffle
36 | self._pool_processes = processes
37 | self.pool = Pool(self._pool_processes)
38 | self.gen = None
39 | self._im_processor = im_processor
40 |
41 | def next_batch(self):
42 | batch = {'images': [], 'gt_boxes': [], 'gt_classes': [], 'dontcare': [], 'origin_im': []}
43 | i = 0
44 | while i < self.batch_size:
45 | try:
46 | if self.gen is None:
47 | raise AttributeError
48 | images, gt_boxes, classes, dontcare, origin_im = next(self.gen)
49 | batch['images'].append(images)
50 | batch['gt_boxes'].append(gt_boxes)
51 | batch['gt_classes'].append(classes)
52 | batch['dontcare'].append(dontcare)
53 | batch['origin_im'].append(origin_im)
54 | i += 1
55 | except (StopIteration, AttributeError):
56 | indexes = np.arange(len(self.image_names), dtype=np.int)
57 | if self._shuffle:
58 | np.random.shuffle(indexes)
59 | self.gen = self.pool.imap(self._im_processor,
60 | ([self.image_names[i], self.get_annotation(i), self.dst_size] for i in indexes),
61 | chunksize=self.batch_size)
62 | self._epoch += 1
63 | print('epoch {} start...'.format(self._epoch))
64 | batch['images'] = np.asarray(batch['images'])
65 |
66 | return batch
67 |
68 | def close(self):
69 | self.pool.terminate()
70 | self.pool.join()
71 | self.gen = None
72 |
73 | def load_dataset(self):
74 | raise NotImplementedError
75 |
76 | def evaluate_detections(self, all_boxes, output_dir=None):
77 | """
78 | all_boxes is a list of length number-of-classes.
79 | Each list element is a list of length number-of-images.
80 | Each of those list elements is either an empty list []
81 | or a numpy array of detection.
82 |
83 | all_boxes[class][image] = [] or np.array of shape #dets x 5
84 | """
85 | raise NotImplementedError
86 |
87 | def get_annotation(self, i):
88 | if self.annotations is None:
89 | return None
90 | return self.annotations[i]
91 |
92 | @property
93 | def name(self):
94 | return self._name
95 |
96 | @property
97 | def num_classes(self):
98 | return len(self._classes)
99 |
100 | @property
101 | def classes(self):
102 | return self._classes
103 |
104 | @property
105 | def image_names(self):
106 | return self._image_names
107 |
108 | @property
109 | def image_indexes(self):
110 | return self._image_indexes
111 |
112 | @property
113 | def annotations(self):
114 | return self._annotations
115 |
116 | @property
117 | def cache_path(self):
118 | cache_path = os.path.join(self._data_dir, 'cache')
119 | mkdir(cache_path)
120 | return cache_path
121 |
122 | @property
123 | def num_images(self):
124 | return len(self.image_names)
125 |
126 | @property
127 | def epoch(self):
128 | return self._epoch
129 |
130 | @epoch.setter
131 | def epoch(self, value):
132 | self._epoch = value
133 |
134 | @property
135 | def batch_size(self):
136 | return self._batch_size
137 |
138 | @property
139 | def batch_per_epoch(self):
140 | return self.num_images // self.batch_size
141 |
142 |
143 |
--------------------------------------------------------------------------------
/demo/demo_images_list.txt:
--------------------------------------------------------------------------------
1 | demo/images/000040.jpg
2 | demo/images/000041.jpg
3 | demo/images/000042.jpg
4 | demo/images/000043.jpg
5 | demo/images/000044.jpg
6 | demo/images/000045.jpg
7 | demo/images/000046.jpg
8 | demo/images/000047.jpg
9 | demo/images/000048.jpg
10 | demo/images/000049.jpg
11 | demo/images/000050.jpg
12 | demo/images/000051.jpg
13 | demo/images/000052.jpg
14 | demo/images/000053.jpg
15 | demo/images/000054.jpg
16 | demo/images/000055.jpg
17 | demo/images/000056.jpg
18 | demo/images/000057.jpg
19 | demo/images/000058.jpg
20 | demo/images/000059.jpg
21 | demo/images/000060.jpg
22 | demo/images/000061.jpg
23 | demo/images/000062.jpg
24 | demo/images/000063.jpg
25 | demo/images/000064.jpg
26 | demo/images/000065.jpg
27 | demo/images/000066.jpg
28 | demo/images/000067.jpg
29 | demo/images/000068.jpg
30 | demo/images/000069.jpg
31 | demo/images/000070.jpg
32 | demo/images/000071.jpg
33 | demo/images/000072.jpg
34 | demo/images/000073.jpg
35 | demo/images/000074.jpg
36 | demo/images/000075.jpg
37 | demo/images/000076.jpg
38 | demo/images/000077.jpg
39 | demo/images/000078.jpg
40 | demo/images/000079.jpg
41 |
--------------------------------------------------------------------------------
/demo/detection_0030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/detection_0030.jpg
--------------------------------------------------------------------------------
/demo/images/000040.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000040.jpg
--------------------------------------------------------------------------------
/demo/images/000041.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000041.jpg
--------------------------------------------------------------------------------
/demo/images/000042.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000042.jpg
--------------------------------------------------------------------------------
/demo/images/000043.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000043.jpg
--------------------------------------------------------------------------------
/demo/images/000044.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000044.jpg
--------------------------------------------------------------------------------
/demo/images/000045.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000045.jpg
--------------------------------------------------------------------------------
/demo/images/000046.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000046.jpg
--------------------------------------------------------------------------------
/demo/images/000047.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000047.jpg
--------------------------------------------------------------------------------
/demo/images/000048.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000048.jpg
--------------------------------------------------------------------------------
/demo/images/000049.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000049.jpg
--------------------------------------------------------------------------------
/demo/images/000050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000050.jpg
--------------------------------------------------------------------------------
/demo/images/000051.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000051.jpg
--------------------------------------------------------------------------------
/demo/images/000052.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000052.jpg
--------------------------------------------------------------------------------
/demo/images/000053.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000053.jpg
--------------------------------------------------------------------------------
/demo/images/000054.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000054.jpg
--------------------------------------------------------------------------------
/demo/images/000055.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000055.jpg
--------------------------------------------------------------------------------
/demo/images/000056.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000056.jpg
--------------------------------------------------------------------------------
/demo/images/000057.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000057.jpg
--------------------------------------------------------------------------------
/demo/images/000058.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000058.jpg
--------------------------------------------------------------------------------
/demo/images/000059.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000059.jpg
--------------------------------------------------------------------------------
/demo/images/000060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000060.jpg
--------------------------------------------------------------------------------
/demo/images/000061.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000061.jpg
--------------------------------------------------------------------------------
/demo/images/000062.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000062.jpg
--------------------------------------------------------------------------------
/demo/images/000063.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000063.jpg
--------------------------------------------------------------------------------
/demo/images/000064.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000064.jpg
--------------------------------------------------------------------------------
/demo/images/000065.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000065.jpg
--------------------------------------------------------------------------------
/demo/images/000066.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000066.jpg
--------------------------------------------------------------------------------
/demo/images/000067.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000067.jpg
--------------------------------------------------------------------------------
/demo/images/000068.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000068.jpg
--------------------------------------------------------------------------------
/demo/images/000069.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000069.jpg
--------------------------------------------------------------------------------
/demo/images/000070.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000070.jpg
--------------------------------------------------------------------------------
/demo/images/000071.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000071.jpg
--------------------------------------------------------------------------------
/demo/images/000072.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000072.jpg
--------------------------------------------------------------------------------
/demo/images/000073.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000073.jpg
--------------------------------------------------------------------------------
/demo/images/000074.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000074.jpg
--------------------------------------------------------------------------------
/demo/images/000075.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000075.jpg
--------------------------------------------------------------------------------
/demo/images/000076.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000076.jpg
--------------------------------------------------------------------------------
/demo/images/000077.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000077.jpg
--------------------------------------------------------------------------------
/demo/images/000078.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000078.jpg
--------------------------------------------------------------------------------
/demo/images/000079.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/demo/images/000079.jpg
--------------------------------------------------------------------------------
/flow/detection_analyzer.py:
--------------------------------------------------------------------------------
1 | from yolo_detect import *
2 |
3 |
4 | def diff_detection(img1, img2, cfg, net, thresh):
5 | bboxes_1, cls_inds_1, image_1, scores_1 = detect_image(cfg, img1, net, thresh)
6 | bboxes_2, cls_inds_2, image_2, scores_2 = detect_image(cfg, img2, net, thresh)
7 | is_cls_equal = False
8 | if len(cls_inds_1) == len(cls_inds_2):
9 | is_cls_equal = np.all(np.equal(cls_inds_1, cls_inds_2))
10 |
11 | if not is_cls_equal:
12 | im2show = yolo_utils.draw_detection(image_1, bboxes_1, scores_1, cls_inds_1, cfg)
13 | cv2.imshow('detection_1', im2show)
14 | im2show = yolo_utils.draw_detection(image_2, bboxes_2, scores_2, cls_inds_2, cfg)
15 | cv2.imshow('detection_2', im2show)
16 | cv2.waitKey(0)
17 |
18 | return is_cls_equal
19 |
20 |
21 | def run():
22 | net, cfg = init_network()
23 | image_orig_paths = load_image_paths('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt')
24 | image_warp_paths = load_image_paths('/home/cory/project/yolo2-pytorch/flow/kitti_val_images_warp.txt')
25 | n_img = len(image_orig_paths)
26 |
27 | thresh = 0.6
28 |
29 | for i in range(n_img - 1):
30 | img_orig = image_orig_paths[i]
31 | img_warp = image_warp_paths[i]
32 | is_equal = diff_detection(img_orig, img_warp, cfg, net, thresh)
33 |
34 |
35 | if __name__ == '__main__':
36 | run()
37 |
--------------------------------------------------------------------------------
/flow/flow_generator.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class FlowGenerator:
4 | def __init__(self, input_images, output_dir, flow_warper):
5 | self.imgs = input_images
6 | self.output_dir = output_dir
7 | self.warper = flow_warper
8 |
9 | def gen(self):
10 | for i in range(len(self.imgs)):
11 | img1 = self.imgs[i]
12 | img2 = self.imgs[i + 1]
13 | print(img1, img2)
14 | img_w, flow_w = self.warper.warp(img1, img2)
15 |
--------------------------------------------------------------------------------
/flow/flow_util.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import cv2
3 | import numpy as np
4 | from subprocess import check_output
5 | import sys
6 | import os
7 |
8 | sys.path.append('/home/cory/project/spynet')
9 | os.environ['TERM'] = 'xterm-256color'
10 | # from spynet import Spynet
11 |
12 |
13 | def shift_filter(feature, flow):
14 | # feature shape = (batch, filters, h, w)
15 | shifted_feature = list()
16 | for feat in feature:
17 | # print(feat.shape)
18 | for i in range(feat.shape[0]):
19 | act2d = feat[i, ...]
20 | act2d = act2d[:, :, np.newaxis]
21 | res = warp_flow(act2d, flow)
22 | shifted_feature.append(res)
23 |
24 | if False:
25 | print('act2d', act2d.shape, sum(act2d.ravel()))
26 | print('flow', flow.shape, sum(flow.ravel()))
27 | plt.figure(11)
28 | plt.imshow(act2d[:, :, 0], cmap='gray')
29 | plt.figure(12)
30 | plt.imshow(flow[..., 0], cmap='gray')
31 | plt.figure(13)
32 | plt.imshow(flow[..., 1], cmap='gray')
33 | plt.figure(14)
34 | plt.imshow(res, cmap='gray')
35 | plt.show()
36 | pass
37 |
38 | return np.asarray([shifted_feature])
39 |
40 |
41 | # spynet = Spynet()
42 |
43 |
44 | def spynet_flow(image_path1, image_path2):
45 | import time
46 | t1 = time.time()
47 | flow = spynet.compute_flow(image_path1, image_path2)
48 | t2 = time.time()
49 | # print(t2 -t1)
50 | flow = np.transpose(flow[0], (1, 2, 0)) # 2 x h x w--> h x w x 2
51 | return flow
52 |
53 |
54 | def read_flo_file(filename):
55 | with open(filename, 'rb') as f:
56 | magic = np.fromfile(f, np.float32, count=1)
57 | if 202021.25 != magic:
58 | print('Magic number incorrect. Invalid .flo file')
59 | return None
60 | w = np.fromfile(f, np.int32, count=1)[0]
61 | h = np.fromfile(f, np.int32, count=1)[0]
62 | # print('Reading %d x %d flo file' % (w, h))
63 | data = np.fromfile(f, np.float32, count=2 * w * h)
64 | # Reshape data into 3D array (columns, rows, bands)
65 | data2D = np.reshape(data, (h, w, 2))
66 | return data2D
67 |
68 |
69 | def dis_flow(img_path1, img_path2):
70 | out = check_output(['./run_of.sh ' + img_path1 + ' ' + img_path2], shell=True)
71 | flow_val = read_flo_file('flow.flo')
72 | return flow_val
73 | # print(out)
74 |
75 |
76 | def flownet2_flow(img_path1, img_path2):
77 | out = check_output(['./run_of.sh ' + img_path1 + ' ' + img_path2], shell=True)
78 | flow_val = read_flo_file('flow.flo')
79 | return flow_val
80 | # print(out)
81 |
82 |
83 | def get_flow_for_filter(flow, feat_map_size):
84 | filter_map_height = feat_map_size[0]
85 | filter_map_width = feat_map_size[1]
86 | flow_ratio_y = flow.shape[0] / filter_map_height
87 | flow_ratio_x = flow.shape[1] / filter_map_width
88 | flow_small = np.asarray([cv2.resize(src=flow[:, :, 0] / flow_ratio_y,
89 | dsize=(filter_map_width, filter_map_height),
90 | interpolation=cv2.INTER_CUBIC),
91 | cv2.resize(src=flow[:, :, 1] / flow_ratio_x,
92 | dsize=(filter_map_width, filter_map_height),
93 | interpolation=cv2.INTER_CUBIC)])
94 | flow_small = flow_small.transpose([1, 2, 0])
95 | # print('flow_small.shape', flow_small.shape)
96 | return flow_small
97 |
98 |
99 | def warp_flow(img, flow):
100 | h, w = flow.shape[:2]
101 | flow_map = flow.copy()
102 | flow_map[:, :, 0] += np.arange(w)
103 | flow_map[:, :, 1] += np.arange(h)[:, np.newaxis]
104 | res = cv2.remap(src=img, map1=flow_map, map2=None, interpolation=cv2.INTER_LINEAR)
105 | return res
106 |
--------------------------------------------------------------------------------
/flow/flow_vis.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from flow.plot_util import *
4 | from flow.flow_util import *
5 |
6 | pwd = '/home/cory/project/flownet2/output/'
7 | flos = sorted(os.listdir(pwd))
8 | for flo in flos:
9 | ff = read_flo_file(pwd + flo)
10 | flow_hsv = draw_hsv(ff, ratio=4)
11 | cv2.imshow('flow', flow_hsv)
12 | cv2.waitKey(10)
13 |
--------------------------------------------------------------------------------
/flow/flow_warper.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class FlowWarper:
4 | def __init__(self):
5 | pass
6 |
7 | def warp(self, img1, img2):
8 | pass
--------------------------------------------------------------------------------
/flow/gen_flow_images.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import cv2
4 |
5 | from flow import flow_util
6 |
7 |
8 | def find_tracklet_id(img_path):
9 | str_offset = img_path.rfind('/')
10 | tracklet_id = img_path[str_offset - 4: str_offset]
11 | return tracklet_id
12 |
13 |
14 | def gen_warp(img_path_0, img_path_1):
15 | # img_flow = flow_util.spynet_flow(img_path_1, img_path_0)
16 | img_flow = flow_util.dis_flow(img_path_1, img_path_0)
17 | img_0 = cv2.imread(img_path_0)
18 | img_warp = flow_util.warp_flow(img_0, img_flow / 2)
19 | return img_warp
20 |
21 |
22 | def parse_label_file(label_file_path):
23 | label_file = open(label_file_path)
24 | vlist = list()
25 | for l in label_file.readlines():
26 | v = l.split(' ')[0:5]
27 | v[1:5] = list(map(float, v[1:5]))
28 | if v[1] < 50 or v[2] < 50 or v[3] > 1200 or v[4] > 350:
29 | v[0] = 'DontCare'
30 | vlist.append(v)
31 | # print(v)
32 | return vlist
33 |
34 |
35 | def write_to_file(labels, filename):
36 | curr_label = parse_label_file(labels)
37 | new_label_file = open(filename, 'w')
38 | for v in curr_label:
39 | line = ' '.join([str(x) for x in v])
40 | # print(line)
41 | new_label_file.write(line + '\n')
42 |
43 |
44 | def gen_images(gen_label_only=False):
45 |
46 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt')
47 | image_abs_paths = img_files.readlines()
48 | image_abs_paths = [f.strip() for f in image_abs_paths]
49 |
50 | label_files = open('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt')
51 | label_abs_paths = label_files.readlines()
52 | label_abs_paths = [f.strip() for f in label_abs_paths]
53 |
54 | out_img_dir = 'images_flow_warp_half'
55 | if not os.path.exists(out_img_dir):
56 | os.mkdir(out_img_dir)
57 |
58 | img_pairs = open('img_pairs.txt', 'w')
59 |
60 | for i in range(0, len(image_abs_paths)):
61 | curr_img_path = image_abs_paths[i]
62 | prev_img_path = image_abs_paths[i - 1]
63 |
64 | curr_tracklet_id = find_tracklet_id(curr_img_path)
65 | prev_tracklet_id = find_tracklet_id(prev_img_path)
66 |
67 | print(i, curr_img_path, curr_tracklet_id)
68 |
69 | tracklet_out_path = out_img_dir + '/' + curr_tracklet_id
70 | if not os.path.exists(tracklet_out_path):
71 | os.mkdir(tracklet_out_path)
72 |
73 | if curr_tracklet_id != prev_tracklet_id:
74 | prev_img_path = curr_img_path
75 |
76 | if not gen_label_only:
77 | # w(0 -> 1) = frame(0) * flow(1 -> 0)
78 | w01 = gen_warp(prev_img_path, curr_img_path)
79 | out_path = curr_img_path.replace('.png', '')
80 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:]
81 | w01_path = out_path + '_w01.png'
82 |
83 | write_to_file(label_abs_paths[i], out_path + '_w01_label.txt')
84 |
85 | # for flownet 2.0
86 | flo_id = out_path[out_path.find('/') + 1:].replace('/', '_')
87 | img_pairs.write(curr_img_path + ' ' + prev_img_path + ' ../output/' + flo_id + '.flo\n')
88 |
89 | out_path = prev_img_path.replace('.png', '')
90 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:]
91 | w10_path = out_path + '_w10.png'
92 | write_to_file(label_abs_paths[i], out_path + '_w10_label.txt')
93 |
94 | if not gen_label_only:
95 | # w(1 -> 0) = frame(1) * flow(0 -> 1)
96 | # w10 = gen_warp(curr_img_path, prev_img_path)
97 | cv2.imshow('w01', w01)
98 | # cv2.imshow('w10', w10)
99 |
100 | os.makedirs(w01_path[0: w01_path.rfind('/')], exist_ok=True)
101 | cv2.imwrite(w01_path, w01)
102 | # cv2.imwrite(w10_path, w10)
103 | cv2.waitKey(30)
104 |
105 |
106 | if __name__ == '__main__':
107 | gen_images()
108 |
--------------------------------------------------------------------------------
/flow/gen_flow_images_cloudai.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import cv2
4 |
5 | from flow import flow_util
6 |
7 |
8 | def find_tracklet_id(img_path):
9 | str_offset = img_path.rfind('/')
10 | tracklet_id = img_path[str_offset - 4: str_offset]
11 | return tracklet_id
12 |
13 |
14 | def gen_warp(img_path_0, img_path_1):
15 | # img_flow = flow_util.spynet_flow(img_path_1, img_path_0)
16 | img_flow = flow_util.dis_flow(img_path_1, img_path_0)
17 | img_0 = cv2.imread(img_path_0)
18 | img_warp = flow_util.warp_flow(img_0, img_flow / 2)
19 | return img_warp
20 |
21 |
22 | def parse_label_file(label_file_path):
23 | label_file = open(label_file_path)
24 | vlist = list()
25 | for l in label_file.readlines():
26 | v = l.split(' ')[0:5]
27 | v[1:5] = list(map(float, v[1:5]))
28 | if v[1] < 50 or v[2] < 50 or v[3] > 1200 or v[4] > 350:
29 | v[0] = 'DontCare'
30 | vlist.append(v)
31 | # print(v)
32 | return vlist
33 |
34 |
35 | def write_to_file(labels, filename):
36 | curr_label = parse_label_file(labels)
37 | new_label_file = open(filename, 'w')
38 | for v in curr_label:
39 | line = ' '.join([str(x) for x in v])
40 | # print(line)
41 | new_label_file.write(line + '\n')
42 |
43 |
44 | def gen_images(gen_label_only=False):
45 |
46 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_images.txt')
47 | image_abs_paths = img_files.readlines()
48 | image_abs_paths = [f.strip() for f in image_abs_paths]
49 |
50 | label_files = open('/home/cory/project/yolo2-pytorch/train_data/crowdai/crowdai_train_labels.txt')
51 | label_abs_paths = label_files.readlines()
52 | label_abs_paths = [f.strip() for f in label_abs_paths]
53 |
54 | out_img_dir = 'images_flow_warp_crowdai'
55 | if not os.path.exists(out_img_dir):
56 | os.mkdir(out_img_dir)
57 |
58 | for i in range(0, len(image_abs_paths)):
59 | curr_img_path = image_abs_paths[i]
60 | prev_img_path = image_abs_paths[i - 1]
61 |
62 | w01 = gen_warp(prev_img_path, curr_img_path)
63 |
64 | cv2.imshow('w01', w01)
65 | cv2.waitKey(1)
66 |
67 |
68 | if __name__ == '__main__':
69 | gen_images()
70 |
--------------------------------------------------------------------------------
/flow/gen_flow_images_detrac.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import cv2
4 |
5 | from flow import flow_util
6 |
7 |
8 | def find_tracklet_id(img_path):
9 | str_offset = img_path.rfind('/')
10 | tracklet_id = img_path[str_offset - 5: str_offset]
11 | return tracklet_id
12 |
13 |
14 | def gen_warp(img_path_0, img_path_1):
15 | # img_flow = flow_util.spynet_flow(img_path_1, img_path_0)
16 | img_flow = flow_util.dis_flow(img_path_1, img_path_0)
17 | img_0 = cv2.imread(img_path_0)
18 | img_warp = flow_util.warp_flow(img_0, img_flow)
19 | return img_warp
20 |
21 |
22 | def parse_label_file(label_file_path):
23 | label_file = open(label_file_path)
24 | vlist = list()
25 | for l in label_file.readlines():
26 | v = l.split(' ')[0:5]
27 | if len(v) <= 1:
28 | continue
29 | v[1:5] = list(map(float, v[1:5]))
30 | if v[1] < 50 or v[2] < 50 or v[3] > 900 or v[4] > 500:
31 | v[0] = 'DontCare'
32 | vlist.append(v)
33 | # print(v)
34 | return vlist
35 |
36 |
37 | def write_to_file(labels, filename):
38 | curr_label = parse_label_file(labels)
39 | new_label_file = open(filename, 'w')
40 | for v in curr_label:
41 | line = ' '.join([str(x) for x in v])
42 | # print(line)
43 | new_label_file.write(line + '\n')
44 |
45 |
46 | def gen_images(gen_label_only=False):
47 |
48 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images.txt')
49 | image_abs_paths = img_files.readlines()
50 | image_abs_paths = [f.strip() for f in image_abs_paths]
51 |
52 | label_files = open('/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels.txt')
53 | label_abs_paths = label_files.readlines()
54 | label_abs_paths = [f.strip() for f in label_abs_paths]
55 |
56 | out_img_dir = 'images_flow_warp_detrac'
57 | if not os.path.exists(out_img_dir):
58 | os.mkdir(out_img_dir)
59 |
60 | for i in range(0, len(image_abs_paths)):
61 | curr_img_path = image_abs_paths[i]
62 | prev_img_path = image_abs_paths[i - 1]
63 |
64 | curr_tracklet_id = find_tracklet_id(curr_img_path)
65 | prev_tracklet_id = find_tracklet_id(prev_img_path)
66 |
67 | print(i, curr_img_path, curr_tracklet_id)
68 |
69 | tracklet_out_path = out_img_dir + '/' + curr_tracklet_id
70 | if not os.path.exists(tracklet_out_path):
71 | os.mkdir(tracklet_out_path)
72 |
73 | if curr_tracklet_id != prev_tracklet_id:
74 | prev_img_path = curr_img_path
75 |
76 | if not gen_label_only:
77 | # w(0 -> 1) = frame(0) * flow(1 -> 0)
78 | w01 = gen_warp(prev_img_path, curr_img_path)
79 | out_path = curr_img_path.replace('.png', '')
80 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:]
81 | w01_path = out_path + '_w01.png'
82 |
83 | write_to_file(label_abs_paths[i], out_path + '_w01_label.txt')
84 |
85 | out_path = prev_img_path.replace('.png', '')
86 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:]
87 | w10_path = out_path + '_w10.png'
88 | write_to_file(label_abs_paths[i], out_path + '_w10_label.txt')
89 |
90 | if not gen_label_only:
91 | # w(1 -> 0) = frame(1) * flow(0 -> 1)
92 | # w10 = gen_warp(curr_img_path, prev_img_path)
93 | cv2.imshow('w01', w01)
94 | # cv2.imshow('w10', w10)
95 |
96 | os.makedirs(w01_path[0: w01_path.rfind('/')], exist_ok=True)
97 | cv2.imwrite(w01_path, w01)
98 | # cv2.imwrite(w10_path, w10)
99 | cv2.waitKey(1)
100 |
101 |
102 | if __name__ == '__main__':
103 | gen_images()
104 |
--------------------------------------------------------------------------------
/flow/gen_val_from_all.sh:
--------------------------------------------------------------------------------
1 | realpath images_flow_warp/0001/*.png
2 | realpath images_flow_warp/0005/*.png
3 | realpath images_flow_warp/0013/*.png
4 | realpath images_flow_warp/0017/*.png
5 |
--------------------------------------------------------------------------------
/flow/gen_warp_images_by_flow.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from flow.plot_util import *
4 | from flow.flow_util import *
5 | from flow.gen_flow_images import find_tracklet_id
6 |
7 |
8 | def gen_images():
9 |
10 | out_img_dir = 'images_flow_warp_flownet2'
11 | if not os.path.exists(out_img_dir):
12 | os.mkdir(out_img_dir)
13 |
14 | img_files = open('/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt')
15 | image_abs_paths = img_files.readlines()
16 | image_abs_paths = [f.strip() for f in image_abs_paths]
17 |
18 | pwd = '/home/cory/project/flownet2/output/'
19 | flo_list = list()
20 | for flo in sorted(os.listdir(pwd)):
21 | ff = pwd + flo
22 | flo_list.append(ff)
23 |
24 | for i in range(0, len(image_abs_paths)):
25 | curr_img_path = image_abs_paths[i]
26 | prev_img_path = image_abs_paths[i - 1]
27 |
28 | curr_tracklet_id = find_tracklet_id(curr_img_path)
29 | prev_tracklet_id = find_tracklet_id(prev_img_path)
30 |
31 | print(i, curr_img_path, curr_tracklet_id)
32 |
33 | tracklet_out_path = out_img_dir + '/' + curr_tracklet_id
34 | if not os.path.exists(tracklet_out_path):
35 | os.mkdir(tracklet_out_path)
36 |
37 | if curr_tracklet_id != prev_tracklet_id:
38 | prev_img_path = curr_img_path
39 |
40 | # w(0 -> 1) = frame(0) * flow(1 -> 0)
41 | print(flo_list[i])
42 | flo = read_flo_file(flo_list[i])
43 | flow_hsv = draw_hsv(flo, ratio=2)
44 | cv2.imshow('flow', flow_hsv)
45 |
46 | w01 = warp_flow(cv2.imread(prev_img_path), flo)
47 | out_path = curr_img_path.replace('.png', '')
48 | out_path = tracklet_out_path + '/' + out_path[out_path.rfind('/') + 1:]
49 | w01_path = out_path + '_w01.png'
50 | cv2.imshow('w01', w01)
51 |
52 | cv2.imwrite(out_path + '_flow.png', flow_hsv)
53 | cv2.imwrite(w01_path, w01)
54 |
55 | cv2.waitKey(30)
56 |
57 |
58 | if __name__ == '__main__':
59 | gen_images()
60 |
--------------------------------------------------------------------------------
/flow/img_diff.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import random
3 |
4 |
5 | def diff(img1, img2, window_name=''):
6 | df = img1 - img2
7 | cv2.imshow(window_name, df)
8 | cv2.imwrite(window_name + '.jpg', df)
9 |
10 |
11 | def main():
12 | img0 = cv2.imread('/media/cory/c_disk/Project/KITTI_Dataset/data_tracking_image_2/training/image_02/0003/000035.png')
13 | img1 = cv2.imread('/home/cory/project/yolo2-pytorch/flow/images_flow_warp/0003/000035_w01.png')
14 | img2 = cv2.imread('/home/cory/project/yolo2-pytorch/flow/images_flow_warp/0003/000035_w10.png')
15 | cv2.imshow('0', img0)
16 | cv2.imshow('1', img1)
17 | cv2.imshow('2', img2)
18 | diff(img1, img0, '1-0')
19 | diff(img2, img0, '2-0')
20 | diff(img1, img2, '1-2')
21 | cv2.waitKey(0)
22 |
23 |
24 | if __name__ == '__main__':
25 | main()
26 |
--------------------------------------------------------------------------------
/flow/plot_util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import cv2
4 | import math
5 |
6 |
7 | def imshow_fig(img, title='', **kwargs):
8 | h = img.shape[0]
9 | w = img.shape[1]
10 | dpi = 96
11 | fig = plt.figure(num=0, figsize=(w / dpi, h / dpi))
12 | fig.add_axes([0., 0., 1., 1.])
13 | fig.canvas.set_window_title(title)
14 | plt.imshow(img, **kwargs)
15 | plt.axis('off')
16 | return fig
17 |
18 |
19 | def plot_feature_map(features, border=2, resize_ratio=2):
20 | num_channel = features.shape[1]
21 | feat_h = features.shape[2]
22 | feat_w = features.shape[3]
23 | map_border_num = int(math.ceil(math.sqrt(num_channel)))
24 | map_h = (feat_h + border) * map_border_num
25 | map_w = (feat_w + border) * map_border_num
26 | # print('create act map {:d} x {:d}'.format(map_h, map_w))
27 | feature_map_all = np.zeros((map_h, map_w))
28 |
29 | # print(features.shape)
30 | all_sum = 0
31 | idx = 0
32 | max_val = np.max(features.ravel())
33 | for i_y in range(0, map_h, feat_h+border):
34 | for i_x in range(0, map_w, feat_w+border):
35 | if idx >= num_channel:
36 | break
37 | act = features[0, idx, :, :]
38 | idx += 1
39 | if border != 0:
40 | act_pad = np.lib.pad(array=act,
41 | pad_width=((0, border), (0, border)),
42 | mode='constant',
43 | constant_values=max_val/6)
44 | else:
45 | act_pad = act
46 | feature_map_all[i_y: i_y + feat_h + border, i_x: i_x + feat_w + border] = act_pad
47 | act_sum = sum(act.ravel())
48 | all_sum += act_sum
49 | # print('filter-{:d} act_sum={:f}'.format(idx, act_sum))
50 |
51 | # print('all_sum = {:f}'.format(all_sum))
52 | # min max normalization
53 | feature_map_all /= feature_map_all.max()
54 | feature_map_all = cv2.resize(feature_map_all, (feature_map_all.shape[1] * resize_ratio,
55 | feature_map_all.shape[0] * resize_ratio))
56 | return feature_map_all
57 |
58 |
59 | def draw_hsv(flow, ratio=4):
60 | h, w = flow.shape[:2]
61 | fx, fy = flow[:, :, 0], flow[:, :, 1]
62 | ang = np.arctan2(fy, fx) + np.pi
63 | v = np.sqrt(fx * fx + fy * fy)
64 | hsv = np.zeros((h, w, 3), np.uint8)
65 | hsv[..., 0] = ang * (180 / np.pi / 2)
66 | hsv[..., 1] = v * ratio
67 | hsv[..., 2] = 255
68 | bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
69 | return bgr
70 |
--------------------------------------------------------------------------------
/flow/run_flow.sh:
--------------------------------------------------------------------------------
1 | cd /home/cory/project/spynet
2 | th run_flow.lua $1 $2 && cp ./flow.npy ~/yolo2-pytorch
3 |
--------------------------------------------------------------------------------
/flow/run_of.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo $1 $2
3 | ~/project/OF_DIS/run_OF_RGB $1 $2 flow.flo
4 |
--------------------------------------------------------------------------------
/flow/shift_gt_by_flow.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from flow.flow_util import dis_flow
3 | from flow.gen_flow_images import parse_label_file, write_to_file
4 | from misc.visualize_gt import plot_vis
5 | import matplotlib.pyplot as plt
6 |
7 |
8 | def flow_avg_in_rectangle(flow, pos):
9 | pos = list(map(int, pos))
10 | crop = flow[pos[1]: pos[3], pos[0]: pos[2]]
11 | avg_x = np.average(crop[:, :, 1])
12 | avg_y = np.average(crop[:, :, 0])
13 | return avg_x, avg_y
14 |
15 |
16 | def flow_std_in_rectangle(flow, pos):
17 | pos = list(map(int, pos))
18 | crop = flow[pos[1]: pos[3], pos[0]: pos[2]]
19 | std_x = np.std(crop[:, :, 1])
20 | std_y = np.std(crop[:, :, 0])
21 | return std_x, std_y
22 |
23 |
24 | def gt_save_to_file(gt, filepath):
25 | print(filepath)
26 | out_file = open(filepath, 'w')
27 | for g in gt:
28 | gs = [str(int(x)) for x in g[1: 5]]
29 | wline = g[0] + ' ' + ' '.join(gs) + ' 0 0 0\n'
30 | out_file.write(wline)
31 |
32 |
33 | def shift_gt_by_flow():
34 | img_list_filename = 'w01_images.txt'
35 | # gt_list_filename = 'w01_center_labels.txt'
36 | gt_list_filename = 'kitti_train_labels.txt'
37 | img_list_file = open(img_list_filename)
38 | gt_list_file = open(gt_list_filename)
39 |
40 | img_paths = [f.strip() for f in img_list_file.readlines()]
41 | gt_paths = [f.strip() for f in gt_list_file.readlines()]
42 |
43 | total_num = len(img_paths)
44 | print(total_num)
45 |
46 | pt_x = list()
47 | pt_y = list()
48 | for i in range(total_num - 1):
49 | img_file = open(img_paths[i])
50 | out_gt_filepath = gt_paths[i].replace('.txt', '_shift.txt')
51 | gts = parse_label_file(gt_paths[i])
52 | # print(gts)
53 | print(i)
54 |
55 | flow = dis_flow(img_paths[i + 1], img_paths[i])
56 | for gt in gts:
57 | std_flow = flow_std_in_rectangle(flow, gt[1:5])
58 | pt_x.append(std_flow[0])
59 | pt_y.append(std_flow[1])
60 | if abs(std_flow[0]) > 2 or abs(std_flow[1]) > 5:
61 | print(gt[0], std_flow[0], std_flow[1])
62 | gt[0] = 'DontCare'
63 | print(gt)
64 |
65 | # r = plot_vis(img_paths[i], gts)
66 | gt_save_to_file(gts, out_gt_filepath)
67 |
68 | plt.plot(pt_x, pt_y, '*')
69 | plt.show()
70 |
71 | if __name__ == '__main__':
72 | shift_gt_by_flow()
73 |
--------------------------------------------------------------------------------
/flow/vis.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/flow/vis.jpg
--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/__init__.py
--------------------------------------------------------------------------------
/layers/reorg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/reorg/__init__.py
--------------------------------------------------------------------------------
/layers/reorg/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/reorg/_ext/__init__.py
--------------------------------------------------------------------------------
/layers/reorg/_ext/reorg_layer/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._reorg_layer import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | locals[symbol] = _wrap_function(fn, _ffi)
10 | __all__.append(symbol)
11 |
12 | _import_symbols(locals())
13 |
--------------------------------------------------------------------------------
/layers/reorg/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/reorg_cpu.c']
7 | headers = ['src/reorg_cpu.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/reorg_cuda.c']
14 | headers += ['src/reorg_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | # print(this_file)
20 | extra_objects = ['src/reorg_cuda_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.reorg_layer',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects
31 | )
32 |
33 | if __name__ == '__main__':
34 | ffi.build()
35 |
--------------------------------------------------------------------------------
/layers/reorg/reorg_layer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from ._ext import reorg_layer
4 |
5 |
6 | class ReorgFunction(Function):
7 | def __init__(self, stride=2):
8 | super().__init__()
9 | self.stride = stride
10 |
11 | def forward(self, x):
12 | stride = self.stride
13 |
14 | bsize, c, h, w = x.size()
15 | out_w, out_h, out_c = int(w / stride), int(h / stride), c * (stride * stride)
16 | out = torch.FloatTensor(bsize, out_c, out_h, out_w)
17 |
18 | if x.is_cuda:
19 | out = out.cuda()
20 | reorg_layer.reorg_cuda(x, out_w, out_h, out_c, bsize, stride, 0, out)
21 | else:
22 | reorg_layer.reorg_cpu(x, out_w, out_h, out_c, bsize, stride, 0, out)
23 |
24 | return out
25 |
26 | def backward(self, grad_top):
27 | stride = self.stride
28 | bsize, c, h, w = grad_top.size()
29 |
30 | out_w, out_h, out_c = w * stride, h * stride, c // (stride * stride)
31 | grad_bottom = torch.FloatTensor(bsize, out_c, out_h, out_w)
32 |
33 | # rev_stride = 1. / stride # reverse
34 | if grad_top.is_cuda:
35 | grad_bottom = grad_bottom.cuda()
36 | reorg_layer.reorg_cuda(grad_top, w, h, c, bsize, stride, 1, grad_bottom)
37 | else:
38 | reorg_layer.reorg_cpu(grad_top, w, h, c, bsize, stride, 1, grad_bottom)
39 |
40 | return grad_bottom
41 |
42 |
43 | class ReorgLayer(torch.nn.Module):
44 | def __init__(self, stride):
45 | super(ReorgLayer, self).__init__()
46 | self.stride = stride
47 |
48 | def forward(self, x):
49 | x = ReorgFunction(self.stride)(x)
50 | return x
51 |
--------------------------------------------------------------------------------
/layers/reorg/src/reorg_cpu.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor)
4 | {
5 | // Grab the tensor
6 | float * x = THFloatTensor_data(x_tensor);
7 | float * out = THFloatTensor_data(out_tensor);
8 |
9 | // https://github.com/pjreddie/darknet/blob/master/src/blas.c
10 | int b,i,j,k;
11 | int out_c = c/(stride*stride);
12 |
13 | for(b = 0; b < batch; ++b){
14 | for(k = 0; k < c; ++k){
15 | for(j = 0; j < h; ++j){
16 | for(i = 0; i < w; ++i){
17 | int in_index = i + w*(j + h*(k + c*b));
18 | int c2 = k % out_c;
19 | int offset = k / out_c;
20 | int w2 = i*stride + offset % stride;
21 | int h2 = j*stride + offset / stride;
22 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
23 | if(forward) out[out_index] = x[in_index];
24 | else out[in_index] = x[out_index];
25 | }
26 | }
27 | }
28 | }
29 |
30 | return 1;
31 | }
--------------------------------------------------------------------------------
/layers/reorg/src/reorg_cpu.h:
--------------------------------------------------------------------------------
1 | int reorg_cpu(THFloatTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THFloatTensor *out_tensor);
--------------------------------------------------------------------------------
/layers/reorg/src/reorg_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "reorg_cuda_kernel.h"
3 |
4 | extern THCState *state;
5 |
6 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor)
7 | {
8 | float * x = THCudaTensor_data(state, x_tensor);
9 | float * out = THCudaTensor_data(state, out_tensor);
10 |
11 | cudaStream_t stream = THCState_getCurrentStream(state);
12 | reorg_ongpu(x, w, h, c, batch, stride, forward, out, stream);
13 |
14 | return 1;
15 | }
--------------------------------------------------------------------------------
/layers/reorg/src/reorg_cuda.h:
--------------------------------------------------------------------------------
1 | int reorg_cuda(THCudaTensor *x_tensor, int w, int h, int c, int batch, int stride, int forward, THCudaTensor *out_tensor);
--------------------------------------------------------------------------------
/layers/reorg/src/reorg_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 | #include
6 | #include
7 | #include
8 | #include "reorg_cuda_kernel.h"
9 |
10 | #define BLOCK 512
11 |
12 | dim3 cuda_gridsize(int n)
13 | {
14 | int k = (n-1) / BLOCK + 1;
15 | int x = k;
16 | int y = 1;
17 | if(x > 65535){
18 | x = ceil(sqrt(k));
19 | y = (n-1)/(x*BLOCK) + 1;
20 | }
21 | dim3 d(x, y, 1);
22 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
23 | return d;
24 | }
25 |
26 | __global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
27 | {
28 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
29 | if(i >= N) return;
30 | int in_index = i;
31 | int in_w = i%w;
32 | i = i/w;
33 | int in_h = i%h;
34 | i = i/h;
35 | int in_c = i%c;
36 | i = i/c;
37 | int b = i%batch;
38 |
39 | int out_c = c/(stride*stride);
40 |
41 | int c2 = in_c % out_c;
42 | int offset = in_c / out_c;
43 | int w2 = in_w*stride + offset % stride;
44 | int h2 = in_h*stride + offset / stride;
45 | //printf("%d\n", offset);
46 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
47 |
48 | // printf("%d %d %d\n", w2, h2, c2);
49 | //printf("%d %d\n", in_index, out_index);
50 | //if(out_index >= N || out_index < 0) printf("bad bad bad \n");
51 |
52 | if(forward) out[out_index] = x[in_index];
53 | else out[in_index] = x[out_index];
54 | //if(forward) out[1] = x[1];
55 | //else out[0] = x[0];
56 | }
57 |
58 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream)
59 | {
60 | int size = w*h*c*batch;
61 | cudaError_t err;
62 |
63 | reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out);
64 |
65 | err = cudaGetLastError();
66 | if(cudaSuccess != err)
67 | {
68 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
69 | exit( -1 );
70 | }
71 | }
72 |
73 |
74 |
75 | #ifdef __cplusplus
76 | }
77 | #endif
78 |
--------------------------------------------------------------------------------
/layers/reorg/src/reorg_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _REORG_CUDA_KERNEL
2 | #define _REORG_CUDA_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out, cudaStream_t stream);
9 |
10 |
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 |
15 | #endif
16 |
--------------------------------------------------------------------------------
/layers/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/roi_pooling/__init__.py
--------------------------------------------------------------------------------
/layers/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/layers/roi_pooling/_ext/__init__.py
--------------------------------------------------------------------------------
/layers/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_pooling import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | locals[symbol] = _wrap_function(fn, _ffi)
10 | __all__.append(symbol)
11 |
12 | _import_symbols(locals())
13 |
--------------------------------------------------------------------------------
/layers/roi_pooling/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/roi_pooling.c']
7 | headers = ['src/roi_pooling.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/roi_pooling_cuda.c']
14 | headers += ['src/roi_pooling_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.roi_pooling',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects
31 | )
32 |
33 | if __name__ == '__main__':
34 | ffi.build()
35 |
--------------------------------------------------------------------------------
/layers/roi_pooling/roi_pool.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from _ext import roi_pooling
4 |
5 |
6 | class RoIPoolFunction(Function):
7 | def __init__(self, pooled_height, pooled_width, spatial_scale):
8 | self.pooled_width = int(pooled_width)
9 | self.pooled_height = int(pooled_height)
10 | self.spatial_scale = float(spatial_scale)
11 | self.output = None
12 | self.argmax = None
13 | self.rois = None
14 | self.feature_size = None
15 |
16 | def forward(self, features, rois):
17 | batch_size, num_channels, data_height, data_width = features.size()
18 | num_rois = rois.size()[0]
19 | output = torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)
20 | argmax = torch.IntTensor(num_rois, num_channels, self.pooled_height, self.pooled_width).zero_()
21 |
22 | if not features.is_cuda:
23 | _features = features.permute(0, 2, 3, 1)
24 | roi_pooling.roi_pooling_forward(self.pooled_height, self.pooled_width, self.spatial_scale,
25 | _features, rois, output)
26 | # output = output.cuda()
27 | else:
28 | output = output.cuda()
29 | argmax = argmax.cuda()
30 | roi_pooling.roi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
31 | features, rois, output, argmax)
32 | self.output = output
33 | self.argmax = argmax
34 | self.rois = rois
35 | self.feature_size = features.size()
36 |
37 | return output
38 |
39 | def backward(self, grad_output):
40 | assert(self.feature_size is not None and grad_output.is_cuda)
41 |
42 | batch_size, num_channels, data_height, data_width = self.feature_size
43 |
44 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda()
45 | roi_pooling.roi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
46 | grad_output, self.rois, grad_input, self.argmax)
47 |
48 | # print grad_input
49 |
50 | return grad_input, None
51 |
52 |
53 | class RoIPool(torch.nn.Module):
54 | def __init__(self, pooled_height, pooled_width, spatial_scale):
55 | super(RoIPool, self).__init__()
56 |
57 | self.pooled_width = int(pooled_width)
58 | self.pooled_height = int(pooled_height)
59 | self.spatial_scale = float(spatial_scale)
60 |
61 | def forward(self, features, rois):
62 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
63 |
--------------------------------------------------------------------------------
/layers/roi_pooling/roi_pool_py.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 | import numpy as np
5 |
6 |
7 | class RoIPool(nn.Module):
8 | def __init__(self, pooled_height, pooled_width, spatial_scale):
9 | super(RoIPool, self).__init__()
10 | self.pooled_width = int(pooled_width)
11 | self.pooled_height = int(pooled_height)
12 | self.spatial_scale = float(spatial_scale)
13 |
14 | def forward(self, features, rois):
15 | batch_size, num_channels, data_height, data_width = features.size()
16 | num_rois = rois.size()[0]
17 | outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
18 |
19 | for roi_ind, roi in enumerate(rois):
20 | batch_ind = int(roi[0].data[0])
21 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
22 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
23 | roi_width = max(roi_end_w - roi_start_w + 1, 1)
24 | roi_height = max(roi_end_h - roi_start_h + 1, 1)
25 | bin_size_w = float(roi_width) / float(self.pooled_width)
26 | bin_size_h = float(roi_height) / float(self.pooled_height)
27 |
28 | for ph in range(self.pooled_height):
29 | hstart = int(np.floor(ph * bin_size_h))
30 | hend = int(np.ceil((ph + 1) * bin_size_h))
31 | hstart = min(data_height, max(0, hstart + roi_start_h))
32 | hend = min(data_height, max(0, hend + roi_start_h))
33 | for pw in range(self.pooled_width):
34 | wstart = int(np.floor(pw * bin_size_w))
35 | wend = int(np.ceil((pw + 1) * bin_size_w))
36 | wstart = min(data_width, max(0, wstart + roi_start_w))
37 | wend = min(data_width, max(0, wend + roi_start_w))
38 |
39 | is_empty = (hend <= hstart) or(wend <= wstart)
40 | if is_empty:
41 | outputs[roi_ind, :, ph, pw] = 0
42 | else:
43 | data = features[batch_ind]
44 | outputs[roi_ind, :, ph, pw] = torch.max(
45 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1)
46 |
47 | return outputs
48 |
49 |
--------------------------------------------------------------------------------
/layers/roi_pooling/src/cuda/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_POOLING_KERNEL
2 | #define _ROI_POOLING_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int ROIPoolForwardLaucher(
9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 | const int width, const int channels, const int pooled_height,
11 | const int pooled_width, const float* bottom_rois,
12 | float* top_data, int* argmax_data, cudaStream_t stream);
13 |
14 |
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 | const int height, const int width, const int channels, const int pooled_height,
17 | const int pooled_width, const float* bottom_rois,
18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
25 |
26 |
--------------------------------------------------------------------------------
/layers/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
6 | {
7 | // Grab the input tensor
8 | float * data_flat = THFloatTensor_data(features);
9 | float * rois_flat = THFloatTensor_data(rois);
10 |
11 | float * output_flat = THFloatTensor_data(output);
12 |
13 | // Number of ROIs
14 | int num_rois = THFloatTensor_size(rois, 0);
15 | int size_rois = THFloatTensor_size(rois, 1);
16 | // batch size
17 | int batch_size = THFloatTensor_size(features, 0);
18 | if(batch_size != 1)
19 | {
20 | return 0;
21 | }
22 | // data height
23 | int data_height = THFloatTensor_size(features, 1);
24 | // data width
25 | int data_width = THFloatTensor_size(features, 2);
26 | // Number of channels
27 | int num_channels = THFloatTensor_size(features, 3);
28 |
29 | // Set all element of the output tensor to -inf.
30 | THFloatStorage_fill(THFloatTensor_storage(output), -1);
31 |
32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
33 | int index_roi = 0;
34 | int index_output = 0;
35 | int n;
36 | for (n = 0; n < num_rois; ++n)
37 | {
38 | int roi_batch_ind = rois_flat[index_roi + 0];
39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
43 | // CHECK_GE(roi_batch_ind, 0);
44 | // CHECK_LT(roi_batch_ind, batch_size);
45 |
46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height);
49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width);
50 |
51 | int index_data = roi_batch_ind * data_height * data_width * num_channels;
52 | const int output_area = pooled_width * pooled_height;
53 |
54 | int c, ph, pw;
55 | for (ph = 0; ph < pooled_height; ++ph)
56 | {
57 | for (pw = 0; pw < pooled_width; ++pw)
58 | {
59 | int hstart = (floor((float)(ph) * bin_size_h));
60 | int wstart = (floor((float)(pw) * bin_size_w));
61 | int hend = (ceil((float)(ph + 1) * bin_size_h));
62 | int wend = (ceil((float)(pw + 1) * bin_size_w));
63 |
64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
68 |
69 | const int pool_index = index_output + (ph * pooled_width + pw);
70 | int is_empty = (hend <= hstart) || (wend <= wstart);
71 | if (is_empty)
72 | {
73 | for (c = 0; c < num_channels * output_area; c += output_area)
74 | {
75 | output_flat[pool_index + c] = 0;
76 | }
77 | }
78 | else
79 | {
80 | int h, w, c;
81 | for (h = hstart; h < hend; ++h)
82 | {
83 | for (w = wstart; w < wend; ++w)
84 | {
85 | for (c = 0; c < num_channels; ++c)
86 | {
87 | const int index = (h * data_width + w) * num_channels + c;
88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
89 | {
90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index];
91 | }
92 | }
93 | }
94 | }
95 | }
96 | }
97 | }
98 |
99 | // Increment ROI index
100 | index_roi += size_rois;
101 | index_output += pooled_height * pooled_width * num_channels;
102 | }
103 | return 1;
104 | }
--------------------------------------------------------------------------------
/layers/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
--------------------------------------------------------------------------------
/layers/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "cuda/roi_pooling_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
9 | {
10 | // Grab the input tensor
11 | float * data_flat = THCudaTensor_data(state, features);
12 | float * rois_flat = THCudaTensor_data(state, rois);
13 |
14 | float * output_flat = THCudaTensor_data(state, output);
15 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 |
17 | // Number of ROIs
18 | int num_rois = THCudaTensor_size(state, rois, 0);
19 | int size_rois = THCudaTensor_size(state, rois, 1);
20 | if (size_rois != 5)
21 | {
22 | return 0;
23 | }
24 |
25 | // batch size
26 | int batch_size = THCudaTensor_size(state, features, 0);
27 | if (batch_size != 1)
28 | {
29 | return 0;
30 | }
31 | // data height
32 | int data_height = THCudaTensor_size(state, features, 2);
33 | // data width
34 | int data_width = THCudaTensor_size(state, features, 3);
35 | // Number of channels
36 | int num_channels = THCudaTensor_size(state, features, 1);
37 |
38 | cudaStream_t stream = THCState_getCurrentStream(state);
39 |
40 | ROIPoolForwardLaucher(
41 | data_flat, spatial_scale, num_rois, data_height,
42 | data_width, num_channels, pooled_height,
43 | pooled_width, rois_flat,
44 | output_flat, argmax_flat, stream);
45 |
46 | return 1;
47 | }
48 |
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 | // Grab the input tensor
53 | float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 | float * rois_flat = THCudaTensor_data(state, rois);
55 |
56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 |
59 | // Number of ROIs
60 | int num_rois = THCudaTensor_size(state, rois, 0);
61 | int size_rois = THCudaTensor_size(state, rois, 1);
62 | if (size_rois != 5)
63 | {
64 | return 0;
65 | }
66 |
67 | // batch size
68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 | if (batch_size != 1)
70 | {
71 | return 0;
72 | }
73 | // data height
74 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 | // data width
76 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 | // Number of channels
78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 |
80 | cudaStream_t stream = THCState_getCurrentStream(state);
81 | ROIPoolBackwardLaucher(
82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 | data_width, num_channels, pooled_height,
84 | pooled_width, rois_flat,
85 | bottom_grad_flat, argmax_flat, stream);
86 |
87 | return 1;
88 | }
--------------------------------------------------------------------------------
/layers/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 |
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);
--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | #PYTHON=python
4 | PYTHON=python3
5 |
6 | NVCC=nvcc
7 |
8 | cd utils
9 | ${PYTHON} build.py build_ext --inplace
10 | cd ../
11 |
12 | cd layers/reorg/src
13 | echo "Compiling reorg layer kernels by nvcc..."
14 | ${NVCC} -c -o reorg_cuda_kernel.cu.o reorg_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
15 | cd ../
16 | ${PYTHON} build.py
17 | cd ../
18 |
19 | cd roi_pooling/src/cuda
20 | echo "Compiling roi_pooling kernels by nvcc..."
21 | ${NVCC} -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
22 | cd ../../
23 | ${PYTHON} build.py
24 | cd ../
25 |
--------------------------------------------------------------------------------
/misc/kitti_detect.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 |
4 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
5 | os.environ['DATASET'] = 'kitti'
6 |
7 | from cfgs.config_v2 import load_cfg_yamls
8 | import utils.network as net_utils
9 | import utils.yolo_v2 as yolo_utils
10 | from darknet_v3 import Darknet19
11 | from flow.flow_util import *
12 | from utils.timer import Timer
13 |
14 | dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_kitti.yaml'
15 | exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/kitti/kitti_baseline_v3.yaml'
16 | gpu_id = 0
17 |
18 | cfg = load_cfg_yamls([dataset_yaml, exp_yaml])
19 |
20 |
21 | def preprocess(filename):
22 | image = cv2.imread(filename)
23 | im_data = np.expand_dims(yolo_utils.preprocess_test((image, None, cfg['inp_size']))[0], 0)
24 | return image, im_data
25 |
26 |
27 | def detection_objects(bboxes, scores, cls_inds):
28 | objects = list()
29 | for i in range(len(bboxes)):
30 | box = bboxes[i]
31 | score = scores[i]
32 | label = cfg['label_names'][cls_inds[i]]
33 | objects.append((box, score, label))
34 | return objects
35 |
36 |
37 | def save_as_kitti_format(frame_id, det_obj, output_dir, src_label='voc'):
38 | # 'Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01'
39 | # 0 -1 car 0 0 0 1078 142 1126 164 0 0 0 0 0 0 0.415537
40 | with open(output_dir + '/{:06d}.txt'.format(frame_id), 'w') as file:
41 | for det in det_obj:
42 | bbox = det[0]
43 | score = det[1]
44 | label = det[2]
45 | if src_label == 'voc':
46 | if label != 'car' and label != 'person':
47 | continue
48 | label = label.replace('person', 'pedestrian')
49 | label.replace('Person', 'Person_sitting')
50 | line_str = '{:s} 0 0 0 {:d} {:d} {:d} {:d} 0 0 0 0 0 0 0 {:.4f}\n' \
51 | .format(label, bbox[0], bbox[1], bbox[2], bbox[3], score)
52 | # print(line_str)
53 | file.write(line_str)
54 |
55 |
56 | def main():
57 |
58 | output_dir = '../output'
59 | output_template_dir = '../output_template'
60 | kitti_output_dir = '../kitti_det_output'
61 | input_file_list = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt'
62 | # input_file_list = '/home/cory/project/yolo2-pytorch/flow/w01_imgs.txt'
63 | vis_enable = False
64 | thresh = 0.5
65 |
66 | trained_model = '/home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_half/' \
67 | 'kitti_new_2_flow_center_ft_half_5.h5'
68 |
69 | shutil.rmtree(output_dir, ignore_errors=True)
70 | shutil.rmtree(kitti_output_dir, ignore_errors=True)
71 | shutil.copytree(output_template_dir, output_dir)
72 | os.makedirs(kitti_output_dir)
73 |
74 | net = Darknet19(cfg)
75 | net_utils.load_net(trained_model, net)
76 | net.eval()
77 | net.cuda()
78 | print(trained_model)
79 | print('load model successfully')
80 |
81 | img_files = open(input_file_list)
82 | image_abs_paths = img_files.readlines()
83 | image_abs_paths = [f.strip() for f in image_abs_paths]
84 |
85 | t_det = Timer()
86 | t_total = Timer()
87 | for i, image_path in enumerate(image_abs_paths):
88 | t_total.tic()
89 | image, im_data = preprocess(image_path)
90 | im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
91 |
92 | t_det.tic()
93 | bbox_pred, iou_pred, prob_pred = net.forward(im_data)
94 | det_time = t_det.toc()
95 |
96 | bbox_pred = bbox_pred.data.cpu().numpy()
97 | iou_pred = iou_pred.data.cpu().numpy()
98 | prob_pred = prob_pred.data.cpu().numpy()
99 |
100 | bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
101 | det_obj = detection_objects(bboxes, scores, cls_inds)
102 | save_as_kitti_format(i, det_obj, kitti_output_dir, src_label='kitti')
103 |
104 | total_time = t_total.toc()
105 | format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms) %s'
106 | print(format_str % (
107 | i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000, image_path))
108 |
109 | t_det.clear()
110 | t_total.clear()
111 |
112 | if vis_enable:
113 | im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg)
114 | cv2.imshow('detection', im2show)
115 | cv2.imwrite(output_dir + '/detection/{:04d}.jpg'.format(i), im2show)
116 | key = cv2.waitKey(0)
117 | if key == ord('q'):
118 | break
119 |
120 |
121 | if __name__ == '__main__':
122 | main()
123 |
--------------------------------------------------------------------------------
/misc/validate_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import numpy as np
5 |
6 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
7 |
8 |
9 | from cfgs.config_v2 import add_cfg
10 | import utils.network as net_utils
11 | from darknet_v3 import Darknet19
12 | from datasets.ImageFileDataset_v2 import ImageFileDataset
13 | from utils.timer import Timer
14 | from train.train_util_v2 import *
15 |
16 |
17 | # dataset_yaml = '/home/cory/yolo2-pytorch/cfgs/config_kitti.yaml'
18 | # exp_yaml = '/home/cory/yolo2-pytorch/cfgs/exps/kitti_new_2.yaml'
19 | dataset_yaml = '/home/cory/yolo2-pytorch/cfgs/config_voc.yaml'
20 | exp_yaml = '/home/cory/yolo2-pytorch/cfgs/exps/voc0712_template.yaml'
21 |
22 | cfg = dict()
23 | # add_cfg(cfg, '/home/cory/yolo2-pytorch/cfgs/config_voc.yaml')
24 | add_cfg(cfg, dataset_yaml)
25 | add_cfg(cfg, exp_yaml)
26 |
27 | # data loader
28 | imdb = ImageFileDataset(cfg, ImageFileDataset.preprocess_train,
29 | processes=4, shuffle=False, dst_size=None, mode='val')
30 |
31 | print('imdb load data succeeded')
32 | net = Darknet19(cfg)
33 |
34 | # CUDA_VISIBLE_DEVICES=1
35 | # 20 0.68
36 | # 40 0.60
37 | # 45 0.56
38 | # 50 0.58
39 | # 55 0.55
40 | # 60 0.59
41 |
42 | os.makedirs(cfg['train_output_dir'], exist_ok=True)
43 | try:
44 | ckp = open(cfg['train_output_dir'] + '/check_point.txt')
45 | ckp_epoch = int(ckp.readlines()[0])
46 | # ckp_epoch = 100
47 | # raise IOError
48 | use_model = os.path.join(cfg['train_output_dir'], cfg['exp_name'] + '_' + str(ckp_epoch) + '.h5')
49 | except IOError:
50 | ckp_epoch = 0
51 | use_model = cfg['pretrained_model']
52 |
53 | net_utils.load_net(use_model, net)
54 |
55 | net.cuda()
56 | net.train()
57 | print('load net succeeded')
58 |
59 | start_epoch = ckp_epoch
60 | imdb.epoch = start_epoch
61 |
62 | # show training parameters
63 | print('-------------------------------')
64 | print('gpu_id', os.environ.get('CUDA_VISIBLE_DEVICES'))
65 | print('use_model', use_model)
66 | print('exp_name', cfg['exp_name'])
67 | print('dataset', cfg['dataset_name'])
68 | print('optimizer', cfg['optimizer'])
69 | print('opt_param', cfg['opt_param'])
70 | print('train_batch_size', cfg['train_batch_size'])
71 | print('start_epoch', start_epoch)
72 | print('lr', lookup_lr(cfg, start_epoch))
73 | print('-------------------------------')
74 |
75 |
76 | train_loss = 0
77 | bbox_loss, iou_loss, cls_loss = 0., 0., 0.
78 | cnt = 0
79 |
80 | timer = Timer()
81 |
82 | # default input size
83 | network_size = np.array(cfg['inp_size'], dtype=np.int)
84 |
85 | for step in range(start_epoch * imdb.batch_per_epoch, (start_epoch + 5) * imdb.batch_per_epoch + 1):
86 | timer.tic()
87 |
88 | prev_epoch = imdb.epoch
89 | batch = imdb.next_batch(network_size)
90 |
91 | # when go to next epoch
92 | if imdb.epoch > prev_epoch:
93 | train_loss /= cnt
94 | bbox_loss /= cnt
95 | iou_loss /= cnt
96 | cls_loss /= cnt
97 | print()
98 | print('loss: %.3f, bbox_loss: %.3f, iou_loss: %.3f, cls_loss: %.3f' %
99 | (train_loss, bbox_loss, iou_loss, cls_loss))
100 |
101 | train_loss = 0
102 | bbox_loss, iou_loss, cls_loss = 0., 0., 0.
103 | cnt = 0
104 | timer.clear()
105 |
106 | # forward
107 | im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=False).permute(0, 3, 1, 2)
108 | x = net.forward(im_data, batch['gt_boxes'], batch['gt_classes'], network_size)
109 |
110 | # loss
111 | bbox_loss += net.bbox_loss.data.cpu().numpy()[0]
112 | iou_loss += net.iou_loss.data.cpu().numpy()[0]
113 | cls_loss += net.class_loss.data.cpu().numpy()[0]
114 | train_loss += net.loss.data.cpu().numpy()[0]
115 | cnt += 1
116 |
117 | if step % cfg['disp_interval'] == 0:
118 | progress_in_epoch = (step % imdb.batch_per_epoch) / imdb.batch_per_epoch
119 | print('%.2f%%' % (progress_in_epoch * 100), end=' ')
120 | sys.stdout.flush()
121 |
122 | imdb.close()
123 |
--------------------------------------------------------------------------------
/misc/vis.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/misc/vis.jpg
--------------------------------------------------------------------------------
/misc/visualize_gt.py:
--------------------------------------------------------------------------------
1 | import cv2
2 |
3 |
4 | def proc_label(img, values):
5 | if len(values) <= 1:
6 | return
7 | label = values[0]
8 | official_format = False
9 | label = label.replace('DontCare', '')
10 | if official_format:
11 | xmin = int(float(values[4]))
12 | ymin = int(float(values[5]))
13 | xmax = int(float(values[6]))
14 | ymax = int(float(values[7]))
15 | else:
16 | xmin = int(float(values[1]))
17 | ymin = int(float(values[2]))
18 | xmax = int(float(values[3]))
19 | ymax = int(float(values[4]))
20 |
21 | cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 1)
22 | cv2.putText(img, label, (xmin, ymax), cv2.FORMATTER_FMT_CSV, 1, (0, 255, 0), 1, cv2.LINE_AA)
23 | print(values)
24 |
25 |
26 | def plot_vis(image_path, label_path):
27 | img = cv2.imread(image_path)
28 | print(img.shape)
29 |
30 | if isinstance(label_path, str):
31 | label_file = open(label_path)
32 | vv = [f.strip().split(' ') for f in label_file.readlines()]
33 | else:
34 | vv = label_path
35 |
36 | for values in vv:
37 | proc_label(img, values)
38 |
39 | cv2.imshow('img', img)
40 | cv2.imwrite('vis.jpg', img)
41 | key = cv2.waitKey(100)
42 | print(key)
43 | if key == ord('q'):
44 | return -1
45 | return 0
46 |
47 |
48 | def run_vis():
49 | choice = 5
50 | if choice == 1:
51 | image_path = '/home/cory/cedl/dashcam/images/000900/000010.jpg'
52 | label_path = '/home/cory/cedl/dashcam/labels/000900/000010.txt'
53 | elif choice == 2:
54 | image_path = '/home/cory/KITTI_Dataset/data_tracking_image_2/training/image_02/0000/000000.png'
55 | label_path = '/home/cory/KITTI_Dataset/tracking_label/0000/000000.txt'
56 | elif choice == 3:
57 | image_path = '/home/cory/VOC/VOCdevkit/VOC2007/JPEGImages/000009.jpg'
58 | label_path = '/home/cory/VOC/VOCdevkit/VOC2007/labels/000009.txt'
59 | elif choice == 4:
60 | image_path = '/home/cory/GTAV/VOCdevkit/VOC2012/JPEGImages/3384645.jpg'
61 | label_path = '/home/cory/GTAV/VOCdevkit/VOC2012/labels/3384645.txt'
62 |
63 | plot_vis(image_path, label_path)
64 |
65 |
66 | def vis_list_file():
67 | # image_path = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_images.txt'
68 | # label_path = '/home/cory/yolo2-pytorch/train_data/voc/voc_train_labels.txt'
69 | # image_path = '/media/cory/BackUp/ImageNet/vid_all_images.txt'
70 | # label_path = '/media/cory/BackUp/ImageNet/vid_all_labels.txt'
71 | image_path = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_images.txt'
72 | label_path = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_train_labels.txt'
73 |
74 | # image_path = '/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_images.txt'
75 | # label_path = '/home/cory/project/yolo2-pytorch/train_data/detrac/detrac_train_labels.txt'
76 |
77 | image_file = open(image_path)
78 | label_file = open(label_path)
79 | images = [p.strip() for p in image_file.readlines()]
80 | labels = [p.strip() for p in label_file.readlines()]
81 | for i in range(len(images)):
82 | if i < 500:
83 | continue
84 | print(images[i], labels[i])
85 | r = plot_vis(images[i], labels[i])
86 | if r == -1:
87 | break
88 |
89 | if __name__ == '__main__':
90 | vis_list_file()
91 | # run_vis()
92 |
--------------------------------------------------------------------------------
/misc/voc_data.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | all_labels = 'VOC/voc0712_labels.txt'
4 | train_images = 'VOC/detrac_train_images.txt'
5 | train_labels = 'VOC/detrac_train_labels.txt'
6 |
7 |
8 | prefix = '/home/cory/VOC/VOCdevkit/VOC2007/labels/'
9 |
10 |
11 | def sort_file_line(filename):
12 | f = open(filename)
13 | sorted_line = sorted(f.readlines())
14 | for line in sorted_line:
15 | print(line, end='')
16 |
17 | sort_file_line(train_images)
18 |
19 |
20 | def get_filename_id(fullpath):
21 | filename_begin_pos = fullpath.rfind('/') + 1
22 | filename_end_pos = fullpath.rfind('.')
23 | fname = fullpath[filename_begin_pos: filename_end_pos]
24 | return fname
25 |
26 |
27 | def convert_main():
28 | all_id = [get_filename_id(f.strip()) for f in all_labels.readlines()]
29 | train_id = [get_filename_id(f.strip()) for f in train_images.readlines()]
30 |
31 | train_counter = 0
32 | test_counter = 0
33 | for id in all_id:
34 | if id in train_id:
35 | # print(train_counter, id)
36 | print(prefix + id + '.txt')
37 | train_counter += 1
38 | else:
39 | # print(test_counter, id, 'test')
40 | # print(prefix + id + '.txt')
41 | test_counter += 1
42 |
43 | print('total label:', len(all_id))
44 | print('train', train_counter)
45 | print('test', test_counter)
46 | weights
--------------------------------------------------------------------------------
/misc/yolo_video_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import numpy as np
4 |
5 | from darknet import Darknet19
6 | import utils.yolo as yolo_utils
7 | import utils.network as net_utils
8 | from utils.timer import Timer
9 | import cfgs.config as cfg
10 |
11 |
12 | def preprocess(filename):
13 | image = cv2.imread(filename)
14 | im_data = np.expand_dims(yolo_utils.preprocess_test((image, None, cfg.inp_size))[0], 0)
15 | return image, im_data
16 |
17 |
18 | def main():
19 |
20 | trained_model = cfg.trained_model
21 | thresh = 0.5
22 | image_dir = '/home/cory/cedl/vid/videos/vid04'
23 |
24 | net = Darknet19()
25 | net_utils.load_net(trained_model, net)
26 | net.eval()
27 | net.cuda()
28 | print('load model successfully')
29 | print(net)
30 |
31 | image_extensions = ['.jpg', '.JPG', '.png', '.PNG']
32 | image_abs_paths = sorted([os.path.join(image_dir, name)
33 | for name in os.listdir(image_dir)
34 | if name[-4:] in image_extensions])
35 |
36 | t_det = Timer()
37 | t_total = Timer()
38 |
39 | for i, image_path in enumerate(image_abs_paths):
40 | t_total.tic()
41 | image, im_data = preprocess(image_path)
42 | im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
43 | t_det.tic()
44 | bbox_pred, iou_pred, prob_pred = net.forward(im_data)
45 | det_time = t_det.toc()
46 | # to numpy
47 | bbox_pred = bbox_pred.data.cpu().numpy()
48 | iou_pred = iou_pred.data.cpu().numpy()
49 | prob_pred = prob_pred.data.cpu().numpy()
50 |
51 | # print bbox_pred.shape, iou_pred.shape, prob_pred.shape
52 |
53 | bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
54 |
55 | im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg)
56 |
57 | if im2show.shape[0] > 1100:
58 | im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000))
59 | cv2.imshow('test', im2show)
60 |
61 | total_time = t_total.toc()
62 | format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)'
63 | print(format_str % (
64 | i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000))
65 |
66 | t_det.clear()
67 | t_total.clear()
68 |
69 | key = cv2.waitKey(1)
70 | if key == ord('q'):
71 | break
72 |
73 |
74 | if __name__ == '__main__':
75 | main()
76 |
--------------------------------------------------------------------------------
/train/train_util_v2.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from train.yellowfin import YFOptimizer
3 |
4 |
5 | def lookup_lr(cfg, ep):
6 | lr_epochs = cfg['lr_epoch']
7 | lr_vals = cfg['lr_val']
8 | for i in range(len(lr_epochs) - 1):
9 | if lr_epochs[i] <= ep < lr_epochs[i + 1]:
10 | return lr_vals[i]
11 | return lr_vals[- 1] # last lr
12 |
13 |
14 | def get_optimizer_lr(optimizer):
15 | return optimizer.param_groups[0]['lr']
16 |
17 |
18 | def get_optimizer(cfg, net, epoch):
19 | lr = lookup_lr(cfg, epoch)
20 | optimizer = None
21 | if cfg['optimizer'] == 'SGD':
22 | if cfg['opt_param'] == 'all':
23 | optimizer = torch.optim.SGD(params=net.parameters(),
24 | momentum=cfg['momentum'],
25 | weight_decay=cfg['weight_decay'],
26 | nesterov=True,
27 | lr=lr)
28 | elif cfg['opt_param'] == 'conv345':
29 | optimizer = torch.optim.SGD(params=[{'params': net.conv3.parameters()},
30 | {'params': net.conv4.parameters()},
31 | {'params': net.conv5.parameters()}],
32 | momentum=cfg['momentum'],
33 | weight_decay=cfg['weight_decay'],
34 | nesterov=True,
35 | lr=lr)
36 | elif cfg['optimizer'] == 'Adam':
37 | if cfg['opt_param'] == 'all':
38 | optimizer = torch.optim.Adam(params=net.parameters(),
39 | weight_decay=cfg['weight_decay'],
40 | lr=lr)
41 | elif cfg['opt_param'] == 'conv345':
42 | optimizer = torch.optim.Adam(params=[{'params': net.conv3.parameters()},
43 | {'params': net.conv4.parameters()},
44 | {'params': net.conv5.parameters()}],
45 | weight_decay=cfg['weight_decay'],
46 | lr=lr)
47 | elif cfg['optimizer'] == 'YF':
48 | if cfg['opt_param'] == 'all':
49 | optimizer = YFOptimizer(var_list=net.parameters())
50 | elif cfg['opt_param'] == 'conv345':
51 | optimizer = YFOptimizer(var_list=[{'params': net.conv3.parameters()},
52 | {'params': net.conv4.parameters()},
53 | {'params': net.conv5.parameters()}])
54 |
55 | assert optimizer is not None
56 |
57 | print('optimizer_lr =', get_optimizer_lr(optimizer))
58 | return optimizer
59 |
--------------------------------------------------------------------------------
/train_data/gen_dashcam_train_data.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | all_images_file = '/home/cory/cedl/dashcam/all_images.txt'
4 | all_labels_file = '/home/cory/cedl/dashcam/all_labels.txt'
5 | orig_label_dir = '/home/cory/cedl/dashcam/labels_video'
6 | output_label_dir = '/home/cory/cedl/dashcam/labels'
7 |
8 |
9 | def copy_exclude(filename, outfilename, patterns):
10 | with open(outfilename, 'w') as out:
11 | with open(filename) as f:
12 | for line in f.readlines():
13 | pattern_found = False
14 | for p in patterns:
15 | if line.find(p) >= 0:
16 | pattern_found = True
17 | if not pattern_found:
18 | out.write(line)
19 | print(line.strip())
20 |
21 |
22 | def copy_if(filename, outfilename, patterns):
23 | with open(outfilename, 'w') as out:
24 | with open(filename) as f:
25 | for line in f.readlines():
26 | matched = True
27 | for p in patterns:
28 | if line.find(p) == -1:
29 | matched = False
30 | break
31 | if matched:
32 | out.write(line)
33 | print(line.strip())
34 |
35 |
36 | #
37 | all_class = list()
38 |
39 |
40 | def gen_each_label():
41 | for label_file in os.listdir(orig_label_dir):
42 | id_str = label_file[:label_file.rfind('.')]
43 | full_path = os.path.join(orig_label_dir, label_file)
44 | print(id_str, full_path)
45 | out_dir_video = os.path.join(output_label_dir, id_str)
46 | if not os.path.exists(out_dir_video):
47 | os.mkdir(out_dir_video)
48 |
49 | video_label_file = open(full_path)
50 | labels_per_frame = list()
51 | for i in range(100):
52 | labels_per_frame.append(list())
53 |
54 | for line in video_label_file.readlines():
55 | values = line.strip().split('\t')
56 | frame = int(values[0])
57 | classs = values[2].replace('"', '')
58 | if classs not in all_class:
59 | all_class.append(classs)
60 | xmin = int(values[3])
61 | ymin = int(values[4])
62 | xmax = int(values[5])
63 | ymax = int(values[6])
64 | bundle = (classs, xmin, ymin, xmax, ymax)
65 | labels_per_frame[frame - 1].append(bundle)
66 |
67 | for frame_i, labels in enumerate(labels_per_frame):
68 | out_file_name = out_dir_video + '/{:06d}.txt'.format(frame_i + 1)
69 | print(out_file_name, labels)
70 | out_file = open(out_file_name, 'w')
71 | for label in labels:
72 | print(label)
73 | out_file.write(' '.join([str(s) for s in label]) + '\n')
74 |
75 |
76 | if __name__ == '__main__':
77 | # gen_each_label()
78 |
79 | # exclude 9xx series video
80 | copy_exclude(all_images_file, 'dashcam_train_images.txt', ['/0009', '1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.'])
81 | copy_exclude(all_labels_file, 'dashcam_train_labels.txt', ['/0009', '1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.'])
82 | copy_if(all_images_file, 'dashcam_val_images.txt', ['/0009', '0.'])
83 | copy_if(all_labels_file, 'dashcam_val_labels.txt', ['/0009', '0.'])
84 | print(all_class)
85 |
--------------------------------------------------------------------------------
/train_data/gen_kitti_det_train_data.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | kitti_det_label_path = '/home/cory/KITTI_Dataset/data_object_image_2/training/label_2'
4 | out_label_path = '/home/cory/KITTI_Dataset/detection_label'
5 |
6 | all_images_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_detection_images.txt'
7 | all_labels_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_detection_labels.txt'
8 |
9 |
10 | def copy_exclude(filename, outfilename, patterns):
11 | with open(outfilename, 'w') as out:
12 | with open(filename) as f:
13 | for line in f.readlines():
14 | pattern_found = False
15 | for p in patterns:
16 | if line.find(p) >= 0:
17 | pattern_found = True
18 | if not pattern_found:
19 | out.write(line)
20 | print(line.strip())
21 |
22 |
23 | def copy_include(filename, outfilename, patterns):
24 | with open(outfilename, 'w') as out:
25 | with open(filename) as f:
26 | for line in f.readlines():
27 | for p in patterns:
28 | if line.find(p) >= 0:
29 | print(line.strip())
30 | out.write(line)
31 | break
32 |
33 |
34 | def convert_file(infile_path, outfile_path):
35 | # 'Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01'
36 | infile = open(infile_path)
37 | outfile = open(outfile_path, 'w')
38 | for line in infile.readlines():
39 | v = line.strip().split(' ')
40 | bb = list(map(str, map(int, map(float, v[4:8]))))
41 | outfile.write(v[0] + ' ' + ' '.join(bb) + '\n')
42 |
43 |
44 | def convert_format():
45 | file_list = os.listdir(kitti_det_label_path)
46 | file_list.sort()
47 | for f in file_list:
48 | infile_path = kitti_det_label_path + '/' + f
49 | outfile_path = out_label_path + '/' + f
50 | convert_file(infile_path, outfile_path)
51 |
52 | print(infile_path, outfile_path)
53 |
54 | print(len(file_list))
55 |
56 |
57 | def main():
58 | copy_exclude(all_images_file, 'kitti/kitti_det_train_images.txt', ['/006', '/007'])
59 | copy_exclude(all_labels_file, 'kitti/kitti_det_train_labels.txt', ['/006', '/007'])
60 | copy_include(all_images_file, 'kitti/kitti_det_val_images.txt', ['/006', '/007'])
61 | copy_include(all_labels_file, 'kitti/kitti_det_val_labels.txt', ['/006', '/007'])
62 |
63 | if __name__ == '__main__':
64 | # convert_format()
65 | main()
66 |
--------------------------------------------------------------------------------
/train_data/gen_kitti_train_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 |
4 | all_images_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_tracking_images.txt'
5 | all_labels_file = '/media/cory/c_disk/Project/KITTI_Dataset/kitti_tracking_labels.txt'
6 |
7 | tracking_raw_dir = '/media/cory/c_disk/Project/KITTI_Dataset/data_tracking_label_2/training/label_02'
8 | tracking_label_output = '/media/cory/c_disk/Project/KITTI_Dataset/trk'
9 |
10 |
11 | tracklet_count = [154, 447, 233, 144, 314, 297, 270, 800, 390, 803,
12 | 294, 373, 78, 340, 106, 376, 209, 145, 339, 1059,
13 | 837]
14 |
15 |
16 | def convert_tracking_to_detection():
17 | all_tracking_labels = os.listdir(tracking_raw_dir)
18 | if not os.path.exists(tracking_label_output):
19 | os.mkdir(tracking_label_output)
20 | for merged_file in all_tracking_labels:
21 | track_id = merged_file.replace('.txt', '')
22 | print(track_id)
23 | tk_out = tracking_label_output + '/' + track_id
24 | shutil.rmtree(tk_out, ignore_errors=True)
25 | os.mkdir(tk_out)
26 | f = open(tracking_raw_dir + '/' + merged_file)
27 | lines = f.readlines()
28 | num_image = tracklet_count[int(track_id)]
29 | for i in range(num_image):
30 | frame_id = '{:06d}'.format(i)
31 | open(tk_out + '/' + frame_id + '.txt', 'w')
32 |
33 | for line in lines:
34 | v = line.strip().split(' ')
35 | frame_id = '{:06d}'.format(int(v[0]))
36 | data = v[2] + ' ' + ' '.join(v[6:10]) + ' ' + ' '.join(v[3:6]) + '\n'
37 | of = open(tk_out + '/' + frame_id + '.txt', 'a')
38 | of.write(data)
39 |
40 |
41 | def copy_exclude(filename, outfilename, patterns):
42 | with open(outfilename, 'w') as out:
43 | with open(filename) as f:
44 | for line in f.readlines():
45 | pattern_found = False
46 | for p in patterns:
47 | if line.find(p) >= 0:
48 | pattern_found = True
49 | if not pattern_found:
50 | out.write(line)
51 | print(line.strip())
52 |
53 |
54 | def copy_include(filename, outfilename, patterns):
55 | with open(outfilename, 'w') as out:
56 | with open(filename) as f:
57 | for line in f.readlines():
58 | for p in patterns:
59 | if line.find(p) >= 0:
60 | print(line.strip())
61 | out.write(line)
62 | break
63 |
64 |
65 | def main():
66 | test_set = ['/0001/', '/0005/', '/0013/', '/0017/']
67 | copy_exclude(all_images_file, 'kitti/kitti_train_images.txt', test_set)
68 | copy_exclude(all_labels_file, 'kitti/kitti_train_labels.txt', test_set)
69 | copy_include(all_images_file, 'kitti/kitti_val_images.txt', test_set)
70 | copy_include(all_labels_file, 'kitti/kitti_val_labels.txt', test_set)
71 |
72 | if __name__ == '__main__':
73 | convert_tracking_to_detection()
74 | # main()
75 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/utils/__init__.py
--------------------------------------------------------------------------------
/utils/barrier.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 |
4 | class Barrier(object):
5 | def __init__(self):
6 | self.t_list = [time.time()]
7 | self.idx_list = [0]
8 | pass
9 |
10 | def add(self, idx):
11 | self.t_list.append(time.time())
12 | self.idx_list.append(idx)
13 |
14 | def print(self):
15 | sum = 0.0
16 | for i in range(len(self.t_list) - 1):
17 | diff = self.t_list[i + 1] - self.t_list[i]
18 | print(self.idx_list[i + 1], '{:.4f} seconds'.format(diff))
19 | sum += diff
20 | print('--- sum {:.4f}'.format(sum))
21 |
--------------------------------------------------------------------------------
/utils/im_transform.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 | def imcv2_recolor(im, a=.1):
6 | # t = [np.random.uniform()]
7 | # t += [np.random.uniform()]
8 | # t += [np.random.uniform()]
9 | # t = np.array(t) * 2. - 1.
10 | t = np.random.uniform(-1, 1, 3)
11 |
12 | # random amplify each channel
13 | im = im.astype(np.float)
14 | im *= (1 + t * a)
15 | mx = 255. * (1 + a)
16 | up = np.random.uniform(-1, 1)
17 | im = np.power(im / mx, 1. + up * .5)
18 | # return np.array(im * 255., np.uint8)
19 | return im
20 |
21 |
22 | def imcv2_affine_trans(im):
23 | # Scale and translate
24 | h, w, c = im.shape
25 | scale = np.random.uniform() / 10. + 1.
26 | max_offx = (scale - 1.) * w
27 | max_offy = (scale - 1.) * h
28 | offx = int(np.random.uniform() * max_offx)
29 | offy = int(np.random.uniform() * max_offy)
30 |
31 | im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
32 | im = im[offy: (offy + h), offx: (offx + w)]
33 | flip = np.random.uniform() > 0.5
34 | if flip:
35 | im = cv2.flip(im, 1)
36 |
37 | return im, [scale, [offx, offy], flip]
38 |
--------------------------------------------------------------------------------
/utils/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cory8249/yolo2-pytorch/8ceb0e5beaaf0d83b751b361045fb23c0874a486/utils/nms/__init__.py
--------------------------------------------------------------------------------
/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/utils/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/utils/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from .nms.cpu_nms import cpu_nms
9 | from .nms.gpu_nms import gpu_nms
10 |
11 |
12 | # def nms(dets, thresh, force_cpu=False):
13 | # """Dispatch to either CPU or GPU NMS implementations."""
14 | #
15 | # if dets.shape[0] == 0:
16 | # return []
17 | # if cfg.USE_GPU_NMS and not force_cpu:
18 | # return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | # else:
20 | # return cpu_nms(dets, thresh)
21 |
22 |
23 | def nms(dets, thresh, force_cpu=False):
24 | """Dispatch to either CPU or GPU NMS implementations."""
25 |
26 | if dets.shape[0] == 0:
27 | return []
28 | if force_cpu:
29 | return cpu_nms(dets, thresh)
30 | return gpu_nms(dets, thresh)
31 |
--------------------------------------------------------------------------------
/utils/plot_loss.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | exp1 = 'detrac_baseline'
5 | exp2 = 'detrac_baseline'
6 | log_file1 = '/home/cory/project/yolo2-pytorch/models/training/' + exp1 + '/train.log' # red
7 | log_file2 = '/home/cory/project/yolo2-pytorch/models/training/' + exp2 + '/train.log' # blue
8 | log1 = np.genfromtxt(log_file1, delimiter=', ')
9 | log2 = np.genfromtxt(log_file2, delimiter=', ')
10 |
11 |
12 | def moving_avg(x, N):
13 | return np.convolve(x, np.ones((N,))/N, mode='valid')
14 |
15 | begin_index = min(0, log1.shape[0], log2.shape[0])
16 | end_index = min(log1.shape[0], log2.shape[0])
17 | N_avg = 5
18 | N_log_per_epoch = 55
19 | x = np.arange(begin_index, end_index - N_avg + 1, dtype=np.float32)
20 | x /= N_log_per_epoch
21 | print()
22 | s1 = moving_avg(log1[begin_index:end_index, 2], N_avg)
23 | s2 = moving_avg(log2[begin_index:end_index, 2], N_avg)
24 |
25 | log_scale = True
26 | if log_scale:
27 | s1 = np.log(s1)
28 | s2 = np.log(s2)
29 |
30 | if log_file1 != log_file2:
31 | plt.plot(x, s1, 'r-', x, s2, 'b-')
32 | else:
33 | plt.plot(x, s1, 'r-')
34 |
35 | axes = plt.gca()
36 | # plt.ylim([0, 1])
37 | plt.show()
38 |
--------------------------------------------------------------------------------
/utils/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 |
--------------------------------------------------------------------------------
/utils/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/utils/pycocotools/license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 |
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 |
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies,
26 | either expressed or implied, of the FreeBSD Project.
27 |
--------------------------------------------------------------------------------
/utils/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | from . import _mask
4 |
5 | # Interface for manipulating masks stored in RLE format.
6 | #
7 | # RLE is a simple yet efficient format for storing binary masks. RLE
8 | # first divides a vector (or vectorized image) into a series of piecewise
9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | # encode - Encode binary masks using RLE.
32 | # decode - Decode binary masks encoded via RLE.
33 | # merge - Compute union or intersection of encoded masks.
34 | # iou - Compute intersection over union between masks.
35 | # area - Compute area of encoded masks.
36 | # toBbox - Get bounding boxes surrounding encoded masks.
37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | # Rs = encode( masks )
41 | # masks = decode( Rs )
42 | # R = merge( Rs, intersect=false )
43 | # o = iou( dt, gt, iscrowd )
44 | # a = area( Rs )
45 | # bbs = toBbox( Rs )
46 | # Rs = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | # Rs - [dict] Run-length encoding of binary masks
50 | # R - dict Run-length encoding of binary mask
51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | # dt,gt - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox. version 2.0
72 | # Data, paper, and tutorials available at: http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 |
76 | encode = _mask.encode
77 | decode = _mask.decode
78 | iou = _mask.iou
79 | merge = _mask.merge
80 | area = _mask.area
81 | toBbox = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects
--------------------------------------------------------------------------------
/utils/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 | #include
9 |
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 |
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 |
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 |
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 |
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 |
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 |
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 |
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 |
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 |
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 |
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 |
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 |
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 |
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 |
--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 | def __init__(self):
14 | self.total_time = 0.
15 | self.calls = 0
16 | self.start_time = 0.
17 | self.diff = 0.
18 | self.average_time = 0.
19 |
20 | def tic(self):
21 | # using time.time instead of time.clock because time time.clock
22 | # does not normalize for multithreading
23 | self.start_time = time.time()
24 |
25 | def toc(self, average=True):
26 | self.diff = time.time() - self.start_time
27 | self.total_time += self.diff
28 | self.calls += 1
29 | self.average_time = self.total_time / self.calls
30 | if average:
31 | return self.average_time
32 | else:
33 | return self.diff
34 |
35 | def clear(self):
36 | self.total_time = 0.
37 | self.calls = 0
38 | self.start_time = 0.
39 | self.diff = 0.
40 | self.average_time = 0.
41 |
--------------------------------------------------------------------------------
/utils/vis_util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from cfgs.config import *
3 |
4 | # for display
5 | ############################
6 | def _to_color(indx, base):
7 | """ return (b, r, g) tuple"""
8 | base2 = base * base
9 | b = 2 - indx / base2
10 | r = 2 - (indx % base2) / base
11 | g = 2 - (indx % base2) % base
12 | return b * 127, r * 127, g * 127
13 |
14 | base = int(np.ceil(pow(num_classes, 1. / 3)))
15 | colors = [_to_color(x, base) for x in range(num_classes)]
16 |
17 |
--------------------------------------------------------------------------------
/utils/yolo.pyx:
--------------------------------------------------------------------------------
1 | cimport cython
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | DTYPE = np.float
6 | ctypedef np.float_t DTYPE_t
7 |
8 | cdef extern from "math.h":
9 | double abs(double m)
10 | double log(double x)
11 |
12 |
13 | def yolo_to_bbox(
14 | np.ndarray[DTYPE_t, ndim=4] bbox_pred,
15 | np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W):
16 | return yolo_to_bbox_c(bbox_pred, anchors, H, W)
17 |
18 | cdef yolo_to_bbox_c(
19 | np.ndarray[DTYPE_t, ndim=4] bbox_pred,
20 | np.ndarray[DTYPE_t, ndim=2] anchors, int H, int W):
21 | """
22 | Parameters
23 | ----------
24 | bbox_pred: (bsize, HxW, num_anchors, 4) ndarray of float (sig(tx), sig(ty), exp(tw), exp(th))
25 | anchors: (num_anchors, 2) (pw, ph)
26 | Returns
27 | -------
28 | bbox_out: (HxWxnum_anchors, 4) ndarray of bbox (x1, y1, x2, y2) rescaled to (0, 1)
29 | """
30 | cdef unsigned int bsize = bbox_pred.shape[0]
31 | cdef unsigned int num_anchors = anchors.shape[0]
32 | cdef np.ndarray[DTYPE_t, ndim=4] bbox_out = np.zeros((bsize, H*W, num_anchors, 4), dtype=DTYPE)
33 |
34 | cdef DTYPE_t cx, cy, bw, bh
35 | cdef unsigned int row, col, a, ind
36 | for b in range(bsize):
37 | for row in range(H):
38 | for col in range(W):
39 | ind = row * W + col
40 | for a in range(num_anchors):
41 | cx = (bbox_pred[b, ind, a, 0] + col) / W
42 | cy = (bbox_pred[b, ind, a, 1] + row) / H
43 | bw = bbox_pred[b, ind, a, 2] * anchors[a][0] / W * 0.5
44 | bh = bbox_pred[b, ind, a, 3] * anchors[a][1] / H * 0.5
45 |
46 | bbox_out[b, ind, a, 0] = cx - bw
47 | bbox_out[b, ind, a, 1] = cy - bh
48 | bbox_out[b, ind, a, 2] = cx + bw
49 | bbox_out[b, ind, a, 3] = cy + bh
50 |
51 | return bbox_out
--------------------------------------------------------------------------------
/yolo_detect.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import time
5 | from darknet_v3 import Darknet19
6 | from cfgs.config_v2 import load_cfg_yamls
7 | import utils.network as net_utils
8 | import utils.yolo_v2 as yolo_utils
9 |
10 | base_dir = './'
11 |
12 |
13 | def init_network():
14 | dataset_yaml = os.path.join(base_dir, 'cfgs/config_kitti_demo.yaml')
15 | cfg = load_cfg_yamls([dataset_yaml])
16 |
17 | model = os.path.join(base_dir, 'models/training/kitti_baseline_v3/kitti_baseline_v3_100.h5')
18 | net = Darknet19(cfg)
19 | net_utils.load_net(model, net)
20 | net.eval()
21 | net.cuda()
22 | print('load model successfully')
23 | return net, cfg
24 |
25 |
26 | def load_image_paths(img_list_file):
27 | img_files = open(img_list_file)
28 | image_paths = [f.strip() for f in img_files.readlines()]
29 | return image_paths
30 |
31 |
32 | def preprocess(filename, inp_size):
33 | image = cv2.imread(filename)
34 | im_data = np.expand_dims(yolo_utils.preprocess_test((image, None, inp_size))[0], 0)
35 | return image, im_data
36 |
37 |
38 | def detect_image(cfg, image_path, net, thresh):
39 | image, im_data = preprocess(image_path, cfg['inp_size'])
40 | im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
41 | bbox_pred, iou_pred, prob_pred = net.forward(im_data)
42 | bbox_pred = bbox_pred.data.cpu().numpy()
43 | iou_pred = iou_pred.data.cpu().numpy()
44 | prob_pred = prob_pred.data.cpu().numpy()
45 | bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
46 | return bboxes, cls_inds, image, scores
47 |
48 |
49 | def run():
50 | net, cfg = init_network()
51 | image_paths = load_image_paths(os.path.join('./demo/', 'demo_images_list.txt'))
52 |
53 | thresh = 0.6
54 | imshow = True
55 | time_rec = list()
56 | for i, image_path in enumerate(image_paths):
57 | begin_time = time.time()
58 | bboxes, cls_inds, image, scores = detect_image(cfg, image_path, net, thresh)
59 | end_time = time.time()
60 | time_rec.append(end_time - begin_time)
61 | im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg)
62 |
63 | cv2.imwrite('output/detection_{:04d}.jpg'.format(i), im2show)
64 | if imshow:
65 | cv2.imshow('detection', im2show)
66 | key = cv2.waitKey(30)
67 | if key == ord('q'):
68 | break
69 |
70 | avg = sum(time_rec) / len(time_rec)
71 | print('processed {:d} images in {:.3f} seconds'.format(len(time_rec), sum(time_rec)))
72 | print('{:.3f} sec/image'.format(avg))
73 | print('{:.2f} fps'.format(1/avg))
74 |
75 |
76 | if __name__ == '__main__':
77 | run()
78 |
--------------------------------------------------------------------------------
| |