├── README.md ├── article_pic ├── 01.jpg └── 02.jpg ├── data ├── Argoverse.yaml ├── GlobalWheat2020.yaml ├── Objects365.yaml ├── SKU-110K.yaml ├── VOC.yaml ├── VisDrone.yaml ├── coco.yaml ├── coco128.yaml ├── hyps │ ├── hyp.finetune.yaml │ ├── hyp.finetune_objects365.yaml │ ├── hyp.scratch-p6.yaml │ └── hyp.scratch.yaml ├── images │ ├── Ref_bus.jpg │ ├── Ref_car.jpg │ ├── Ref_motorcycle.jpg │ ├── Ref_person.png │ ├── bus.jpg │ └── zidane.jpg ├── scripts │ ├── download_weights.sh │ ├── get_coco.sh │ └── get_coco128.sh ├── video │ └── testVideo.mp4 └── xView.yaml ├── deep_sort_pytorch ├── LICENSE ├── README.md ├── configs │ └── deep_sort.yaml ├── deep_sort │ ├── README.md │ ├── __init__.py │ ├── deep │ │ ├── __init__.py │ │ ├── checkpoint │ │ │ └── evaluate.py │ │ ├── evaluate.py │ │ ├── feature_extractor.py │ │ ├── model.py │ │ ├── original_model.py │ │ ├── test.py │ │ ├── train.jpg │ │ └── train.py │ ├── deep_sort.py │ └── sort │ │ ├── __init__.py │ │ ├── detection.py │ │ ├── iou_matching.py │ │ ├── kalman_filter.py │ │ ├── linear_assignment.py │ │ ├── nn_matching.py │ │ ├── preprocessing.py │ │ ├── track.py │ │ └── tracker.py └── utils │ ├── __init__.py │ ├── asserts.py │ ├── draw.py │ ├── evaluation.py │ ├── io.py │ ├── json_logger.py │ ├── log.py │ ├── parser.py │ └── tools.py ├── detect.py ├── distance.py ├── estimateDistanceUtil.py ├── export.py ├── hubconf.py ├── models ├── __init__.py ├── common.py ├── experimental.py ├── hub │ ├── anchors.yaml │ ├── yolov3-spp.yaml │ ├── yolov3-tiny.yaml │ ├── yolov3.yaml │ ├── yolov5-bifpn.yaml │ ├── yolov5-fpn.yaml │ ├── yolov5-p2.yaml │ ├── yolov5-p6.yaml │ ├── yolov5-p7.yaml │ ├── yolov5-panet.yaml │ ├── yolov5l6.yaml │ ├── yolov5m6.yaml │ ├── yolov5s-transformer.yaml │ ├── yolov5s6.yaml │ └── yolov5x6.yaml ├── yolo.py ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml └── yolov5x.yaml ├── requirements.txt ├── track.py ├── train.py ├── tutorial.ipynb ├── utils ├── __init__.py ├── activations.py ├── augmentations.py ├── autoanchor.py ├── aws │ ├── __init__.py │ ├── mime.sh │ ├── resume.py │ └── userdata.sh ├── callbacks.py ├── datasets.py ├── downloads.py ├── flask_rest_api │ ├── README.md │ ├── example_request.py │ └── restapi.py ├── general.py ├── google_app_engine │ ├── Dockerfile │ ├── additional_requirements.txt │ └── app.yaml ├── loggers │ ├── __init__.py │ └── wandb │ │ ├── README.md │ │ ├── __init__.py │ │ ├── log_dataset.py │ │ ├── sweep.py │ │ ├── sweep.yaml │ │ └── wandb_utils.py ├── loss.py ├── metrics.py ├── plots.py └── torch_utils.py ├── val.py ├── video.py └── yolov5s.pt /README.md: -------------------------------------------------------------------------------- 1 | # yolov5_monocular_camera_ranging 2 | This repository is a project of monocular camera ranging, which object detection frame is yolov5. 3 | This project deal with real-time video. It just shows you a video directly which contains the type of object, the confidence of object and the distance from network camera to object.
4 | ![result picture](https://github.com/xiaol-arch/yolov5_monocular_camera_ranging/blob/main/article_pic/02.jpg) 5 | # yolov5 6 | I just use the pretrained model of yolov5 directly.The detail of yolov5 is available here [YOLOv5 🚀 Vision AI ⭐](https://github.com/ultralytics/yolov5) 7 | The referenced project version is v5.0.The version of project does'nt matter. 8 | # quick start examples 9 | ## install 10 | python>=3.6 and pytorch>= 1.7: 11 | `$ git clone https://github.com/xiaol-arch/yolov5_monocular_camera_ranging`
12 | `$ cd yolov5_monocular_camera_ranging`
13 | `$ pip install -r requirements.txt` 14 | ## inference 15 | Runing following instruction, you can get a resultant video.
16 | This instruction is for video
17 | `$ python video.py`
18 | This instruction is for webcam
19 | `$ python distance.py`
20 | This intruction is for webcam which can track stuff
21 | `$ python track.py` 22 | # to do 23 | + Modify issues of accuracy of ranging in program 24 | + Improve the README documentation 25 | + Add speed measurement to the program 26 | -------------------------------------------------------------------------------- /article_pic/01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/article_pic/01.jpg -------------------------------------------------------------------------------- /article_pic/02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/article_pic/02.jpg -------------------------------------------------------------------------------- /data/Argoverse.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ 3 | # Example usage: python train.py --data Argoverse.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── Argoverse ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/Argoverse # dataset root dir 12 | train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images 13 | val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images 14 | test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview 15 | 16 | # Classes 17 | nc: 8 # number of classes 18 | names: ['person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign'] # class names 19 | 20 | 21 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 22 | download: | 23 | import json 24 | 25 | from tqdm import tqdm 26 | from utils.general import download, Path 27 | 28 | 29 | def argoverse2yolo(set): 30 | labels = {} 31 | a = json.load(open(set, "rb")) 32 | for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."): 33 | img_id = annot['image_id'] 34 | img_name = a['images'][img_id]['name'] 35 | img_label_name = img_name[:-3] + "txt" 36 | 37 | cls = annot['category_id'] # instance class id 38 | x_center, y_center, width, height = annot['bbox'] 39 | x_center = (x_center + width / 2) / 1920.0 # offset and scale 40 | y_center = (y_center + height / 2) / 1200.0 # offset and scale 41 | width /= 1920.0 # scale 42 | height /= 1200.0 # scale 43 | 44 | img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']] 45 | if not img_dir.exists(): 46 | img_dir.mkdir(parents=True, exist_ok=True) 47 | 48 | k = str(img_dir / img_label_name) 49 | if k not in labels: 50 | labels[k] = [] 51 | labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n") 52 | 53 | for k in labels: 54 | with open(k, "w") as f: 55 | f.writelines(labels[k]) 56 | 57 | 58 | # Download 59 | dir = Path('../datasets/Argoverse') # dataset root dir 60 | urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip'] 61 | download(urls, dir=dir, delete=False) 62 | 63 | # Convert 64 | annotations_dir = 'Argoverse-HD/annotations/' 65 | (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images' 66 | for d in "train.json", "val.json": 67 | argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels 68 | -------------------------------------------------------------------------------- /data/GlobalWheat2020.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # Global Wheat 2020 dataset http://www.global-wheat.com/ 3 | # Example usage: python train.py --data GlobalWheat2020.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── GlobalWheat2020 ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/GlobalWheat2020 # dataset root dir 12 | train: # train images (relative to 'path') 3422 images 13 | - images/arvalis_1 14 | - images/arvalis_2 15 | - images/arvalis_3 16 | - images/ethz_1 17 | - images/rres_1 18 | - images/inrae_1 19 | - images/usask_1 20 | val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1) 21 | - images/ethz_1 22 | test: # test images (optional) 1276 images 23 | - images/utokyo_1 24 | - images/utokyo_2 25 | - images/nau_1 26 | - images/uq_1 27 | 28 | # Classes 29 | nc: 1 # number of classes 30 | names: ['wheat_head'] # class names 31 | 32 | 33 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 34 | download: | 35 | from utils.general import download, Path 36 | 37 | # Download 38 | dir = Path(yaml['path']) # dataset root dir 39 | urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip', 40 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip'] 41 | download(urls, dir=dir) 42 | 43 | # Make Directories 44 | for p in 'annotations', 'images', 'labels': 45 | (dir / p).mkdir(parents=True, exist_ok=True) 46 | 47 | # Move 48 | for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \ 49 | 'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1': 50 | (dir / p).rename(dir / 'images' / p) # move to /images 51 | f = (dir / p).with_suffix('.json') # json file 52 | if f.exists(): 53 | f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations 54 | -------------------------------------------------------------------------------- /data/Objects365.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # Objects365 dataset https://www.objects365.org/ 3 | # Example usage: python train.py --data Objects365.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── Objects365 ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/Objects365 # dataset root dir 12 | train: images/train # train images (relative to 'path') 1742289 images 13 | val: images/val # val images (relative to 'path') 5570 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | nc: 365 # number of classes 18 | names: ['Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup', 19 | 'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book', 20 | 'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag', 21 | 'Pillow', 'Boots', 'Vase', 'Microphone', 'Necklace', 'Ring', 'SUV', 'Wine Glass', 'Belt', 'Monitor/TV', 22 | 'Backpack', 'Umbrella', 'Traffic Light', 'Speaker', 'Watch', 'Tie', 'Trash bin Can', 'Slippers', 'Bicycle', 23 | 'Stool', 'Barrel/bucket', 'Van', 'Couch', 'Sandals', 'Basket', 'Drum', 'Pen/Pencil', 'Bus', 'Wild Bird', 24 | 'High Heels', 'Motorcycle', 'Guitar', 'Carpet', 'Cell Phone', 'Bread', 'Camera', 'Canned', 'Truck', 25 | 'Traffic cone', 'Cymbal', 'Lifesaver', 'Towel', 'Stuffed Toy', 'Candle', 'Sailboat', 'Laptop', 'Awning', 26 | 'Bed', 'Faucet', 'Tent', 'Horse', 'Mirror', 'Power outlet', 'Sink', 'Apple', 'Air Conditioner', 'Knife', 27 | 'Hockey Stick', 'Paddle', 'Pickup Truck', 'Fork', 'Traffic Sign', 'Balloon', 'Tripod', 'Dog', 'Spoon', 'Clock', 28 | 'Pot', 'Cow', 'Cake', 'Dinning Table', 'Sheep', 'Hanger', 'Blackboard/Whiteboard', 'Napkin', 'Other Fish', 29 | 'Orange/Tangerine', 'Toiletry', 'Keyboard', 'Tomato', 'Lantern', 'Machinery Vehicle', 'Fan', 30 | 'Green Vegetables', 'Banana', 'Baseball Glove', 'Airplane', 'Mouse', 'Train', 'Pumpkin', 'Soccer', 'Skiboard', 31 | 'Luggage', 'Nightstand', 'Tea pot', 'Telephone', 'Trolley', 'Head Phone', 'Sports Car', 'Stop Sign', 32 | 'Dessert', 'Scooter', 'Stroller', 'Crane', 'Remote', 'Refrigerator', 'Oven', 'Lemon', 'Duck', 'Baseball Bat', 33 | 'Surveillance Camera', 'Cat', 'Jug', 'Broccoli', 'Piano', 'Pizza', 'Elephant', 'Skateboard', 'Surfboard', 34 | 'Gun', 'Skating and Skiing shoes', 'Gas stove', 'Donut', 'Bow Tie', 'Carrot', 'Toilet', 'Kite', 'Strawberry', 35 | 'Other Balls', 'Shovel', 'Pepper', 'Computer Box', 'Toilet Paper', 'Cleaning Products', 'Chopsticks', 36 | 'Microwave', 'Pigeon', 'Baseball', 'Cutting/chopping Board', 'Coffee Table', 'Side Table', 'Scissors', 37 | 'Marker', 'Pie', 'Ladder', 'Snowboard', 'Cookies', 'Radiator', 'Fire Hydrant', 'Basketball', 'Zebra', 'Grape', 38 | 'Giraffe', 'Potato', 'Sausage', 'Tricycle', 'Violin', 'Egg', 'Fire Extinguisher', 'Candy', 'Fire Truck', 39 | 'Billiards', 'Converter', 'Bathtub', 'Wheelchair', 'Golf Club', 'Briefcase', 'Cucumber', 'Cigar/Cigarette', 40 | 'Paint Brush', 'Pear', 'Heavy Truck', 'Hamburger', 'Extractor', 'Extension Cord', 'Tong', 'Tennis Racket', 41 | 'Folder', 'American Football', 'earphone', 'Mask', 'Kettle', 'Tennis', 'Ship', 'Swing', 'Coffee Machine', 42 | 'Slide', 'Carriage', 'Onion', 'Green beans', 'Projector', 'Frisbee', 'Washing Machine/Drying Machine', 43 | 'Chicken', 'Printer', 'Watermelon', 'Saxophone', 'Tissue', 'Toothbrush', 'Ice cream', 'Hot-air balloon', 44 | 'Cello', 'French Fries', 'Scale', 'Trophy', 'Cabbage', 'Hot dog', 'Blender', 'Peach', 'Rice', 'Wallet/Purse', 45 | 'Volleyball', 'Deer', 'Goose', 'Tape', 'Tablet', 'Cosmetics', 'Trumpet', 'Pineapple', 'Golf Ball', 46 | 'Ambulance', 'Parking meter', 'Mango', 'Key', 'Hurdle', 'Fishing Rod', 'Medal', 'Flute', 'Brush', 'Penguin', 47 | 'Megaphone', 'Corn', 'Lettuce', 'Garlic', 'Swan', 'Helicopter', 'Green Onion', 'Sandwich', 'Nuts', 48 | 'Speed Limit Sign', 'Induction Cooker', 'Broom', 'Trombone', 'Plum', 'Rickshaw', 'Goldfish', 'Kiwi fruit', 49 | 'Router/modem', 'Poker Card', 'Toaster', 'Shrimp', 'Sushi', 'Cheese', 'Notepaper', 'Cherry', 'Pliers', 'CD', 50 | 'Pasta', 'Hammer', 'Cue', 'Avocado', 'Hamimelon', 'Flask', 'Mushroom', 'Screwdriver', 'Soap', 'Recorder', 51 | 'Bear', 'Eggplant', 'Board Eraser', 'Coconut', 'Tape Measure/Ruler', 'Pig', 'Showerhead', 'Globe', 'Chips', 52 | 'Steak', 'Crosswalk Sign', 'Stapler', 'Camel', 'Formula 1', 'Pomegranate', 'Dishwasher', 'Crab', 53 | 'Hoverboard', 'Meat ball', 'Rice Cooker', 'Tuba', 'Calculator', 'Papaya', 'Antelope', 'Parrot', 'Seal', 54 | 'Butterfly', 'Dumbbell', 'Donkey', 'Lion', 'Urinal', 'Dolphin', 'Electric Drill', 'Hair Dryer', 'Egg tart', 55 | 'Jellyfish', 'Treadmill', 'Lighter', 'Grapefruit', 'Game board', 'Mop', 'Radish', 'Baozi', 'Target', 'French', 56 | 'Spring Rolls', 'Monkey', 'Rabbit', 'Pencil Case', 'Yak', 'Red Cabbage', 'Binoculars', 'Asparagus', 'Barbell', 57 | 'Scallop', 'Noddles', 'Comb', 'Dumpling', 'Oyster', 'Table Tennis paddle', 'Cosmetics Brush/Eyeliner Pencil', 58 | 'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis'] 59 | 60 | 61 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 62 | download: | 63 | from pycocotools.coco import COCO 64 | from tqdm import tqdm 65 | 66 | from utils.general import download, Path 67 | 68 | # Make Directories 69 | dir = Path(yaml['path']) # dataset root dir 70 | for p in 'images', 'labels': 71 | (dir / p).mkdir(parents=True, exist_ok=True) 72 | for q in 'train', 'val': 73 | (dir / p / q).mkdir(parents=True, exist_ok=True) 74 | 75 | # Download 76 | url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/" 77 | download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json 78 | download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', 79 | curl=True, delete=False, threads=8) 80 | 81 | # Move 82 | train = dir / 'images' / 'train' 83 | for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'): 84 | f.rename(train / f.name) # move to /images/train 85 | 86 | # Labels 87 | coco = COCO(dir / 'zhiyuan_objv2_train.json') 88 | names = [x["name"] for x in coco.loadCats(coco.getCatIds())] 89 | for cid, cat in enumerate(names): 90 | catIds = coco.getCatIds(catNms=[cat]) 91 | imgIds = coco.getImgIds(catIds=catIds) 92 | for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'): 93 | width, height = im["width"], im["height"] 94 | path = Path(im["file_name"]) # image filename 95 | try: 96 | with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file: 97 | annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) 98 | for a in coco.loadAnns(annIds): 99 | x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) 100 | x, y = x + w / 2, y + h / 2 # xy to center 101 | file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") 102 | 103 | except Exception as e: 104 | print(e) 105 | -------------------------------------------------------------------------------- /data/SKU-110K.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 3 | # Example usage: python train.py --data SKU-110K.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── SKU-110K ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/SKU-110K # dataset root dir 12 | train: train.txt # train images (relative to 'path') 8219 images 13 | val: val.txt # val images (relative to 'path') 588 images 14 | test: test.txt # test images (optional) 2936 images 15 | 16 | # Classes 17 | nc: 1 # number of classes 18 | names: ['object'] # class names 19 | 20 | 21 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 22 | download: | 23 | import shutil 24 | from tqdm import tqdm 25 | from utils.general import np, pd, Path, download, xyxy2xywh 26 | 27 | # Download 28 | dir = Path(yaml['path']) # dataset root dir 29 | parent = Path(dir.parent) # download dir 30 | urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz'] 31 | download(urls, dir=parent, delete=False) 32 | 33 | # Rename directories 34 | if dir.exists(): 35 | shutil.rmtree(dir) 36 | (parent / 'SKU110K_fixed').rename(dir) # rename dir 37 | (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir 38 | 39 | # Convert labels 40 | names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names 41 | for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv': 42 | x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations 43 | images, unique_images = x[:, 0], np.unique(x[:, 0]) 44 | with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f: 45 | f.writelines(f'./images/{s}\n' for s in unique_images) 46 | for im in tqdm(unique_images, desc=f'Converting {dir / d}'): 47 | cls = 0 # single-class dataset 48 | with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f: 49 | for r in x[images == im]: 50 | w, h = r[6], r[7] # image width, height 51 | xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance 52 | f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label 53 | -------------------------------------------------------------------------------- /data/VOC.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC 3 | # Example usage: python train.py --data VOC.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── VOC ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/VOC 12 | train: # train images (relative to 'path') 16551 images 13 | - images/train2012 14 | - images/train2007 15 | - images/val2012 16 | - images/val2007 17 | val: # val images (relative to 'path') 4952 images 18 | - images/test2007 19 | test: # test images (optional) 20 | - images/test2007 21 | 22 | # Classes 23 | nc: 20 # number of classes 24 | names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 25 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] # class names 26 | 27 | 28 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 29 | download: | 30 | import xml.etree.ElementTree as ET 31 | 32 | from tqdm import tqdm 33 | from utils.general import download, Path 34 | 35 | 36 | def convert_label(path, lb_path, year, image_id): 37 | def convert_box(size, box): 38 | dw, dh = 1. / size[0], 1. / size[1] 39 | x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2] 40 | return x * dw, y * dh, w * dw, h * dh 41 | 42 | in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml') 43 | out_file = open(lb_path, 'w') 44 | tree = ET.parse(in_file) 45 | root = tree.getroot() 46 | size = root.find('size') 47 | w = int(size.find('width').text) 48 | h = int(size.find('height').text) 49 | 50 | for obj in root.iter('object'): 51 | cls = obj.find('name').text 52 | if cls in yaml['names'] and not int(obj.find('difficult').text) == 1: 53 | xmlbox = obj.find('bndbox') 54 | bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')]) 55 | cls_id = yaml['names'].index(cls) # class id 56 | out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n') 57 | 58 | 59 | # Download 60 | dir = Path(yaml['path']) # dataset root dir 61 | url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' 62 | urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images 63 | url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images 64 | url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images 65 | download(urls, dir=dir / 'images', delete=False) 66 | 67 | # Convert 68 | path = dir / f'images/VOCdevkit' 69 | for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'): 70 | imgs_path = dir / 'images' / f'{image_set}{year}' 71 | lbs_path = dir / 'labels' / f'{image_set}{year}' 72 | imgs_path.mkdir(exist_ok=True, parents=True) 73 | lbs_path.mkdir(exist_ok=True, parents=True) 74 | 75 | image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split() 76 | for id in tqdm(image_ids, desc=f'{image_set}{year}'): 77 | f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path 78 | lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path 79 | f.rename(imgs_path / f.name) # move image 80 | convert_label(path, lb_path, year, id) # convert labels to YOLO format 81 | -------------------------------------------------------------------------------- /data/VisDrone.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset 3 | # Example usage: python train.py --data VisDrone.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── VisDrone ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/VisDrone # dataset root dir 12 | train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images 13 | val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images 14 | test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images 15 | 16 | # Classes 17 | nc: 10 # number of classes 18 | names: ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'] 19 | 20 | 21 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 22 | download: | 23 | from utils.general import download, os, Path 24 | 25 | def visdrone2yolo(dir): 26 | from PIL import Image 27 | from tqdm import tqdm 28 | 29 | def convert_box(size, box): 30 | # Convert VisDrone box to YOLO xywh box 31 | dw = 1. / size[0] 32 | dh = 1. / size[1] 33 | return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh 34 | 35 | (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory 36 | pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}') 37 | for f in pbar: 38 | img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size 39 | lines = [] 40 | with open(f, 'r') as file: # read annotation.txt 41 | for row in [x.split(',') for x in file.read().strip().splitlines()]: 42 | if row[4] == '0': # VisDrone 'ignored regions' class 0 43 | continue 44 | cls = int(row[5]) - 1 45 | box = convert_box(img_size, tuple(map(int, row[:4]))) 46 | lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n") 47 | with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl: 48 | fl.writelines(lines) # write label.txt 49 | 50 | 51 | # Download 52 | dir = Path(yaml['path']) # dataset root dir 53 | urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip', 54 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip', 55 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip', 56 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip'] 57 | download(urls, dir=dir) 58 | 59 | # Convert 60 | for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev': 61 | visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels 62 | -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # COCO 2017 dataset http://cocodataset.org 3 | # Example usage: python train.py --data coco.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── coco ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco # dataset root dir 12 | train: train2017.txt # train images (relative to 'path') 118287 images 13 | val: val2017.txt # train images (relative to 'path') 5000 images 14 | test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 15 | 16 | # Classes 17 | nc: 80 # number of classes 18 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 19 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 20 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 21 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 22 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 23 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 24 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 25 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 26 | 'hair drier', 'toothbrush'] # class names 27 | 28 | 29 | # Download script/URL (optional) 30 | download: | 31 | from utils.general import download, Path 32 | 33 | # Download labels 34 | segments = False # segment or box labels 35 | dir = Path(yaml['path']) # dataset root dir 36 | url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' 37 | urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels 38 | download(urls, dir=dir.parent) 39 | 40 | # Download data 41 | urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images 42 | 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images 43 | 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) 44 | download(urls, dir=dir / 'images', threads=3) 45 | -------------------------------------------------------------------------------- /data/coco128.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 2 | # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) 3 | # Example usage: python train.py --data coco128.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── coco128 ← downloads here 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco128 # dataset root dir 12 | train: images/train2017 # train images (relative to 'path') 128 images 13 | val: images/train2017 # val images (relative to 'path') 128 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | nc: 80 # number of classes 18 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 19 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 20 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 21 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 22 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 23 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 24 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 25 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 26 | 'hair drier', 'toothbrush'] # class names 27 | 28 | 29 | # Download script/URL (optional) 30 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip -------------------------------------------------------------------------------- /data/hyps/hyp.finetune.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for VOC finetuning 2 | # python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | # Hyperparameter Evolution Results 7 | # Generations: 306 8 | # P R mAP.5 mAP.5:.95 box obj cls 9 | # Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146 10 | 11 | lr0: 0.0032 12 | lrf: 0.12 13 | momentum: 0.843 14 | weight_decay: 0.00036 15 | warmup_epochs: 2.0 16 | warmup_momentum: 0.5 17 | warmup_bias_lr: 0.05 18 | box: 0.0296 19 | cls: 0.243 20 | cls_pw: 0.631 21 | obj: 0.301 22 | obj_pw: 0.911 23 | iou_t: 0.2 24 | anchor_t: 2.91 25 | # anchors: 3.63 26 | fl_gamma: 0.0 27 | hsv_h: 0.0138 28 | hsv_s: 0.664 29 | hsv_v: 0.464 30 | degrees: 0.373 31 | translate: 0.245 32 | scale: 0.898 33 | shear: 0.602 34 | perspective: 0.0 35 | flipud: 0.00856 36 | fliplr: 0.5 37 | mosaic: 1.0 38 | mixup: 0.243 39 | copy_paste: 0.0 40 | -------------------------------------------------------------------------------- /data/hyps/hyp.finetune_objects365.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.00258 2 | lrf: 0.17 3 | momentum: 0.779 4 | weight_decay: 0.00058 5 | warmup_epochs: 1.33 6 | warmup_momentum: 0.86 7 | warmup_bias_lr: 0.0711 8 | box: 0.0539 9 | cls: 0.299 10 | cls_pw: 0.825 11 | obj: 0.632 12 | obj_pw: 1.0 13 | iou_t: 0.2 14 | anchor_t: 3.44 15 | anchors: 3.2 16 | fl_gamma: 0.0 17 | hsv_h: 0.0188 18 | hsv_s: 0.704 19 | hsv_v: 0.36 20 | degrees: 0.0 21 | translate: 0.0902 22 | scale: 0.491 23 | shear: 0.0 24 | perspective: 0.0 25 | flipud: 0.0 26 | fliplr: 0.5 27 | mosaic: 1.0 28 | mixup: 0.0 29 | copy_paste: 0.0 30 | -------------------------------------------------------------------------------- /data/hyps/hyp.scratch-p6.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for COCO training from scratch 2 | # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.3 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 0.7 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.9 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.0 # image mixup (probability) 34 | copy_paste: 0.0 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /data/hyps/hyp.scratch.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for COCO training from scratch 2 | # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 1.0 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.5 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.0 # image mixup (probability) 34 | copy_paste: 0.0 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /data/images/Ref_bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/data/images/Ref_bus.jpg -------------------------------------------------------------------------------- /data/images/Ref_car.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/data/images/Ref_car.jpg -------------------------------------------------------------------------------- /data/images/Ref_motorcycle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/data/images/Ref_motorcycle.jpg -------------------------------------------------------------------------------- /data/images/Ref_person.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/data/images/Ref_person.png -------------------------------------------------------------------------------- /data/images/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/data/images/bus.jpg -------------------------------------------------------------------------------- /data/images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/data/images/zidane.jpg -------------------------------------------------------------------------------- /data/scripts/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # YOLOv5 🚀 by Ultralytics https://ultralytics.com, licensed under GNU GPL v3.0 3 | # Download latest models from https://github.com/ultralytics/yolov5/releases 4 | # Example usage: bash path/to/download_weights.sh 5 | # parent 6 | # └── yolov5 7 | # ├── yolov5s.pt ← downloads here 8 | # ├── yolov5m.pt 9 | # └── ... 10 | 11 | python - <= cls >= 0, f'incorrect class index {cls}' 74 | 75 | # Write YOLO label 76 | if id not in shapes: 77 | shapes[id] = Image.open(file).size 78 | box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True) 79 | with open((labels / id).with_suffix('.txt'), 'a') as f: 80 | f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt 81 | except Exception as e: 82 | print(f'WARNING: skipping one label for {file}: {e}') 83 | 84 | 85 | # Download manually from https://challenge.xviewdataset.org 86 | dir = Path(yaml['path']) # dataset root dir 87 | # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels 88 | # 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images 89 | # 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels) 90 | # download(urls, dir=dir, delete=False) 91 | 92 | # Convert labels 93 | convert_labels(dir / 'xView_train.geojson') 94 | 95 | # Move images 96 | images = Path(dir / 'images') 97 | images.mkdir(parents=True, exist_ok=True) 98 | Path(dir / 'train_images').rename(dir / 'images' / 'train') 99 | Path(dir / 'val_images').rename(dir / 'images' / 'val') 100 | 101 | # Split 102 | autosplit(dir / 'images' / 'train') 103 | -------------------------------------------------------------------------------- /deep_sort_pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ziqiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /deep_sort_pytorch/README.md: -------------------------------------------------------------------------------- 1 | # Deep Sort with PyTorch 2 | 3 | ![](demo/demo.gif) 4 | 5 | ## Update(1-1-2020) 6 | Changes 7 | - fix bugs 8 | - refactor code 9 | - accerate detection by adding nms on gpu 10 | 11 | ## Latest Update(07-22) 12 | Changes 13 | - bug fix (Thanks @JieChen91 and @yingsen1 for bug reporting). 14 | - using batch for feature extracting for each frame, which lead to a small speed up. 15 | - code improvement. 16 | 17 | Futher improvement direction 18 | - Train detector on specific dataset rather than the official one. 19 | - Retrain REID model on pedestrain dataset for better performance. 20 | - Replace YOLOv3 detector with advanced ones. 21 | 22 | **Any contributions to this repository is welcome!** 23 | 24 | 25 | ## Introduction 26 | This is an implement of MOT tracking algorithm deep sort. Deep sort is basicly the same with sort but added a CNN model to extract features in image of human part bounded by a detector. This CNN model is indeed a RE-ID model and the detector used in [PAPER](https://arxiv.org/abs/1703.07402) is FasterRCNN , and the original source code is [HERE](https://github.com/nwojke/deep_sort). 27 | However in original code, the CNN model is implemented with tensorflow, which I'm not familier with. SO I re-implemented the CNN feature extraction model with PyTorch, and changed the CNN model a little bit. Also, I use **YOLOv3** to generate bboxes instead of FasterRCNN. 28 | 29 | ## Dependencies 30 | - python 3 (python2 not sure) 31 | - numpy 32 | - scipy 33 | - opencv-python 34 | - sklearn 35 | - torch >= 0.4 36 | - torchvision >= 0.1 37 | - pillow 38 | - vizer 39 | - edict 40 | 41 | ## Quick Start 42 | 0. Check all dependencies installed 43 | ```bash 44 | pip install -r requirements.txt 45 | ``` 46 | for user in china, you can specify pypi source to accelerate install like: 47 | ```bash 48 | pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 49 | ``` 50 | 51 | 1. Clone this repository 52 | ``` 53 | git clone git@github.com:ZQPei/deep_sort_pytorch.git 54 | ``` 55 | 56 | 2. Download YOLOv3 parameters 57 | ``` 58 | cd detector/YOLOv3/weight/ 59 | wget https://pjreddie.com/media/files/yolov3.weights 60 | wget https://pjreddie.com/media/files/yolov3-tiny.weights 61 | cd ../../../ 62 | ``` 63 | 64 | 3. Download deepsort parameters ckpt.t7 65 | ``` 66 | cd deep_sort/deep/checkpoint 67 | # download ckpt.t7 from 68 | https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder 69 | cd ../../../ 70 | ``` 71 | 72 | 4. Compile nms module 73 | ```bash 74 | cd detector/YOLOv3/nms 75 | sh build.sh 76 | cd ../../.. 77 | ``` 78 | 79 | Notice: 80 | If compiling failed, the simplist way is to **Upgrade your pytorch >= 1.1 and torchvision >= 0.3" and you can avoid the troublesome compiling problems which are most likely caused by either `gcc version too low` or `libraries missing`. 81 | 82 | 5. Run demo 83 | ``` 84 | usage: python yolov3_deepsort.py VIDEO_PATH 85 | [--help] 86 | [--frame_interval FRAME_INTERVAL] 87 | [--config_detection CONFIG_DETECTION] 88 | [--config_deepsort CONFIG_DEEPSORT] 89 | [--display] 90 | [--display_width DISPLAY_WIDTH] 91 | [--display_height DISPLAY_HEIGHT] 92 | [--save_path SAVE_PATH] 93 | [--cpu] 94 | 95 | # yolov3 + deepsort 96 | python yolov3_deepsort.py [VIDEO_PATH] 97 | 98 | # yolov3_tiny + deepsort 99 | python yolov3_deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml 100 | 101 | # yolov3 + deepsort on webcam 102 | python3 yolov3_deepsort.py /dev/video0 --camera 0 103 | 104 | # yolov3_tiny + deepsort on webcam 105 | python3 yolov3_deepsort.py /dev/video0 --config_detection ./configs/yolov3_tiny.yaml --camera 0 106 | ``` 107 | Use `--display` to enable display. 108 | Results will be saved to `./output/results.avi` and `./output/results.txt`. 109 | 110 | All files above can also be accessed from BaiduDisk! 111 | linker:[BaiduDisk](https://pan.baidu.com/s/1YJ1iPpdFTlUyLFoonYvozg) 112 | passwd:fbuw 113 | 114 | ## Training the RE-ID model 115 | The original model used in paper is in original_model.py, and its parameter here [original_ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6). 116 | 117 | To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset. 118 | 119 | Then you can try [train.py](deep_sort/deep/train.py) to train your own parameter and evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py). 120 | ![train.jpg](deep_sort/deep/train.jpg) 121 | 122 | ## Demo videos and images 123 | [demo.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6) 124 | [demo2.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6) 125 | 126 | ![1.jpg](demo/1.jpg) 127 | ![2.jpg](demo/2.jpg) 128 | 129 | 130 | ## References 131 | - paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402) 132 | 133 | - code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort) 134 | 135 | - paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf) 136 | 137 | - code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/) 138 | -------------------------------------------------------------------------------- /deep_sort_pytorch/configs/deep_sort.yaml: -------------------------------------------------------------------------------- 1 | DEEPSORT: 2 | REID_CKPT: "deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7" 3 | MAX_DIST: 0.2 4 | MIN_CONFIDENCE: 0.3 5 | NMS_MAX_OVERLAP: 0.5 6 | MAX_IOU_DISTANCE: 0.7 7 | MAX_AGE: 70 8 | N_INIT: 3 9 | NN_BUDGET: 100 10 | 11 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/README.md: -------------------------------------------------------------------------------- 1 | # Deep Sort 2 | 3 | This is the implemention of deep sort with pytorch. -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/checkpoint/evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | features = torch.load("features.pth") 4 | qf = features["qf"] 5 | ql = features["ql"] 6 | gf = features["gf"] 7 | gl = features["gl"] 8 | 9 | scores = qf.mm(gf.t()) 10 | res = scores.topk(5, dim=1)[1][:, 0] 11 | top1correct = gl[res].eq(ql).sum().item() 12 | 13 | print("Acc top1:{:.3f}".format(top1correct / ql.size(0))) 14 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | features = torch.load("features.pth") 4 | qf = features["qf"] 5 | ql = features["ql"] 6 | gf = features["gf"] 7 | gl = features["gl"] 8 | 9 | scores = qf.mm(gf.t()) 10 | res = scores.topk(5, dim=1)[1][:, 0] 11 | top1correct = gl[res].eq(ql).sum().item() 12 | 13 | print("Acc top1:{:.3f}".format(top1correct / ql.size(0))) 14 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | import logging 6 | 7 | from .model import Net 8 | 9 | 10 | class Extractor(object): 11 | def __init__(self, model_path, use_cuda=True): 12 | self.net = Net(reid=True) 13 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 14 | state_dict = torch.load(model_path, map_location=torch.device(self.device))[ 15 | 'net_dict'] 16 | self.net.load_state_dict(state_dict) 17 | logger = logging.getLogger("root.tracker") 18 | logger.info("Loading weights from {}... Done!".format(model_path)) 19 | self.net.to(self.device) 20 | self.size = (64, 128) 21 | self.norm = transforms.Compose([ 22 | transforms.ToTensor(), 23 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 24 | ]) 25 | 26 | def _preprocess(self, im_crops): 27 | """ 28 | TODO: 29 | 1. to float with scale from 0 to 1 30 | 2. resize to (64, 128) as Market1501 dataset did 31 | 3. concatenate to a numpy array 32 | 3. to torch Tensor 33 | 4. normalize 34 | """ 35 | def _resize(im, size): 36 | return cv2.resize(im.astype(np.float32)/255., size) 37 | 38 | im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze( 39 | 0) for im in im_crops], dim=0).float() 40 | return im_batch 41 | 42 | def __call__(self, im_crops): 43 | im_batch = self._preprocess(im_crops) 44 | with torch.no_grad(): 45 | im_batch = im_batch.to(self.device) 46 | features = self.net(im_batch) 47 | return features.cpu().numpy() 48 | 49 | 50 | if __name__ == '__main__': 51 | img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)] 52 | extr = Extractor("checkpoint/ckpt.t7") 53 | feature = extr(img) 54 | print(feature.shape) 55 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, c_in, c_out, is_downsample=False): 8 | super(BasicBlock, self).__init__() 9 | self.is_downsample = is_downsample 10 | if is_downsample: 11 | self.conv1 = nn.Conv2d( 12 | c_in, c_out, 3, stride=2, padding=1, bias=False) 13 | else: 14 | self.conv1 = nn.Conv2d( 15 | c_in, c_out, 3, stride=1, padding=1, bias=False) 16 | self.bn1 = nn.BatchNorm2d(c_out) 17 | self.relu = nn.ReLU(True) 18 | self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, 19 | padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(c_out) 21 | if is_downsample: 22 | self.downsample = nn.Sequential( 23 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 24 | nn.BatchNorm2d(c_out) 25 | ) 26 | elif c_in != c_out: 27 | self.downsample = nn.Sequential( 28 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 29 | nn.BatchNorm2d(c_out) 30 | ) 31 | self.is_downsample = True 32 | 33 | def forward(self, x): 34 | y = self.conv1(x) 35 | y = self.bn1(y) 36 | y = self.relu(y) 37 | y = self.conv2(y) 38 | y = self.bn2(y) 39 | if self.is_downsample: 40 | x = self.downsample(x) 41 | return F.relu(x.add(y), True) 42 | 43 | 44 | def make_layers(c_in, c_out, repeat_times, is_downsample=False): 45 | blocks = [] 46 | for i in range(repeat_times): 47 | if i == 0: 48 | blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] 49 | else: 50 | blocks += [BasicBlock(c_out, c_out), ] 51 | return nn.Sequential(*blocks) 52 | 53 | 54 | class Net(nn.Module): 55 | def __init__(self, num_classes=751, reid=False): 56 | super(Net, self).__init__() 57 | # 3 128 64 58 | self.conv = nn.Sequential( 59 | nn.Conv2d(3, 64, 3, stride=1, padding=1), 60 | nn.BatchNorm2d(64), 61 | nn.ReLU(inplace=True), 62 | # nn.Conv2d(32,32,3,stride=1,padding=1), 63 | # nn.BatchNorm2d(32), 64 | # nn.ReLU(inplace=True), 65 | nn.MaxPool2d(3, 2, padding=1), 66 | ) 67 | # 32 64 32 68 | self.layer1 = make_layers(64, 64, 2, False) 69 | # 32 64 32 70 | self.layer2 = make_layers(64, 128, 2, True) 71 | # 64 32 16 72 | self.layer3 = make_layers(128, 256, 2, True) 73 | # 128 16 8 74 | self.layer4 = make_layers(256, 512, 2, True) 75 | # 256 8 4 76 | self.avgpool = nn.AvgPool2d((8, 4), 1) 77 | # 256 1 1 78 | self.reid = reid 79 | self.classifier = nn.Sequential( 80 | nn.Linear(512, 256), 81 | nn.BatchNorm1d(256), 82 | nn.ReLU(inplace=True), 83 | nn.Dropout(), 84 | nn.Linear(256, num_classes), 85 | ) 86 | 87 | def forward(self, x): 88 | x = self.conv(x) 89 | x = self.layer1(x) 90 | x = self.layer2(x) 91 | x = self.layer3(x) 92 | x = self.layer4(x) 93 | x = self.avgpool(x) 94 | x = x.view(x.size(0), -1) 95 | # B x 128 96 | if self.reid: 97 | x = x.div(x.norm(p=2, dim=1, keepdim=True)) 98 | return x 99 | # classifier 100 | x = self.classifier(x) 101 | return x 102 | 103 | 104 | if __name__ == '__main__': 105 | net = Net() 106 | x = torch.randn(4, 3, 128, 64) 107 | y = net(x) 108 | import ipdb 109 | ipdb.set_trace() 110 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/original_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, c_in, c_out, is_downsample=False): 8 | super(BasicBlock, self).__init__() 9 | self.is_downsample = is_downsample 10 | if is_downsample: 11 | self.conv1 = nn.Conv2d( 12 | c_in, c_out, 3, stride=2, padding=1, bias=False) 13 | else: 14 | self.conv1 = nn.Conv2d( 15 | c_in, c_out, 3, stride=1, padding=1, bias=False) 16 | self.bn1 = nn.BatchNorm2d(c_out) 17 | self.relu = nn.ReLU(True) 18 | self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, 19 | padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(c_out) 21 | if is_downsample: 22 | self.downsample = nn.Sequential( 23 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 24 | nn.BatchNorm2d(c_out) 25 | ) 26 | elif c_in != c_out: 27 | self.downsample = nn.Sequential( 28 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 29 | nn.BatchNorm2d(c_out) 30 | ) 31 | self.is_downsample = True 32 | 33 | def forward(self, x): 34 | y = self.conv1(x) 35 | y = self.bn1(y) 36 | y = self.relu(y) 37 | y = self.conv2(y) 38 | y = self.bn2(y) 39 | if self.is_downsample: 40 | x = self.downsample(x) 41 | return F.relu(x.add(y), True) 42 | 43 | 44 | def make_layers(c_in, c_out, repeat_times, is_downsample=False): 45 | blocks = [] 46 | for i in range(repeat_times): 47 | if i == 0: 48 | blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] 49 | else: 50 | blocks += [BasicBlock(c_out, c_out), ] 51 | return nn.Sequential(*blocks) 52 | 53 | 54 | class Net(nn.Module): 55 | def __init__(self, num_classes=625, reid=False): 56 | super(Net, self).__init__() 57 | # 3 128 64 58 | self.conv = nn.Sequential( 59 | nn.Conv2d(3, 32, 3, stride=1, padding=1), 60 | nn.BatchNorm2d(32), 61 | nn.ELU(inplace=True), 62 | nn.Conv2d(32, 32, 3, stride=1, padding=1), 63 | nn.BatchNorm2d(32), 64 | nn.ELU(inplace=True), 65 | nn.MaxPool2d(3, 2, padding=1), 66 | ) 67 | # 32 64 32 68 | self.layer1 = make_layers(32, 32, 2, False) 69 | # 32 64 32 70 | self.layer2 = make_layers(32, 64, 2, True) 71 | # 64 32 16 72 | self.layer3 = make_layers(64, 128, 2, True) 73 | # 128 16 8 74 | self.dense = nn.Sequential( 75 | nn.Dropout(p=0.6), 76 | nn.Linear(128*16*8, 128), 77 | nn.BatchNorm1d(128), 78 | nn.ELU(inplace=True) 79 | ) 80 | # 256 1 1 81 | self.reid = reid 82 | self.batch_norm = nn.BatchNorm1d(128) 83 | self.classifier = nn.Sequential( 84 | nn.Linear(128, num_classes), 85 | ) 86 | 87 | def forward(self, x): 88 | x = self.conv(x) 89 | x = self.layer1(x) 90 | x = self.layer2(x) 91 | x = self.layer3(x) 92 | 93 | x = x.view(x.size(0), -1) 94 | if self.reid: 95 | x = self.dense[0](x) 96 | x = self.dense[1](x) 97 | x = x.div(x.norm(p=2, dim=1, keepdim=True)) 98 | return x 99 | x = self.dense(x) 100 | # B x 128 101 | # classifier 102 | x = self.classifier(x) 103 | return x 104 | 105 | 106 | if __name__ == '__main__': 107 | net = Net(reid=True) 108 | x = torch.randn(4, 3, 128, 64) 109 | y = net(x) 110 | import ipdb 111 | ipdb.set_trace() 112 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.backends.cudnn as cudnn 3 | import torchvision 4 | 5 | import argparse 6 | import os 7 | 8 | from model import Net 9 | 10 | parser = argparse.ArgumentParser(description="Train on market1501") 11 | parser.add_argument("--data-dir", default='data', type=str) 12 | parser.add_argument("--no-cuda", action="store_true") 13 | parser.add_argument("--gpu-id", default=0, type=int) 14 | args = parser.parse_args() 15 | 16 | # device 17 | device = "cuda:{}".format( 18 | args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 19 | if torch.cuda.is_available() and not args.no_cuda: 20 | cudnn.benchmark = True 21 | 22 | # data loader 23 | root = args.data_dir 24 | query_dir = os.path.join(root, "query") 25 | gallery_dir = os.path.join(root, "gallery") 26 | transform = torchvision.transforms.Compose([ 27 | torchvision.transforms.Resize((128, 64)), 28 | torchvision.transforms.ToTensor(), 29 | torchvision.transforms.Normalize( 30 | [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 31 | ]) 32 | queryloader = torch.utils.data.DataLoader( 33 | torchvision.datasets.ImageFolder(query_dir, transform=transform), 34 | batch_size=64, shuffle=False 35 | ) 36 | galleryloader = torch.utils.data.DataLoader( 37 | torchvision.datasets.ImageFolder(gallery_dir, transform=transform), 38 | batch_size=64, shuffle=False 39 | ) 40 | 41 | # net definition 42 | net = Net(reid=True) 43 | assert os.path.isfile( 44 | "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 45 | print('Loading from checkpoint/ckpt.t7') 46 | checkpoint = torch.load("./checkpoint/ckpt.t7") 47 | net_dict = checkpoint['net_dict'] 48 | net.load_state_dict(net_dict, strict=False) 49 | net.eval() 50 | net.to(device) 51 | 52 | # compute features 53 | query_features = torch.tensor([]).float() 54 | query_labels = torch.tensor([]).long() 55 | gallery_features = torch.tensor([]).float() 56 | gallery_labels = torch.tensor([]).long() 57 | 58 | with torch.no_grad(): 59 | for idx, (inputs, labels) in enumerate(queryloader): 60 | inputs = inputs.to(device) 61 | features = net(inputs).cpu() 62 | query_features = torch.cat((query_features, features), dim=0) 63 | query_labels = torch.cat((query_labels, labels)) 64 | 65 | for idx, (inputs, labels) in enumerate(galleryloader): 66 | inputs = inputs.to(device) 67 | features = net(inputs).cpu() 68 | gallery_features = torch.cat((gallery_features, features), dim=0) 69 | gallery_labels = torch.cat((gallery_labels, labels)) 70 | 71 | gallery_labels -= 2 72 | 73 | # save features 74 | features = { 75 | "qf": query_features, 76 | "ql": query_labels, 77 | "gf": gallery_features, 78 | "gl": gallery_labels 79 | } 80 | torch.save(features, "features.pth") 81 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/deep_sort_pytorch/deep_sort/deep/train.jpg -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | import torchvision 10 | 11 | from model import Net 12 | 13 | parser = argparse.ArgumentParser(description="Train on market1501") 14 | parser.add_argument("--data-dir", default='data', type=str) 15 | parser.add_argument("--no-cuda", action="store_true") 16 | parser.add_argument("--gpu-id", default=0, type=int) 17 | parser.add_argument("--lr", default=0.1, type=float) 18 | parser.add_argument("--interval", '-i', default=20, type=int) 19 | parser.add_argument('--resume', '-r', action='store_true') 20 | args = parser.parse_args() 21 | 22 | # device 23 | device = "cuda:{}".format( 24 | args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 25 | if torch.cuda.is_available() and not args.no_cuda: 26 | cudnn.benchmark = True 27 | 28 | # data loading 29 | root = args.data_dir 30 | train_dir = os.path.join(root, "train") 31 | test_dir = os.path.join(root, "test") 32 | transform_train = torchvision.transforms.Compose([ 33 | torchvision.transforms.RandomCrop((128, 64), padding=4), 34 | torchvision.transforms.RandomHorizontalFlip(), 35 | torchvision.transforms.ToTensor(), 36 | torchvision.transforms.Normalize( 37 | [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 38 | ]) 39 | transform_test = torchvision.transforms.Compose([ 40 | torchvision.transforms.Resize((128, 64)), 41 | torchvision.transforms.ToTensor(), 42 | torchvision.transforms.Normalize( 43 | [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 44 | ]) 45 | trainloader = torch.utils.data.DataLoader( 46 | torchvision.datasets.ImageFolder(train_dir, transform=transform_train), 47 | batch_size=64, shuffle=True 48 | ) 49 | testloader = torch.utils.data.DataLoader( 50 | torchvision.datasets.ImageFolder(test_dir, transform=transform_test), 51 | batch_size=64, shuffle=True 52 | ) 53 | num_classes = max(len(trainloader.dataset.classes), 54 | len(testloader.dataset.classes)) 55 | 56 | # net definition 57 | start_epoch = 0 58 | net = Net(num_classes=num_classes) 59 | if args.resume: 60 | assert os.path.isfile( 61 | "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 62 | print('Loading from checkpoint/ckpt.t7') 63 | checkpoint = torch.load("./checkpoint/ckpt.t7") 64 | # import ipdb; ipdb.set_trace() 65 | net_dict = checkpoint['net_dict'] 66 | net.load_state_dict(net_dict) 67 | best_acc = checkpoint['acc'] 68 | start_epoch = checkpoint['epoch'] 69 | net.to(device) 70 | 71 | # loss and optimizer 72 | criterion = torch.nn.CrossEntropyLoss() 73 | optimizer = torch.optim.SGD( 74 | net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4) 75 | best_acc = 0. 76 | 77 | # train function for each epoch 78 | 79 | 80 | def train(epoch): 81 | print("\nEpoch : %d" % (epoch+1)) 82 | net.train() 83 | training_loss = 0. 84 | train_loss = 0. 85 | correct = 0 86 | total = 0 87 | interval = args.interval 88 | start = time.time() 89 | for idx, (inputs, labels) in enumerate(trainloader): 90 | # forward 91 | inputs, labels = inputs.to(device), labels.to(device) 92 | outputs = net(inputs) 93 | loss = criterion(outputs, labels) 94 | 95 | # backward 96 | optimizer.zero_grad() 97 | loss.backward() 98 | optimizer.step() 99 | 100 | # accumurating 101 | training_loss += loss.item() 102 | train_loss += loss.item() 103 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 104 | total += labels.size(0) 105 | 106 | # print 107 | if (idx+1) % interval == 0: 108 | end = time.time() 109 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 110 | 100.*(idx+1)/len(trainloader), end-start, training_loss / 111 | interval, correct, total, 100.*correct/total 112 | )) 113 | training_loss = 0. 114 | start = time.time() 115 | 116 | return train_loss/len(trainloader), 1. - correct/total 117 | 118 | 119 | def test(epoch): 120 | global best_acc 121 | net.eval() 122 | test_loss = 0. 123 | correct = 0 124 | total = 0 125 | start = time.time() 126 | with torch.no_grad(): 127 | for idx, (inputs, labels) in enumerate(testloader): 128 | inputs, labels = inputs.to(device), labels.to(device) 129 | outputs = net(inputs) 130 | loss = criterion(outputs, labels) 131 | 132 | test_loss += loss.item() 133 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 134 | total += labels.size(0) 135 | 136 | print("Testing ...") 137 | end = time.time() 138 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 139 | 100.*(idx+1)/len(testloader), end-start, test_loss / 140 | len(testloader), correct, total, 100.*correct/total 141 | )) 142 | 143 | # saving checkpoint 144 | acc = 100.*correct/total 145 | if acc > best_acc: 146 | best_acc = acc 147 | print("Saving parameters to checkpoint/ckpt.t7") 148 | checkpoint = { 149 | 'net_dict': net.state_dict(), 150 | 'acc': acc, 151 | 'epoch': epoch, 152 | } 153 | if not os.path.isdir('checkpoint'): 154 | os.mkdir('checkpoint') 155 | torch.save(checkpoint, './checkpoint/ckpt.t7') 156 | 157 | return test_loss/len(testloader), 1. - correct/total 158 | 159 | 160 | # plot figure 161 | x_epoch = [] 162 | record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []} 163 | fig = plt.figure() 164 | ax0 = fig.add_subplot(121, title="loss") 165 | ax1 = fig.add_subplot(122, title="top1err") 166 | 167 | 168 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err): 169 | global record 170 | record['train_loss'].append(train_loss) 171 | record['train_err'].append(train_err) 172 | record['test_loss'].append(test_loss) 173 | record['test_err'].append(test_err) 174 | 175 | x_epoch.append(epoch) 176 | ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') 177 | ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') 178 | ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') 179 | ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') 180 | if epoch == 0: 181 | ax0.legend() 182 | ax1.legend() 183 | fig.savefig("train.jpg") 184 | 185 | # lr decay 186 | 187 | 188 | def lr_decay(): 189 | global optimizer 190 | for params in optimizer.param_groups: 191 | params['lr'] *= 0.1 192 | lr = params['lr'] 193 | print("Learning rate adjusted to {}".format(lr)) 194 | 195 | 196 | def main(): 197 | for epoch in range(start_epoch, start_epoch+40): 198 | train_loss, train_err = train(epoch) 199 | test_loss, test_err = test(epoch) 200 | draw_curve(epoch, train_loss, train_err, test_loss, test_err) 201 | if (epoch+1) % 20 == 0: 202 | lr_decay() 203 | 204 | 205 | if __name__ == '__main__': 206 | main() 207 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/deep_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .deep.feature_extractor import Extractor 5 | from .sort.nn_matching import NearestNeighborDistanceMetric 6 | from .sort.detection import Detection 7 | from .sort.tracker import Tracker 8 | 9 | 10 | __all__ = ['DeepSort'] 11 | 12 | 13 | class DeepSort(object): 14 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True): 15 | self.min_confidence = min_confidence 16 | self.nms_max_overlap = nms_max_overlap 17 | 18 | self.extractor = Extractor(model_path, use_cuda=use_cuda) 19 | 20 | max_cosine_distance = max_dist 21 | metric = NearestNeighborDistanceMetric( 22 | "cosine", max_cosine_distance, nn_budget) 23 | self.tracker = Tracker( 24 | metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) 25 | 26 | def update(self, bbox_xywh, confidences, classes, ori_img): 27 | self.height, self.width = ori_img.shape[:2] 28 | # generate detections 29 | features = self._get_features(bbox_xywh, ori_img) 30 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 31 | detections = [Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate( 32 | confidences) if conf > self.min_confidence] 33 | 34 | # run on non-maximum supression 35 | boxes = np.array([d.tlwh for d in detections]) 36 | scores = np.array([d.confidence for d in detections]) 37 | 38 | # update tracker 39 | self.tracker.predict() 40 | self.tracker.update(detections, classes) 41 | 42 | # output bbox identities 43 | outputs = [] 44 | for track in self.tracker.tracks: 45 | if not track.is_confirmed() or track.time_since_update > 1: 46 | continue 47 | box = track.to_tlwh() 48 | x1, y1, x2, y2 = self._tlwh_to_xyxy(box) 49 | track_id = track.track_id 50 | class_id = track.class_id 51 | outputs.append(np.array([x1, y1, x2, y2, track_id, class_id], dtype=np.int)) 52 | if len(outputs) > 0: 53 | outputs = np.stack(outputs, axis=0) 54 | return outputs 55 | 56 | """ 57 | TODO: 58 | Convert bbox from xc_yc_w_h to xtl_ytl_w_h 59 | Thanks JieChen91@github.com for reporting this bug! 60 | """ 61 | @staticmethod 62 | def _xywh_to_tlwh(bbox_xywh): 63 | if isinstance(bbox_xywh, np.ndarray): 64 | bbox_tlwh = bbox_xywh.copy() 65 | elif isinstance(bbox_xywh, torch.Tensor): 66 | bbox_tlwh = bbox_xywh.clone() 67 | bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. 68 | bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. 69 | return bbox_tlwh 70 | 71 | def _xywh_to_xyxy(self, bbox_xywh): 72 | x, y, w, h = bbox_xywh 73 | x1 = max(int(x - w / 2), 0) 74 | x2 = min(int(x + w / 2), self.width - 1) 75 | y1 = max(int(y - h / 2), 0) 76 | y2 = min(int(y + h / 2), self.height - 1) 77 | return x1, y1, x2, y2 78 | 79 | def _tlwh_to_xyxy(self, bbox_tlwh): 80 | """ 81 | TODO: 82 | Convert bbox from xtl_ytl_w_h to xc_yc_w_h 83 | Thanks JieChen91@github.com for reporting this bug! 84 | """ 85 | x, y, w, h = bbox_tlwh 86 | x1 = max(int(x), 0) 87 | x2 = min(int(x+w), self.width - 1) 88 | y1 = max(int(y), 0) 89 | y2 = min(int(y+h), self.height - 1) 90 | return x1, y1, x2, y2 91 | 92 | def increment_ages(self): 93 | self.tracker.increment_ages() 94 | 95 | def _xyxy_to_tlwh(self, bbox_xyxy): 96 | x1, y1, x2, y2 = bbox_xyxy 97 | 98 | t = x1 99 | l = y1 100 | w = int(x2 - x1) 101 | h = int(y2 - y1) 102 | return t, l, w, h 103 | 104 | def _get_features(self, bbox_xywh, ori_img): 105 | im_crops = [] 106 | for box in bbox_xywh: 107 | x1, y1, x2, y2 = self._xywh_to_xyxy(box) 108 | im = ori_img[y1:y2, x1:x2] 109 | im_crops.append(im) 110 | if im_crops: 111 | features = self.extractor(im_crops) 112 | else: 113 | features = np.array([]) 114 | return features 115 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/sort/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | def to_tlbr(self): 35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 36 | `(top left, bottom right)`. 37 | """ 38 | ret = self.tlwh.copy() 39 | ret[2:] += ret[:2] 40 | return ret 41 | 42 | def to_xyah(self): 43 | """Convert bounding box to format `(center x, center y, aspect ratio, 44 | height)`, where the aspect ratio is `width / height`. 45 | """ 46 | ret = self.tlwh.copy() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray( 80 | [detections[i].tlwh for i in detection_indices]) 81 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 82 | return cost_matrix 83 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | if metric == "euclidean": 126 | self._metric = _nn_euclidean_distance 127 | elif metric == "cosine": 128 | self._metric = _nn_cosine_distance 129 | else: 130 | raise ValueError( 131 | "Invalid metric; must be either 'euclidean' or 'cosine'") 132 | self.matching_threshold = matching_threshold 133 | self.budget = budget 134 | self.samples = {} 135 | 136 | def partial_fit(self, features, targets, active_targets): 137 | """Update the distance metric with new data. 138 | 139 | Parameters 140 | ---------- 141 | features : ndarray 142 | An NxM matrix of N features of dimensionality M. 143 | targets : ndarray 144 | An integer array of associated target identities. 145 | active_targets : List[int] 146 | A list of targets that are currently present in the scene. 147 | 148 | """ 149 | for feature, target in zip(features, targets): 150 | self.samples.setdefault(target, []).append(feature) 151 | if self.budget is not None: 152 | self.samples[target] = self.samples[target][-self.budget:] 153 | self.samples = {k: self.samples[k] for k in active_targets} 154 | 155 | def distance(self, features, targets): 156 | """Compute distance between features and targets. 157 | 158 | Parameters 159 | ---------- 160 | features : ndarray 161 | An NxM matrix of N features of dimensionality M. 162 | targets : List[int] 163 | A list of targets to match the given `features` against. 164 | 165 | Returns 166 | ------- 167 | ndarray 168 | Returns a cost matrix of shape len(targets), len(features), where 169 | element (i, j) contains the closest squared distance between 170 | `targets[i]` and `features[j]`. 171 | 172 | """ 173 | cost_matrix = np.zeros((len(targets), len(features))) 174 | for i, target in enumerate(targets): 175 | cost_matrix[i, :] = self._metric(self.samples[target], features) 176 | return cost_matrix 177 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_id, class_id, n_init, max_age, 67 | feature=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_id = track_id 71 | self.class_id = class_id 72 | self.hits = 1 73 | self.age = 1 74 | self.time_since_update = 0 75 | 76 | self.state = TrackState.Tentative 77 | self.features = [] 78 | if feature is not None: 79 | self.features.append(feature) 80 | 81 | self._n_init = n_init 82 | self._max_age = max_age 83 | 84 | def to_tlwh(self): 85 | """Get current position in bounding box format `(top left x, top left y, 86 | width, height)`. 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | The bounding box. 92 | 93 | """ 94 | ret = self.mean[:4].copy() 95 | ret[2] *= ret[3] 96 | ret[:2] -= ret[2:] / 2 97 | return ret 98 | 99 | def to_tlbr(self): 100 | """Get current position in bounding box format `(min x, miny, max x, 101 | max y)`. 102 | 103 | Returns 104 | ------- 105 | ndarray 106 | The bounding box. 107 | 108 | """ 109 | ret = self.to_tlwh() 110 | ret[2:] = ret[:2] + ret[2:] 111 | return ret 112 | 113 | def increment_age(self): 114 | self.age += 1 115 | self.time_since_update += 1 116 | 117 | def predict(self, kf): 118 | """Propagate the state distribution to the current time step using a 119 | Kalman filter prediction step. 120 | 121 | Parameters 122 | ---------- 123 | kf : kalman_filter.KalmanFilter 124 | The Kalman filter. 125 | 126 | """ 127 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 128 | self.increment_age() 129 | 130 | def update(self, kf, detection): 131 | """Perform Kalman filter measurement update step and update the feature 132 | cache. 133 | 134 | Parameters 135 | ---------- 136 | kf : kalman_filter.KalmanFilter 137 | The Kalman filter. 138 | detection : Detection 139 | The associated detection. 140 | 141 | """ 142 | self.mean, self.covariance = kf.update( 143 | self.mean, self.covariance, detection.to_xyah()) 144 | self.features.append(detection.feature) 145 | 146 | self.hits += 1 147 | self.time_since_update = 0 148 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 149 | self.state = TrackState.Confirmed 150 | 151 | def mark_missed(self): 152 | """Mark this track as missed (no association at the current time step). 153 | """ 154 | if self.state == TrackState.Tentative: 155 | self.state = TrackState.Deleted 156 | elif self.time_since_update > self._max_age: 157 | self.state = TrackState.Deleted 158 | 159 | def is_tentative(self): 160 | """Returns True if this track is tentative (unconfirmed). 161 | """ 162 | return self.state == TrackState.Tentative 163 | 164 | def is_confirmed(self): 165 | """Returns True if this track is confirmed.""" 166 | return self.state == TrackState.Confirmed 167 | 168 | def is_deleted(self): 169 | """Returns True if this track is dead and should be deleted.""" 170 | return self.state == TrackState.Deleted 171 | -------------------------------------------------------------------------------- /deep_sort_pytorch/deep_sort/sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def increment_ages(self): 59 | for track in self.tracks: 60 | track.increment_age() 61 | track.mark_missed() 62 | 63 | def update(self, detections, classes): 64 | """Perform measurement update and track management. 65 | 66 | Parameters 67 | ---------- 68 | detections : List[deep_sort.detection.Detection] 69 | A list of detections at the current time step. 70 | 71 | """ 72 | # Run matching cascade. 73 | matches, unmatched_tracks, unmatched_detections = \ 74 | self._match(detections) 75 | 76 | # Update track set. 77 | for track_idx, detection_idx in matches: 78 | self.tracks[track_idx].update( 79 | self.kf, detections[detection_idx]) 80 | for track_idx in unmatched_tracks: 81 | self.tracks[track_idx].mark_missed() 82 | for detection_idx in unmatched_detections: 83 | self._initiate_track(detections[detection_idx], classes[detection_idx].item()) 84 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 85 | 86 | # Update distance metric. 87 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 88 | features, targets = [], [] 89 | for track in self.tracks: 90 | if not track.is_confirmed(): 91 | continue 92 | features += track.features 93 | targets += [track.track_id for _ in track.features] 94 | track.features = [] 95 | self.metric.partial_fit( 96 | np.asarray(features), np.asarray(targets), active_targets) 97 | 98 | def _match(self, detections): 99 | 100 | def gated_metric(tracks, dets, track_indices, detection_indices): 101 | features = np.array([dets[i].feature for i in detection_indices]) 102 | targets = np.array([tracks[i].track_id for i in track_indices]) 103 | cost_matrix = self.metric.distance(features, targets) 104 | cost_matrix = linear_assignment.gate_cost_matrix( 105 | self.kf, cost_matrix, tracks, dets, track_indices, 106 | detection_indices) 107 | 108 | return cost_matrix 109 | 110 | # Split track set into confirmed and unconfirmed tracks. 111 | confirmed_tracks = [ 112 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 113 | unconfirmed_tracks = [ 114 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 115 | 116 | # Associate confirmed tracks using appearance features. 117 | matches_a, unmatched_tracks_a, unmatched_detections = \ 118 | linear_assignment.matching_cascade( 119 | gated_metric, self.metric.matching_threshold, self.max_age, 120 | self.tracks, detections, confirmed_tracks) 121 | 122 | # Associate remaining tracks together with unconfirmed tracks using IOU. 123 | iou_track_candidates = unconfirmed_tracks + [ 124 | k for k in unmatched_tracks_a if 125 | self.tracks[k].time_since_update == 1] 126 | unmatched_tracks_a = [ 127 | k for k in unmatched_tracks_a if 128 | self.tracks[k].time_since_update != 1] 129 | matches_b, unmatched_tracks_b, unmatched_detections = \ 130 | linear_assignment.min_cost_matching( 131 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 132 | detections, iou_track_candidates, unmatched_detections) 133 | 134 | matches = matches_a + matches_b 135 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 136 | return matches, unmatched_tracks, unmatched_detections 137 | 138 | def _initiate_track(self, detection, class_id): 139 | mean, covariance = self.kf.initiate(detection.to_xyah()) 140 | self.tracks.append(Track( 141 | mean, covariance, self._next_id, class_id, self.n_init, self.max_age, 142 | detection.feature)) 143 | self._next_id += 1 144 | -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/asserts.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | 4 | def assert_in(file, files_to_check): 5 | if file not in files_to_check: 6 | raise AssertionError("{} does not exist in the list".format(str(file))) 7 | return True 8 | 9 | 10 | def assert_in_env(check_list: list): 11 | for item in check_list: 12 | assert_in(item, environ.keys()) 13 | return True 14 | -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 5 | 6 | 7 | def compute_color_for_labels(label): 8 | """ 9 | Simple function that adds fixed color depending on the class 10 | """ 11 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 12 | return tuple(color) 13 | 14 | 15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)): 16 | for i,box in enumerate(bbox): 17 | x1,y1,x2,y2 = [int(i) for i in box] 18 | x1 += offset[0] 19 | x2 += offset[0] 20 | y1 += offset[1] 21 | y2 += offset[1] 22 | # box text and bar 23 | id = int(identities[i]) if identities is not None else 0 24 | color = compute_color_for_labels(id) 25 | label = '{}{:d}'.format("", id) 26 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 27 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 28 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 29 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 30 | return img 31 | 32 | 33 | 34 | if __name__ == '__main__': 35 | for i in range(82): 36 | print(compute_color_for_labels(i)) 37 | -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | for frame_id in frames: 75 | trk_objs = result_frame_dict.get(frame_id, []) 76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 78 | 79 | return self.acc 80 | 81 | @staticmethod 82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 83 | names = copy.deepcopy(names) 84 | if metrics is None: 85 | metrics = mm.metrics.motchallenge_metrics 86 | metrics = copy.deepcopy(metrics) 87 | 88 | mh = mm.metrics.create() 89 | summary = mh.compute_many( 90 | accs, 91 | metrics=metrics, 92 | names=names, 93 | generate_overall=True 94 | ) 95 | 96 | return summary 97 | 98 | @staticmethod 99 | def save_summary(summary, filename): 100 | import pandas as pd 101 | writer = pd.ExcelWriter(filename) 102 | summary.to_excel(writer) 103 | writer.save() 104 | -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | # from utils.log import get_logger 6 | 7 | 8 | def write_results(filename, results, data_type): 9 | if data_type == 'mot': 10 | save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' 11 | elif data_type == 'kitti': 12 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 13 | else: 14 | raise ValueError(data_type) 15 | 16 | with open(filename, 'w') as f: 17 | for frame_id, tlwhs, track_ids in results: 18 | if data_type == 'kitti': 19 | frame_id -= 1 20 | for tlwh, track_id in zip(tlwhs, track_ids): 21 | if track_id < 0: 22 | continue 23 | x1, y1, w, h = tlwh 24 | x2, y2 = x1 + w, y1 + h 25 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 26 | f.write(line) 27 | 28 | 29 | # def write_results(filename, results_dict: Dict, data_type: str): 30 | # if not filename: 31 | # return 32 | # path = os.path.dirname(filename) 33 | # if not os.path.exists(path): 34 | # os.makedirs(path) 35 | 36 | # if data_type in ('mot', 'mcmot', 'lab'): 37 | # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 38 | # elif data_type == 'kitti': 39 | # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 40 | # else: 41 | # raise ValueError(data_type) 42 | 43 | # with open(filename, 'w') as f: 44 | # for frame_id, frame_data in results_dict.items(): 45 | # if data_type == 'kitti': 46 | # frame_id -= 1 47 | # for tlwh, track_id in frame_data: 48 | # if track_id < 0: 49 | # continue 50 | # x1, y1, w, h = tlwh 51 | # x2, y2 = x1 + w, y1 + h 52 | # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 53 | # f.write(line) 54 | # logger.info('Save results to {}'.format(filename)) 55 | 56 | 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 58 | if data_type in ('mot', 'lab'): 59 | read_fun = read_mot_results 60 | else: 61 | raise ValueError('Unknown data type: {}'.format(data_type)) 62 | 63 | return read_fun(filename, is_gt, is_ignore) 64 | 65 | 66 | """ 67 | labels={'ped', ... % 1 68 | 'person_on_vhcl', ... % 2 69 | 'car', ... % 3 70 | 'bicycle', ... % 4 71 | 'mbike', ... % 5 72 | 'non_mot_vhcl', ... % 6 73 | 'static_person', ... % 7 74 | 'distractor', ... % 8 75 | 'occluder', ... % 9 76 | 'occluder_on_grnd', ... %10 77 | 'occluder_full', ... % 11 78 | 'reflection', ... % 12 79 | 'crowd' ... % 13 80 | }; 81 | """ 82 | 83 | 84 | def read_mot_results(filename, is_gt, is_ignore): 85 | valid_labels = {1} 86 | ignore_labels = {2, 7, 8, 12} 87 | results_dict = dict() 88 | if os.path.isfile(filename): 89 | with open(filename, 'r') as f: 90 | for line in f.readlines(): 91 | linelist = line.split(',') 92 | if len(linelist) < 7: 93 | continue 94 | fid = int(linelist[0]) 95 | if fid < 1: 96 | continue 97 | results_dict.setdefault(fid, list()) 98 | 99 | if is_gt: 100 | if 'MOT16-' in filename or 'MOT17-' in filename: 101 | label = int(float(linelist[7])) 102 | mark = int(float(linelist[6])) 103 | if mark == 0 or label not in valid_labels: 104 | continue 105 | score = 1 106 | elif is_ignore: 107 | if 'MOT16-' in filename or 'MOT17-' in filename: 108 | label = int(float(linelist[7])) 109 | vis_ratio = float(linelist[8]) 110 | if label not in ignore_labels and vis_ratio >= 0: 111 | continue 112 | else: 113 | continue 114 | score = 1 115 | else: 116 | score = float(linelist[6]) 117 | 118 | tlwh = tuple(map(float, linelist[2:6])) 119 | target_id = int(linelist[1]) 120 | 121 | results_dict[fid].append((tlwh, target_id, score)) 122 | 123 | return results_dict 124 | 125 | 126 | def unzip_objs(objs): 127 | if len(objs) > 0: 128 | tlwhs, ids, scores = zip(*objs) 129 | else: 130 | tlwhs, ids, scores = [], [], [] 131 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 132 | 133 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.INFO) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from easydict import EasyDict as edict 4 | 5 | 6 | class YamlParser(edict): 7 | """ 8 | This is yaml parser based on EasyDict. 9 | """ 10 | 11 | def __init__(self, cfg_dict=None, config_file=None): 12 | if cfg_dict is None: 13 | cfg_dict = {} 14 | 15 | if config_file is not None: 16 | assert(os.path.isfile(config_file)) 17 | with open(config_file, 'r') as fo: 18 | yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader) 19 | cfg_dict.update(yaml_) 20 | 21 | super(YamlParser, self).__init__(cfg_dict) 22 | 23 | def merge_from_file(self, config_file): 24 | with open(config_file, 'r') as fo: 25 | yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader) 26 | self.update(yaml_) 27 | 28 | def merge_from_dict(self, config_dict): 29 | self.update(config_dict) 30 | 31 | 32 | def get_config(config_file=None): 33 | return YamlParser(config_file=config_file) 34 | 35 | 36 | if __name__ == "__main__": 37 | cfg = YamlParser(config_file="../configs/yolov3.yaml") 38 | cfg.merge_from_file("../configs/deep_sort.yaml") 39 | 40 | import ipdb 41 | ipdb.set_trace() 42 | -------------------------------------------------------------------------------- /deep_sort_pytorch/utils/tools.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from time import time 3 | 4 | 5 | def is_video(ext: str): 6 | """ 7 | Returns true if ext exists in 8 | allowed_exts for video files. 9 | 10 | Args: 11 | ext: 12 | 13 | Returns: 14 | 15 | """ 16 | 17 | allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp') 18 | return any((ext.endswith(x) for x in allowed_exts)) 19 | 20 | 21 | def tik_tok(func): 22 | """ 23 | keep track of time for each process. 24 | Args: 25 | func: 26 | 27 | Returns: 28 | 29 | """ 30 | @wraps(func) 31 | def _time_it(*args, **kwargs): 32 | start = time() 33 | try: 34 | return func(*args, **kwargs) 35 | finally: 36 | end_ = time() 37 | print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start))) 38 | 39 | return _time_it 40 | -------------------------------------------------------------------------------- /estimateDistanceUtil.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # distance from camera to object measured(meter) 4 | KNOWN_PRESON_DISTANCE = 0.762 5 | KNOWN_BUS_DISTANCE = 3.33 6 | KNOWN_CAR_DISTANCE = 1.18 7 | KNOWN_MOTORCYCLE_DISTANCE = 1.67 8 | # width of object in the real world or object plane(meter) 9 | KNOWN_PERSON_WIDTH = 0.6 10 | KNOWN_BUS_WIDTH = 2.63 11 | KNOWN_CAR_WIDTH = 1.86 12 | KNOWN_MOTORCYCLE_WIDTH = 0.56 13 | # colors 14 | GREEN = (0, 255, 0) 15 | RED = (0, 0, 255) 16 | WHITE = (255, 255, 255) 17 | fonts = cv2.FONT_HERSHEY_COMPLEX 18 | 19 | # focal length finder function 20 | def focal_length(width_in_rf_image, measured_distance, real_width): 21 | focal_length_value = (width_in_rf_image*measured_distance)/real_width 22 | # return focal_length_value 23 | return focal_length_value 24 | 25 | # distance estimation function 26 | def distance_finder(focal_length, object_width_in_frame, real_object_width): 27 | distance = (real_object_width*focal_length)/object_width_in_frame 28 | return distance 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /export.py: -------------------------------------------------------------------------------- 1 | """Export a YOLOv5 *.pt model to TorchScript, ONNX, CoreML formats 2 | 3 | Usage: 4 | $ python path/to/export.py --weights yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | from pathlib import Path 11 | 12 | import torch 13 | import torch.nn as nn 14 | from torch.utils.mobile_optimizer import optimize_for_mobile 15 | 16 | FILE = Path(__file__).absolute() 17 | sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path 18 | 19 | from models.common import Conv 20 | from models.yolo import Detect 21 | from models.experimental import attempt_load 22 | from utils.activations import Hardswish, SiLU 23 | from utils.general import colorstr, check_img_size, check_requirements, file_size, set_logging 24 | from utils.torch_utils import select_device 25 | 26 | 27 | def export_torchscript(model, img, file, optimize): 28 | # TorchScript model export 29 | prefix = colorstr('TorchScript:') 30 | try: 31 | print(f'\n{prefix} starting export with torch {torch.__version__}...') 32 | f = file.with_suffix('.torchscript.pt') 33 | ts = torch.jit.trace(model, img, strict=False) 34 | (optimize_for_mobile(ts) if optimize else ts).save(f) 35 | print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 36 | return ts 37 | except Exception as e: 38 | print(f'{prefix} export failure: {e}') 39 | 40 | 41 | def export_onnx(model, img, file, opset, train, dynamic, simplify): 42 | # ONNX model export 43 | prefix = colorstr('ONNX:') 44 | try: 45 | check_requirements(('onnx', 'onnx-simplifier')) 46 | import onnx 47 | 48 | print(f'\n{prefix} starting export with onnx {onnx.__version__}...') 49 | f = file.with_suffix('.onnx') 50 | torch.onnx.export(model, img, f, verbose=False, opset_version=opset, 51 | training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, 52 | do_constant_folding=not train, 53 | input_names=['images'], 54 | output_names=['output'], 55 | dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640) 56 | 'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85) 57 | } if dynamic else None) 58 | 59 | # Checks 60 | model_onnx = onnx.load(f) # load onnx model 61 | onnx.checker.check_model(model_onnx) # check onnx model 62 | # print(onnx.helper.printable_graph(model_onnx.graph)) # print 63 | 64 | # Simplify 65 | if simplify: 66 | try: 67 | import onnxsim 68 | 69 | print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') 70 | model_onnx, check = onnxsim.simplify( 71 | model_onnx, 72 | dynamic_input_shape=dynamic, 73 | input_shapes={'images': list(img.shape)} if dynamic else None) 74 | assert check, 'assert check failed' 75 | onnx.save(model_onnx, f) 76 | except Exception as e: 77 | print(f'{prefix} simplifier failure: {e}') 78 | print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 79 | print(f"{prefix} run --dynamic ONNX model inference with detect.py: 'python detect.py --weights {f}'") 80 | except Exception as e: 81 | print(f'{prefix} export failure: {e}') 82 | 83 | 84 | def export_coreml(model, img, file): 85 | # CoreML model export 86 | prefix = colorstr('CoreML:') 87 | try: 88 | import coremltools as ct 89 | 90 | print(f'\n{prefix} starting export with coremltools {ct.__version__}...') 91 | f = file.with_suffix('.mlmodel') 92 | model.train() # CoreML exports should be placed in model.train() mode 93 | ts = torch.jit.trace(model, img, strict=False) # TorchScript model 94 | model = ct.convert(ts, inputs=[ct.ImageType('image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 95 | model.save(f) 96 | print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 97 | except Exception as e: 98 | print(f'\n{prefix} export failure: {e}') 99 | 100 | 101 | def run(weights='./yolov5s.pt', # weights path 102 | img_size=(640, 640), # image (height, width) 103 | batch_size=1, # batch size 104 | device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu 105 | include=('torchscript', 'onnx', 'coreml'), # include formats 106 | half=False, # FP16 half-precision export 107 | inplace=False, # set YOLOv5 Detect() inplace=True 108 | train=False, # model.train() mode 109 | optimize=False, # TorchScript: optimize for mobile 110 | dynamic=False, # ONNX: dynamic axes 111 | simplify=False, # ONNX: simplify model 112 | opset=12, # ONNX: opset version 113 | ): 114 | t = time.time() 115 | include = [x.lower() for x in include] 116 | img_size *= 2 if len(img_size) == 1 else 1 # expand 117 | file = Path(weights) 118 | 119 | # Load PyTorch model 120 | device = select_device(device) 121 | assert not (device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0' 122 | model = attempt_load(weights, map_location=device) # load FP32 model 123 | names = model.names 124 | 125 | # Input 126 | gs = int(max(model.stride)) # grid size (max stride) 127 | img_size = [check_img_size(x, gs) for x in img_size] # verify img_size are gs-multiples 128 | img = torch.zeros(batch_size, 3, *img_size).to(device) # image size(1,3,320,192) iDetection 129 | 130 | # Update model 131 | if half: 132 | img, model = img.half(), model.half() # to FP16 133 | model.train() if train else model.eval() # training mode = no Detect() layer grid construction 134 | for k, m in model.named_modules(): 135 | if isinstance(m, Conv): # assign export-friendly activations 136 | if isinstance(m.act, nn.Hardswish): 137 | m.act = Hardswish() 138 | elif isinstance(m.act, nn.SiLU): 139 | m.act = SiLU() 140 | elif isinstance(m, Detect): 141 | m.inplace = inplace 142 | m.onnx_dynamic = dynamic 143 | # m.forward = m.forward_export # assign forward (optional) 144 | 145 | for _ in range(2): 146 | y = model(img) # dry runs 147 | print(f"\n{colorstr('PyTorch:')} starting from {weights} ({file_size(weights):.1f} MB)") 148 | 149 | # Exports 150 | if 'torchscript' in include: 151 | export_torchscript(model, img, file, optimize) 152 | if 'onnx' in include: 153 | export_onnx(model, img, file, opset, train, dynamic, simplify) 154 | if 'coreml' in include: 155 | export_coreml(model, img, file) 156 | 157 | # Finish 158 | print(f'\nExport complete ({time.time() - t:.2f}s)' 159 | f"\nResults saved to {colorstr('bold', file.parent.resolve())}" 160 | f'\nVisualize with https://netron.app') 161 | 162 | 163 | def parse_opt(): 164 | parser = argparse.ArgumentParser() 165 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') 166 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image (height, width)') 167 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 168 | parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 169 | parser.add_argument('--include', nargs='+', default=['torchscript', 'onnx', 'coreml'], help='include formats') 170 | parser.add_argument('--half', action='store_true', help='FP16 half-precision export') 171 | parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True') 172 | parser.add_argument('--train', action='store_true', help='model.train() mode') 173 | parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile') 174 | parser.add_argument('--dynamic', action='store_true', help='ONNX: dynamic axes') 175 | parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model') 176 | parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version') 177 | opt = parser.parse_args() 178 | return opt 179 | 180 | 181 | def main(opt): 182 | set_logging() 183 | print(colorstr('export: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items())) 184 | run(**vars(opt)) 185 | 186 | 187 | if __name__ == "__main__": 188 | opt = parse_opt() 189 | main(opt) 190 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | """YOLOv5 PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s') 6 | """ 7 | 8 | import torch 9 | 10 | 11 | def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 12 | """Creates a specified YOLOv5 model 13 | 14 | Arguments: 15 | name (str): name of model, i.e. 'yolov5s' 16 | pretrained (bool): load pretrained weights into the model 17 | channels (int): number of input channels 18 | classes (int): number of model classes 19 | autoshape (bool): apply YOLOv5 .autoshape() wrapper to model 20 | verbose (bool): print all information to screen 21 | device (str, torch.device, None): device to use for model parameters 22 | 23 | Returns: 24 | YOLOv5 pytorch model 25 | """ 26 | from pathlib import Path 27 | 28 | from models.yolo import Model, attempt_load 29 | from utils.general import check_requirements, set_logging 30 | from utils.downloads import attempt_download 31 | from utils.torch_utils import select_device 32 | 33 | file = Path(__file__).absolute() 34 | check_requirements(requirements=file.parent / 'requirements.txt', exclude=('tensorboard', 'thop', 'opencv-python')) 35 | set_logging(verbose=verbose) 36 | 37 | save_dir = Path('') if str(name).endswith('.pt') else file.parent 38 | path = (save_dir / name).with_suffix('.pt') # checkpoint path 39 | try: 40 | device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device) 41 | 42 | if pretrained and channels == 3 and classes == 80: 43 | model = attempt_load(path, map_location=device) # download/load FP32 model 44 | else: 45 | cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0] # model.yaml path 46 | model = Model(cfg, channels, classes) # create model 47 | if pretrained: 48 | ckpt = torch.load(attempt_download(path), map_location=device) # load 49 | msd = model.state_dict() # model state_dict 50 | csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 51 | csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape} # filter 52 | model.load_state_dict(csd, strict=False) # load 53 | if len(ckpt['model'].names) == classes: 54 | model.names = ckpt['model'].names # set class names attribute 55 | if autoshape: 56 | model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS 57 | return model.to(device) 58 | 59 | except Exception as e: 60 | help_url = 'https://github.com/ultralytics/yolov5/issues/36' 61 | s = 'Cache may be out of date, try `force_reload=True`. See %s for help.' % help_url 62 | raise Exception(s) from e 63 | 64 | 65 | def custom(path='path/to/model.pt', autoshape=True, verbose=True, device=None): 66 | # YOLOv5 custom or local model 67 | return _create(path, autoshape=autoshape, verbose=verbose, device=device) 68 | 69 | 70 | def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 71 | # YOLOv5-small model https://github.com/ultralytics/yolov5 72 | return _create('yolov5s', pretrained, channels, classes, autoshape, verbose, device) 73 | 74 | 75 | def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 76 | # YOLOv5-medium model https://github.com/ultralytics/yolov5 77 | return _create('yolov5m', pretrained, channels, classes, autoshape, verbose, device) 78 | 79 | 80 | def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 81 | # YOLOv5-large model https://github.com/ultralytics/yolov5 82 | return _create('yolov5l', pretrained, channels, classes, autoshape, verbose, device) 83 | 84 | 85 | def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 86 | # YOLOv5-xlarge model https://github.com/ultralytics/yolov5 87 | return _create('yolov5x', pretrained, channels, classes, autoshape, verbose, device) 88 | 89 | 90 | def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 91 | # YOLOv5-small-P6 model https://github.com/ultralytics/yolov5 92 | return _create('yolov5s6', pretrained, channels, classes, autoshape, verbose, device) 93 | 94 | 95 | def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 96 | # YOLOv5-medium-P6 model https://github.com/ultralytics/yolov5 97 | return _create('yolov5m6', pretrained, channels, classes, autoshape, verbose, device) 98 | 99 | 100 | def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 101 | # YOLOv5-large-P6 model https://github.com/ultralytics/yolov5 102 | return _create('yolov5l6', pretrained, channels, classes, autoshape, verbose, device) 103 | 104 | 105 | def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 106 | # YOLOv5-xlarge-P6 model https://github.com/ultralytics/yolov5 107 | return _create('yolov5x6', pretrained, channels, classes, autoshape, verbose, device) 108 | 109 | 110 | if __name__ == '__main__': 111 | model = _create(name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True, verbose=True) # pretrained 112 | # model = custom(path='path/to/model.pt') # custom 113 | 114 | # Verify inference 115 | import cv2 116 | import numpy as np 117 | from PIL import Image 118 | from pathlib import Path 119 | 120 | imgs = ['data/images/zidane.jpg', # filename 121 | Path('data/images/zidane.jpg'), # Path 122 | 'https://ultralytics.com/images/zidane.jpg', # URI 123 | cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV 124 | Image.open('data/images/bus.jpg'), # PIL 125 | np.zeros((320, 640, 3))] # numpy 126 | 127 | results = model(imgs) # batched inference 128 | results.print() 129 | results.save() 130 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.downloads import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super().__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super().__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super().__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride 62 | super().__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super().__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super().__init__() 102 | 103 | def forward(self, x, augment=False, profile=False, visualize=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment, profile, visualize)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.stack(y).mean(0) # mean ensemble 109 | y = torch.cat(y, 1) # nms ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None, inplace=True): 114 | from models.yolo import Detect, Model 115 | 116 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 117 | model = Ensemble() 118 | for w in weights if isinstance(weights, list) else [weights]: 119 | ckpt = torch.load(attempt_download(w), map_location=map_location) # load 120 | model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model 121 | 122 | # Compatibility updates 123 | for m in model.modules(): 124 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]: 125 | m.inplace = inplace # pytorch 1.7.0 compatibility 126 | elif type(m) is Conv: 127 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 128 | 129 | if len(model) == 1: 130 | return model[-1] # return model 131 | else: 132 | print(f'Ensemble created with {weights}\n') 133 | for k in ['names']: 134 | setattr(model, k, getattr(model[-1], k)) 135 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride 136 | return model # return ensemble 137 | -------------------------------------------------------------------------------- /models/hub/anchors.yaml: -------------------------------------------------------------------------------- 1 | # Default YOLOv5 anchors for COCO data 2 | 3 | 4 | # P5 ------------------------------------------------------------------------------------------------------------------- 5 | # P5-640: 6 | anchors_p5_640: 7 | - [10,13, 16,30, 33,23] # P3/8 8 | - [30,61, 62,45, 59,119] # P4/16 9 | - [116,90, 156,198, 373,326] # P5/32 10 | 11 | 12 | # P6 ------------------------------------------------------------------------------------------------------------------- 13 | # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 14 | anchors_p6_640: 15 | - [9,11, 21,19, 17,41] # P3/8 16 | - [43,32, 39,70, 86,64] # P4/16 17 | - [65,131, 134,130, 120,265] # P5/32 18 | - [282,180, 247,354, 512,387] # P6/64 19 | 20 | # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 21 | anchors_p6_1280: 22 | - [19,27, 44,40, 38,94] # P3/8 23 | - [96,68, 86,152, 180,137] # P4/16 24 | - [140,301, 303,264, 238,542] # P5/32 25 | - [436,615, 739,380, 925,792] # P6/64 26 | 27 | # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 28 | anchors_p6_1920: 29 | - [28,41, 67,59, 57,141] # P3/8 30 | - [144,103, 129,227, 270,205] # P4/16 31 | - [209,452, 455,396, 358,812] # P5/32 32 | - [653,922, 1109,570, 1387,1187] # P6/64 33 | 34 | 35 | # P7 ------------------------------------------------------------------------------------------------------------------- 36 | # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 37 | anchors_p7_640: 38 | - [11,11, 13,30, 29,20] # P3/8 39 | - [30,46, 61,38, 39,92] # P4/16 40 | - [78,80, 146,66, 79,163] # P5/32 41 | - [149,150, 321,143, 157,303] # P6/64 42 | - [257,402, 359,290, 524,372] # P7/128 43 | 44 | # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 45 | anchors_p7_1280: 46 | - [19,22, 54,36, 32,77] # P3/8 47 | - [70,83, 138,71, 75,173] # P4/16 48 | - [165,159, 148,334, 375,151] # P5/32 49 | - [334,317, 251,626, 499,474] # P6/64 50 | - [750,326, 534,814, 1079,818] # P7/128 51 | 52 | # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 53 | anchors_p7_1920: 54 | - [29,34, 81,55, 47,115] # P3/8 55 | - [105,124, 207,107, 113,259] # P4/16 56 | - [247,238, 222,500, 563,227] # P5/32 57 | - [501,476, 376,939, 749,711] # P6/64 58 | - [1126,489, 801,1222, 1618,1227] # P7/128 59 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # darknet53 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Conv, [32, 3, 1]], # 0 14 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 15 | [-1, 1, Bottleneck, [64]], 16 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 17 | [-1, 2, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 19 | [-1, 8, Bottleneck, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 21 | [-1, 8, Bottleneck, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 23 | [-1, 4, Bottleneck, [1024]], # 10 24 | ] 25 | 26 | # YOLOv3-SPP head 27 | head: 28 | [[-1, 1, Bottleneck, [1024, False]], 29 | [-1, 1, SPP, [512, [5, 9, 13]]], 30 | [-1, 1, Conv, [1024, 3, 1]], 31 | [-1, 1, Conv, [512, 1, 1]], 32 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 33 | 34 | [-2, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 37 | [-1, 1, Bottleneck, [512, False]], 38 | [-1, 1, Bottleneck, [512, False]], 39 | [-1, 1, Conv, [256, 1, 1]], 40 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 41 | 42 | [-2, 1, Conv, [128, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 45 | [-1, 1, Bottleneck, [256, False]], 46 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 47 | 48 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [10,14, 23,27, 37,58] # P4/16 7 | - [81,82, 135,169, 344,319] # P5/32 8 | 9 | # YOLOv3-tiny backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [16, 3, 1]], # 0 13 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 14 | [-1, 1, Conv, [32, 3, 1]], 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 16 | [-1, 1, Conv, [64, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 18 | [-1, 1, Conv, [128, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 20 | [-1, 1, Conv, [256, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 22 | [-1, 1, Conv, [512, 3, 1]], 23 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 24 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 25 | ] 26 | 27 | # YOLOv3-tiny head 28 | head: 29 | [[-1, 1, Conv, [1024, 3, 1]], 30 | [-1, 1, Conv, [256, 1, 1]], 31 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 32 | 33 | [-2, 1, Conv, [128, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 36 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 37 | 38 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 39 | ] 40 | -------------------------------------------------------------------------------- /models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # darknet53 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Conv, [32, 3, 1]], # 0 14 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 15 | [-1, 1, Bottleneck, [64]], 16 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 17 | [-1, 2, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 19 | [-1, 8, Bottleneck, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 21 | [-1, 8, Bottleneck, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 23 | [-1, 4, Bottleneck, [1024]], # 10 24 | ] 25 | 26 | # YOLOv3 head 27 | head: 28 | [[-1, 1, Bottleneck, [1024, False]], 29 | [-1, 1, Conv, [512, [1, 1]]], 30 | [-1, 1, Conv, [1024, 3, 1]], 31 | [-1, 1, Conv, [512, 1, 1]], 32 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 33 | 34 | [-2, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 37 | [-1, 1, Bottleneck, [512, False]], 38 | [-1, 1, Bottleneck, [512, False]], 39 | [-1, 1, Conv, [256, 1, 1]], 40 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 41 | 42 | [-2, 1, Conv, [128, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 45 | [-1, 1, Bottleneck, [256, False]], 46 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 47 | 48 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/hub/yolov5-bifpn.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]] 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 3, C3, [1024, False]], # 9 23 | ] 24 | 25 | # YOLOv5 BiFPN head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14, 6], 1, Concat, [1]], # cat P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, Bottleneck, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, BottleneckCSP, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, BottleneckCSP, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 6, BottleneckCSP, [1024]], # 9 23 | ] 24 | 25 | # YOLOv5 FPN head 26 | head: 27 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 28 | 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 31 | [-1, 1, Conv, [512, 1, 1]], 32 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 33 | 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 36 | [-1, 1, Conv, [256, 1, 1]], 37 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 38 | 39 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 40 | ] 41 | -------------------------------------------------------------------------------- /models/hub/yolov5-p2.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 3 6 | 7 | # YOLOv5 backbone 8 | backbone: 9 | # [from, number, module, args] 10 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 11 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 12 | [-1, 3, C3, [128]], 13 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 14 | [-1, 9, C3, [256]], 15 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 16 | [-1, 9, C3, [512]], 17 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 18 | [-1, 1, SPP, [1024, [5, 9, 13]]], 19 | [-1, 3, C3, [1024, False]], # 9 20 | ] 21 | 22 | # YOLOv5 head 23 | head: 24 | [[-1, 1, Conv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3, [512, False]], # 13 28 | 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, Conv, [128, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 2], 1, Concat, [1]], # cat backbone P2 37 | [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall) 38 | 39 | [-1, 1, Conv, [128, 3, 2]], 40 | [[-1, 18], 1, Concat, [1]], # cat head P3 41 | [-1, 3, C3, [256, False]], # 24 (P3/8-small) 42 | 43 | [-1, 1, Conv, [256, 3, 2]], 44 | [[-1, 14], 1, Concat, [1]], # cat head P4 45 | [-1, 3, C3, [512, False]], # 27 (P4/16-medium) 46 | 47 | [-1, 1, Conv, [512, 3, 2]], 48 | [[-1, 10], 1, Concat, [1]], # cat head P5 49 | [-1, 3, C3, [1024, False]], # 30 (P5/32-large) 50 | 51 | [[24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 52 | ] 53 | -------------------------------------------------------------------------------- /models/hub/yolov5-p6.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 3 6 | 7 | # YOLOv5 backbone 8 | backbone: 9 | # [from, number, module, args] 10 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 11 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 12 | [-1, 3, C3, [128]], 13 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 14 | [-1, 9, C3, [256]], 15 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 16 | [-1, 9, C3, [512]], 17 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 18 | [-1, 3, C3, [768]], 19 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 20 | [-1, 1, SPP, [1024, [3, 5, 7]]], 21 | [-1, 3, C3, [1024, False]], # 11 22 | ] 23 | 24 | # YOLOv5 head 25 | head: 26 | [[-1, 1, Conv, [768, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 29 | [-1, 3, C3, [768, False]], # 15 30 | 31 | [-1, 1, Conv, [512, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 34 | [-1, 3, C3, [512, False]], # 19 35 | 36 | [-1, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 39 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 40 | 41 | [-1, 1, Conv, [256, 3, 2]], 42 | [[-1, 20], 1, Concat, [1]], # cat head P4 43 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 44 | 45 | [-1, 1, Conv, [512, 3, 2]], 46 | [[-1, 16], 1, Concat, [1]], # cat head P5 47 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 48 | 49 | [-1, 1, Conv, [768, 3, 2]], 50 | [[-1, 12], 1, Concat, [1]], # cat head P6 51 | [-1, 3, C3, [1024, False]], # 32 (P5/64-xlarge) 52 | 53 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 54 | ] 55 | -------------------------------------------------------------------------------- /models/hub/yolov5-p7.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 3 6 | 7 | # YOLOv5 backbone 8 | backbone: 9 | # [from, number, module, args] 10 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 11 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 12 | [-1, 3, C3, [128]], 13 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 14 | [-1, 9, C3, [256]], 15 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 16 | [-1, 9, C3, [512]], 17 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 18 | [-1, 3, C3, [768]], 19 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 20 | [-1, 3, C3, [1024]], 21 | [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128 22 | [-1, 1, SPP, [1280, [3, 5]]], 23 | [-1, 3, C3, [1280, False]], # 13 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [[-1, 1, Conv, [1024, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 10], 1, Concat, [1]], # cat backbone P6 31 | [-1, 3, C3, [1024, False]], # 17 32 | 33 | [-1, 1, Conv, [768, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 36 | [-1, 3, C3, [768, False]], # 21 37 | 38 | [-1, 1, Conv, [512, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 41 | [-1, 3, C3, [512, False]], # 25 42 | 43 | [-1, 1, Conv, [256, 1, 1]], 44 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 45 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 46 | [-1, 3, C3, [256, False]], # 29 (P3/8-small) 47 | 48 | [-1, 1, Conv, [256, 3, 2]], 49 | [[-1, 26], 1, Concat, [1]], # cat head P4 50 | [-1, 3, C3, [512, False]], # 32 (P4/16-medium) 51 | 52 | [-1, 1, Conv, [512, 3, 2]], 53 | [[-1, 22], 1, Concat, [1]], # cat head P5 54 | [-1, 3, C3, [768, False]], # 35 (P5/32-large) 55 | 56 | [-1, 1, Conv, [768, 3, 2]], 57 | [[-1, 18], 1, Concat, [1]], # cat head P6 58 | [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge) 59 | 60 | [-1, 1, Conv, [1024, 3, 2]], 61 | [[-1, 14], 1, Concat, [1]], # cat head P7 62 | [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge) 63 | 64 | [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7) 65 | ] 66 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, BottleneckCSP, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, BottleneckCSP, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, BottleneckCSP, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 3, BottleneckCSP, [1024, False]], # 9 23 | ] 24 | 25 | # YOLOv5 PANet head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, BottleneckCSP, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/hub/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [19,27, 44,40, 38,94] # P3/8 7 | - [96,68, 86,152, 180,137] # P4/16 8 | - [140,301, 303,264, 238,542] # P5/32 9 | - [436,615, 739,380, 925,792] # P6/64 10 | 11 | # YOLOv5 backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 15 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 22 | [-1, 3, C3, [768]], 23 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 24 | [-1, 1, SPP, [1024, [3, 5, 7]]], 25 | [-1, 3, C3, [1024, False]], # 11 26 | ] 27 | 28 | # YOLOv5 head 29 | head: 30 | [[-1, 1, Conv, [768, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 33 | [-1, 3, C3, [768, False]], # 15 34 | 35 | [-1, 1, Conv, [512, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 38 | [-1, 3, C3, [512, False]], # 19 39 | 40 | [-1, 1, Conv, [256, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 43 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 44 | 45 | [-1, 1, Conv, [256, 3, 2]], 46 | [[-1, 20], 1, Concat, [1]], # cat head P4 47 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [512, 3, 2]], 50 | [[-1, 16], 1, Concat, [1]], # cat head P5 51 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 52 | 53 | [-1, 1, Conv, [768, 3, 2]], 54 | [[-1, 12], 1, Concat, [1]], # cat head P6 55 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 56 | 57 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 58 | ] 59 | -------------------------------------------------------------------------------- /models/hub/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | anchors: 6 | - [19,27, 44,40, 38,94] # P3/8 7 | - [96,68, 86,152, 180,137] # P4/16 8 | - [140,301, 303,264, 238,542] # P5/32 9 | - [436,615, 739,380, 925,792] # P6/64 10 | 11 | # YOLOv5 backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 15 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 22 | [-1, 3, C3, [768]], 23 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 24 | [-1, 1, SPP, [1024, [3, 5, 7]]], 25 | [-1, 3, C3, [1024, False]], # 11 26 | ] 27 | 28 | # YOLOv5 head 29 | head: 30 | [[-1, 1, Conv, [768, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 33 | [-1, 3, C3, [768, False]], # 15 34 | 35 | [-1, 1, Conv, [512, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 38 | [-1, 3, C3, [512, False]], # 19 39 | 40 | [-1, 1, Conv, [256, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 43 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 44 | 45 | [-1, 1, Conv, [256, 3, 2]], 46 | [[-1, 20], 1, Concat, [1]], # cat head P4 47 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [512, 3, 2]], 50 | [[-1, 16], 1, Concat, [1]], # cat head P5 51 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 52 | 53 | [-1, 1, Conv, [768, 3, 2]], 54 | [[-1, 12], 1, Concat, [1]], # cat head P6 55 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 56 | 57 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 58 | ] 59 | -------------------------------------------------------------------------------- /models/hub/yolov5s-transformer.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module 23 | ] 24 | 25 | # YOLOv5 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/hub/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | anchors: 6 | - [19,27, 44,40, 38,94] # P3/8 7 | - [96,68, 86,152, 180,137] # P4/16 8 | - [140,301, 303,264, 238,542] # P5/32 9 | - [436,615, 739,380, 925,792] # P6/64 10 | 11 | # YOLOv5 backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 15 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 22 | [-1, 3, C3, [768]], 23 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 24 | [-1, 1, SPP, [1024, [3, 5, 7]]], 25 | [-1, 3, C3, [1024, False]], # 11 26 | ] 27 | 28 | # YOLOv5 head 29 | head: 30 | [[-1, 1, Conv, [768, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 33 | [-1, 3, C3, [768, False]], # 15 34 | 35 | [-1, 1, Conv, [512, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 38 | [-1, 3, C3, [512, False]], # 19 39 | 40 | [-1, 1, Conv, [256, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 43 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 44 | 45 | [-1, 1, Conv, [256, 3, 2]], 46 | [[-1, 20], 1, Concat, [1]], # cat head P4 47 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [512, 3, 2]], 50 | [[-1, 16], 1, Concat, [1]], # cat head P5 51 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 52 | 53 | [-1, 1, Conv, [768, 3, 2]], 54 | [[-1, 12], 1, Concat, [1]], # cat head P6 55 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 56 | 57 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 58 | ] 59 | -------------------------------------------------------------------------------- /models/hub/yolov5x6.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | anchors: 6 | - [19,27, 44,40, 38,94] # P3/8 7 | - [96,68, 86,152, 180,137] # P4/16 8 | - [140,301, 303,264, 238,542] # P5/32 9 | - [436,615, 739,380, 925,792] # P6/64 10 | 11 | # YOLOv5 backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 15 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 22 | [-1, 3, C3, [768]], 23 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 24 | [-1, 1, SPP, [1024, [3, 5, 7]]], 25 | [-1, 3, C3, [1024, False]], # 11 26 | ] 27 | 28 | # YOLOv5 head 29 | head: 30 | [[-1, 1, Conv, [768, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 33 | [-1, 3, C3, [768, False]], # 15 34 | 35 | [-1, 1, Conv, [512, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 38 | [-1, 3, C3, [512, False]], # 19 39 | 40 | [-1, 1, Conv, [256, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 43 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 44 | 45 | [-1, 1, Conv, [256, 3, 2]], 46 | [[-1, 20], 1, Concat, [1]], # cat head P4 47 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [512, 3, 2]], 50 | [[-1, 16], 1, Concat, [1]], # cat head P5 51 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 52 | 53 | [-1, 1, Conv, [768, 3, 2]], 54 | [[-1, 12], 1, Concat, [1]], # cat head P6 55 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 56 | 57 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 58 | ] 59 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 3, C3, [1024, False]], # 9 23 | ] 24 | 25 | # YOLOv5 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 3, C3, [1024, False]], # 9 23 | ] 24 | 25 | # YOLOv5 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 3, C3, [1024, False]], # 9 23 | ] 24 | 25 | # YOLOv5 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 9, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 1, SPP, [1024, [5, 9, 13]]], 22 | [-1, 3, C3, [1024, False]], # 9 23 | ] 24 | 25 | # YOLOv5 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | 3 | # base ---------------------------------------- 4 | matplotlib>=3.2.2 5 | numpy>=1.18.5 6 | opencv-python>=4.1.2 7 | Pillow 8 | PyYAML>=5.3.1 9 | scipy>=1.4.1 10 | torch>=1.7.0 11 | torchvision>=0.8.1 12 | tqdm>=4.41.0 13 | 14 | # logging ------------------------------------- 15 | tensorboard>=2.4.1 16 | # wandb 17 | 18 | # plotting ------------------------------------ 19 | seaborn>=0.11.0 20 | pandas 21 | 22 | # export -------------------------------------- 23 | # coremltools>=4.1 24 | # onnx>=1.9.0 25 | # scikit-learn==0.19.2 # for coreml quantization 26 | 27 | # extras -------------------------------------- 28 | # Cython # for pycocotools https://github.com/cocodataset/cocoapi/issues/172 29 | # pycocotools>=2.0 # COCO mAP 30 | # albumentations>=1.0.3 31 | thop # FLOPs computation 32 | 33 | # deep_sort ----------------------------------- 34 | 35 | easydict 36 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 23 | class Mish(nn.Module): 24 | @staticmethod 25 | def forward(x): 26 | return x * F.softplus(x).tanh() 27 | 28 | 29 | class MemoryEfficientMish(nn.Module): 30 | class F(torch.autograd.Function): 31 | @staticmethod 32 | def forward(ctx, x): 33 | ctx.save_for_backward(x) 34 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 35 | 36 | @staticmethod 37 | def backward(ctx, grad_output): 38 | x = ctx.saved_tensors[0] 39 | sx = torch.sigmoid(x) 40 | fx = F.softplus(x).tanh() 41 | return grad_output * (fx + x * sx * (1 - fx * fx)) 42 | 43 | def forward(self, x): 44 | return self.F.apply(x) 45 | 46 | 47 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 48 | class FReLU(nn.Module): 49 | def __init__(self, c1, k=3): # ch_in, kernel 50 | super().__init__() 51 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 52 | self.bn = nn.BatchNorm2d(c1) 53 | 54 | def forward(self, x): 55 | return torch.max(x, self.bn(self.conv(x))) 56 | 57 | 58 | # ACON https://arxiv.org/pdf/2009.04759.pdf ---------------------------------------------------------------------------- 59 | class AconC(nn.Module): 60 | r""" ACON activation (activate or not). 61 | AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter 62 | according to "Activate or Not: Learning Customized Activation" . 63 | """ 64 | 65 | def __init__(self, c1): 66 | super().__init__() 67 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 68 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 69 | self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) 70 | 71 | def forward(self, x): 72 | dpx = (self.p1 - self.p2) * x 73 | return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x 74 | 75 | 76 | class MetaAconC(nn.Module): 77 | r""" ACON activation (activate or not). 78 | MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network 79 | according to "Activate or Not: Learning Customized Activation" . 80 | """ 81 | 82 | def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r 83 | super().__init__() 84 | c2 = max(r, c1 // r) 85 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 86 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 87 | self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) 88 | self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) 89 | # self.bn1 = nn.BatchNorm2d(c2) 90 | # self.bn2 = nn.BatchNorm2d(c1) 91 | 92 | def forward(self, x): 93 | y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) 94 | # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 95 | # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable 96 | beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed 97 | dpx = (self.p1 - self.p2) * x 98 | return dpx * torch.sigmoid(beta * dpx) + self.p2 * x 99 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import yaml 8 | from tqdm import tqdm 9 | 10 | from utils.general import colorstr 11 | 12 | 13 | def check_anchor_order(m): 14 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 15 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 16 | da = a[-1] - a[0] # delta a 17 | ds = m.stride[-1] - m.stride[0] # delta s 18 | if da.sign() != ds.sign(): # same order 19 | print('Reversing anchor order') 20 | m.anchors[:] = m.anchors.flip(0) 21 | m.anchor_grid[:] = m.anchor_grid.flip(0) 22 | 23 | 24 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 25 | # Check anchor fit to data, recompute if necessary 26 | prefix = colorstr('autoanchor: ') 27 | print(f'\n{prefix}Analyzing anchors... ', end='') 28 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 29 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 30 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 31 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 32 | 33 | def metric(k): # compute metric 34 | r = wh[:, None] / k[None] 35 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 36 | best = x.max(1)[0] # best_x 37 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 38 | bpr = (best > 1. / thr).float().mean() # best possible recall 39 | return bpr, aat 40 | 41 | anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors 42 | bpr, aat = metric(anchors) 43 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 44 | if bpr < 0.98: # threshold to recompute 45 | print('. Attempting to improve anchors, please wait...') 46 | na = m.anchor_grid.numel() // 2 # number of anchors 47 | try: 48 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 49 | except Exception as e: 50 | print(f'{prefix}ERROR: {e}') 51 | new_bpr = metric(anchors)[0] 52 | if new_bpr > bpr: # replace anchors 53 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 54 | m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference 55 | m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 56 | check_anchor_order(m) 57 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 58 | else: 59 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 60 | print('') # newline 61 | 62 | 63 | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 64 | """ Creates kmeans-evolved anchors from training dataset 65 | 66 | Arguments: 67 | dataset: path to data.yaml, or a loaded dataset 68 | n: number of anchors 69 | img_size: image size used for training 70 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 71 | gen: generations to evolve anchors using genetic algorithm 72 | verbose: print all results 73 | 74 | Return: 75 | k: kmeans evolved anchors 76 | 77 | Usage: 78 | from utils.autoanchor import *; _ = kmean_anchors() 79 | """ 80 | from scipy.cluster.vq import kmeans 81 | 82 | thr = 1. / thr 83 | prefix = colorstr('autoanchor: ') 84 | 85 | def metric(k, wh): # compute metrics 86 | r = wh[:, None] / k[None] 87 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 88 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 89 | return x, x.max(1)[0] # x, best_x 90 | 91 | def anchor_fitness(k): # mutation fitness 92 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 93 | return (best * (best > thr).float()).mean() # fitness 94 | 95 | def print_results(k): 96 | k = k[np.argsort(k.prod(1))] # sort small to large 97 | x, best = metric(k, wh0) 98 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 99 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 100 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 101 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 102 | for i, x in enumerate(k): 103 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 104 | return k 105 | 106 | if isinstance(dataset, str): # *.yaml file 107 | with open(dataset, encoding='ascii', errors='ignore') as f: 108 | data_dict = yaml.safe_load(f) # model dict 109 | from utils.datasets import LoadImagesAndLabels 110 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 111 | 112 | # Get label wh 113 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 114 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 115 | 116 | # Filter 117 | i = (wh0 < 3.0).any(1).sum() 118 | if i: 119 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 120 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 121 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 122 | 123 | # Kmeans calculation 124 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 125 | s = wh.std(0) # sigmas for whitening 126 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 127 | assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') 128 | k *= s 129 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 130 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 131 | k = print_results(k) 132 | 133 | # Plot 134 | # k, d = [None] * 20, [None] * 20 135 | # for i in tqdm(range(1, 21)): 136 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 137 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 138 | # ax = ax.ravel() 139 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 140 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 141 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 142 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 143 | # fig.savefig('wh.png', dpi=200) 144 | 145 | # Evolve 146 | npr = np.random 147 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 148 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar 149 | for _ in pbar: 150 | v = np.ones(sh) 151 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 152 | v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 153 | kg = (k.copy() * v).clip(min=2.0) 154 | fg = anchor_fitness(kg) 155 | if fg > f: 156 | f, k = fg, kg.copy() 157 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 158 | if verbose: 159 | print_results(k) 160 | 161 | return print_results(k) 162 | -------------------------------------------------------------------------------- /utils/aws/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /utils/aws/mime.sh: -------------------------------------------------------------------------------- 1 | # AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/ 2 | # This script will run on every instance restart, not only on first start 3 | # --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA --- 4 | 5 | Content-Type: multipart/mixed; boundary="//" 6 | MIME-Version: 1.0 7 | 8 | --// 9 | Content-Type: text/cloud-config; charset="us-ascii" 10 | MIME-Version: 1.0 11 | Content-Transfer-Encoding: 7bit 12 | Content-Disposition: attachment; filename="cloud-config.txt" 13 | 14 | #cloud-config 15 | cloud_final_modules: 16 | - [scripts-user, always] 17 | 18 | --// 19 | Content-Type: text/x-shellscript; charset="us-ascii" 20 | MIME-Version: 1.0 21 | Content-Transfer-Encoding: 7bit 22 | Content-Disposition: attachment; filename="userdata.txt" 23 | 24 | #!/bin/bash 25 | # --- paste contents of userdata.sh here --- 26 | --// 27 | -------------------------------------------------------------------------------- /utils/aws/resume.py: -------------------------------------------------------------------------------- 1 | # Resume all interrupted trainings in yolov5/ dir including DDP trainings 2 | # Usage: $ python utils/aws/resume.py 3 | 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import torch 9 | import yaml 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | port = 0 # --master_port 14 | path = Path('').resolve() 15 | for last in path.rglob('*/**/last.pt'): 16 | ckpt = torch.load(last) 17 | if ckpt['optimizer'] is None: 18 | continue 19 | 20 | # Load opt.yaml 21 | with open(last.parent.parent / 'opt.yaml') as f: 22 | opt = yaml.safe_load(f) 23 | 24 | # Get device count 25 | d = opt['device'].split(',') # devices 26 | nd = len(d) # number of devices 27 | ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel 28 | 29 | if ddp: # multi-GPU 30 | port += 1 31 | cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}' 32 | else: # single-GPU 33 | cmd = f'python train.py --resume {last}' 34 | 35 | cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread 36 | print(cmd) 37 | os.system(cmd) 38 | -------------------------------------------------------------------------------- /utils/aws/userdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html 3 | # This script will run only once on first instance start (for a re-start script see mime.sh) 4 | # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir 5 | # Use >300 GB SSD 6 | 7 | cd home/ubuntu 8 | if [ ! -d yolov5 ]; then 9 | echo "Running first-time script." # install dependencies, download COCO, pull Docker 10 | git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5 11 | cd yolov5 12 | bash data/scripts/get_coco.sh && echo "COCO done." & 13 | sudo docker pull ultralytics/yolov5:latest && echo "Docker done." & 14 | python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." & 15 | wait && echo "All tasks done." # finish background tasks 16 | else 17 | echo "Running re-start script." # resume interrupted runs 18 | i=0 19 | list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour' 20 | while IFS= read -r id; do 21 | ((i++)) 22 | echo "restarting container $i: $id" 23 | sudo docker start $id 24 | # sudo docker exec -it $id python train.py --resume # single-GPU 25 | sudo docker exec -d $id python utils/aws/resume.py # multi-scenario 26 | done <<<"$list" 27 | fi 28 | -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | class Callbacks: 4 | """" 5 | Handles all registered callbacks for YOLOv5 Hooks 6 | """ 7 | 8 | _callbacks = { 9 | 'on_pretrain_routine_start': [], 10 | 'on_pretrain_routine_end': [], 11 | 12 | 'on_train_start': [], 13 | 'on_train_epoch_start': [], 14 | 'on_train_batch_start': [], 15 | 'optimizer_step': [], 16 | 'on_before_zero_grad': [], 17 | 'on_train_batch_end': [], 18 | 'on_train_epoch_end': [], 19 | 20 | 'on_val_start': [], 21 | 'on_val_batch_start': [], 22 | 'on_val_image_end': [], 23 | 'on_val_batch_end': [], 24 | 'on_val_end': [], 25 | 26 | 'on_fit_epoch_end': [], # fit = train + val 27 | 'on_model_save': [], 28 | 'on_train_end': [], 29 | 30 | 'teardown': [], 31 | } 32 | 33 | def __init__(self): 34 | return 35 | 36 | def register_action(self, hook, name='', callback=None): 37 | """ 38 | Register a new action to a callback hook 39 | 40 | Args: 41 | hook The callback hook name to register the action to 42 | name The name of the action 43 | callback The callback to fire 44 | """ 45 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 46 | assert callable(callback), f"callback '{callback}' is not callable" 47 | self._callbacks[hook].append({'name': name, 'callback': callback}) 48 | 49 | def get_registered_actions(self, hook=None): 50 | """" 51 | Returns all the registered actions by callback hook 52 | 53 | Args: 54 | hook The name of the hook to check, defaults to all 55 | """ 56 | if hook: 57 | return self._callbacks[hook] 58 | else: 59 | return self._callbacks 60 | 61 | def run_callbacks(self, hook, *args, **kwargs): 62 | """ 63 | Loop through the registered actions and fire all callbacks 64 | """ 65 | for logger in self._callbacks[hook]: 66 | # print(f"Running callbacks.{logger['callback'].__name__}()") 67 | logger['callback'](*args, **kwargs) 68 | 69 | def on_pretrain_routine_start(self, *args, **kwargs): 70 | """ 71 | Fires all registered callbacks at the start of each pretraining routine 72 | """ 73 | self.run_callbacks('on_pretrain_routine_start', *args, **kwargs) 74 | 75 | def on_pretrain_routine_end(self, *args, **kwargs): 76 | """ 77 | Fires all registered callbacks at the end of each pretraining routine 78 | """ 79 | self.run_callbacks('on_pretrain_routine_end', *args, **kwargs) 80 | 81 | def on_train_start(self, *args, **kwargs): 82 | """ 83 | Fires all registered callbacks at the start of each training 84 | """ 85 | self.run_callbacks('on_train_start', *args, **kwargs) 86 | 87 | def on_train_epoch_start(self, *args, **kwargs): 88 | """ 89 | Fires all registered callbacks at the start of each training epoch 90 | """ 91 | self.run_callbacks('on_train_epoch_start', *args, **kwargs) 92 | 93 | def on_train_batch_start(self, *args, **kwargs): 94 | """ 95 | Fires all registered callbacks at the start of each training batch 96 | """ 97 | self.run_callbacks('on_train_batch_start', *args, **kwargs) 98 | 99 | def optimizer_step(self, *args, **kwargs): 100 | """ 101 | Fires all registered callbacks on each optimizer step 102 | """ 103 | self.run_callbacks('optimizer_step', *args, **kwargs) 104 | 105 | def on_before_zero_grad(self, *args, **kwargs): 106 | """ 107 | Fires all registered callbacks before zero grad 108 | """ 109 | self.run_callbacks('on_before_zero_grad', *args, **kwargs) 110 | 111 | def on_train_batch_end(self, *args, **kwargs): 112 | """ 113 | Fires all registered callbacks at the end of each training batch 114 | """ 115 | self.run_callbacks('on_train_batch_end', *args, **kwargs) 116 | 117 | def on_train_epoch_end(self, *args, **kwargs): 118 | """ 119 | Fires all registered callbacks at the end of each training epoch 120 | """ 121 | self.run_callbacks('on_train_epoch_end', *args, **kwargs) 122 | 123 | def on_val_start(self, *args, **kwargs): 124 | """ 125 | Fires all registered callbacks at the start of the validation 126 | """ 127 | self.run_callbacks('on_val_start', *args, **kwargs) 128 | 129 | def on_val_batch_start(self, *args, **kwargs): 130 | """ 131 | Fires all registered callbacks at the start of each validation batch 132 | """ 133 | self.run_callbacks('on_val_batch_start', *args, **kwargs) 134 | 135 | def on_val_image_end(self, *args, **kwargs): 136 | """ 137 | Fires all registered callbacks at the end of each val image 138 | """ 139 | self.run_callbacks('on_val_image_end', *args, **kwargs) 140 | 141 | def on_val_batch_end(self, *args, **kwargs): 142 | """ 143 | Fires all registered callbacks at the end of each validation batch 144 | """ 145 | self.run_callbacks('on_val_batch_end', *args, **kwargs) 146 | 147 | def on_val_end(self, *args, **kwargs): 148 | """ 149 | Fires all registered callbacks at the end of the validation 150 | """ 151 | self.run_callbacks('on_val_end', *args, **kwargs) 152 | 153 | def on_fit_epoch_end(self, *args, **kwargs): 154 | """ 155 | Fires all registered callbacks at the end of each fit (train+val) epoch 156 | """ 157 | self.run_callbacks('on_fit_epoch_end', *args, **kwargs) 158 | 159 | def on_model_save(self, *args, **kwargs): 160 | """ 161 | Fires all registered callbacks after each model save 162 | """ 163 | self.run_callbacks('on_model_save', *args, **kwargs) 164 | 165 | def on_train_end(self, *args, **kwargs): 166 | """ 167 | Fires all registered callbacks at the end of training 168 | """ 169 | self.run_callbacks('on_train_end', *args, **kwargs) 170 | 171 | def teardown(self, *args, **kwargs): 172 | """ 173 | Fires all registered callbacks before teardown 174 | """ 175 | self.run_callbacks('teardown', *args, **kwargs) 176 | -------------------------------------------------------------------------------- /utils/downloads.py: -------------------------------------------------------------------------------- 1 | # Download utils 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | import urllib 8 | from pathlib import Path 9 | 10 | import requests 11 | import torch 12 | 13 | 14 | def gsutil_getsize(url=''): 15 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 16 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 17 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 18 | 19 | 20 | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): 21 | # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes 22 | file = Path(file) 23 | assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" 24 | try: # url1 25 | print(f'Downloading {url} to {file}...') 26 | torch.hub.download_url_to_file(url, str(file)) 27 | assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check 28 | except Exception as e: # url2 29 | file.unlink(missing_ok=True) # remove partial downloads 30 | print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') 31 | os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail 32 | finally: 33 | if not file.exists() or file.stat().st_size < min_bytes: # check 34 | file.unlink(missing_ok=True) # remove partial downloads 35 | print(f"ERROR: {assert_msg}\n{error_msg}") 36 | print('') 37 | 38 | 39 | def attempt_download(file, repo='ultralytics/yolov5'): # from utils.downloads import *; attempt_download() 40 | # Attempt file download if does not exist 41 | file = Path(str(file).strip().replace("'", '')) 42 | 43 | if not file.exists(): 44 | # URL specified 45 | name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. 46 | if str(file).startswith(('http:/', 'https:/')): # download 47 | url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ 48 | name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... 49 | safe_download(file=name, url=url, min_bytes=1E5) 50 | return name 51 | 52 | # GitHub assets 53 | file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) 54 | try: 55 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api 56 | assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...] 57 | tag = response['tag_name'] # i.e. 'v1.0' 58 | except: # fallback plan 59 | assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 60 | 'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt'] 61 | try: 62 | tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] 63 | except: 64 | tag = 'v5.0' # current release 65 | 66 | if name in assets: 67 | safe_download(file, 68 | url=f'https://github.com/{repo}/releases/download/{tag}/{name}', 69 | # url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}', # backup url (optional) 70 | min_bytes=1E5, 71 | error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/') 72 | 73 | return str(file) 74 | 75 | 76 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): 77 | # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download() 78 | t = time.time() 79 | file = Path(file) 80 | cookie = Path('cookie') # gdrive cookie 81 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 82 | file.unlink(missing_ok=True) # remove existing file 83 | cookie.unlink(missing_ok=True) # remove existing cookie 84 | 85 | # Attempt file download 86 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 87 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 88 | if os.path.exists('cookie'): # large file 89 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 90 | else: # small file 91 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 92 | r = os.system(s) # execute, capture return 93 | cookie.unlink(missing_ok=True) # remove existing cookie 94 | 95 | # Error check 96 | if r != 0: 97 | file.unlink(missing_ok=True) # remove partial 98 | print('Download error ') # raise Exception('Download error') 99 | return r 100 | 101 | # Unzip if archive 102 | if file.suffix == '.zip': 103 | print('unzipping... ', end='') 104 | os.system(f'unzip -q {file}') # unzip 105 | file.unlink() # remove zip to free space 106 | 107 | print(f'Done ({time.time() - t:.1f}s)') 108 | return r 109 | 110 | 111 | def get_token(cookie="./cookie"): 112 | with open(cookie) as f: 113 | for line in f: 114 | if "download" in line: 115 | return line.split()[-1] 116 | return "" 117 | 118 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries ---------------------------------------------- 119 | # 120 | # 121 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 122 | # # Uploads a file to a bucket 123 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 124 | # 125 | # storage_client = storage.Client() 126 | # bucket = storage_client.get_bucket(bucket_name) 127 | # blob = bucket.blob(destination_blob_name) 128 | # 129 | # blob.upload_from_filename(source_file_name) 130 | # 131 | # print('File {} uploaded to {}.'.format( 132 | # source_file_name, 133 | # destination_blob_name)) 134 | # 135 | # 136 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 137 | # # Uploads a blob from a bucket 138 | # storage_client = storage.Client() 139 | # bucket = storage_client.get_bucket(bucket_name) 140 | # blob = bucket.blob(source_blob_name) 141 | # 142 | # blob.download_to_filename(destination_file_name) 143 | # 144 | # print('Blob {} downloaded to {}.'.format( 145 | # source_blob_name, 146 | # destination_file_name)) 147 | -------------------------------------------------------------------------------- /utils/flask_rest_api/README.md: -------------------------------------------------------------------------------- 1 | # Flask REST API 2 | [REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/). 3 | 4 | ## Requirements 5 | 6 | [Flask](https://palletsprojects.com/p/flask/) is required. Install with: 7 | ```shell 8 | $ pip install Flask 9 | ``` 10 | 11 | ## Run 12 | 13 | After Flask installation run: 14 | 15 | ```shell 16 | $ python3 restapi.py --port 5000 17 | ``` 18 | 19 | Then use [curl](https://curl.se/) to perform a request: 20 | 21 | ```shell 22 | $ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s' 23 | ``` 24 | 25 | The model inference results are returned as a JSON response: 26 | 27 | ```json 28 | [ 29 | { 30 | "class": 0, 31 | "confidence": 0.8900438547, 32 | "height": 0.9318675399, 33 | "name": "person", 34 | "width": 0.3264600933, 35 | "xcenter": 0.7438579798, 36 | "ycenter": 0.5207948685 37 | }, 38 | { 39 | "class": 0, 40 | "confidence": 0.8440024257, 41 | "height": 0.7155083418, 42 | "name": "person", 43 | "width": 0.6546785235, 44 | "xcenter": 0.427829951, 45 | "ycenter": 0.6334488392 46 | }, 47 | { 48 | "class": 27, 49 | "confidence": 0.3771208823, 50 | "height": 0.3902671337, 51 | "name": "tie", 52 | "width": 0.0696444362, 53 | "xcenter": 0.3675483763, 54 | "ycenter": 0.7991207838 55 | }, 56 | { 57 | "class": 27, 58 | "confidence": 0.3527112305, 59 | "height": 0.1540903747, 60 | "name": "tie", 61 | "width": 0.0336618312, 62 | "xcenter": 0.7814827561, 63 | "ycenter": 0.5065554976 64 | } 65 | ] 66 | ``` 67 | 68 | An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given in `example_request.py` 69 | -------------------------------------------------------------------------------- /utils/flask_rest_api/example_request.py: -------------------------------------------------------------------------------- 1 | """Perform test request""" 2 | import pprint 3 | 4 | import requests 5 | 6 | DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s" 7 | TEST_IMAGE = "zidane.jpg" 8 | 9 | image_data = open(TEST_IMAGE, "rb").read() 10 | 11 | response = requests.post(DETECTION_URL, files={"image": image_data}).json() 12 | 13 | pprint.pprint(response) 14 | -------------------------------------------------------------------------------- /utils/flask_rest_api/restapi.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run a rest API exposing the yolov5s object detection model 3 | """ 4 | import argparse 5 | import io 6 | 7 | import torch 8 | from PIL import Image 9 | from flask import Flask, request 10 | 11 | app = Flask(__name__) 12 | 13 | DETECTION_URL = "/v1/object-detection/yolov5s" 14 | 15 | 16 | @app.route(DETECTION_URL, methods=["POST"]) 17 | def predict(): 18 | if not request.method == "POST": 19 | return 20 | 21 | if request.files.get("image"): 22 | image_file = request.files["image"] 23 | image_bytes = image_file.read() 24 | 25 | img = Image.open(io.BytesIO(image_bytes)) 26 | 27 | results = model(img, size=640) # reduce size=320 for faster inference 28 | return results.pandas().xyxy[0].to_json(orient="records") 29 | 30 | 31 | if __name__ == "__main__": 32 | parser = argparse.ArgumentParser(description="Flask API exposing YOLOv5 model") 33 | parser.add_argument("--port", default=5000, type=int, help="port number") 34 | args = parser.parse_args() 35 | 36 | model = torch.hub.load("ultralytics/yolov5", "yolov5s", force_reload=True) # force_reload to recache 37 | app.run(host="0.0.0.0", port=args.port) # debug=True causes Restarting with stat 38 | -------------------------------------------------------------------------------- /utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==19.2 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /utils/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /utils/loggers/wandb/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /utils/loggers/wandb/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from wandb_utils import WandbLogger 4 | 5 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 6 | 7 | 8 | def create_dataset_artifact(opt): 9 | logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused 10 | 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 15 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 16 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 17 | parser.add_argument('--entity', default=None, help='W&B entity') 18 | parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') 19 | 20 | opt = parser.parse_args() 21 | opt.resume = False # Explicitly disallow resume check for dataset upload job 22 | 23 | create_dataset_artifact(opt) 24 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import wandb 5 | 6 | FILE = Path(__file__).absolute() 7 | sys.path.append(FILE.parents[3].as_posix()) # add utils/ to path 8 | 9 | from train import train, parse_opt 10 | from utils.general import increment_path 11 | from utils.torch_utils import select_device 12 | 13 | 14 | def sweep(): 15 | wandb.init() 16 | # Get hyp dict from sweep agent 17 | hyp_dict = vars(wandb.config).get("_items") 18 | 19 | # Workaround: get necessary opt args 20 | opt = parse_opt(known=True) 21 | opt.batch_size = hyp_dict.get("batch_size") 22 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 23 | opt.epochs = hyp_dict.get("epochs") 24 | opt.nosave = True 25 | opt.data = hyp_dict.get("data") 26 | device = select_device(opt.device, batch_size=opt.batch_size) 27 | 28 | # train 29 | train(hyp_dict, opt, device) 30 | 31 | 32 | if __name__ == "__main__": 33 | sweep() 34 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | # To set range- 3 | # Provide min and max values as: 4 | # parameter: 5 | # 6 | # min: scalar 7 | # max: scalar 8 | # OR 9 | # 10 | # Set a specific list of search space- 11 | # parameter: 12 | # values: [scalar1, scalar2, scalar3...] 13 | # 14 | # You can use grid, bayesian and hyperopt search strategy 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration 16 | 17 | program: utils/loggers/wandb/sweep.py 18 | method: random 19 | metric: 20 | name: metrics/mAP_0.5 21 | goal: maximize 22 | 23 | parameters: 24 | # hyperparameters: set either min, max range or values list 25 | data: 26 | value: "data/coco128.yaml" 27 | batch_size: 28 | values: [64] 29 | epochs: 30 | values: [10] 31 | 32 | lr0: 33 | distribution: uniform 34 | min: 1e-5 35 | max: 1e-1 36 | lrf: 37 | distribution: uniform 38 | min: 0.01 39 | max: 1.0 40 | momentum: 41 | distribution: uniform 42 | min: 0.6 43 | max: 0.98 44 | weight_decay: 45 | distribution: uniform 46 | min: 0.0 47 | max: 0.001 48 | warmup_epochs: 49 | distribution: uniform 50 | min: 0.0 51 | max: 5.0 52 | warmup_momentum: 53 | distribution: uniform 54 | min: 0.0 55 | max: 0.95 56 | warmup_bias_lr: 57 | distribution: uniform 58 | min: 0.0 59 | max: 0.2 60 | box: 61 | distribution: uniform 62 | min: 0.02 63 | max: 0.2 64 | cls: 65 | distribution: uniform 66 | min: 0.2 67 | max: 4.0 68 | cls_pw: 69 | distribution: uniform 70 | min: 0.5 71 | max: 2.0 72 | obj: 73 | distribution: uniform 74 | min: 0.2 75 | max: 4.0 76 | obj_pw: 77 | distribution: uniform 78 | min: 0.5 79 | max: 2.0 80 | iou_t: 81 | distribution: uniform 82 | min: 0.1 83 | max: 0.7 84 | anchor_t: 85 | distribution: uniform 86 | min: 2.0 87 | max: 8.0 88 | fl_gamma: 89 | distribution: uniform 90 | min: 0.0 91 | max: 0.1 92 | hsv_h: 93 | distribution: uniform 94 | min: 0.0 95 | max: 0.1 96 | hsv_s: 97 | distribution: uniform 98 | min: 0.0 99 | max: 0.9 100 | hsv_v: 101 | distribution: uniform 102 | min: 0.0 103 | max: 0.9 104 | degrees: 105 | distribution: uniform 106 | min: 0.0 107 | max: 45.0 108 | translate: 109 | distribution: uniform 110 | min: 0.0 111 | max: 0.9 112 | scale: 113 | distribution: uniform 114 | min: 0.0 115 | max: 0.9 116 | shear: 117 | distribution: uniform 118 | min: 0.0 119 | max: 10.0 120 | perspective: 121 | distribution: uniform 122 | min: 0.0 123 | max: 0.001 124 | flipud: 125 | distribution: uniform 126 | min: 0.0 127 | max: 1.0 128 | fliplr: 129 | distribution: uniform 130 | min: 0.0 131 | max: 1.0 132 | mosaic: 133 | distribution: uniform 134 | min: 0.0 135 | max: 1.0 136 | mixup: 137 | distribution: uniform 138 | min: 0.0 139 | max: 1.0 140 | copy_paste: 141 | distribution: uniform 142 | min: 0.0 143 | max: 1.0 144 | -------------------------------------------------------------------------------- /video.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | import time 4 | from pathlib import Path 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | 8 | from utils.augmentations import letterbox 9 | from utils.general import check_img_size, non_max_suppression, scale_coords 10 | from utils.plots import plot_one_box 11 | import random 12 | import numpy as np 13 | from models.experimental import attempt_load 14 | from estimateDistanceUtil import * 15 | 16 | # init 17 | def init(): 18 | FILE = Path(__file__).absolute() 19 | sys.path.append(FILE.parents[0].as_posix()) # add yolov5/ to path 20 | 21 | device = torch.device('cuda:0') 22 | half = device.type != True # half precision only supported on CUDA 23 | 24 | model = attempt_load('yolov5s.pt', map_location=device) # load FP32 model 25 | imgsz = check_img_size(640, s=model.stride.max()) # check img_size 26 | 27 | if half: 28 | model.half() # to FP16 29 | cudnn.benchmark = True # set True to speed up constant image size inference 30 | 31 | # Get names and colors 32 | names = model.module.names if hasattr(model, 'module') else model.names 33 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] 34 | 35 | img01 = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 36 | _ = model(img01.half() if half else img01) if device.type != 'cpu' else None # run once 37 | return device, half, model, names, colors 38 | 39 | def predict_img(imgs, device, half, model): 40 | img = [letterbox(x, new_shape=640, auto=True)[0] for x in imgs] 41 | # Stack 42 | img = np.stack(img, 0) 43 | # Convert 44 | img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 45 | img = np.ascontiguousarray(img) 46 | 47 | img = torch.from_numpy(img).to(device) 48 | img = img.half() if half else img.float() # uint8 to fp16/32 49 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 # torch.Size([1, 3, 480, 640]) 50 | 51 | if img.ndimension() == 3: 52 | img = img.unsqueeze(0) 53 | 54 | pred = model(img, augment=False)[0] 55 | 56 | # Apply NMS 57 | pred = non_max_suppression(pred, 0.25, 0.45, classes=[0, 1, 2, 3, 5, 6, 7], agnostic=False) 58 | return img, pred 59 | 60 | def ref_img_information(img_path, device, half, model): 61 | imgs = [cv2.imread(img_path)] 62 | img, pred = predict_img(imgs, device, half, model) 63 | focal_length_found = 0 64 | for i, det in enumerate(pred): # detections per image 65 | im0 = imgs[i].copy() 66 | if len(det): 67 | # Rescale boxes from img_size to im0 size 68 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 69 | for *xyxy, conf, cls in reversed(det): 70 | if names[int(cls)] == 'person': 71 | ref_image_object_width = int(xyxy[2]) - int(xyxy[0]) 72 | focal_length_found = focal_length(ref_image_object_width, KNOWN_PRESON_DISTANCE, KNOWN_PERSON_WIDTH) 73 | elif (names[int(cls)] == 'bus') and (round(float(conf), 2)==0.61): 74 | ref_image_object_width = (int(xyxy[2]) - 400) - int(xyxy[0]) 75 | focal_length_found = focal_length(ref_image_object_width, KNOWN_BUS_DISTANCE, KNOWN_BUS_WIDTH) 76 | elif (names[int(cls)] == 'car') and (round(float(conf), 2)==0.47): 77 | ref_image_object_width = (int(xyxy[2]) - 30) - int(xyxy[0]) 78 | focal_length_found = focal_length(ref_image_object_width, KNOWN_CAR_DISTANCE, KNOWN_CAR_WIDTH) 79 | elif (names[int(cls)]=='motorcycle'): 80 | ref_image_object_width = (int(xyxy[2])) - int(xyxy[0]) 81 | focal_length_found = focal_length(ref_image_object_width, KNOWN_MOTORCYCLE_DISTANCE, KNOWN_MOTORCYCLE_WIDTH) 82 | return focal_length_found 83 | 84 | if __name__ == '__main__': 85 | device, half, model, names, colors = init() 86 | bus_img_path = 'data/images/Ref_bus.jpg' 87 | car_img_path = 'data/images/Ref_car.jpg' 88 | motorcycle_img_path = 'data/images/Ref_motorcycle.jpg' 89 | person_img_path = 'data/images/Ref_person.png' 90 | 91 | focal_length_bus = ref_img_information(bus_img_path, device, half, model) 92 | focal_length_car = ref_img_information(car_img_path, device, half, model) 93 | focal_length_motorcycle = ref_img_information(motorcycle_img_path, device, half, model) 94 | focal_length_person = ref_img_information(person_img_path, device, half, model) 95 | 96 | video_path = 'data/video/testVideo.mp4' 97 | save_path = 'data/video/videoResult.mp4' 98 | cap = cv2.VideoCapture(video_path) 99 | assert cap.isOpened(), f'Failed to open {video_path}' 100 | # get video information 101 | fps = cap.get(cv2.CAP_PROP_FPS) 102 | w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 103 | h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 104 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) 105 | ret, frame = cap.read() 106 | while(ret): 107 | imgs = [frame] 108 | img, pred = predict_img([frame], device, half, model) 109 | for i, det in enumerate(pred): # detections per image 110 | s, im0 = '%g: ' % i, imgs[i].copy() 111 | s += '%gx%g ' % img.shape[2:] # print string 112 | if len(det): 113 | # Rescale boxes from img_size to im0 size 114 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 115 | for *xyxy, conf, cls in reversed(det): 116 | distance = 0 117 | if names[int(cls)] == 'person': 118 | object_width_in_frame = int(xyxy[2]) - int(xyxy[0]) 119 | distance = distance_finder(focal_length_person, object_width_in_frame, KNOWN_PERSON_WIDTH) 120 | elif names[int(cls)] == 'bus': 121 | object_width_in_frame = int(xyxy[2]) - int(xyxy[0]) 122 | distance = distance_finder(focal_length_person, object_width_in_frame, KNOWN_BUS_WIDTH) 123 | elif names[int(cls)] == 'car': 124 | object_width_in_frame = int(xyxy[2]) - int(xyxy[0]) 125 | distance = distance_finder(focal_length_person, object_width_in_frame, KNOWN_CAR_WIDTH) 126 | elif names[int(cls)] == 'motorcycle': 127 | object_width_in_frame = int(xyxy[2]) - int(xyxy[0]) 128 | distance = distance_finder(focal_length_person, object_width_in_frame, KNOWN_MOTORCYCLE_WIDTH) 129 | label = f'{names[int(cls)]} {conf:.2f} {distance:.3f}m' 130 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) 131 | cv2.imshow('a', im0) 132 | cv2.waitKey(20) 133 | # transform frame to video 134 | vid_writer.write(im0) 135 | ret, frame = cap.read() 136 | cap.release() 137 | cv2.destroyAllWindows() 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /yolov5s.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaolei00/yolov5_monocular_camera_ranging/b78f77f914a6e1f1c7f983f05717209429cce00b/yolov5s.pt --------------------------------------------------------------------------------