├── CONTRIBUTING.md ├── License.txt ├── README.md ├── brambox ├── __init__.py ├── boxes │ ├── __init__.py │ ├── annotations │ │ ├── __init__.py │ │ ├── annotation.py │ │ ├── cvc.py │ │ ├── darknet.py │ │ ├── dollar.py │ │ ├── formats.py │ │ ├── kitti.py │ │ ├── pascalvoc.py │ │ ├── pickle.py │ │ ├── vatic.py │ │ └── yaml.py │ ├── box.py │ ├── detections │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── detection.py │ │ ├── dollar.py │ │ ├── formats.py │ │ ├── pascalvoc.py │ │ ├── pickle.py │ │ └── yaml.py │ ├── formats.py │ ├── statistics │ │ ├── __init__.py │ │ ├── mr_fppi.py │ │ ├── pr.py │ │ └── util.py │ └── util │ │ ├── __init__.py │ │ ├── convert.py │ │ ├── filters.py │ │ ├── modifiers.py │ │ ├── path.py │ │ └── visual.py └── transforms │ ├── __init__.py │ └── channelMixer.py ├── cfgs ├── README.md ├── main.yml ├── region_light_xception.yml ├── region_mobilenet.yml ├── region_mobilenetv2.yml ├── region_shufflenet.yml ├── region_shufflenetv2.yml ├── region_squeezenext.yml ├── region_xception.yml ├── tiny_yolov2.yml ├── tiny_yolov3.yml ├── yolov2.yml └── yolov3.yml ├── examples ├── labels.py ├── simple_speed.py ├── speed.py ├── test.py └── train.py ├── figures └── OSD_logo.PNG ├── outputs └── README.md ├── results └── README.md ├── utils ├── __init__.py ├── cfg_parser.py ├── envs.py ├── fileproc.py └── test │ ├── Makefile │ ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── ds_utils.py │ ├── factory.py │ ├── imdb.py │ ├── pascal_voc.py │ ├── person_eval.py │ ├── person_head.py │ └── voc_eval.py │ ├── fast_rcnn │ ├── __init__.py │ ├── config.py │ ├── nms_wrapper.py │ └── test.py │ ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py │ ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── license.txt │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h │ ├── setup.py │ ├── utils │ ├── .gitignore │ ├── __init__.py │ └── bbox.pyx │ └── voc_wrapper.py ├── vedanet ├── __init__.py ├── data │ ├── __init__.py │ ├── _dataloading.py │ ├── _dataset_brambox.py │ ├── _dataset_darknet.py │ └── transform │ │ ├── __init__.py │ │ ├── _postprocess.py │ │ ├── _preprocess.py │ │ └── util.py ├── engine │ ├── __init__.py │ ├── _speed.py │ ├── _voc_test.py │ ├── _voc_train.py │ └── engine.py ├── hyperparams.py ├── loss │ ├── __init__.py │ ├── _regionloss.py │ ├── _yololoss.py │ └── util.py ├── models │ ├── __init__.py │ ├── _darknet.py │ ├── _lightnet.py │ ├── _region_light_xception.py │ ├── _region_mobilenet.py │ ├── _region_mobilenetv2.py │ ├── _region_shufflenet.py │ ├── _region_shufflenetv2.py │ ├── _region_squeezenext.py │ ├── _region_xception.py │ ├── _tiny_yolov2.py │ ├── _tiny_yolov3.py │ ├── _yolov2.py │ ├── _yolov3.py │ └── yolo_abc.py └── network │ ├── __init__.py │ ├── backbone │ ├── __init__.py │ ├── _darknet19.py │ ├── _darknet53.py │ ├── _light_xception.py │ ├── _mobilenet.py │ ├── _mobilenetv2.py │ ├── _shufflenet.py │ ├── _shufflenetv2.py │ ├── _squeezenext.py │ ├── _tiny_yolov2.py │ ├── _tiny_yolov3.py │ ├── _xception.py │ └── brick │ │ ├── __init__.py │ │ ├── darknet53.py │ │ ├── light_xception.py │ │ ├── mobilenet.py │ │ ├── mobilenetv2.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── squeezenext.py │ │ └── xception.py │ ├── head │ ├── __init__.py │ ├── _region_light_xception.py │ ├── _region_mobilenet.py │ ├── _region_mobilenetv2.py │ ├── _region_shufflenet.py │ ├── _region_shufflenetv2.py │ ├── _region_squeezenext.py │ ├── _region_xception.py │ ├── _tiny_yolov2.py │ ├── _tiny_yolov3.py │ ├── _yolov2.py │ ├── _yolov3.py │ └── brick │ │ ├── __init__.py │ │ └── yolov3.py │ └── layer │ ├── __init__.py │ └── _darknet.py └── weights └── README.md /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Request for contributions 2 | 3 | Please contribute to this repository if any of the following is true: 4 | - You have expertise in community development, communication, or education 5 | - You want open source communities to be more collaborative and inclusive 6 | - You want to help lower the burden to first time contributors 7 | 8 | # How to contribute 9 | 10 | Prerequisites: 11 | 12 | - Familiarity with [pull requests](https://help.github.com/articles/using-pull-requests) and [issues](https://guides.github.com/features/issues/). 13 | - Knowledge of [Markdown](https://help.github.com/articles/markdown-basics/) for editing `.md` documents. 14 | 15 | Please make sure the following conditions are met before raising the issue: 16 | 17 | - It must be a bug or a new function 18 | - It must be a OSD related issue. 19 | - You have been searched in issues, and no similar issue or solution has been found. -------------------------------------------------------------------------------- /brambox/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # BRAMBOX: Basic Recipes for Annotations and Modeling Toolbox 3 | # Copyright EAVISE 4 | # 5 | 6 | #from .version import __version__ 7 | 8 | from . import boxes 9 | from . import transforms 10 | 11 | __all__ = ['boxes', 'transforms'] 12 | -------------------------------------------------------------------------------- /brambox/boxes/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Brambox boxes module |br| 3 | This package contains parsers for various annotation and detection formats. 4 | You can use this package to convert formats, visualize image annotations and compute statistics on your detections. 5 | """ 6 | 7 | from .box import Box 8 | from .formats import * 9 | from . import annotations 10 | from . import detections 11 | 12 | from .statistics import * 13 | from .util import * 14 | -------------------------------------------------------------------------------- /brambox/boxes/annotations/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Brambox boxes annotations module |br| 3 | This package contains the actual annotation parsers. These parsers can be used to parse and generate annotation files. 4 | """ 5 | 6 | # Formats 7 | from .cvc import * 8 | from .darknet import * 9 | from .dollar import * 10 | from .kitti import * 11 | from .pascalvoc import * 12 | from .pickle import * 13 | from .vatic import * 14 | from .yaml import * 15 | 16 | # Extra 17 | from .annotation import Annotation 18 | from .formats import * 19 | -------------------------------------------------------------------------------- /brambox/boxes/annotations/cvc.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Maarten Vandersteegen 4 | # 5 | """ 6 | CVC 7 | --- 8 | """ 9 | 10 | from .annotation import * 11 | 12 | __all__ = ["CvcAnnotation", "CvcParser"] 13 | 14 | 15 | class CvcAnnotation(Annotation): 16 | """ Cvc image annotation """ 17 | 18 | def serialize(self): 19 | """ generate a cvc annotation string 20 | 21 | Note that this format does not support a class label 22 | """ 23 | string = "{} {} {} {} 1 0 0 0 0 {} 0" \ 24 | .format(round(self.x_top_left + self.width / 2), 25 | round(self.y_top_left + self.height / 2), 26 | round(self.width), 27 | round(self.height), 28 | int(self.object_id)) 29 | 30 | return string 31 | 32 | def deserialize(self, string): 33 | """ parse a cvc annotation string 34 | 35 | x,y are the center of a box 36 | """ 37 | elements = string.split() 38 | self.width = float(elements[2]) 39 | self.height = float(elements[3]) 40 | self.x_top_left = float(elements[0]) - self.width / 2 41 | self.y_top_left = float(elements[1]) - self.height / 2 42 | self.object_id = int(elements[9]) 43 | 44 | self.lost = False 45 | self.occluded = False 46 | 47 | 48 | class CvcParser(Parser): 49 | """ 50 | This parser is designed to parse the CVC_ pedestrian dataset collection. 51 | The CVC format has one .txt file for every image of the dataset where each line within a file represents a bounding box. 52 | Each line is a space separated list of values structured as follows: 53 | 54 | 55 | 56 | ========= =========== 57 | Name Description 58 | ========= =========== 59 | x center x coordinate of the bounding box in pixels (integer) 60 | y center y coordinate of the bounding box in pixels (integer) 61 | w width of the bounding box in pixels (integer) 62 | h height of the bounding box in pixels (integer) 63 | mandatory 1 if the pedestrian is mandatory for training and testing, 0 for optional 64 | track_id identifier of the track this object is following (integer) 65 | ========= =========== 66 | 67 | Example: 68 | >>> image_000.txt 69 | 97 101 18 52 1 0 0 0 0 1 0 70 | 121 105 15 46 1 0 0 0 0 2 0 71 | 505 99 14 41 1 0 0 0 0 3 0 72 | 73 | Warning: 74 | This parser is only tested on the CVC-14 dataset 75 | 76 | .. _CVC: http://adas.cvc.uab.es/elektra/datasets/pedestrian-detection/ 77 | """ 78 | parser_type = ParserType.MULTI_FILE 79 | box_type = CvcAnnotation 80 | -------------------------------------------------------------------------------- /brambox/boxes/annotations/formats.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # 4 | 5 | from .cvc import CvcParser 6 | from .darknet import DarknetParser 7 | from .dollar import DollarParser 8 | from .kitti import KittiParser 9 | from .pascalvoc import PascalVocParser 10 | from .pickle import PickleParser 11 | from .vatic import VaticParser 12 | from .yaml import YamlParser 13 | 14 | __all__ = ['annotation_formats'] 15 | 16 | 17 | annotation_formats = { 18 | 'cvc': CvcParser, 19 | 'darknet': DarknetParser, 20 | 'dollar': DollarParser, 21 | 'kitti': KittiParser, 22 | 'pickle': PickleParser, 23 | 'pascalvoc': PascalVocParser, 24 | 'vatic': VaticParser, 25 | 'yaml': YamlParser, 26 | } 27 | -------------------------------------------------------------------------------- /brambox/boxes/annotations/pascalvoc.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # By Tanguy Ophoff 4 | # 5 | """ 6 | Pascal VOC 7 | ---------- 8 | """ 9 | 10 | import xml.etree.ElementTree as ET 11 | 12 | from .annotation import * 13 | 14 | __all__ = ['PascalVocAnnotation', 'PascalVocParser'] 15 | 16 | 17 | class PascalVocAnnotation(Annotation): 18 | """ Pascal Voc image annotation """ 19 | def serialize(self): 20 | """ generate a Pascal Voc object xml string """ 21 | string = '\n' 22 | string += f'\t{self.class_label}\n' 23 | string += '\tUnspecified\n' 24 | string += f'\t{int(self.occluded)}\n' 25 | string += f'\t{int(self.difficult)}\n' 26 | string += '\t\n' 27 | string += f'\t\t{self.x_top_left}\n' 28 | string += f'\t\t{self.y_top_left}\n' 29 | string += f'\t\t{self.x_top_left + self.width - 1}\n' 30 | string += f'\t\t{self.y_top_left + self.height - 1}\n' 31 | string += '\t\n' 32 | string += '\n' 33 | 34 | return string 35 | 36 | def deserialize(self, xml_obj): 37 | """ parse a Pascal Voc xml annotation string """ 38 | self.class_label = xml_obj.find('name').text 39 | self.occluded = xml_obj.find('truncated').text == '1' 40 | self.difficult = xml_obj.find('difficult').text == '1' 41 | 42 | box = xml_obj.find('bndbox') 43 | self.x_top_left = float(box.find('xmin').text) 44 | self.y_top_left = float(box.find('ymin').text) 45 | self.width = float(int(box.find('xmax').text) - self.x_top_left + 1) 46 | self.height = float(int(box.find('ymax').text) - self.y_top_left + 1) 47 | 48 | self.object_id = 0 49 | self.lost = None 50 | 51 | return self 52 | 53 | 54 | class PascalVocParser(Parser): 55 | """ 56 | This parser can parse annotations in the `pascal voc`_ format. 57 | This format consists of one xml file for every image. 58 | 59 | Example: 60 | >>> image_000.xml 61 | 62 | 63 | horse 64 | 1 65 | 0 66 | 67 | 100 68 | 200 69 | 300 70 | 400 71 | 72 | 73 | 74 | person 75 | 0 76 | 1 77 | 78 | 110 79 | 20 80 | 200 81 | 350 82 | 83 | 84 | 85 | 86 | .. _pascal voc: http://host.robots.ox.ac.uk/pascal/VOC/ 87 | """ 88 | parser_type = ParserType.MULTI_FILE 89 | box_type = PascalVocAnnotation 90 | extension = '.xml' 91 | 92 | def serialize(self, annotations): 93 | """ Serialize a list of annotations into one string """ 94 | result = '\n' 95 | 96 | for anno in annotations: 97 | new_anno = self.box_type.create(anno) 98 | result += new_anno.serialize() 99 | 100 | return result + '\n' 101 | 102 | def deserialize(self, string): 103 | """ Deserialize an annotation string into a list of annotation """ 104 | result = [] 105 | 106 | root = ET.fromstring(string) 107 | for obj in root.iter('object'): 108 | anno = self.box_type() 109 | result += [anno.deserialize(obj)] 110 | 111 | return result 112 | -------------------------------------------------------------------------------- /brambox/boxes/annotations/pickle.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Tanguy Ophoff 4 | # 5 | """ 6 | Pickle 7 | ------ 8 | """ 9 | import logging as log 10 | import pickle 11 | from .annotation import * 12 | 13 | __all__ = ['PickleAnnotation', 'PickleParser'] 14 | #log = logging.getLogger(__name__) 15 | 16 | 17 | class PickleAnnotation(Annotation): 18 | """ Pickle annotation """ 19 | def __getstate__(self): 20 | state = self.__dict__.copy() 21 | if hasattr(self, 'keep_ignore') and not self.keep_ignore: 22 | del state['ignore'] 23 | if hasattr(self, 'keep_ignore'): 24 | del state['keep_ignore'] 25 | if self.visible_x_top_left == 0: 26 | del state['visible_x_top_left'] 27 | if self.visible_y_top_left == 0: 28 | del state['visible_y_top_left'] 29 | if self.visible_width == 0: 30 | del state['visible_width'] 31 | if self.visible_height == 0: 32 | del state['visible_height'] 33 | 34 | return state 35 | 36 | def __setstate__(self, state): 37 | if 'occluded_fraction' not in state: # Backward compatible with older versions 38 | log.deprecated('You are using an old pickle format that will be deprecated in newer versions. Consider to save your annotations with the new format.') 39 | state['occluded_fraction'] = float(state['occluded']) 40 | del state['occluded'] 41 | if 'truncated_fraction' not in state: # Backward compatible with older versions 42 | log.deprecated('You are using an old pickle format that will be deprecated in newer versions. Consider to save your annotations with the new format.') 43 | state['truncated_fraction'] = 0.0 44 | 45 | self.__dict__.update(state) 46 | if not hasattr(self, 'ignore'): 47 | self.ignore = False 48 | if not hasattr(self, 'visible_x_top_left'): 49 | self.visible_x_top_left = 0.0 50 | if not hasattr(self, 'visible_y_top_left'): 51 | self.visible_y_top_left = 0.0 52 | if not hasattr(self, 'visible_width'): 53 | self.visible_width = 0.0 54 | if not hasattr(self, 'visible_height'): 55 | self.visible_height = 0.0 56 | 57 | 58 | class PickleParser(Parser): 59 | """ 60 | This parser generates a binary file of your annotations that can be parsed really fast. 61 | If you are using a python library for training your network, you can use this format to quickly read your annotations. 62 | 63 | Args: 64 | keep_ignore (boolean, optional): Whether are not to save the ignore flag value of the annotations; Default **False** 65 | """ 66 | parser_type = ParserType.SINGLE_FILE 67 | box_type = PickleAnnotation 68 | extension = '.pkl' 69 | read_mode = 'rb' 70 | write_mode = 'wb' 71 | 72 | def __init__(self, **kwargs): 73 | try: 74 | self.keep_ignore = kwargs['keep_ignore'] 75 | except KeyError: 76 | log.info("No 'keep_ignore' kwarg found, defaulting to False.") 77 | self.keep_ignore = False 78 | 79 | def serialize(self, annotations): 80 | """ Serialize input dictionary of annotations into one bytestream """ 81 | result = {} 82 | for img_id in annotations: 83 | img_res = [] 84 | for anno in annotations[img_id]: 85 | box = self.box_type.create(anno) 86 | box.keep_ignore = self.keep_ignore 87 | img_res.append(box) 88 | result[img_id] = img_res 89 | 90 | return pickle.dumps(result) 91 | 92 | def deserialize(self, bytestream): 93 | """ Deserialize an annotation file into a dictionary of annotations """ 94 | return pickle.loads(bytestream) 95 | -------------------------------------------------------------------------------- /brambox/boxes/detections/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Brambox boxes detections module |br| 3 | This package contains the actual detection parsers. These parsers can be used to parse detection files. 4 | """ 5 | 6 | # Formats 7 | from .coco import * 8 | from .dollar import * 9 | from .pascalvoc import * 10 | from .pickle import * 11 | from .yaml import * 12 | 13 | # Extra 14 | from .detection import Detection 15 | from .formats import * 16 | -------------------------------------------------------------------------------- /brambox/boxes/detections/coco.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Maarten Vandersteegen 4 | # 5 | """ 6 | Coco 7 | ---- 8 | """ 9 | 10 | import json 11 | from .detection import * 12 | 13 | __all__ = ["CocoDetection", "CocoParser"] 14 | 15 | 16 | class CocoDetection(Detection): 17 | """ Json based detection format from darknet framework """ 18 | 19 | def serialize(self): 20 | """ generate a json detection object """ 21 | 22 | raise NotImplementedError 23 | 24 | def deserialize(self, json_obj, class_label_map): 25 | """ parse a json detection object """ 26 | 27 | if class_label_map is not None: 28 | self.class_label = class_label_map[json_obj['category_id'] - 1] 29 | else: 30 | self.class_label = str(json_obj['category_id']) 31 | 32 | self.x_top_left = float(json_obj['bbox'][0]) 33 | self.y_top_left = float(json_obj['bbox'][1]) 34 | self.width = float(json_obj['bbox'][2]) 35 | self.height = float(json_obj['bbox'][3]) 36 | self.confidence = json_obj['score'] 37 | 38 | self.object_id = 0 39 | 40 | 41 | class CocoParser(Parser): 42 | """ 43 | COCO detection format parser to parse the coco detection output of the darknet_ DL framework. 44 | 45 | Keyword Args: 46 | class_label_map (list): list of class label strings where the ``category_id`` in the json file \ 47 | is used as an index minus one on this list to get the class labels 48 | 49 | A text file contains multiple detections formated using json. 50 | The file contains one json list where each element represents one bounding box. 51 | The fields within the elements are: 52 | 53 | =========== =========== 54 | Name Description 55 | =========== =========== 56 | image_id identifier of the image (integer) 57 | category_id class label index (where 1 is the first class label i.s.o. 0) (integer) 58 | bbox json list containing bounding box coordinates [top left x, top left y, width, height] (float values) 59 | score confidence score between 0 and 1 (float) 60 | =========== =========== 61 | 62 | Example: 63 | >>> detection_results.json 64 | [ 65 | {"image_id":0, "category_id":1, "bbox":[501.484039, 209.805313, 28.525848, 50.727005], "score":0.189649}, 66 | {"image_id":1, "category_id":1, "bbox":[526.957703, 219.587631, 25.830444, 55.723373], "score":0.477851} 67 | ] 68 | 69 | .. _darknet: https://pjreddie.com/darknet/ 70 | """ 71 | parser_type = ParserType.SINGLE_FILE 72 | box_type = CocoDetection 73 | extension = '.json' 74 | 75 | def __init__(self, **kwargs): 76 | try: 77 | self.class_label_map = kwargs['class_label_map'] 78 | except KeyError: 79 | raise ValueError("Coco detection format requires a 'class_label_map' kwarg") 80 | 81 | def serialize(self, detections): 82 | """ Serialize input detection to a json string """ 83 | 84 | raise NotImplementedError 85 | 86 | def deserialize(self, string): 87 | """ Parse a json string into a dictionary of detections """ 88 | json_obj = json.loads(string) 89 | 90 | result = {} 91 | for json_det in json_obj: 92 | img_id = json_det['image_id'] 93 | if img_id not in result: 94 | result[img_id] = [] 95 | det = self.box_type() 96 | det.deserialize(json_det, self.class_label_map) 97 | result[img_id] += [det] 98 | 99 | return result 100 | -------------------------------------------------------------------------------- /brambox/boxes/detections/detection.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # 4 | 5 | from enum import Enum 6 | 7 | from .. import box as b 8 | from ..annotations import annotation as anno 9 | 10 | __all__ = ['Detection', 'ParserType', 'Parser'] 11 | 12 | 13 | class Detection(b.Box): 14 | """ This is a generic detection class that provides some base functionality all detections need. 15 | It builds upon :class:`~brambox.boxes.box.Box`. 16 | 17 | Attributes: 18 | confidence (Number): confidence score between 0-1 for that detection; Default **0.0** 19 | """ 20 | def __init__(self): 21 | """ x_top_left,y_top_left,width,height are in pixel coordinates """ 22 | super(Detection, self).__init__() 23 | self.confidence = 0.0 # Confidence score between 0-1 24 | 25 | @classmethod 26 | def create(cls, obj=None): 27 | """ Create a detection from a string or other box object. 28 | 29 | Args: 30 | obj (Box or string, optional): Bounding box object to copy attributes from or string to deserialize 31 | 32 | Note: 33 | The obj can be both an :class:`~brambox.boxes.annotations.Annotation` or a :class:`~brambox.boxes.detections.Detection`. 34 | For Detections the confidence score is copied over, for Annotations it is set to 1. 35 | """ 36 | instance = super(Detection, cls).create(obj) 37 | 38 | if obj is None: 39 | return instance 40 | 41 | if isinstance(obj, Detection): 42 | instance.confidence = obj.confidence 43 | elif isinstance(obj, anno.Annotation): 44 | instance.confidence = 1.0 45 | 46 | return instance 47 | 48 | def __repr__(self): 49 | """ Unambiguous representation """ 50 | string = f'{self.__class__.__name__} ' + '{' 51 | string += f'class_label = {self.class_label}, ' 52 | string += f'object_id = {self.object_id}, ' 53 | string += f'x = {self.x_top_left}, ' 54 | string += f'y = {self.y_top_left}, ' 55 | string += f'w = {self.width}, ' 56 | string += f'h = {self.height}, ' 57 | string += f'confidence = {self.confidence}' 58 | return string + '}' 59 | 60 | def __str__(self): 61 | """ Pretty print """ 62 | string = 'Detection {' 63 | string += f'\'{self.class_label}\' {self.object_id}, ' 64 | string += f'[{int(self.x_top_left)}, {int(self.y_top_left)}, {int(self.width)}, {int(self.height)}]' 65 | string += f', {round(self.confidence*100, 2)} %' 66 | return string + '}' 67 | 68 | 69 | ParserType = b.ParserType 70 | 71 | 72 | class Parser(b.Parser): 73 | """ Generic parser class """ 74 | box_type = Detection # Derived classes should set the correct box_type 75 | -------------------------------------------------------------------------------- /brambox/boxes/detections/dollar.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Maarten Vandersteegen 4 | # 5 | """ 6 | Dollar 7 | ------ 8 | """ 9 | 10 | from .detection import * 11 | 12 | __all__ = ["DollarDetection", "DollarParser"] 13 | 14 | 15 | class DollarDetection(Detection): 16 | """ Dollar image detection """ 17 | 18 | def serialize(self): 19 | """ generate a dollar detection string """ 20 | 21 | raise NotImplementedError 22 | 23 | def deserialize(self, string, class_label_map): 24 | """ parse a dollar detection string """ 25 | elements = string.split(',') 26 | self.class_label = class_label_map[0] 27 | frame_nr = int(elements[0]) - 1 28 | self.x_top_left = float(elements[1]) 29 | self.y_top_left = float(elements[2]) 30 | self.width = float(elements[3]) 31 | self.height = float(elements[4]) 32 | self.confidence = float(elements[5]) 33 | 34 | self.object_id = 0 35 | 36 | 37 | class DollarParser(Parser): 38 | """ 39 | This parser is designed to parse the text based dollar detections generated by Piotr Dollar's toolbox_ 40 | and the EAVISE PeopleDetect framework. 41 | 42 | Keyword Args: 43 | class_label_map (list): a list containing one string element with the name of the class the detector \ 44 | is trained for (this format only supports single class detections) 45 | 46 | A text file contains multiple detections over multiple images where each line in the file represents one 47 | detection bounding box. 48 | Each line is a comma separated list of values structured as follows: 49 | 50 | ,,,,, 51 | 52 | ========= =========== 53 | Name Description 54 | ========= =========== 55 | image_id image identifier that this annotation belong to (integer) 56 | x top left x coordinate of the bounding box in pixels (integer) 57 | y top left y coordinate of the bounding box in pixels (integer) 58 | w width of the bounding box in pixels (integer) 59 | h height of the bounding box in pixels (integer) 60 | score relative detection score not limited between boundaries (float) 61 | ========= =========== 62 | 63 | Example: 64 | >>> video_000.txt 65 | 20,503.75,213,20.5,50,74.8391 66 | 20,540.8,166.4,37.4857,91.4286,56.4761 67 | 20,519.034,186.602,31.6574,77.2131,51.2428 68 | 69 | Note: 70 | The image identifier is subtracted with 1 during parsing so parsed image id 1 refers to 71 | image 0. This is due to MATLAB legacy. 72 | 73 | Note: 74 | For calculating PR or MR vs FPPI curves, the score does not need to be normalized between 0 and 1 75 | since the scores are treated relatively 76 | 77 | Warning: 78 | Serialize is currently not implemented 79 | 80 | .. _toolbox: https://github.com/pdollar/toolbox/blob/master/detector/bbGt.m 81 | """ 82 | parser_type = ParserType.SINGLE_FILE 83 | box_type = DollarDetection 84 | 85 | def __init__(self, **kwargs): 86 | """ Only the first element of the class label map is used since this format 87 | does not support class labels 88 | """ 89 | try: 90 | self.class_label_map = kwargs['class_label_map'] 91 | except KeyError: 92 | raise ValueError("Dollar detection format requires a 'class_label_map' kwarg") 93 | 94 | def serialize(self, detections): 95 | """ Serialize input detection to dollar detection strings """ 96 | 97 | raise NotImplementedError 98 | 99 | def deserialize(self, string): 100 | """ Parse a json string into a dictionary of detections """ 101 | result = {} 102 | for line in string.splitlines(): 103 | img_id = str(int(line.split(',')[0]) - 1) 104 | if img_id not in result: 105 | result[img_id] = [] 106 | det = self.box_type() 107 | det.deserialize(line, self.class_label_map) 108 | result[img_id] += [det] 109 | 110 | return result 111 | -------------------------------------------------------------------------------- /brambox/boxes/detections/formats.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # 4 | 5 | from .coco import CocoParser 6 | from .dollar import DollarParser 7 | from .pascalvoc import PascalVocParser 8 | from .pickle import PickleParser 9 | from .yaml import YamlParser 10 | 11 | __all__ = ['detection_formats'] 12 | 13 | 14 | detection_formats = { 15 | 'coco': CocoParser, 16 | 'dollar': DollarParser, 17 | 'pascalvoc': PascalVocParser, 18 | 'pickle': PickleParser, 19 | 'yaml': YamlParser, 20 | } 21 | -------------------------------------------------------------------------------- /brambox/boxes/detections/pascalvoc.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Tanguy Ophoff 4 | # 5 | """ 6 | Pascal VOC 7 | ---------- 8 | """ 9 | import logging as log 10 | from .detection import * 11 | 12 | __all__ = ["PascalVocDetection", "PascalVocParser"] 13 | #log = logging.getLogger(__name__) 14 | 15 | 16 | class PascalVocDetection(Detection): 17 | """ Pascal VOC image detection """ 18 | def serialize(self): 19 | """ generate a Pascal VOC detection string """ 20 | raise NotImplementedError 21 | 22 | def deserialize(self, det_string, class_label): 23 | """ parse a Pascal VOC detection string """ 24 | self.class_label = class_label 25 | 26 | elements = det_string.split() 27 | self.confidence = float(elements[1]) 28 | self.x_top_left = float(elements[2]) 29 | self.y_top_left = float(elements[3]) 30 | self.width = float(elements[4]) - self.x_top_left + 1 31 | self.height = float(elements[5]) - self.y_top_left + 1 32 | 33 | self.object_id = 0 34 | 35 | return elements[0] 36 | 37 | 38 | class PascalVocParser(Parser): 39 | """ 40 | This parser can parse detections in the `pascal voc`_ format. 41 | This format consists of one file per class of detection. |br| 42 | confidence_scores are saved as a number between 0-1, coordinates are saved as pixel values. 43 | 44 | Keyword Args: 45 | class_label (string, optional): This keyword argument contains the ``class_label`` \ 46 | for the current file that is being parsed. 47 | 48 | Example: 49 | >>> person.txt 50 | 51 | 52 | 53 | >>> cat.txt 54 | 55 | 56 | .. _pascal voc: http://host.robots.ox.ac.uk/pascal/VOC/ 57 | """ 58 | parser_type = ParserType.SINGLE_FILE 59 | box_type = PascalVocDetection 60 | extension = '.txt' 61 | 62 | def __init__(self, **kwargs): 63 | try: 64 | self.class_label = kwargs['class_label'] 65 | except KeyError: 66 | log.info("No 'class_label' kwarg found, parser will use '' as class_label.") 67 | self.class_label = '' 68 | 69 | def serialize(self, detections): 70 | """ Serialize input dictionary of detections into one string """ 71 | raise NotImplementedError 72 | 73 | def deserialize(self, string): 74 | """ Deserialize a detection file into a dictionary of detections """ 75 | result = {} 76 | 77 | for line in string.splitlines(): 78 | if line[0] != '#': 79 | anno = self.box_type() 80 | img_id = anno.deserialize(line, self.class_label) 81 | if img_id in result: 82 | result[img_id].append(anno) 83 | else: 84 | result[img_id] = [anno] 85 | 86 | return result 87 | -------------------------------------------------------------------------------- /brambox/boxes/detections/pickle.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Tanguy Ophoff 4 | # 5 | """ 6 | Pickle 7 | ------ 8 | """ 9 | 10 | import pickle 11 | from .detection import * 12 | 13 | __all__ = ["PickleParser"] 14 | 15 | 16 | class PickleParser(Parser): 17 | """ 18 | This parser generates a binary file of your detections that can be parsed really fast. 19 | If you are using a python library for testing your network, you can use this format to quickly save your detections. 20 | """ 21 | parser_type = ParserType.SINGLE_FILE 22 | box_type = Detection 23 | extension = '.pkl' 24 | read_mode = 'rb' 25 | write_mode = 'wb' 26 | 27 | def serialize(self, annotations): 28 | """ Serialize input dictionary of annotations into one bytestream """ 29 | result = {} 30 | for img_id in annotations: 31 | img_res = [] 32 | for anno in annotations[img_id]: 33 | img_res.append(self.box_type.create(anno)) 34 | result[img_id] = img_res 35 | 36 | return pickle.dumps(result) 37 | 38 | def deserialize(self, bytestream): 39 | """ Deserialize an annotation file into a dictionary of annotations """ 40 | return pickle.loads(bytestream) 41 | -------------------------------------------------------------------------------- /brambox/boxes/detections/yaml.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Tanguy Ophoff 4 | # 5 | """ 6 | YAML 7 | ---- 8 | """ 9 | 10 | import yaml 11 | from .detection import * 12 | 13 | __all__ = ["YamlDetection", "YamlParser"] 14 | 15 | 16 | class YamlDetection(Detection): 17 | """ YAML image detection """ 18 | 19 | def serialize(self): 20 | """ generate a yaml detection object """ 21 | class_label = '?' if self.class_label == '' else self.class_label 22 | return (class_label, 23 | { 24 | 'coords': [round(self.x_top_left), round(self.y_top_left), round(self.width), round(self.height)], 25 | 'score': self.confidence*100, 26 | } 27 | ) 28 | 29 | def deserialize(self, yaml_obj, class_label): 30 | """ parse a yaml detection object """ 31 | self.class_label = '' if class_label == '?' else class_label 32 | self.x_top_left = float(yaml_obj['coords'][0]) 33 | self.y_top_left = float(yaml_obj['coords'][1]) 34 | self.width = float(yaml_obj['coords'][2]) 35 | self.height = float(yaml_obj['coords'][3]) 36 | self.confidence = yaml_obj['score'] / 100 37 | 38 | self.object_id = 0 39 | 40 | 41 | class YamlParser(Parser): 42 | """ 43 | This parser generates a lightweight human readable detection format. 44 | With only one file for the entire dataset, this format will save you precious HDD space and will also be parsed faster. 45 | 46 | Example: 47 | >>> detections.yaml 48 | img1: 49 | car: 50 | - coords: [x,y,w,h] 51 | score: 56.76 52 | person: 53 | - coords: [x,y,w,h] 54 | score: 90.1294132 55 | - coords: [x,y,w,h] 56 | score: 12.120 57 | img2: 58 | car: 59 | - coords: [x,y,w,h] 60 | score: 50 61 | """ 62 | parser_type = ParserType.SINGLE_FILE 63 | box_type = YamlDetection 64 | extension = '.yaml' 65 | 66 | def serialize(self, detections): 67 | """ Serialize input dictionary of detections into one string """ 68 | result = {} 69 | for img_id in detections: 70 | img_res = {} 71 | for det in detections[img_id]: 72 | new_det = self.box_type.create(det) 73 | key, val = new_det.serialize() 74 | if key not in img_res: 75 | img_res[key] = [val] 76 | else: 77 | img_res[key] += [val] 78 | result[img_id] = img_res 79 | 80 | return yaml.dump(result) 81 | 82 | def deserialize(self, string): 83 | """ Deserialize a detection file into a dictionary of detections """ 84 | yml_obj = yaml.load(string) 85 | 86 | result = {} 87 | for img_id in yml_obj: 88 | det_res = [] 89 | for class_label, detections in yml_obj[img_id].items(): 90 | for det_yml in detections: 91 | det = self.box_type() 92 | det.deserialize(det_yml, class_label) 93 | det_res += [det] 94 | result[img_id] = det_res 95 | 96 | return result 97 | -------------------------------------------------------------------------------- /brambox/boxes/formats.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # 4 | 5 | from .annotations import annotation_formats 6 | from .detections import detection_formats 7 | 8 | __all__ = ['formats', 'annotation_formats', 'detection_formats'] 9 | 10 | formats = {} 11 | for key in annotation_formats: 12 | formats['anno_'+key] = annotation_formats[key] 13 | for key in detection_formats: 14 | formats['det_'+key] = detection_formats[key] 15 | -------------------------------------------------------------------------------- /brambox/boxes/statistics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Brambox boxes statistics module |br| 3 | This package contains functions to perform statistical analysis of your detections and annotations. 4 | """ 5 | 6 | from .mr_fppi import * 7 | from .pr import * 8 | from .util import * 9 | -------------------------------------------------------------------------------- /brambox/boxes/statistics/mr_fppi.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Maarten Vandersteegen 4 | # Author: Tanguy Ophoff 5 | # 6 | # Functions for generating miss-rate vs FPPI curves (False Positives Per Image) axis 7 | # and calculating log average miss-rate 8 | # 9 | import numpy as np 10 | import scipy.interpolate 11 | 12 | from .util import * 13 | 14 | __all__ = ['mr_fppi', 'lamr'] 15 | 16 | 17 | def mr_fppi(detections, ground_truth, overlap_threshold=0.5): 18 | """ Compute a list of miss-rate FPPI values that can be plotted into a graph. 19 | 20 | Args: 21 | detections (dict): Detection objects per image 22 | ground_truth (dict): Annotation objects per image 23 | overlap_threshold (Number, optional): Minimum iou threshold for true positive; Default **0.5** 24 | 25 | Returns: 26 | tuple: **[miss-rate_values]**, **[fppi_values]** 27 | """ 28 | num_images = len(ground_truth) 29 | tps, fps, num_annotations = match_detections(detections, ground_truth, overlap_threshold) 30 | 31 | miss_rate = [] 32 | fppi = [] 33 | for tp, fp in zip(tps, fps): 34 | miss_rate.append(1 - (tp / num_annotations)) 35 | fppi.append(fp / num_images) 36 | 37 | return miss_rate, fppi 38 | 39 | 40 | # TODO ? maarten -> why 9 41 | def lamr(miss_rate, fppi, num_of_samples=9): 42 | """ Compute the log average miss-rate from a given MR-FPPI curve. 43 | The log average miss-rate is defined as the average of a number of evenly spaced log miss-rate samples 44 | on the :math:`{log}(FPPI)` axis within the range :math:`[10^{-2}, 10^{0}]` 45 | 46 | Args: 47 | miss_rate (list): miss-rate values 48 | fppi (list): FPPI values 49 | num_of_samples (int, optional): Number of samples to take from the curve to measure the average precision; Default **9** 50 | 51 | Returns: 52 | Number: log average miss-rate 53 | """ 54 | samples = np.logspace(-2., 0., num_of_samples) 55 | m = np.array(miss_rate) 56 | f = np.array(fppi) 57 | interpolated = scipy.interpolate.interp1d(f, m, fill_value=(1., 0.), bounds_error=False)(samples) 58 | log_interpolated = np.log(interpolated) 59 | avg = sum(log_interpolated) / len(log_interpolated) 60 | return np.exp(avg) 61 | -------------------------------------------------------------------------------- /brambox/boxes/statistics/pr.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Maarten Vandersteegen 4 | # Author: Tanguy Ophoff 5 | # 6 | # Functions for generating PR-curve values and calculating average precision 7 | # 8 | 9 | import math 10 | from statistics import mean 11 | import numpy as np 12 | import scipy.interpolate 13 | 14 | from .util import * 15 | 16 | __all__ = ['pr', 'ap', 'voc_ap'] 17 | 18 | 19 | def pr(detections, ground_truth, overlap_threshold=0.5): 20 | """ Compute a list of precision recall values that can be plotted into a graph. 21 | 22 | Args: 23 | detections (dict): Detection objects per image 24 | ground_truth (dict): Annotation objects per image 25 | overlap_threshold (Number, optional): Minimum iou threshold for true positive; Default **0.5** 26 | 27 | Returns: 28 | tuple: **[precision_values]**, **[recall_values]** 29 | """ 30 | tps, fps, num_annotations = match_detections(detections, ground_truth, overlap_threshold) 31 | 32 | precision = [] 33 | recall = [] 34 | for tp, fp in zip(tps, fps): 35 | recall.append(tp / num_annotations) 36 | precision.append(tp / (fp + tp)) 37 | 38 | return precision, recall 39 | 40 | 41 | def ap(precision, recall, num_of_samples=100): 42 | """ Compute the average precision from a given pr-curve. 43 | The average precision is defined as the area under the curve. 44 | 45 | Args: 46 | precision (list): Precision values 47 | recall (list): Recall values 48 | num_of_samples (int, optional): Number of samples to take from the curve to measure the average precision; Default **100** 49 | 50 | Returns: 51 | Number: average precision 52 | """ 53 | if len(precision) > 1 and len(recall) > 1: 54 | p = np.array(precision) 55 | r = np.array(recall) 56 | p_start = p[np.argmin(r)] 57 | samples = np.arange(0., 1., 1.0/num_of_samples) 58 | interpolated = scipy.interpolate.interp1d(r, p, fill_value=(p_start, 0.), bounds_error=False)(samples) 59 | avg = sum(interpolated) / len(interpolated) 60 | elif len(precision) > 0 and len(recall) > 0: 61 | # 1 point on PR: AP is box between (0,0) and (p,r) 62 | avg = precision[0] * recall[0] 63 | else: 64 | avg = float('nan') 65 | 66 | return avg 67 | 68 | 69 | def voc_ap(prec, rec, use_07_metric=True): 70 | """ ap = voc_ap(rec, prec, [use_07_metric]) 71 | Compute VOC AP given precision and recall. 72 | If use_07_metric is true, uses the 73 | VOC 07 11 point method (default:False). 74 | """ 75 | prec = np.array(prec) 76 | rec = np.array(rec) 77 | if use_07_metric: 78 | # 11 point metric 79 | ap = 0. 80 | for t in np.arange(0., 1.1, 0.1): 81 | if np.sum(rec >= t) == 0: 82 | p = 0 83 | else: 84 | p = np.max(prec[rec >= t]) 85 | ap = ap + p / 11. 86 | else: 87 | # correct AP calculation 88 | # first append sentinel values at the end 89 | mrec = np.concatenate(([0.], rec, [1.])) 90 | mpre = np.concatenate(([0.], prec, [0.])) 91 | 92 | # compute the precision envelope 93 | for i in range(mpre.size - 1, 0, -1): 94 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 95 | 96 | # to calculate area under PR curve, look for points 97 | # where X axis (recall) changes value 98 | i = np.where(mrec[1:] != mrec[:-1])[0] 99 | 100 | # and sum (\Delta recall) * prec 101 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 102 | return ap 103 | -------------------------------------------------------------------------------- /brambox/boxes/util/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Brambox boxes util module |br| 3 | This package contains utilitary function to use with brambox boxes. 4 | """ 5 | 6 | #from .visual import * 7 | from .convert import * 8 | from .path import * 9 | from .modifiers import * 10 | from .filters import * 11 | -------------------------------------------------------------------------------- /brambox/boxes/util/path.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # Author: Maarten Vandersteegen 4 | # 5 | 6 | import os 7 | import glob 8 | 9 | __all__ = ["expand"] 10 | 11 | 12 | def files(path): 13 | """ List all files in a directory omitting directories. """ 14 | for file in os.listdir(path): 15 | if os.path.isfile(os.path.join(path, file)): 16 | yield os.path.join(path, file) 17 | 18 | 19 | def strider(elements, stride, offset): 20 | """ Yield input elements with given stride and offset. """ 21 | next_element = offset 22 | 23 | # support negative offsets 24 | while next_element < 0: 25 | next_element += stride 26 | 27 | for i, elem in enumerate(elements): 28 | if i == next_element: 29 | next_element += stride 30 | yield elem 31 | 32 | 33 | def modulo_expand(expr, stride, offset): 34 | """ Expands a path with a **%d** to files with different numbers. """ 35 | # Support negative offset 36 | number = offset 37 | while (number < 0): 38 | number += stride 39 | 40 | while True: 41 | filename = expr % number 42 | if not os.path.isfile(filename): 43 | break 44 | yield filename 45 | number += stride 46 | 47 | 48 | def expand(expr, stride=1, offset=0): 49 | """ Expand a file selection expression into multiple filenames. 50 | 51 | Args: 52 | expr (str): File sequence expression 53 | stride (int, optional): Sample every n'th file where n is this parameter; Default **1** 54 | offset (int, optional): Start with the m'th file where m is this parameter; Default **0** 55 | 56 | Returns: 57 | generator: Iterable object that produces full filenames 58 | 59 | Note: 60 | The ``expr`` parameter can be one of the following expressions: 61 | 62 | - a file itself -> return filename 63 | - a directory -> return files from directory 64 | - path with **'*'** wildcard -> return globbed files 65 | - path with **'%d'** wildcard -> return incremental files 66 | """ 67 | if os.path.isdir(expr): 68 | return strider(sorted(files(expr)), stride, offset) 69 | elif os.path.isfile(expr): 70 | return [expr] 71 | elif '*' in expr: 72 | return strider(sorted(glob.glob(expr)), stride, offset) 73 | elif '%' in expr: 74 | return modulo_expand(expr, stride, offset) 75 | else: 76 | raise TypeError("File selection expression invalid") 77 | -------------------------------------------------------------------------------- /brambox/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Brambox transforms module |br| 3 | This package contains various image transformation functions, we feel are worth sharing. 4 | If this package ever grows big, we might need to refactor this entirely, 5 | but for now it can stay as a package with random bits and bops. 6 | """ 7 | 8 | from .channelMixer import ChannelMixer 9 | -------------------------------------------------------------------------------- /brambox/transforms/channelMixer.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright EAVISE 3 | # By Tanguy Ophoff 4 | # 5 | 6 | import logging as log 7 | #log = logging.getLogger(__name__) # noqa 8 | 9 | from PIL import Image 10 | import numpy as np 11 | try: 12 | import cv2 13 | except ModuleNotFoundError: 14 | log.debug('OpenCV not installed, always using PIL') 15 | cv2 = None 16 | 17 | __all__ = ['ChannelMixer'] 18 | 19 | 20 | class ChannelMixer: 21 | """ Mix channels of multiple inputs in a single output image. 22 | This class works with opencv_ images (np.ndarray), and will mix the channels of multiple images into one new image. 23 | 24 | Args: 25 | num_channels (int, optional): The number of channels the output image will have; Default **3** 26 | 27 | Example: 28 | >>> # Replace the 3th channel of an image with a channel from another image 29 | >>> mixer = brambox.transforms.ChannelMixer() 30 | >>> mixer.set_channels([(0,0), (0,1), (1,0)]) 31 | >>> out = mixer(img1, img2) 32 | >>> # out => opencv image with channels: [img0_channel0, img0_channel1, img1_channel0] 33 | """ 34 | def __init__(self, num_channels=3): 35 | self.num_channels = num_channels 36 | self.channels = [(0, i) for i in range(num_channels)] 37 | 38 | def set_channels(self, channels): 39 | """ Set from which channels the output image should be created. 40 | The channels list should have the same length as the number of output channels. 41 | 42 | Args: 43 | channels (list): List of tuples containing (img_number, channel_number) 44 | """ 45 | if len(channels) != self.num_channels: 46 | raise ValueError('You should have one [image,channel] per output channel') 47 | self.channels = [(c[0], c[1]) for c in channels] 48 | 49 | def __call__(self, *imgs): 50 | """ Create and return output image. 51 | 52 | Args: 53 | *imgs: Argument list with all the images needed for the mix 54 | 55 | Warning: 56 | Make sure the images all have the same width and height before mixing them. 57 | """ 58 | m = max(self.channels, key=lambda c: c[0])[0] 59 | if m >= len(imgs): 60 | raise ValueError(f'{m} images are needed to perform the mix') 61 | 62 | if isinstance(imgs[0], Image.Image): 63 | pil_image = True 64 | imgs = [np.array(img) for img in imgs] 65 | else: 66 | pil_image = False 67 | 68 | res = np.zeros([imgs[0].shape[0], imgs[0].shape[1], self.num_channels], 'uint8') 69 | for i in range(self.num_channels): 70 | if imgs[self.channels[i][0]].ndim >= 3: 71 | res[..., i] = imgs[self.channels[i][0]][..., self.channels[i][1]] 72 | else: 73 | res[..., i] = imgs[self.channels[i][0]] 74 | res = np.squeeze(res) 75 | 76 | if pil_image: 77 | return Image.fromarray(res) 78 | else: 79 | return res 80 | -------------------------------------------------------------------------------- /cfgs/README.md: -------------------------------------------------------------------------------- 1 | GPU: Tesla P100 2 | 3 | Deep learning library: PyTorch 0.4.0 4 | 5 | Input size: 544x544 6 | 7 | Batch size: 1 8 | 9 | Test iterations: 200 10 | 11 | Time: 2018-12-29 16:16 12 | 13 | ################################## 14 | 15 | tiny yolov3 < tiny yolov2 < RegionMobilenetv2 < RegionMobilenet < RegionShufflenet & RegionShufflenetv2 16 | < RegionLightXception < yolov2 < RegionSqueezenext < RegionXception < Yolov3 17 | 18 | Yolov2: 11.5 ms/iter 19 | 20 | Yolov3: 23.1 ms/iter 21 | 22 | RegionMobilenet: 6.3 ms/iter 23 | 24 | RegionMobilenetv2: 5.9 ms/iter 25 | 26 | RegionShufflenet: 7.2 ms/iter 27 | 28 | RegionShufflenetv2: 7.4 ms/iter 29 | 30 | RegionXception: 22.7 ms/iter 31 | 32 | RegionLightXception: 11.0 ms/iter 33 | 34 | RegionSqueezenext: 13.8 ms/iter 35 | 36 | TinyYolov2: 3.7 ms/iter 37 | 38 | TinyYolov3: 3.4 ms/iter 39 | -------------------------------------------------------------------------------- /cfgs/main.yml: -------------------------------------------------------------------------------- 1 | cfg_dict: 2 | Yolov3: yolov3.yml 3 | Yolov2: yolov2.yml 4 | RegionMobilenet: region_mobilenet.yml 5 | RegionMobilenetv2: region_mobilenetv2.yml 6 | TinyYolov2: tiny_yolov2.yml 7 | TinyYolov3: tiny_yolov3.yml 8 | RegionShufflenet: region_shufflenet.yml 9 | RegionShufflenetv2: region_shufflenetv2.yml 10 | RegionSqueezenext: region_squeezenext.yml 11 | RegionXception: region_xception.yml 12 | RegionLightXception: region_light_xception.yml 13 | -------------------------------------------------------------------------------- /cfgs/region_light_xception.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "6" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000,80000, 120000,140000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002,0.001, 0.0001, 0.00001] 25 | max_batches: 160200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000] 30 | backup_rates: [10000] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "7" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "outputs/RegionLightXception/baseline/weights/backup.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/region_mobilenet.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "1" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000,80000, 120000,140000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002,0.001, 0.0001, 0.00001] 25 | max_batches: 160200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000] 30 | backup_rates: [10000] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "7" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "outputs/RegionMobilenet/baseline/weights/backup.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/region_mobilenetv2.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "0" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000,80000, 120000,140000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002,0.001, 0.0001, 0.00001] 25 | max_batches: 160200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000] 30 | backup_rates: [10000] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "7" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "outputs/RegionMobilenetv2/baseline/weights/backup.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "5" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/region_shufflenet.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "3" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000,80000, 120000,140000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002,0.001, 0.0001, 0.00001] 25 | max_batches: 160200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000, 60000] 30 | backup_rates: [10000, 200] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "7" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "outputs/RegionShufflenet/baseline/weights/backup.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/region_shufflenetv2.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "5" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000,80000, 120000,140000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002,0.001, 0.0001, 0.00001] 25 | max_batches: 160200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000, 60000] 30 | backup_rates: [10000, 200] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "7" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "outputs/RegionShufflenetv2/baseline/weights/backup.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/region_squeezenext.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "7" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000,80000, 120000,140000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002,0.001, 0.0001, 0.00001] 25 | max_batches: 160200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000, 60000] 30 | backup_rates: [10000, 200] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "7" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "outputs/RegionSqueezenext/baseline/weights/backup.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/region_xception.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "1" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002] 25 | max_batches: 80200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000] 30 | backup_rates: [10000] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "3" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "outputs/RegionXception/baseline/weights/backup.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/tiny_yolov2.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "3" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002] 25 | max_batches: 80200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000] 30 | backup_rates: [10000] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "1" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [416, 416] 45 | batch_size: 16 46 | weights: "weights/tiny_yolov2_80000.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/tiny_yolov3.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "2" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000] 24 | lr_rates: [0.0005,0.001,0.002,0.02, 0.002,0.0002] 25 | max_batches: 80200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000] 30 | backup_rates: [10000] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: ~ 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "0" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [416, 416] 45 | batch_size: 16 46 | weights: "weights/tiny_yolov3_80200.pt" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/yolov2.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "2" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.0001 23 | lr_steps: [400,700,900,1000, 40000,60000] 24 | lr_rates: [0.0001,0.0005,0.0005,0.001, 0.0001,0.00001] 25 | max_batches: 70200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000, 60000] 30 | backup_rates: [10000, 200] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: "weights/darknet19_448.conv.23" 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "3" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 16 46 | weights: "weights/yolov2_60200.dw" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /cfgs/yolov3.yml: -------------------------------------------------------------------------------- 1 | output_root: "outputs" 2 | output_version: "baseline" 3 | backup_name: "weights" 4 | log_name: "logs" 5 | 6 | labels: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 7 | 8 | data_root_dir: "/data2/yichaoxiong/data/VOCdevkit/onedet_cache" 9 | 10 | train: 11 | dataset: "train" 12 | stdout: False 13 | gpus: "6" 14 | nworkers: 16 15 | pin_mem: True 16 | 17 | momentum: 0.9 18 | decay: 0.0005 19 | 20 | clear: False 21 | 22 | warmup_lr: 0.00005 23 | lr_steps: [400,700,900,1000, 40000,45000] 24 | lr_rates: [0.0001,0.0002,0.0005,0.001, 0.0001,0.00001] 25 | max_batches: 50200 26 | resize_interval: 10 27 | 28 | backup_interval: 200 29 | backup_steps: [1000] 30 | backup_rates: [10000] 31 | 32 | input_shape: [608, 608] 33 | batch_size: 64 34 | mini_batch_size: 16 35 | weights: "weights/darknet53.conv.74" 36 | 37 | test: 38 | dataset: "test" 39 | stdout: True 40 | gpus: "3" 41 | nworkers: 8 42 | pin_mem: True 43 | 44 | input_shape: [544, 544] 45 | batch_size: 8 46 | weights: "weights/yolov3_50200.dw" 47 | 48 | conf_thresh: 0.005 49 | nms_thresh: 0.45 50 | 51 | results: "results" 52 | 53 | speed: 54 | gpus: "7" 55 | batch_size: 1 56 | max_iters: 200 57 | input_shape: [544, 544] 58 | -------------------------------------------------------------------------------- /examples/labels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright EAVISE 4 | # Example: Transform annotations for VOCdevkit to the brambox pickle format 5 | # 6 | 7 | # modified by mileistone 8 | 9 | import os 10 | import sys 11 | import xml.etree.ElementTree as ET 12 | sys.path.insert(0, '.') 13 | import brambox.boxes as bbb 14 | 15 | DEBUG = True # Enable some debug prints with extra information 16 | ROOT = '/data2/yichaoxiong/data/VOCdevkit' # Root folder where the VOCdevkit is located 17 | 18 | TRAINSET = [ 19 | ('2012', 'train'), 20 | ('2012', 'val'), 21 | ('2007', 'train'), 22 | ('2007', 'val'), 23 | ] 24 | 25 | TESTSET = [ 26 | ('2007', 'test'), 27 | ] 28 | 29 | def identify(xml_file): 30 | root_dir = ROOT 31 | root = ET.parse(xml_file).getroot() 32 | folder = root.find('folder').text 33 | filename = root.find('filename').text 34 | return f'{root_dir}/{folder}/JPEGImages/{filename}' 35 | 36 | 37 | if __name__ == '__main__': 38 | print('Getting training annotation filenames') 39 | train = [] 40 | for (year, img_set) in TRAINSET: 41 | with open(f'{ROOT}/VOC{year}/ImageSets/Main/{img_set}.txt', 'r') as f: 42 | ids = f.read().strip().split() 43 | train += [f'{ROOT}/VOC{year}/Annotations/{xml_id}.xml' for xml_id in ids] 44 | 45 | if DEBUG: 46 | print(f'\t{len(train)} xml files') 47 | 48 | print('Parsing training annotation files') 49 | train_annos = bbb.parse('anno_pascalvoc', train, identify) 50 | # Remove difficult for training 51 | for k,annos in train_annos.items(): 52 | for i in range(len(annos)-1, -1, -1): 53 | if annos[i].difficult: 54 | del annos[i] 55 | 56 | print('Generating training annotation file') 57 | bbb.generate('anno_pickle', train_annos, f'{ROOT}/onedet_cache/train.pkl') 58 | 59 | print() 60 | 61 | print('Getting testing annotation filenames') 62 | test = [] 63 | for (year, img_set) in TESTSET: 64 | with open(f'{ROOT}/VOC{year}/ImageSets/Main/{img_set}.txt', 'r') as f: 65 | ids = f.read().strip().split() 66 | test += [f'{ROOT}/VOC{year}/Annotations/{xml_id}.xml' for xml_id in ids] 67 | 68 | if DEBUG: 69 | print(f'\t{len(test)} xml files') 70 | 71 | print('Parsing testing annotation files') 72 | test_annos = bbb.parse('anno_pascalvoc', test, identify) 73 | 74 | print('Generating testing annotation file') 75 | bbb.generate('anno_pickle', test_annos, f'{ROOT}/onedet_cache/test.pkl') 76 | 77 | -------------------------------------------------------------------------------- /examples/simple_speed.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import os 4 | 5 | import sys 6 | sys.path.insert(0, '.') 7 | 8 | import vedanet as vn 9 | 10 | __all__ = ['speed'] 11 | 12 | 13 | def speed(): 14 | print('Creating network') 15 | 16 | batch = 1 17 | gpus = '7' 18 | network_size = (544, 544) 19 | max_iters = 200 20 | 21 | use_cuda = True if gpus is not None else False 22 | if gpus is not None: 23 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 24 | 25 | net = vn.network.backbone.Mobilenet() 26 | net.eval() 27 | print('Net structure\n%s' % net) 28 | 29 | if use_cuda: 30 | net.cuda() 31 | 32 | data = torch.randn(batch, 3, network_size[1], network_size[0], dtype=torch.float) 33 | if use_cuda: 34 | data = data.cuda() 35 | 36 | torch.cuda.synchronize() 37 | start_time = time.time() 38 | for idx in range(max_iters): 39 | with torch.no_grad(): 40 | net(data) 41 | torch.cuda.synchronize() 42 | end_time = time.time() 43 | elapse = (end_time - start_time) 44 | 45 | print('Average %.3fms per forward in %d iteration (batch size %d, shape %dx%d)' % 46 | (1000 * elapse / max_iters, max_iters, batch, network_size[1], network_size[0])) 47 | 48 | 49 | if __name__ == '__main__': 50 | speed() 51 | -------------------------------------------------------------------------------- /examples/speed.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import logging as log 4 | import time 5 | from statistics import mean 6 | import numpy as np 7 | import torch 8 | from torchvision import transforms as tf 9 | from pprint import pformat 10 | 11 | import sys 12 | sys.path.insert(0, '.') 13 | 14 | import brambox.boxes as bbb 15 | import vedanet as vn 16 | from utils.envs import initEnv 17 | 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser(description='OneDet: an one stage framework based on PyTorch') 21 | parser.add_argument('model_name', help='model name', default=None) 22 | args = parser.parse_args() 23 | 24 | train_flag = 0 25 | config = initEnv(train_flag=train_flag, model_name=args.model_name) 26 | 27 | log.info('Config\n\n%s\n' % pformat(config)) 28 | 29 | # init env 30 | hyper_params = vn.hyperparams.HyperParams(config, train_flag=train_flag) 31 | 32 | # init and run eng 33 | vn.engine.speed(hyper_params) 34 | -------------------------------------------------------------------------------- /examples/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import logging as log 4 | import time 5 | from statistics import mean 6 | import numpy as np 7 | import torch 8 | from torchvision import transforms as tf 9 | from pprint import pformat 10 | 11 | import sys 12 | sys.path.insert(0, '.') 13 | 14 | import brambox.boxes as bbb 15 | import vedanet as vn 16 | from utils.envs import initEnv 17 | 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser(description='OneDet: an one stage framework based on PyTorch') 21 | parser.add_argument('model_name', help='model name', default=None) 22 | args = parser.parse_args() 23 | 24 | train_flag = 2 25 | config = initEnv(train_flag=train_flag, model_name=args.model_name) 26 | 27 | log.info('Config\n\n%s\n' % pformat(config)) 28 | 29 | # init env 30 | hyper_params = vn.hyperparams.HyperParams(config, train_flag=train_flag) 31 | 32 | # init and run eng 33 | vn.engine.VOCTest(hyper_params) 34 | -------------------------------------------------------------------------------- /examples/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import logging as log 4 | import time 5 | from statistics import mean 6 | import numpy as np 7 | import torch 8 | from torchvision import transforms as tf 9 | from pprint import pformat 10 | 11 | import sys 12 | sys.path.insert(0, '.') 13 | 14 | import brambox.boxes as bbb 15 | import vedanet as vn 16 | from utils.envs import initEnv, randomSeeding 17 | 18 | 19 | if __name__ == '__main__': 20 | 21 | parser = argparse.ArgumentParser(description='OneDet: an one stage framework based on PyTorch') 22 | parser.add_argument('model_name', help='model name', default=None) 23 | args = parser.parse_args() 24 | 25 | train_flag = 1 26 | config = initEnv(train_flag=train_flag, model_name=args.model_name) 27 | #randomSeeding(0) 28 | 29 | log.info('Config\n\n%s\n' % pformat(config)) 30 | 31 | # init env 32 | hyper_params = vn.hyperparams.HyperParams(config, train_flag=train_flag) 33 | 34 | # int eng 35 | eng = vn.engine.VOCTrainingEngine(hyper_params) 36 | 37 | # run eng 38 | b1 = eng.batch 39 | t1 = time.time() 40 | eng() 41 | t2 = time.time() 42 | b2 = eng.batch 43 | 44 | log.info(f'\nDuration of {b2-b1} batches: {t2-t1} seconds [{round((t2-t1)/(b2-b1), 3)} sec/batch]') 45 | -------------------------------------------------------------------------------- /figures/OSD_logo.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/ObjectDetection-OneStageDet/d29f69cdce32b006bd040edb6e66427b3c987c70/figures/OSD_logo.PNG -------------------------------------------------------------------------------- /outputs/README.md: -------------------------------------------------------------------------------- 1 | The snapshots and logs when training a model will be saved here. 2 | -------------------------------------------------------------------------------- /results/README.md: -------------------------------------------------------------------------------- 1 | every line of the file in this folder has a format like: 2 | 3 | `img_name confidence xmin ymin xmax ymax` 4 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import envs 2 | -------------------------------------------------------------------------------- /utils/cfg_parser.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import sys 3 | import logging as log 4 | import os 5 | 6 | def parse(fp): 7 | with open(fp, 'r') as fd: 8 | cont = fd.read() 9 | y = yaml.load(cont) 10 | return y 11 | 12 | def getConfig(cfgs_root, model_name): 13 | #cfgs_root = 'cfgs' 14 | main_cfg = parse('%s/main.yml' % cfgs_root) 15 | #model_name = main_cfg['model'] 16 | if model_name not in main_cfg['cfg_dict'].keys(): 17 | models = ', '.join(main_cfg['cfg_dict'].keys()) 18 | print('There are models like %s\n' % models, file=sys.stderr) 19 | raise Exception 20 | cfg_fp = './' + cfgs_root + '/' + main_cfg['cfg_dict'][model_name] 21 | config = parse(cfg_fp) 22 | #config['model_name'] = model_name 23 | #print(config) 24 | return config 25 | -------------------------------------------------------------------------------- /utils/envs.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import copy 4 | from datetime import datetime 5 | import logging 6 | import torch 7 | import random 8 | import numpy as np 9 | 10 | 11 | # individual packages 12 | from .fileproc import safeMakeDirs 13 | from .cfg_parser import getConfig 14 | 15 | 16 | def setLogging(log_dir, stdout_flag): 17 | safeMakeDirs(log_dir) 18 | dt = datetime.now() 19 | log_name = dt.strftime('%Y-%m-%d_time_%H_%M_%S') + '.log' 20 | 21 | log_fp = os.path.join(log_dir, log_name) 22 | #print os.path.abspath(log_fp) 23 | 24 | if stdout_flag: 25 | logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.DEBUG) 26 | else: 27 | logging.basicConfig(filename=log_fp, format='%(asctime)s:%(levelname)s:%(message)s', level=logging.DEBUG) 28 | 29 | 30 | def combineConfig(cur_cfg, train_flag): 31 | ret_cfg = {} 32 | for k, v in cur_cfg.items(): 33 | if k == 'train' or k == 'test' or k == 'speed': 34 | continue 35 | ret_cfg[k] = v 36 | if train_flag == 1: 37 | key = 'train' 38 | elif train_flag == 2: 39 | key = 'test' 40 | else: 41 | key = 'speed' 42 | for k, v in cur_cfg[key].items(): 43 | ret_cfg[k] = v 44 | return ret_cfg 45 | 46 | 47 | def initEnv(train_flag, model_name): 48 | cfgs_root = 'cfgs' 49 | cur_cfg = getConfig(cfgs_root, model_name) 50 | 51 | root_dir = cur_cfg['output_root'] 52 | cur_cfg['model_name'] = model_name 53 | version = cur_cfg['output_version'] 54 | work_dir = os.path.join(root_dir, model_name, version) 55 | 56 | backup_name = cur_cfg['backup_name'] 57 | log_name = cur_cfg['log_name'] 58 | 59 | backup_dir = os.path.join(work_dir, backup_name) 60 | log_dir = os.path.join(work_dir, log_name) 61 | 62 | 63 | if train_flag == 1: 64 | safeMakeDirs(backup_dir) 65 | stdout_flag = cur_cfg['train']['stdout'] 66 | setLogging(log_dir, stdout_flag) 67 | 68 | gpus = cur_cfg['train']['gpus'] 69 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 70 | 71 | cur_cfg['train']['backup_dir'] = backup_dir 72 | elif train_flag == 2: 73 | stdout_flag = cur_cfg['test']['stdout'] 74 | setLogging(log_dir, stdout_flag) 75 | 76 | gpus = cur_cfg['test']['gpus'] 77 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 78 | else: 79 | gpus = cur_cfg['speed']['gpus'] 80 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 81 | 82 | ret_cfg = combineConfig(cur_cfg, train_flag) 83 | 84 | return ret_cfg 85 | 86 | 87 | def randomSeeding(seed): 88 | np.random.seed(seed) 89 | torch.manual_seed(seed) 90 | torch.cuda.manual_seed(seed) 91 | torch.cuda.manual_seed_all(seed) 92 | random.seed(seed) 93 | 94 | 95 | if __name__ == '__main__': 96 | pass 97 | -------------------------------------------------------------------------------- /utils/fileproc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def safeMakeDir(tdir): 4 | if not os.path.isdir(tdir): 5 | os.mkdir(tdir) 6 | 7 | def safeMakeDirs(tdir): 8 | if not os.path.isdir(tdir): 9 | os.makedirs(tdir) 10 | -------------------------------------------------------------------------------- /utils/test/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /utils/test/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /utils/test/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | 7 | import numpy as np 8 | 9 | def unique_boxes(boxes, scale=1.0): 10 | """Return indices of unique boxes.""" 11 | v = np.array([1, 1e3, 1e6, 1e9]) 12 | hashes = np.round(boxes * scale).dot(v) 13 | _, index = np.unique(hashes, return_index=True) 14 | return np.sort(index) 15 | 16 | def xywh_to_xyxy(boxes): 17 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 18 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 19 | 20 | def xyxy_to_xywh(boxes): 21 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 22 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 23 | 24 | def validate_boxes(boxes, width=0, height=0): 25 | """Check that a set of boxes are valid.""" 26 | x1 = boxes[:, 0] 27 | y1 = boxes[:, 1] 28 | x2 = boxes[:, 2] 29 | y2 = boxes[:, 3] 30 | assert (x1 >= 0).all() 31 | assert (y1 >= 0).all() 32 | assert (x2 >= x1).all() 33 | assert (y2 >= y1).all() 34 | assert (x2 < width).all() 35 | assert (y2 < height).all() 36 | 37 | def filter_small_boxes(boxes, min_size): 38 | w = boxes[:, 2] - boxes[:, 0] 39 | h = boxes[:, 3] - boxes[:, 1] 40 | keep = np.where((w >= min_size) & (h > min_size))[0] 41 | return keep 42 | -------------------------------------------------------------------------------- /utils/test/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | from datasets.pascal_voc import pascal_voc 13 | from datasets.coco import coco 14 | from datasets.person_head import person_head 15 | import numpy as np 16 | 17 | # Set up voc__ using selective search "fast" mode 18 | for year in ['2007', '2012', '0712']: 19 | for split in ['train', 'val', 'trainval', 'test']: 20 | name = 'voc_{}_{}'.format(year, split) 21 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 22 | 23 | 24 | # Set up coco_2014_ 25 | for year in ['2014']: 26 | for split in ['train', 'val', 'minival', 'valminusminival']: 27 | name = 'coco_{}_{}'.format(year, split) 28 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 29 | 30 | # Set up coco_2015_ 31 | for year in ['2015']: 32 | for split in ['test', 'test-dev']: 33 | name = 'coco_{}_{}'.format(year, split) 34 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 35 | 36 | __sets['person_head'] = (lambda: person_head()) 37 | 38 | def get_imdb(name): 39 | """Get an imdb (image database) by name.""" 40 | if not __sets.has_key(name): 41 | print __sets 42 | raise KeyError('Unknown dataset: {}'.format(name)) 43 | return __sets[name]() 44 | 45 | def list_imdbs(): 46 | """List all registered imdbs.""" 47 | return __sets.keys() 48 | -------------------------------------------------------------------------------- /utils/test/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /utils/test/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from ..nms.gpu_nms import gpu_nms 9 | from ..nms.cpu_nms import cpu_nms, cpu_soft_nms 10 | import numpy as np 11 | 12 | def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.001, method=1): 13 | 14 | keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32), 15 | np.float32(sigma), np.float32(Nt), 16 | np.float32(threshold), 17 | np.uint8(method)) 18 | return keep 19 | 20 | 21 | # Original NMS implementation 22 | def nms(dets, thresh, force_cpu=False, gpu_id=None): 23 | """Dispatch to either CPU or GPU NMS implementations.""" 24 | if dets.shape[0] == 0: 25 | return [] 26 | if gpu_id is not None and not force_cpu: 27 | return gpu_nms(dets, thresh, device_id=gpu_id) 28 | else: 29 | return cpu_nms(dets, thresh) 30 | -------------------------------------------------------------------------------- /utils/test/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /utils/test/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/ObjectDetection-OneStageDet/d29f69cdce32b006bd040edb6e66427b3c987c70/utils/test/nms/__init__.py -------------------------------------------------------------------------------- /utils/test/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /utils/test/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /utils/test/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /utils/test/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /utils/test/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /utils/test/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /utils/test/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /utils/test/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /utils/test/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.so 3 | -------------------------------------------------------------------------------- /utils/test/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /utils/test/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /utils/test/voc_wrapper.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from PIL import Image 4 | from .fast_rcnn.nms_wrapper import nms, soft_nms 5 | 6 | 7 | def genResults(reorg_dets, results_folder, nms_thresh=0.45): 8 | for label, pieces in reorg_dets.items(): 9 | ret = [] 10 | dst_fp = '%s/comp4_det_test_%s.txt' % (results_folder, label) 11 | for name in pieces.keys(): 12 | pred = np.array(pieces[name], dtype=np.float32) 13 | keep = nms(pred, nms_thresh, force_cpu=True) 14 | #keep = soft_nms(pred, sigma=0.5, Nt=0.3, method=1) 15 | #print k, len(keep), len(pred_dets[k]) 16 | for ik in keep: 17 | #print k, pred_left[ik][-1], ' '.join([str(int(num)) for num in pred_left[ik][:4]]) 18 | line ='%s %f %s' % (name, pred[ik][-1], ' '.join([str(num) for num in pred[ik][:4]])) 19 | ret.append(line) 20 | 21 | with open(dst_fp, 'w') as fd: 22 | fd.write('\n'.join(ret)) 23 | 24 | 25 | def reorgDetection(dets, netw, neth): #, prefix): 26 | reorg_dets = {} 27 | for k, v in dets.items(): 28 | #img_fp = '%s/%s.jpg' % (prefix, k) 29 | img_fp = k #'%s/%s.jpg' % (prefix, k) 30 | #name = k.split('/')[-1] 31 | name = k.split('/')[-1][:-4] 32 | 33 | with Image.open(img_fp) as fd: 34 | orig_width, orig_height = fd.size 35 | scale = min(float(netw)/orig_width, float(neth)/orig_height) 36 | new_width = orig_width * scale 37 | new_height = orig_height * scale 38 | pad_w = (netw - new_width) / 2.0 39 | pad_h = (neth - new_height) / 2.0 40 | 41 | for iv in v: 42 | xmin = iv.x_top_left 43 | ymin = iv.y_top_left 44 | xmax = xmin + iv.width 45 | ymax = ymin + iv.height 46 | conf = iv.confidence 47 | class_label = iv.class_label 48 | #print(xmin, ymin, xmax, ymax) 49 | 50 | xmin = max(0, float(xmin - pad_w)/scale) 51 | xmax = min(orig_width - 1,float(xmax - pad_w)/scale) 52 | ymin = max(0, float(ymin - pad_h)/scale) 53 | ymax = min(orig_height - 1, float(ymax - pad_h)/scale) 54 | 55 | reorg_dets.setdefault(class_label, {}) 56 | reorg_dets[class_label].setdefault(name, []) 57 | #line = '%s %f %f %f %f %f' % (name, conf, xmin, ymin, xmax, ymax) 58 | piece = (xmin, ymin, xmax, ymax, conf) 59 | reorg_dets[class_label][name].append(piece) 60 | 61 | return reorg_dets 62 | 63 | 64 | def main(): 65 | netw, neth = 416, 416 66 | results_folder = 'results_test' 67 | prefix = '/data2/yichaoxiong/data/VOCdevkit' 68 | with open('yolov2_bilinear_85000_416_bilinear.pkl', 'rb') as fd: 69 | dets = pickle.load(fd) 70 | reorg_dets = reorgDetection(dets, netw, neth, prefix) 71 | genResults(reorg_dets, results_folder) 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /vedanet/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Lightnet : Darknet building blocks implemented in pytorch 3 | # Copyright EAVISE 4 | # 5 | 6 | __all__ = ['network', 'data', 'engine', 'models'] 7 | 8 | 9 | #from .version import __version__ 10 | 11 | from . import network 12 | from . import data 13 | from . import engine 14 | from . import models 15 | from . import loss 16 | from . import hyperparams 17 | -------------------------------------------------------------------------------- /vedanet/data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lightnet Data Module |br| 3 | This module contains everything related to pre- and post-processing of your data. 4 | It also has functionality to create datasets from images and annotations that are parseable with brambox_. 5 | """ 6 | 7 | from ._dataloading import * 8 | from . import transform 9 | # Lightnet 10 | from ._dataset_brambox import * 11 | from ._dataset_darknet import * 12 | -------------------------------------------------------------------------------- /vedanet/data/_dataset_brambox.py: -------------------------------------------------------------------------------- 1 | # 2 | # Lightnet dataset that works with brambox annotations 3 | # Copyright EAVISE 4 | # 5 | 6 | import os 7 | import copy 8 | import logging as log 9 | from PIL import Image 10 | import random 11 | 12 | import brambox.boxes as bbb 13 | from ._dataloading import Dataset 14 | 15 | __all__ = ['BramboxDataset'] 16 | 17 | 18 | class BramboxDataset(Dataset): 19 | """ Dataset for any brambox parsable annotation format. 20 | 21 | Args: 22 | anno_format (brambox.boxes.formats): Annotation format 23 | anno_filename (list or str): Annotation filename, list of filenames or expandable sequence 24 | input_dimension (tuple): (width,height) tuple with default dimensions of the network 25 | class_label_map (list): List of class_labels 26 | identify (function, optional): Lambda/function to get image based of annotation filename or image id; Default **replace/add .png extension to filename/id** 27 | img_transform (torchvision.transforms.Compose): Transforms to perform on the images 28 | anno_transform (torchvision.transforms.Compose): Transforms to perform on the annotations 29 | kwargs (dict): Keyword arguments that are passed to the brambox parser 30 | """ 31 | def __init__(self, anno_format, anno_filename, input_dimension, class_label_map=None, identify=None, img_transform=None, anno_transform=None, **kwargs): 32 | super().__init__(input_dimension) 33 | self.img_tf = img_transform 34 | self.anno_tf = anno_transform 35 | if callable(identify): 36 | self.id = identify 37 | else: 38 | self.id = lambda name: os.path.splitext(name)[0] + '.png' 39 | 40 | # Get annotations 41 | self.annos = bbb.parse(anno_format, anno_filename, identify=lambda f: f, class_label_map=class_label_map, **kwargs) 42 | self.keys = list(self.annos) 43 | 44 | # Add class_ids 45 | if class_label_map is None: 46 | log.warn(f'No class_label_map given, annotations wont have a class_id values for eg. loss function') 47 | for k, annos in self.annos.items(): 48 | for a in annos: 49 | if class_label_map is not None: 50 | try: 51 | a.class_id = class_label_map.index(a.class_label) 52 | except ValueError as err: 53 | raise ValueError(f'{a.class_label} is not found in the class_label_map') from err 54 | else: 55 | a.class_id = 0 56 | 57 | log.info(f'Dataset loaded: {len(self.keys)} images') 58 | 59 | def __len__(self): 60 | return len(self.keys) 61 | 62 | @Dataset.resize_getitem 63 | def __getitem__(self, index): 64 | """ Get transformed image and annotations based of the index of ``self.keys`` 65 | 66 | Args: 67 | index (int): index of the ``self.keys`` list containing all the image identifiers of the dataset. 68 | 69 | Returns: 70 | tuple: (transformed image, list of transformed brambox boxes) 71 | """ 72 | if index >= len(self): 73 | raise IndexError(f'list index out of range [{index}/{len(self)-1}]') 74 | 75 | # Load 76 | img = Image.open(self.id(self.keys[index])) 77 | anno = copy.deepcopy(self.annos[self.keys[index]]) 78 | random.shuffle(anno) 79 | 80 | # Transform 81 | if self.img_tf is not None: 82 | img = self.img_tf(img) 83 | if self.anno_tf is not None: 84 | anno = self.anno_tf(anno) 85 | 86 | return img, anno 87 | -------------------------------------------------------------------------------- /vedanet/data/_dataset_darknet.py: -------------------------------------------------------------------------------- 1 | # 2 | # Lightnet dataset that uses the same files and structure as darknet and performs the same data augmentations. 3 | # Copyright EAVISE 4 | # 5 | 6 | import os 7 | from PIL import Image 8 | from torchvision import transforms as tf 9 | 10 | from ._dataset_brambox import BramboxDataset 11 | from . import transform as vnd_transform 12 | 13 | __all__ = ['DarknetDataset'] 14 | 15 | 16 | class DarknetDataset(BramboxDataset): 17 | """ Dataset that works with darknet files and performs the same data augmentations. 18 | You must use this dataset with the :meth:`~lightnet.data.list_collate` function in a dataloader. 19 | If you enable the data augmentation you must also use the :class:`~lightnet.data.DataLoader` class as dataloader. 20 | 21 | Args: 22 | data_file (str): File containing path to image files (relative from where command is run) 23 | augment (Boolean, optional): Whether or not you want data augmentation; Default **True** 24 | input_dimension (tuple): Input dimension of the network width,height; Default **416,416** 25 | jitter (Number [0-1], optional): Determines random crop sizes; Default **0.2** 26 | flip (Number [0-1], optional): Determines whether image will be flipped; Default **0.5** 27 | hue (Number, optional): Determines hue shift; Default **0.1** 28 | saturation (Number, optional): Determines saturation shift; Default **1.5** 29 | value (Number, optional): Determines value (exposure) shift; Default **1.5** 30 | class_label_map (list, optional): class label map to convert class names to an index; Default **None** 31 | 32 | Returns: 33 | tuple: image_tensor, list of brambox boxes 34 | """ 35 | def __init__(self, data_file, augment=True, input_dimension=(416, 416), jitter=.2, flip=.5, hue=.1, saturation=1.5, value=1.5, class_label_map=None): 36 | def identify(name): 37 | return self.img_paths[self.anno_paths.index(name)] 38 | 39 | with open(data_file, 'r') as f: 40 | self.img_paths = f.read().splitlines() 41 | 42 | # Prepare variables for brambox init 43 | anno_format = 'anno_darknet' 44 | self.anno_paths = [os.path.splitext(p)[0]+'.txt' for p in self.img_paths] 45 | 46 | lb = vnd_transform.Letterbox(dataset=self) 47 | rf = vnd_transform.RandomFlip(flip) 48 | rc = vnd_transform.RandomCrop(jitter, True) 49 | hsv = vnd_transform.HSVShift(hue, saturation, value) 50 | it = tf.ToTensor() 51 | if augment: 52 | img_tf = vnd_transform.Compose([hsv, rc, rf, lb, it]) 53 | anno_tf = vnd_transform.Compose([rc, rf, lb]) 54 | else: 55 | img_tf = vnd_transform.Compose([lb, it]) 56 | anno_tf = vnd_transform.Compose([lb]) 57 | 58 | first_img = Image.open(self.img_paths[0]) 59 | w, h = first_img.size 60 | kwargs = {'image_width': w, 'image_height': h} 61 | 62 | super().__init__(anno_format, self.anno_paths, input_dimension, class_label_map, identify, img_tf, anno_tf, **kwargs) 63 | -------------------------------------------------------------------------------- /vedanet/data/transform/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Lightnet data transforms 3 | # Copyright EAVISE 4 | # 5 | 6 | from ._preprocess import * 7 | from ._postprocess import * 8 | from .util import * 9 | -------------------------------------------------------------------------------- /vedanet/data/transform/util.py: -------------------------------------------------------------------------------- 1 | # 2 | # Lightnet related data processing 3 | # Utilitary classes and functions for the data subpackage 4 | # Copyright EAVISE 5 | # 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | __all__ = ['Compose'] 10 | 11 | 12 | class Compose(list): 13 | """ This is lightnet's own version of :class:`torchvision.transforms.Compose`. 14 | 15 | Note: 16 | The reason we have our own version is because this one offers more freedom to the user. 17 | For all intends and purposes this class is just a list. 18 | This `Compose` version allows the user to access elements through index, append items, extend it with another list, etc. 19 | When calling instances of this class, it behaves just like :class:`torchvision.transforms.Compose`. 20 | 21 | Note: 22 | I proposed to change :class:`torchvision.transforms.Compose` to something similar to this version, 23 | which would render this class useless. In the meanwhile, we use our own version 24 | and you can track `the issue`_ to see if and when this comes to torchvision. 25 | 26 | Example: 27 | >>> tf = ln.data.transform.Compose([lambda n: n+1]) 28 | >>> tf(10) # 10+1 29 | 11 30 | >>> tf.append(lambda n: n*2) 31 | >>> tf(10) # (10+1)*2 32 | 22 33 | >>> tf.insert(0, lambda n: n//2) 34 | >>> tf(10) # ((10//2)+1)*2 35 | 12 36 | >>> del tf[2] 37 | >>> tf(10) # (10//2)+1 38 | 6 39 | 40 | .. _the issue: https://github.com/pytorch/vision/issues/456 41 | """ 42 | def __call__(self, data): 43 | for tf in self: 44 | data = tf(data) 45 | return data 46 | 47 | def __repr__(self): 48 | format_string = self.__class__.__name__ + ' [' 49 | for tf in self: 50 | format_string += '\n {tf}' 51 | format_string += '\n]' 52 | return format_string 53 | 54 | 55 | class BaseTransform(ABC): 56 | """ Base transform class for the pre- and post-processing functions. 57 | This class allows to create an object with some case specific settings, and then call it with the data to perform the transformation. 58 | It also allows to call the static method ``apply`` with the data and settings. This is usefull if you want to transform a single data object. 59 | """ 60 | def __init__(self, **kwargs): 61 | for key in kwargs: 62 | setattr(self, key, kwargs[key]) 63 | 64 | def __call__(self, data): 65 | return self.apply(data, **self.__dict__) 66 | 67 | @classmethod 68 | @abstractmethod 69 | def apply(cls, data, **kwargs): 70 | """ Classmethod that applies the transformation once. 71 | 72 | Args: 73 | data: Data to transform (eg. image) 74 | **kwargs: Same arguments that are passed to the ``__init__`` function 75 | """ 76 | return data 77 | 78 | 79 | class BaseMultiTransform(ABC): 80 | """ Base multiple transform class that is mainly used in pre-processing functions. 81 | This class exists for transforms that affect both images and annotations. 82 | It provides a classmethod ``apply``, that will perform the transormation on one (data, target) pair. 83 | """ 84 | def __init__(self, **kwargs): 85 | for key in kwargs: 86 | setattr(self, key, kwargs[key]) 87 | 88 | @abstractmethod 89 | def __call__(self, data): 90 | return data 91 | 92 | @classmethod 93 | def apply(cls, data, target=None, **kwargs): 94 | """ Classmethod that applies the transformation once. 95 | 96 | Args: 97 | data: Data to transform (eg. image) 98 | target (optional): ground truth for that data; Default **None** 99 | **kwargs: Same arguments that are passed to the ``__init__`` function 100 | """ 101 | obj = cls(**kwargs) 102 | res_data = obj(data) 103 | 104 | if target is None: 105 | return res_data 106 | 107 | res_target = obj(target) 108 | return res_data, res_target 109 | -------------------------------------------------------------------------------- /vedanet/engine/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lightnet Engine Module |br| 3 | This module contains classes and functions to manage the training of your networks. 4 | It has an engine, capable of orchestrating your training and test cycles, and also contains function to easily visualise data with visdom_. 5 | """ 6 | 7 | 8 | #from .engine import * 9 | from ._voc_train import * 10 | from ._voc_test import * 11 | from ._speed import * 12 | -------------------------------------------------------------------------------- /vedanet/engine/_speed.py: -------------------------------------------------------------------------------- 1 | import logging as log 2 | import time 3 | import torch 4 | from torchvision import transforms as tf 5 | from statistics import mean 6 | import os 7 | 8 | from .. import data as vn_data 9 | from .. import models 10 | from . import engine 11 | from utils.test import voc_wrapper 12 | 13 | __all__ = ['speed'] 14 | 15 | 16 | def speed(hyper_params): 17 | log.debug('Creating network') 18 | 19 | model_name = hyper_params.model_name 20 | batch = hyper_params.batch 21 | use_cuda = hyper_params.cuda 22 | network_size = hyper_params.network_size 23 | max_iters = hyper_params.max_iters 24 | 25 | net = models.__dict__[model_name](hyper_params.classes, train_flag=0) 26 | net.eval() 27 | print('Net structure\n%s' % net) 28 | 29 | if use_cuda: 30 | net.cuda() 31 | 32 | log.debug('Running network') 33 | 34 | data = torch.randn(batch, 3, network_size[1], network_size[0], dtype=torch.float) 35 | if use_cuda: 36 | data = data.cuda() 37 | 38 | torch.cuda.synchronize() 39 | start_time = time.time() 40 | for idx in range(max_iters): 41 | with torch.no_grad(): 42 | net(data) 43 | torch.cuda.synchronize() 44 | end_time = time.time() 45 | elapse = (end_time - start_time) 46 | 47 | print('%s: Average %.3fms per forward in %d iteration (batch size %d, shape %dx%d)' % 48 | (model_name, 1000 * elapse / max_iters, max_iters, batch, network_size[0], network_size[1])) 49 | 50 | 51 | -------------------------------------------------------------------------------- /vedanet/engine/_voc_test.py: -------------------------------------------------------------------------------- 1 | import logging as log 2 | import torch 3 | from torchvision import transforms as tf 4 | from statistics import mean 5 | import os 6 | 7 | from .. import data as vn_data 8 | from .. import models 9 | from . import engine 10 | from utils.test import voc_wrapper 11 | 12 | __all__ = ['VOCTest'] 13 | 14 | class CustomDataset(vn_data.BramboxDataset): 15 | def __init__(self, hyper_params): 16 | anno = hyper_params.testfile 17 | root = hyper_params.data_root 18 | network_size = hyper_params.network_size 19 | labels = hyper_params.labels 20 | 21 | 22 | lb = vn_data.transform.Letterbox(network_size) 23 | it = tf.ToTensor() 24 | img_tf = vn_data.transform.Compose([lb, it]) 25 | anno_tf = vn_data.transform.Compose([lb]) 26 | 27 | def identify(img_id): 28 | return f'{img_id}' 29 | 30 | super(CustomDataset, self).__init__('anno_pickle', anno, network_size, labels, identify, img_tf, anno_tf) 31 | 32 | def __getitem__(self, index): 33 | img, anno = super(CustomDataset, self).__getitem__(index) 34 | for a in anno: 35 | a.ignore = a.difficult # Mark difficult annotations as ignore for pr metric 36 | return img, anno 37 | 38 | 39 | def VOCTest(hyper_params): 40 | log.debug('Creating network') 41 | 42 | model_name = hyper_params.model_name 43 | batch = hyper_params.batch 44 | use_cuda = hyper_params.cuda 45 | weights = hyper_params.weights 46 | conf_thresh = hyper_params.conf_thresh 47 | network_size = hyper_params.network_size 48 | labels = hyper_params.labels 49 | nworkers = hyper_params.nworkers 50 | pin_mem = hyper_params.pin_mem 51 | nms_thresh = hyper_params.nms_thresh 52 | #prefix = hyper_params.prefix 53 | results = hyper_params.results 54 | 55 | test_args = {'conf_thresh': conf_thresh, 'network_size': network_size, 'labels': labels} 56 | net = models.__dict__[model_name](hyper_params.classes, weights, train_flag=2, test_args=test_args) 57 | net.eval() 58 | log.info('Net structure\n%s' % net) 59 | #import pdb 60 | #pdb.set_trace() 61 | if use_cuda: 62 | net.cuda() 63 | 64 | log.debug('Creating dataset') 65 | loader = torch.utils.data.DataLoader( 66 | CustomDataset(hyper_params), 67 | batch_size = batch, 68 | shuffle = False, 69 | drop_last = False, 70 | num_workers = nworkers if use_cuda else 0, 71 | pin_memory = pin_mem if use_cuda else False, 72 | collate_fn = vn_data.list_collate, 73 | ) 74 | 75 | log.debug('Running network') 76 | tot_loss = [] 77 | coord_loss = [] 78 | conf_loss = [] 79 | cls_loss = [] 80 | anno, det = {}, {} 81 | num_det = 0 82 | 83 | for idx, (data, box) in enumerate(loader): 84 | if (idx + 1) % 20 == 0: 85 | log.info('%d/%d' % (idx + 1, len(loader))) 86 | if use_cuda: 87 | data = data.cuda() 88 | with torch.no_grad(): 89 | output, loss = net(data, box) 90 | 91 | key_val = len(anno) 92 | anno.update({loader.dataset.keys[key_val+k]: v for k,v in enumerate(box)}) 93 | det.update({loader.dataset.keys[key_val+k]: v for k,v in enumerate(output)}) 94 | 95 | netw, neth = network_size 96 | reorg_dets = voc_wrapper.reorgDetection(det, netw, neth) #, prefix) 97 | voc_wrapper.genResults(reorg_dets, results, nms_thresh) 98 | 99 | 100 | -------------------------------------------------------------------------------- /vedanet/hyperparams.py: -------------------------------------------------------------------------------- 1 | import logging as log 2 | import torch 3 | 4 | __all__ = ['HyperParams'] 5 | 6 | class HyperParams(object): 7 | def __init__(self, config, train_flag=1): 8 | 9 | self.cuda = True 10 | self.labels = config['labels'] 11 | self.classes = len(self.labels) 12 | self.data_root = config['data_root_dir'] 13 | self.model_name = config['model_name'] 14 | 15 | # cuda check 16 | if self.cuda: 17 | if not torch.cuda.is_available(): 18 | log.debug('CUDA not available') 19 | self.cuda = False 20 | else: 21 | log.debug('CUDA enabled') 22 | 23 | if train_flag == 1: 24 | cur_cfg = config 25 | 26 | self.nworkers = cur_cfg['nworkers'] 27 | self.pin_mem = cur_cfg['pin_mem'] 28 | dataset = cur_cfg['dataset'] 29 | self.trainfile = f'{self.data_root}/{dataset}.pkl' 30 | 31 | self.network_size = cur_cfg['input_shape'] 32 | 33 | self.batch = cur_cfg['batch_size'] 34 | self.mini_batch = cur_cfg['mini_batch_size'] 35 | self.max_batches = cur_cfg['max_batches'] 36 | 37 | self.jitter = 0.3 38 | self.flip = 0.5 39 | self.hue = 0.1 40 | self.sat = 1.5 41 | self.val = 1.5 42 | 43 | self.learning_rate = cur_cfg['warmup_lr'] 44 | self.momentum = cur_cfg['momentum'] 45 | self.decay = cur_cfg['decay'] 46 | self.lr_steps = cur_cfg['lr_steps'] 47 | self.lr_rates = cur_cfg['lr_rates'] 48 | 49 | self.backup = cur_cfg['backup_interval'] 50 | self.bp_steps = cur_cfg['backup_steps'] 51 | self.bp_rates = cur_cfg['backup_rates'] 52 | self.backup_dir = cur_cfg['backup_dir'] 53 | 54 | self.resize = cur_cfg['resize_interval'] 55 | self.rs_steps = [] 56 | self.rs_rates = [] 57 | 58 | self.weights = cur_cfg['weights'] 59 | self.clear = cur_cfg['clear'] 60 | elif train_flag == 2: 61 | cur_cfg = config 62 | 63 | dataset = cur_cfg['dataset'] 64 | self.testfile = f'{self.data_root}/{dataset}.pkl' 65 | self.nworkers = cur_cfg['nworkers'] 66 | self.pin_mem = cur_cfg['pin_mem'] 67 | self.network_size = cur_cfg['input_shape'] 68 | self.batch = cur_cfg['batch_size'] 69 | self.weights = cur_cfg['weights'] 70 | self.conf_thresh = cur_cfg['conf_thresh'] 71 | self.nms_thresh = cur_cfg['nms_thresh'] 72 | self.results = cur_cfg['results'] 73 | 74 | else: 75 | cur_cfg = config 76 | 77 | self.network_size = cur_cfg['input_shape'] 78 | self.batch = cur_cfg['batch_size'] 79 | self.max_iters = cur_cfg['max_iters'] 80 | 81 | -------------------------------------------------------------------------------- /vedanet/loss/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Lightnet loss functions 3 | # Copyright EAVISE 4 | # 5 | 6 | from ._regionloss import * 7 | from ._yololoss import * 8 | -------------------------------------------------------------------------------- /vedanet/loss/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def bbox_ious(boxes1, boxes2): 4 | """ Compute IOU between all boxes from ``boxes1`` with all boxes from ``boxes2``. 5 | 6 | Args: 7 | boxes1 (torch.Tensor): List of bounding boxes 8 | boxes2 (torch.Tensor): List of bounding boxes 9 | 10 | Note: 11 | List format: [[xc, yc, w, h],...] 12 | """ 13 | b1_len = boxes1.size(0) 14 | b2_len = boxes2.size(0) 15 | 16 | b1x1, b1y1 = (boxes1[:, :2] - (boxes1[:, 2:4] / 2)).split(1, 1) 17 | b1x2, b1y2 = (boxes1[:, :2] + (boxes1[:, 2:4] / 2)).split(1, 1) 18 | b2x1, b2y1 = (boxes2[:, :2] - (boxes2[:, 2:4] / 2)).split(1, 1) 19 | b2x2, b2y2 = (boxes2[:, :2] + (boxes2[:, 2:4] / 2)).split(1, 1) 20 | 21 | dx = (b1x2.min(b2x2.t()) - b1x1.max(b2x1.t())).clamp(min=0) 22 | dy = (b1y2.min(b2y2.t()) - b1y1.max(b2y1.t())).clamp(min=0) 23 | intersections = dx * dy 24 | 25 | areas1 = (b1x2 - b1x1) * (b1y2 - b1y1) 26 | areas2 = (b2x2 - b2x1) * (b2y2 - b2y1) 27 | unions = (areas1 + areas2.t()) - intersections 28 | 29 | return intersections / unions 30 | 31 | 32 | -------------------------------------------------------------------------------- /vedanet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from ._yolov3 import * 2 | from ._yolov2 import * 3 | from ._region_mobilenet import * 4 | from ._region_mobilenetv2 import * 5 | from ._tiny_yolov2 import * 6 | from ._tiny_yolov3 import * 7 | from ._region_shufflenet import * 8 | from ._region_shufflenetv2 import * 9 | from ._region_squeezenext import * 10 | from ._region_xception import * 11 | from ._region_light_xception import * 12 | -------------------------------------------------------------------------------- /vedanet/models/_region_light_xception.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['RegionLightXception'] 11 | 12 | 13 | class RegionLightXception(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.LightXception() 32 | self.head = head.RegionLightXception(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_region_mobilenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['RegionMobilenet'] 11 | 12 | 13 | class RegionMobilenet(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.Mobilenet() 32 | self.head = head.RegionMobilenet(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_region_mobilenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['RegionMobilenetv2'] 11 | 12 | 13 | class RegionMobilenetv2(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.Mobilenetv2() 32 | self.head = head.RegionMobilenetv2(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential, backbone.Mobilenetv2, head.RegionMobilenetv2)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_region_shufflenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['RegionShufflenet'] 11 | 12 | 13 | class RegionShufflenet(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.shufflenetg2() 32 | self.head = head.RegionShufflenet(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_region_shufflenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['RegionShufflenetv2'] 11 | 12 | 13 | class RegionShufflenetv2(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.shufflenetv2() 32 | self.head = head.RegionShufflenetv2(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_region_squeezenext.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['RegionSqueezenext'] 11 | 12 | 13 | class RegionSqueezenext(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.Squeezenext() 32 | self.head = head.RegionSqueezenext(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_region_xception.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['RegionXception'] 11 | 12 | 13 | class RegionXception(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.Xception() 32 | self.head = head.RegionXception(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_tiny_yolov2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['TinyYolov2'] 11 | 12 | 13 | class TinyYolov2(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | #anchors = [(18,22), (60,66), (107,175), (252,113), (313,293)], 17 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 18 | """ Network initialisation """ 19 | super().__init__() 20 | 21 | # Parameters 22 | self.num_classes = num_classes 23 | self.anchors = anchors 24 | self.anchors_mask = anchors_mask 25 | self.nloss = len(self.anchors_mask) 26 | self.train_flag = train_flag 27 | self.test_args = test_args 28 | 29 | self.loss = None 30 | self.postprocess = None 31 | 32 | self.backbone = backbone.TinyYolov2() 33 | self.head = head.TinyYolov2(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 34 | 35 | if weights_file is not None: 36 | self.load_weights(weights_file, clear) 37 | else: 38 | self.init_weights(slope=0.1) 39 | 40 | def _forward(self, x): 41 | middle_feats = self.backbone(x) 42 | features = self.head(middle_feats) 43 | loss_fn = loss.RegionLoss 44 | #loss_fn = loss.MultiboxLoss 45 | 46 | self.compose(x, features, loss_fn) 47 | 48 | return features 49 | 50 | def modules_recurse(self, mod=None): 51 | """ This function will recursively loop over all module children. 52 | 53 | Args: 54 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 55 | """ 56 | if mod is None: 57 | mod = self 58 | 59 | for module in mod.children(): 60 | #print(module) 61 | if isinstance(module, (nn.ModuleList, nn.Sequential, backbone.TinyYolov2, head.TinyYolov2)): 62 | yield from self.modules_recurse(module) 63 | else: 64 | yield module 65 | -------------------------------------------------------------------------------- /vedanet/models/_tiny_yolov3.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['TinyYolov3'] 11 | 12 | 13 | class TinyYolov3(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors=[(10,14), (23,27), (37,58), (81,82), (135,169), (344,319)], 16 | anchors_mask=[(3,4,5), (0,1,2)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | num_anchors_list = [len(x) for x in anchors_mask] 32 | 33 | self.backbone = backbone.TinyYolov3() 34 | self.head = head.TinyYolov3(num_classes, num_anchors_list) 35 | 36 | if weights_file is not None: 37 | self.load_weights(weights_file, clear) 38 | else: 39 | self.init_weights(slope=0.1) 40 | 41 | def _forward(self, x): 42 | middle_feats = self.backbone(x) 43 | features = self.head(middle_feats) 44 | loss_fn = loss.YoloLoss 45 | 46 | self.compose(x, features, loss_fn) 47 | 48 | return features 49 | 50 | def modules_recurse(self, mod=None): 51 | """ This function will recursively loop over all module children. 52 | 53 | Args: 54 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 55 | """ 56 | if mod is None: 57 | mod = self 58 | 59 | for module in mod.children(): 60 | if isinstance(module, (nn.ModuleList, nn.Sequential, backbone.Darknet53, 61 | backbone.Darknet53.custom_layers, head.Yolov3, head.Yolov3.custom_layers)): 62 | yield from self.modules_recurse(module) 63 | else: 64 | yield module 65 | -------------------------------------------------------------------------------- /vedanet/models/_yolov2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['Yolov2'] 11 | 12 | 13 | class Yolov2(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors = [(42.31,55.41), (102.17,128.30), (161.79,259.17), (303.08,154.90), (359.56,320.23)], 16 | anchors_mask=[(0,1,2,3,4)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | self.backbone = backbone.Darknet19() 32 | self.head = head.Yolov2(num_anchors=len(anchors_mask[0]), num_classes=num_classes) 33 | 34 | if weights_file is not None: 35 | self.load_weights(weights_file, clear) 36 | else: 37 | self.init_weights(slope=0.1) 38 | 39 | def _forward(self, x): 40 | middle_feats = self.backbone(x) 41 | features = self.head(middle_feats) 42 | loss_fn = loss.RegionLoss 43 | 44 | self.compose(x, features, loss_fn) 45 | 46 | return features 47 | 48 | def modules_recurse(self, mod=None): 49 | """ This function will recursively loop over all module children. 50 | 51 | Args: 52 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 53 | """ 54 | if mod is None: 55 | mod = self 56 | 57 | for module in mod.children(): 58 | if isinstance(module, (nn.ModuleList, nn.Sequential, backbone.Darknet19, head.Yolov2)): 59 | yield from self.modules_recurse(module) 60 | else: 61 | yield module 62 | -------------------------------------------------------------------------------- /vedanet/models/_yolov3.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict, Iterable 3 | import torch 4 | import torch.nn as nn 5 | from .. import loss 6 | from .yolo_abc import YoloABC 7 | from ..network import backbone 8 | from ..network import head 9 | 10 | __all__ = ['Yolov3'] 11 | 12 | 13 | class Yolov3(YoloABC): 14 | def __init__(self, num_classes=20, weights_file=None, input_channels=3, 15 | anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)], 16 | anchors_mask=[(6,7,8), (3,4,5), (0,1,2)], train_flag=1, clear=False, test_args=None): 17 | """ Network initialisation """ 18 | super().__init__() 19 | 20 | # Parameters 21 | self.num_classes = num_classes 22 | self.anchors = anchors 23 | self.anchors_mask = anchors_mask 24 | self.nloss = len(self.anchors_mask) 25 | self.train_flag = train_flag 26 | self.test_args = test_args 27 | 28 | self.loss = None 29 | self.postprocess = None 30 | 31 | num_anchors_list = [len(x) for x in anchors_mask] 32 | in_channels_list = [512, 256, 128] 33 | 34 | self.backbone = backbone.Darknet53() 35 | self.head = head.Yolov3(num_classes, in_channels_list, num_anchors_list) 36 | 37 | if weights_file is not None: 38 | self.load_weights(weights_file, clear) 39 | else: 40 | self.init_weights(slope=0.1) 41 | 42 | def _forward(self, x): 43 | middle_feats = self.backbone(x) 44 | features = self.head(middle_feats) 45 | loss_fn = loss.YoloLoss 46 | 47 | self.compose(x, features, loss_fn) 48 | 49 | return features 50 | 51 | def modules_recurse(self, mod=None): 52 | """ This function will recursively loop over all module children. 53 | 54 | Args: 55 | mod (torch.nn.Module, optional): Module to loop over; Default **self** 56 | """ 57 | if mod is None: 58 | mod = self 59 | 60 | for module in mod.children(): 61 | if isinstance(module, (nn.ModuleList, nn.Sequential, backbone.Darknet53, 62 | backbone.Darknet53.custom_layers, head.Yolov3, head.Yolov3.custom_layers)): 63 | yield from self.modules_recurse(module) 64 | else: 65 | yield module 66 | -------------------------------------------------------------------------------- /vedanet/models/yolo_abc.py: -------------------------------------------------------------------------------- 1 | # 2 | # Darknet YOLOv2 model 3 | # Copyright EAVISE 4 | # 5 | 6 | import os 7 | from collections import OrderedDict, Iterable 8 | import torch 9 | import torch.nn as nn 10 | from .. import data as vnd 11 | from ._darknet import Darknet 12 | 13 | __all__ = ['YoloABC'] 14 | 15 | 16 | class YoloABC(Darknet): 17 | def __init__(self): 18 | """ Network initialisation """ 19 | super().__init__() 20 | 21 | # Parameters 22 | self.num_classes = None 23 | self.anchors = None 24 | self.anchors_mask = None 25 | self.nloss = None 26 | self.loss = None 27 | self.postprocess = None 28 | self.train_flag = None # 1 for train, 2 for test, 0 for speed 29 | self.test_args = None 30 | 31 | # Network 32 | # backbone 33 | #self.backbone = backbone() 34 | # head 35 | #self.head = head() 36 | 37 | #if weights_file is not None: 38 | # self.load_weights(weights_file, clear) 39 | 40 | def _forward(self, x): 41 | pass 42 | 43 | def compose(self, x, features, loss_fn): 44 | """ 45 | generate loss and postprocess 46 | """ 47 | if self.train_flag == 1: # train 48 | if self.loss is None: 49 | self.loss = [] # for training 50 | 51 | for idx in range(self.nloss): 52 | reduction = float(x.shape[2] / features[idx].shape[2]) # n, c, h, w 53 | self.loss.append(loss_fn(self.num_classes, self.anchors, self.anchors_mask[idx], 54 | reduction, self.seen, head_idx=idx)) 55 | elif self.train_flag == 2: # test 56 | if self.postprocess is None: 57 | self.postprocess = [] # for testing 58 | 59 | conf_thresh = self.test_args['conf_thresh'] 60 | network_size = self.test_args['network_size'] 61 | labels = self.test_args['labels'] 62 | for idx in range(self.nloss): 63 | reduction = float(x.shape[2] / features[idx].shape[2]) # n, c, h, w 64 | cur_anchors = [self.anchors[ii] for ii in self.anchors_mask[idx]] 65 | cur_anchors = [(ii[0] / reduction, ii[1] / reduction) for ii in cur_anchors] # abs to relative 66 | self.postprocess.append(vnd.transform.Compose([ 67 | vnd.transform.GetBoundingBoxes(self.num_classes, cur_anchors, conf_thresh), 68 | vnd.transform.TensorToBrambox(network_size, labels) 69 | ])) 70 | # else, speed 71 | -------------------------------------------------------------------------------- /vedanet/network/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lightnet Models Module |br| 3 | This module contains networks that were recreated with this library. 4 | Take a look at the code to learn how to use this library, or just use these models if that is all you need. 5 | """ 6 | 7 | # layer 8 | from . import layer 9 | # backbone 10 | from . import backbone 11 | # head 12 | from . import head 13 | -------------------------------------------------------------------------------- /vedanet/network/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from ._darknet19 import * 2 | from ._darknet53 import * 3 | from ._mobilenet import * 4 | from ._mobilenetv2 import * 5 | from ._tiny_yolov2 import * 6 | from ._tiny_yolov3 import * 7 | from ._shufflenet import * 8 | from ._shufflenetv2 import * 9 | from ._squeezenext import * 10 | from ._xception import * 11 | from ._light_xception import * 12 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_darknet19.py: -------------------------------------------------------------------------------- 1 | # 2 | # Darknet Darknet19 model 3 | # Copyright EAVISE 4 | # 5 | 6 | 7 | # modified by mileistone 8 | 9 | import os 10 | from collections import OrderedDict 11 | import torch 12 | import torch.nn as nn 13 | 14 | from .. import layer as vn_layer 15 | 16 | __all__ = ['Darknet19'] 17 | 18 | 19 | class Darknet19(nn.Module): 20 | """ `Darknet19`_ implementation with pytorch. 21 | 22 | Todo: 23 | - Loss function: L2 (Crossentropyloss in pytorch) 24 | 25 | Args: 26 | weights_file (str, optional): Path to the saved weights; Default **None** 27 | input_channels (Number, optional): Number of input channels; Default **3** 28 | 29 | Attributes: 30 | self.loss (fn): loss function. Usually this is :class:`~lightnet.network.RegionLoss` 31 | self.postprocess (fn): Postprocessing function. By default this is :class:`~lightnet.data.GetBoundingBoxes` 32 | 33 | .. _Darknet19: https://github.com/pjreddie/darknet/blob/master/cfg/darknet19.cfg 34 | """ 35 | def __init__(self): 36 | """ Network initialisation """ 37 | super().__init__() 38 | 39 | # Network 40 | layer_list = [ 41 | OrderedDict([ 42 | ('1_convbatch', vn_layer.Conv2dBatchLeaky(3, 32, 3, 1)), 43 | ('2_max', nn.MaxPool2d(2, 2)), 44 | ('3_convbatch', vn_layer.Conv2dBatchLeaky(32, 64, 3, 1)), 45 | ('4_max', nn.MaxPool2d(2, 2)), 46 | ('5_convbatch', vn_layer.Conv2dBatchLeaky(64, 128, 3, 1)), 47 | ('6_convbatch', vn_layer.Conv2dBatchLeaky(128, 64, 1, 1)), 48 | ('7_convbatch', vn_layer.Conv2dBatchLeaky(64, 128, 3, 1)), 49 | ]), 50 | 51 | OrderedDict([ 52 | ('8_max', nn.MaxPool2d(2, 2)), 53 | ('9_convbatch', vn_layer.Conv2dBatchLeaky(128, 256, 3, 1)), 54 | ('10_convbatch', vn_layer.Conv2dBatchLeaky(256, 128, 1, 1)), 55 | ('11_convbatch', vn_layer.Conv2dBatchLeaky(128, 256, 3, 1)), 56 | ]), 57 | 58 | OrderedDict([ 59 | ('12_max', nn.MaxPool2d(2, 2)), 60 | ('13_convbatch', vn_layer.Conv2dBatchLeaky(256, 512, 3, 1)), 61 | ('14_convbatch', vn_layer.Conv2dBatchLeaky(512, 256, 1, 1)), 62 | ('15_convbatch', vn_layer.Conv2dBatchLeaky(256, 512, 3, 1)), 63 | ('16_convbatch', vn_layer.Conv2dBatchLeaky(512, 256, 1, 1)), 64 | ('17_convbatch', vn_layer.Conv2dBatchLeaky(256, 512, 3, 1)), 65 | ]), 66 | 67 | OrderedDict([ 68 | ('18_max', nn.MaxPool2d(2, 2)), 69 | ('19_convbatch', vn_layer.Conv2dBatchLeaky(512, 1024, 3, 1)), 70 | ('20_convbatch', vn_layer.Conv2dBatchLeaky(1024, 512, 1, 1)), 71 | ('21_convbatch', vn_layer.Conv2dBatchLeaky(512, 1024, 3, 1)), 72 | ('22_convbatch', vn_layer.Conv2dBatchLeaky(1024, 512, 1, 1)), 73 | ('23_convbatch', vn_layer.Conv2dBatchLeaky(512, 1024, 3, 1)), 74 | # the following is extra 75 | ('24_convbatch', vn_layer.Conv2dBatchLeaky(1024, 1024, 3, 1)), 76 | ('25_convbatch', vn_layer.Conv2dBatchLeaky(1024, 1024, 3, 1)), 77 | ]), 78 | ] 79 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 80 | 81 | def forward(self, x): 82 | stem = self.layers[0](x) 83 | stage4 = self.layers[1](stem) 84 | stage5 = self.layers[2](stage4) 85 | stage6 = self.layers[3](stage5) 86 | features = [stage6, stage5, stage4] 87 | return features 88 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_darknet53.py: -------------------------------------------------------------------------------- 1 | # 2 | # Darknet YOLOv3 model 3 | # Copyright EAVISE 4 | # 5 | 6 | 7 | # modified by mileistone 8 | 9 | import os 10 | from collections import OrderedDict, Iterable 11 | import logging 12 | import torch 13 | import torch.nn as nn 14 | 15 | from .. import layer as vn_layer 16 | from .brick import darknet53 as bdkn 17 | 18 | __all__ = ['Darknet53'] 19 | 20 | 21 | class Darknet53(nn.Module): 22 | custom_layers = (bdkn.Stage, bdkn.HeadBody, bdkn.Transition, 23 | bdkn.Stage.custom_layers, bdkn.HeadBody.custom_layers, bdkn.Transition.custom_layers) 24 | def __init__(self): 25 | super().__init__() 26 | 27 | input_channels = 32 28 | stage_cfg = {'stage_2': 2, 'stage_3': 3, 'stage_4': 9, 'stage_5': 9, 'stage_6': 5} 29 | 30 | # Network 31 | layer_list = [ 32 | # layer 0 33 | # first scale, smallest 34 | OrderedDict([ 35 | ('stage_1', vn_layer.Conv2dBatchLeaky(3, input_channels, 3, 1, 1)), 36 | ('stage_2', bdkn.Stage(input_channels, stage_cfg['stage_2'])), 37 | ('stage_3', bdkn.Stage(input_channels*(2**1), stage_cfg['stage_3'])), 38 | ('stage_4', bdkn.Stage(input_channels*(2**2), stage_cfg['stage_4'])), 39 | ]), 40 | 41 | # layer 1 42 | # second scale 43 | OrderedDict([ 44 | ('stage_5', bdkn.Stage(input_channels*(2**3), stage_cfg['stage_5'])), 45 | ]), 46 | 47 | # layer 2 48 | # third scale, largest 49 | OrderedDict([ 50 | ('stage_6', bdkn.Stage(input_channels*(2**4), stage_cfg['stage_6'])), 51 | ]), 52 | 53 | # the following is extra 54 | # layer 3 55 | # output third scale, largest 56 | OrderedDict([ 57 | ('head_body_1', bdkn.HeadBody(input_channels*(2**5), first_head=True)), 58 | ]), 59 | 60 | # layer 4 61 | OrderedDict([ 62 | ('trans_1', bdkn.Transition(input_channels*(2**4))), 63 | ]), 64 | 65 | # layer 5 66 | # output second scale 67 | OrderedDict([ 68 | ('head_body_2', bdkn.HeadBody(input_channels*(2**4+2**3))), 69 | ]), 70 | 71 | # layer 6 72 | OrderedDict([ 73 | ('trans_2', bdkn.Transition(input_channels*(2**3))), 74 | ]), 75 | 76 | # layer 7 77 | # output first scale, smallest 78 | OrderedDict([ 79 | ('head_body_3', bdkn.HeadBody(input_channels*(2**3+2**2))), 80 | ]), 81 | ] 82 | 83 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 84 | 85 | def forward(self, x): 86 | features = [] 87 | outputs = [] 88 | 89 | stage_4 = self.layers[0](x) 90 | stage_5 = self.layers[1](stage_4) 91 | stage_6 = self.layers[2](stage_5) 92 | 93 | head_body_1 = self.layers[3](stage_6) 94 | trans_1 = self.layers[4](head_body_1) 95 | 96 | concat_2 = torch.cat([trans_1, stage_5], 1) 97 | head_body_2 = self.layers[5](concat_2) 98 | trans_2 = self.layers[6](head_body_2) 99 | 100 | concat_3 = torch.cat([trans_2, stage_4], 1) 101 | head_body_3 = self.layers[7](concat_3) 102 | 103 | # stage 6, stage 5, stage 4 104 | features = [head_body_1, head_body_2, head_body_3] 105 | 106 | return features 107 | 108 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_light_xception.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | from .brick import light_xception as blx 8 | 9 | __all__ = ['LightXception'] 10 | 11 | 12 | class LightXception(nn.Module): 13 | def __init__(self): 14 | """ Network initialisation """ 15 | super().__init__() 16 | 17 | # Network 18 | layers_list= [ 19 | OrderedDict([ 20 | ('stage3/conv1', vn_layer.Conv2dBatchReLU(3, 24, 3, 2)), 21 | ('stage3/downsample2', nn.MaxPool2d(3, 2, 1)), 22 | ]), 23 | 24 | 25 | OrderedDict([ 26 | ('stage4/miniblock1', blx.MiniBlock(24, 144, 2, 2)), 27 | ('stage4/stage2', blx.Block(144, 144, 3, 3)), 28 | ]), 29 | 30 | OrderedDict([ 31 | ('stage5/miniblock1', blx.MiniBlock(144, 288, 2, 2)), 32 | ('stage5/stage2', blx.Block(288, 288, 3, 7)), 33 | ]), 34 | 35 | OrderedDict([ 36 | ('stage6/miniblock1', blx.MiniBlock(288, 576, 2, 2)), 37 | ('stage6/stage2', blx.Block(576, 576, 3, 3)), 38 | # the following is extra 39 | ]), 40 | ] 41 | 42 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layers_list]) 43 | 44 | def forward(self, x): 45 | stem = self.layers[0](x) 46 | stage4 = self.layers[1](stem) 47 | stage5 = self.layers[2](stage4) 48 | stage6 = self.layers[3](stage5) 49 | features = [stage6, stage5, stage4] 50 | return features 51 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_mobilenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | from .brick import mobilenet as bmnv 8 | 9 | __all__ = ['Mobilenet'] 10 | 11 | # default Mobilenet 1.0 12 | class Mobilenet(nn.Module): 13 | """ 14 | """ 15 | def __init__(self, alpha=1): 16 | """ Network initialisation """ 17 | super().__init__() 18 | 19 | # Network 20 | layer_list = [ 21 | # Sequence 0 : input = image tensor 22 | OrderedDict([ 23 | ('1_convbatch', vn_layer.Conv2dBatchReLU(3, int(alpha*32), 3, 2)), 24 | ('2_convdw', bmnv.Conv2dDepthWise(int(alpha*32), int(alpha*64), 3, 1)), 25 | ('3_convdw', bmnv.Conv2dDepthWise(int(alpha*64), int(alpha*128), 3, 2)), 26 | ('4_convdw', bmnv.Conv2dDepthWise(int(alpha*128), int(alpha*128), 3, 1)), 27 | ]), 28 | 29 | OrderedDict([ 30 | ('5_convdw', bmnv.Conv2dDepthWise(int(alpha*128), int(alpha*256), 3, 2)), 31 | ('6_convdw', bmnv.Conv2dDepthWise(int(alpha*256), int(alpha*256), 3, 1)), 32 | ]), 33 | 34 | OrderedDict([ 35 | ('7_convdw', bmnv.Conv2dDepthWise(int(alpha*256), int(alpha*512), 3, 2)), 36 | ('8_convdw', bmnv.Conv2dDepthWise(int(alpha*512), int(alpha*512), 3, 1)), 37 | ('9_convdw', bmnv.Conv2dDepthWise(int(alpha*512), int(alpha*512), 3, 1)), 38 | ('10_convdw', bmnv.Conv2dDepthWise(int(alpha*512), int(alpha*512), 3, 1)), 39 | ('11_convdw', bmnv.Conv2dDepthWise(int(alpha*512), int(alpha*512), 3, 1)), 40 | ('12_convdw', bmnv.Conv2dDepthWise(int(alpha*512), int(alpha*512), 3, 1)), 41 | ]), 42 | 43 | OrderedDict([ 44 | ('13_convdw', bmnv.Conv2dDepthWise(int(alpha*512), int(alpha*1024), 3, 2)), 45 | ('14_convdw', bmnv.Conv2dDepthWise(int(alpha*1024), int(alpha*1024), 3, 1)), 46 | # the following is extra 47 | ('15_convdw', bmnv.Conv2dDepthWise(int(alpha*1024), int(alpha*1024), 3, 1)), 48 | ('16_convdw', bmnv.Conv2dDepthWise(int(alpha*1024), int(alpha*1024), 3, 1)), 49 | ]), 50 | ] 51 | 52 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 53 | 54 | def forward(self, x): 55 | stem = self.layers[0](x) 56 | stage4 = self.layers[1](stem) 57 | stage5 = self.layers[2](stage4) 58 | stage6 = self.layers[3](stage5) 59 | features = [stage6, stage5, stage4] 60 | return features 61 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_mobilenetv2.py: -------------------------------------------------------------------------------- 1 | # 2 | # Darknet Darknet19 model 3 | # Copyright EAVISE 4 | # 5 | 6 | 7 | # modified by mileistone 8 | 9 | import os 10 | from collections import OrderedDict 11 | import torch 12 | import torch.nn as nn 13 | 14 | from .. import layer as vn_layer 15 | from .brick import mobilenetv2 as bmnv2 16 | 17 | __all__ = ['Mobilenetv2'] 18 | 19 | # default mobilenetv2 1.0 20 | class Mobilenetv2(nn.Module): 21 | """ 22 | mobilenetv2 23 | """ 24 | def __init__(self, width_mult=1): 25 | """ Network initialisation """ 26 | super().__init__() 27 | 28 | # setting of inverted residual blocks 29 | cfg = [ 30 | # t, c, n, s 31 | # stage 3 32 | [ 33 | [1, 16, 1, 1], 34 | [6, 24, 2, 2], 35 | ], 36 | # stage 4 37 | [ 38 | [6, 32, 3, 2], 39 | ], 40 | # stage 5 41 | [ 42 | [6, 64, 4, 2], 43 | [6, 96, 3, 1], 44 | ], 45 | # stage 6 46 | [ 47 | [6, 160, 3, 2], 48 | [6, 320, 1, 1], 49 | ], 50 | ] 51 | 52 | self.layers = bmnv2.buildMobilenetv2(cfg, width_mult) 53 | 54 | def forward(self, x): 55 | stem = self.layers[0](x) 56 | stage4 = self.layers[1](stem) 57 | #print(stage4.shape) 58 | #print(self.layers[0], self.layers[1], self.layers[2]) 59 | stage5 = self.layers[2](stage4) 60 | stage6 = self.layers[3](stage5) 61 | features = [stage6, stage5, stage4] 62 | #print(stage4.shape, stage5.shape, stage6.shape) 63 | return features 64 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_shufflenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | from .brick import shufflenet as bsn 8 | 9 | __all__ = ['shufflenetg2', 'shufflenetg3'] 10 | 11 | # default shufflenet g2 12 | class Shufflenet(nn.Module): 13 | def __init__(self, cfg): 14 | super().__init__() 15 | out_planes = cfg['out_planes'] 16 | num_blocks = cfg['num_blocks'] 17 | groups = cfg['groups'] 18 | 19 | # Network 20 | layers_list = [ 21 | # Sequence 0 : input = image tensor 22 | OrderedDict([ 23 | ('stage3/convbatchrelu', vn_layer.Conv2dBatchReLU(3, 24, 3, 2)), 24 | ('stage3/max', nn.MaxPool2d(3, 2, 1)), 25 | ]), 26 | 27 | OrderedDict([ 28 | ('Stage4', bsn.Stage(24, out_planes[0], groups, num_blocks[0])), 29 | ]), 30 | 31 | OrderedDict([ 32 | ('Stage5', bsn.Stage(out_planes[0], out_planes[1], groups, num_blocks[1])), 33 | ]), 34 | 35 | OrderedDict([ 36 | ('Stage6', bsn.Stage(out_planes[1], out_planes[2], groups, num_blocks[2])), 37 | # the following is extra 38 | ]), 39 | ] 40 | 41 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layers_list]) 42 | 43 | def forward(self, x): 44 | stem = self.layers[0](x) 45 | stage4 = self.layers[1](stem) 46 | stage5 = self.layers[2](stage4) 47 | stage6 = self.layers[3](stage5) 48 | features = [stage6, stage5, stage4] 49 | return features 50 | 51 | 52 | def shufflenetg2(): 53 | cfg = { 54 | 'out_planes': [200, 400, 800], 55 | 'num_blocks': [4, 8, 4], 56 | 'groups': 2 57 | } 58 | return Shufflenet(cfg) 59 | 60 | 61 | def shufflenetg3(): 62 | cfg = { 63 | 'out_planes': [240, 480, 960], 64 | 'num_blocks': [4, 8, 4], 65 | 'groups': 3 66 | } 67 | return Shufflenet(cfg) 68 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_shufflenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | from .brick import shufflenetv2 as bsnv2 8 | 9 | __all__ = ['shufflenetv2'] 10 | 11 | # default shufflenetv2 1x 12 | class Shufflenetv2(nn.Module): 13 | def __init__(self, cfg): 14 | super().__init__() 15 | 16 | out_planes = cfg['out_channels'] 17 | num_blocks = cfg['num_blocks'] 18 | groups = cfg['groups'] 19 | 20 | # Network 21 | layers_list = [ 22 | # Sequence 0 : input = image tensor 23 | OrderedDict([ 24 | ('stage3/convbatchrelu', vn_layer.Conv2dBatchReLU(3, 24, 3, 2)), 25 | ('stage3/max', nn.MaxPool2d(3, 2, 1)), 26 | ]), 27 | 28 | OrderedDict([ 29 | ('Stage4', bsnv2.Stage(24, out_planes[0], groups, num_blocks[0])), 30 | ]), 31 | 32 | OrderedDict([ 33 | ('Stage5', bsnv2.Stage(out_planes[0], out_planes[1], groups, num_blocks[1])), 34 | ]), 35 | 36 | OrderedDict([ 37 | ('Stage6', bsnv2.Stage(out_planes[1], out_planes[2], groups, num_blocks[2])), 38 | # the following is extra 39 | ]), 40 | ] 41 | 42 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layers_list]) 43 | 44 | def forward(self, x): 45 | stem = self.layers[0](x) 46 | stage4 = self.layers[1](stem) 47 | stage5 = self.layers[2](stage4) 48 | stage6 = self.layers[3](stage5) 49 | features = [stage6, stage5, stage4] 50 | return features 51 | 52 | 53 | def shufflenetv2(): 54 | cfg = { 55 | 'out_channels': (116, 232, 464), 56 | 'num_blocks': (3, 7, 3), 57 | 'groups': 2 58 | } 59 | return Shufflenetv2(cfg) 60 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_squeezenext.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | from .brick import squeezenext as bsn 8 | 9 | __all__ = ['Squeezenext'] 10 | 11 | # default 1.0-SqNxt-23v5 12 | # there are some difference with orignal 1.0xSqNxt-23v5 in downsample part, 13 | # where we just use a 3x3 conv with stride 2 to downsample. 14 | class Squeezenext(nn.Module): 15 | """ 16 | """ 17 | def __init__(self, width_mul=1): 18 | """ Network initialisation """ 19 | super().__init__() 20 | 21 | 22 | sqz_chns = [3, 64, 23 | 32*width_mul, 64*width_mul, 128*width_mul, 256*width_mul] 24 | sqz_stage_cfg = [None, None, 25 | 2, 4, 14, 1] 26 | 27 | # Network 28 | layer_list = [ 29 | # Sequence 0 : input = image tensor 30 | OrderedDict([ 31 | # stage 1 32 | ('stage1/downsample', vn_layer.Conv2dBatchReLU(sqz_chns[0], sqz_chns[1], 5, 2)), 33 | # stage 2 34 | # pass 35 | 36 | # stage 3 37 | ('stage3/downsample', nn.MaxPool2d(3, 2, 1)), 38 | ('stage3/trans', vn_layer.Conv2dBatchReLU(sqz_chns[1], sqz_chns[2], 1, 1)), 39 | ('stage3/squeeze', bsn.Stage(sqz_chns[2], sqz_chns[2], sqz_stage_cfg[2])), 40 | ]), 41 | 42 | # stage 4 43 | OrderedDict([ 44 | ('stage4/trans', vn_layer.Conv2dBatchReLU(sqz_chns[2], sqz_chns[3], 3, 2)), 45 | ('stage4/squeeze', bsn.Stage(sqz_chns[3], sqz_chns[3], sqz_stage_cfg[3])), 46 | ]), 47 | 48 | # stage 5 49 | OrderedDict([ 50 | ('stage5/trans', vn_layer.Conv2dBatchReLU(sqz_chns[3], sqz_chns[4], 3, 2)), 51 | ('stage5/squeeze', bsn.Stage(sqz_chns[4], sqz_chns[4], sqz_stage_cfg[4])), 52 | ]), 53 | 54 | # Sequence 1 : input = sequence0 55 | OrderedDict([ 56 | # stage 6 57 | ('stage6/trans', vn_layer.Conv2dBatchReLU(sqz_chns[4], sqz_chns[5], 3, 2)), 58 | ('stage6/squeeze', bsn.Stage(sqz_chns[5], sqz_chns[5], sqz_stage_cfg[5])), 59 | ]), 60 | ] 61 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 62 | 63 | def forward(self, x): 64 | stem = self.layers[0](x) 65 | stage4 = self.layers[1](stem) 66 | stage5 = self.layers[2](stage4) 67 | stage6 = self.layers[3](stage5) 68 | features = [stage6, stage5, stage4] 69 | return features 70 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_tiny_yolov2.py: -------------------------------------------------------------------------------- 1 | # 2 | # Darknet Darknet19 model 3 | # Copyright EAVISE 4 | # 5 | 6 | 7 | # modified by mileistone 8 | 9 | import os 10 | from collections import OrderedDict 11 | import torch 12 | import torch.nn as nn 13 | 14 | from .. import layer as vn_layer 15 | 16 | __all__ = ['TinyYolov2'] 17 | 18 | 19 | class TinyYolov2(nn.Module): 20 | """ 21 | """ 22 | def __init__(self): 23 | """ Network initialisation """ 24 | super().__init__() 25 | 26 | # Network 27 | layer_list = [ 28 | OrderedDict([ 29 | ('1_convbatch', vn_layer.Conv2dBatchLeaky(3, 16, 3, 1)), 30 | ('2_max', nn.MaxPool2d(2, 2)), 31 | ('3_convbatch', vn_layer.Conv2dBatchLeaky(16, 32, 3, 1)), 32 | ('4_max', nn.MaxPool2d(2, 2)), 33 | ('5_convbatch', vn_layer.Conv2dBatchLeaky(32, 64, 3, 1)), 34 | ('6_max', nn.MaxPool2d(2, 2)), 35 | ('7_convbatch', vn_layer.Conv2dBatchLeaky(64, 128, 3, 1)), 36 | ]), 37 | OrderedDict([ 38 | ('8_max', nn.MaxPool2d(2, 2)), 39 | ('9_convbatch', vn_layer.Conv2dBatchLeaky(128, 256, 3, 1)), 40 | ]), 41 | OrderedDict([ 42 | ('10_max', nn.MaxPool2d(2, 2)), 43 | ('11_convbatch', vn_layer.Conv2dBatchLeaky(256, 512, 3, 1)), 44 | ]), 45 | OrderedDict([ 46 | ('12_max', vn_layer.PaddedMaxPool2d(2, 1, (0, 1, 0, 1))), 47 | ('13_convbatch', vn_layer.Conv2dBatchLeaky(512, 1024, 3, 1)), 48 | ]), 49 | ] 50 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 51 | 52 | def forward(self, x): 53 | stem = self.layers[0](x) 54 | stage4 = self.layers[1](stem) 55 | stage5 = self.layers[2](stage4) 56 | stage6 = self.layers[3](stage5) 57 | #print(stage5.shape, stage6.shape) 58 | features = [stage6, stage5, stage4] 59 | return features 60 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_tiny_yolov3.py: -------------------------------------------------------------------------------- 1 | # 2 | # Darknet Darknet19 model 3 | # Copyright EAVISE 4 | # 5 | 6 | 7 | # modified by mileistone 8 | 9 | import os 10 | from collections import OrderedDict 11 | import torch 12 | import torch.nn as nn 13 | 14 | from .. import layer as vn_layer 15 | 16 | __all__ = ['TinyYolov3'] 17 | 18 | 19 | class TinyYolov3(nn.Module): 20 | """ 21 | """ 22 | def __init__(self): 23 | """ Network initialisation """ 24 | super().__init__() 25 | 26 | # Network 27 | layer_list = [ 28 | # Sequence 0 : input = image tensor 29 | # output redutcion 16 30 | # backbone 31 | OrderedDict([ 32 | ('0_convbatch', vn_layer.Conv2dBatchLeaky(3, 16, 3, 1)), 33 | ('1_max', nn.MaxPool2d(2, 2)), 34 | ('2_convbatch', vn_layer.Conv2dBatchLeaky(16, 32, 3, 1)), 35 | ('3_max', nn.MaxPool2d(2, 2)), 36 | ('4_convbatch', vn_layer.Conv2dBatchLeaky(32, 64, 3, 1)), 37 | ]), 38 | 39 | OrderedDict([ 40 | ('5_max', nn.MaxPool2d(2, 2)), 41 | ('6_convbatch', vn_layer.Conv2dBatchLeaky(64, 128, 3, 1)), 42 | ]), 43 | 44 | OrderedDict([ 45 | ('7_max', nn.MaxPool2d(2, 2)), 46 | ('8_convbatch', vn_layer.Conv2dBatchLeaky(128, 256, 3, 1)), 47 | ]), 48 | 49 | # Sequence 1 : input = sequence0 50 | # output redutcion 32 51 | # backbone 52 | OrderedDict([ 53 | ('9_max', nn.MaxPool2d(2, 2)), 54 | ('10_convbatch', vn_layer.Conv2dBatchLeaky(256, 512, 3, 1)), 55 | ('11_max', nn.MaxPool2d(3, 1, 1)), 56 | ('12_convbatch', vn_layer.Conv2dBatchLeaky(512, 1024, 3, 1)), 57 | ('13_convbatch', vn_layer.Conv2dBatchLeaky(1024, 256, 1, 1)), 58 | ]), 59 | 60 | ] 61 | 62 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 63 | 64 | def forward(self, x): 65 | stem = self.layers[0](x) 66 | stage4 = self.layers[1](stem) 67 | stage5 = self.layers[2](stage4) 68 | stage6 = self.layers[3](stage5) 69 | features = [stage6, stage5, stage4] 70 | return features 71 | -------------------------------------------------------------------------------- /vedanet/network/backbone/_xception.py: -------------------------------------------------------------------------------- 1 | # modified by mileistone 2 | 3 | import os 4 | from collections import OrderedDict 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .. import layer as vn_layer 9 | from .brick import xception as bx 10 | 11 | __all__ = ['Xception'] 12 | 13 | 14 | class Xception(nn.Module): 15 | """ 16 | """ 17 | def __init__(self): 18 | """ Network initialisation """ 19 | super().__init__() 20 | 21 | layers_list = [ 22 | # Sequence 0 : input = image tensor 23 | OrderedDict([ 24 | ('stage3/convbatchrelu1', vn_layer.Conv2dBatchReLU(3, 32, 3, 2)), 25 | ('stage3/convbatchrelu2', vn_layer.Conv2dBatchReLU(32, 64, 3, 1)), 26 | ('stage3/miniblock3', bx.MiniBlock(64, 128, 2, 2, False)), 27 | ]), 28 | 29 | OrderedDict([ 30 | ('stage4/miniblock1', bx.MiniBlock(128, 256, 2, 2)), 31 | ]), 32 | 33 | OrderedDict([ 34 | ('stage5/miniblock1', bx.MiniBlock(256, 728, 2, 2)), 35 | ('stage5/stage2', bx.Block(728, 728, 3, 8)), 36 | ]), 37 | 38 | OrderedDict([ 39 | ('stage6/miniblock1', bx.MiniBlock(728, 1024, 2, 2)), 40 | # the following is extra 41 | ]), 42 | ] 43 | 44 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layers_list]) 45 | 46 | def forward(self, x): 47 | stem = self.layers[0](x) 48 | stage4 = self.layers[1](stem) 49 | stage5 = self.layers[2](stage4) 50 | stage6 = self.layers[3](stage5) 51 | features = [stage6, stage5, stage4] 52 | return features 53 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/ObjectDetection-OneStageDet/d29f69cdce32b006bd040edb6e66427b3c987c70/vedanet/network/backbone/brick/__init__.py -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/darknet53.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from ... import layer as vn_layer 7 | 8 | 9 | class StageBlock(nn.Module): 10 | custom_layers = () 11 | def __init__(self, nchannels): 12 | super().__init__() 13 | self.features = nn.Sequential( 14 | vn_layer.Conv2dBatchLeaky(nchannels, int(nchannels/2), 1, 1), 15 | vn_layer.Conv2dBatchLeaky(int(nchannels/2), nchannels, 3, 1) 16 | ) 17 | 18 | def forward(self, data): 19 | return data + self.features(data) 20 | 21 | 22 | class Stage(nn.Module): 23 | custom_layers = (StageBlock, StageBlock.custom_layers) 24 | def __init__(self, nchannels, nblocks, stride=2): 25 | super().__init__() 26 | blocks = [] 27 | blocks.append(vn_layer.Conv2dBatchLeaky(nchannels, 2*nchannels, 3, stride)) 28 | for ii in range(nblocks - 1): 29 | blocks.append(StageBlock(2*nchannels)) 30 | self.features = nn.Sequential(*blocks) 31 | 32 | def forward(self, data): 33 | return self.features(data) 34 | 35 | 36 | class HeadBody(nn.Module): 37 | custom_layers = () 38 | def __init__(self, nchannels, first_head=False): 39 | super().__init__() 40 | if first_head: 41 | half_nchannels = int(nchannels/2) 42 | else: 43 | half_nchannels = int(nchannels/3) 44 | in_nchannels = 2 * half_nchannels 45 | layers = [ 46 | vn_layer.Conv2dBatchLeaky(nchannels, half_nchannels, 1, 1), 47 | vn_layer.Conv2dBatchLeaky(half_nchannels, in_nchannels, 3, 1), 48 | vn_layer.Conv2dBatchLeaky(in_nchannels, half_nchannels, 1, 1), 49 | vn_layer.Conv2dBatchLeaky(half_nchannels, in_nchannels, 3, 1), 50 | vn_layer.Conv2dBatchLeaky(in_nchannels, half_nchannels, 1, 1) 51 | ] 52 | self.feature = nn.Sequential(*layers) 53 | 54 | def forward(self, data): 55 | x = self.feature(data) 56 | return x 57 | 58 | 59 | class Transition(nn.Module): 60 | custom_layers = () 61 | def __init__(self, nchannels): 62 | super().__init__() 63 | half_nchannels = int(nchannels/2) 64 | layers = [ 65 | vn_layer.Conv2dBatchLeaky(nchannels, half_nchannels, 1, 1), 66 | nn.Upsample(scale_factor=2) 67 | ] 68 | 69 | self.features = nn.Sequential(*layers) 70 | 71 | def forward(self, data): 72 | x = self.features(data) 73 | return x 74 | 75 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/light_xception.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ... import layer as vn_layer 5 | 6 | class SeparableConv2d(nn.Module): 7 | """ 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, relu_in_middle=True): 10 | super().__init__() 11 | 12 | # Parameters 13 | self.in_channels = in_channels 14 | self.out_channels = out_channels 15 | self.kernel_size = kernel_size 16 | self.stride = stride 17 | self.relu_in_middle = relu_in_middle 18 | 19 | if isinstance(kernel_size, (list, tuple)): 20 | self.padding = [int(ii/2) for ii in kernel_size] 21 | else: 22 | self.padding = int(kernel_size/2) 23 | 24 | # Layer 25 | if relu_in_middle: 26 | self.layers = nn.Sequential( 27 | nn.Conv2d(self.in_channels, self.in_channels, self.kernel_size, self.stride, self.padding, groups=self.in_channels, bias=False), 28 | nn.BatchNorm2d(self.in_channels), 29 | nn.ReLU(inplace=True), 30 | 31 | nn.Conv2d(self.in_channels, self.out_channels, 1, 1, 0, bias=False), 32 | nn.BatchNorm2d(self.out_channels), 33 | nn.ReLU(inplace=True), 34 | ) 35 | else: 36 | self.layers = nn.Sequential( 37 | nn.Conv2d(self.in_channels, self.in_channels, self.kernel_size, self.stride, self.padding, groups=self.in_channels, bias=False), 38 | nn.BatchNorm2d(self.in_channels), 39 | 40 | nn.Conv2d(self.in_channels, self.out_channels, 1, 1, 0, bias=False), 41 | nn.BatchNorm2d(self.out_channels), 42 | nn.ReLU(inplace=True), 43 | ) 44 | 45 | 46 | def __repr__(self): 47 | s = '{name} ({in_channels}, {out_channels}, kernel_size={kernel_size}, stride={stride}, padding={padding}, relu_in_middle={relu_in_middle})' 48 | return s.format(name=self.__class__.__name__, **self.__dict__) 49 | 50 | def forward(self, x): 51 | x = self.layers(x) 52 | return x 53 | 54 | 55 | class MiniBlock(nn.Module): 56 | def __init__(self, in_channels, out_channels, stride, separable_conv_num): 57 | super().__init__() 58 | layer_list = [] 59 | 60 | # start 61 | layer_list.append(SeparableConv2d(in_channels, out_channels, 3, 1)) 62 | # middle 63 | for _ in range(separable_conv_num - 1): 64 | layer_list.append( 65 | SeparableConv2d(out_channels, out_channels, 3, 1), 66 | ) 67 | # end 68 | if stride > 1: 69 | self.shortcut = vn_layer.Conv2dBatchReLU(in_channels, out_channels, 1, 2) 70 | layer_list.append(nn.MaxPool2d(3, stride, 1)) 71 | else: 72 | self.shortcut = nn.Sequential() 73 | 74 | self.layers = nn.Sequential(*layer_list) 75 | 76 | def forward(self, x): 77 | y = self.layers(x) + self.shortcut(x) 78 | return y 79 | 80 | 81 | class Block(nn.Module): 82 | def __init__(self, in_channels, out_channels, block_layer, repeat): 83 | super().__init__() 84 | layer_list = [] 85 | layer_list.append(MiniBlock(in_channels, out_channels, 1, block_layer)) 86 | for idx in range(repeat - 1): 87 | layer = MiniBlock(out_channels, out_channels, 1, block_layer) 88 | layer_list.append(layer) 89 | self.layers = nn.Sequential(*layer_list) 90 | 91 | def forward(self, x): 92 | y = self.layers(x) 93 | return y 94 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/mobilenet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from collections import OrderedDict 3 | 4 | from ... import layer as vn_layer 5 | 6 | 7 | class Conv2dDepthWise(nn.Module): 8 | """ This layer implements the depthwise separable convolution from Mobilenets_. 9 | 10 | Args: 11 | in_channels (int): Number of input channels 12 | out_channels (int): Number of output channels 13 | kernel_size (int or tuple): Size of the kernel of the convolution 14 | stride (int or tuple): Stride of the convolution 15 | padding (int or tuple): padding of the convolution 16 | 17 | .. _Mobilenets: https://arxiv.org/pdf/1704.04861.pdf 18 | """ 19 | def __init__(self, in_channels, out_channels, kernel_size, stride): 20 | super(Conv2dDepthWise, self).__init__() 21 | 22 | # Parameters 23 | self.in_channels = in_channels 24 | self.out_channels = out_channels 25 | self.kernel_size = kernel_size 26 | self.stride = stride 27 | if isinstance(kernel_size, (list, tuple)): 28 | self.padding = [int(ii/2) for ii in kernel_size] 29 | else: 30 | self.padding = int(kernel_size/2) 31 | 32 | # Layer 33 | self.layers = nn.Sequential( 34 | nn.Conv2d(self.in_channels, self.in_channels, self.kernel_size, self.stride, self.padding, groups=self.in_channels, bias=False), 35 | nn.BatchNorm2d(self.in_channels), 36 | nn.ReLU(inplace=True), 37 | 38 | vn_layer.Conv2dBatchReLU(in_channels, out_channels, 1, 1), 39 | ) 40 | 41 | def __repr__(self): 42 | s = '{name} ({in_channels}, {out_channels}, kernel_size={kernel_size}, stride={stride}, padding={padding})' 43 | return s.format(name=self.__class__.__name__, **self.__dict__) 44 | 45 | def forward(self, x): 46 | x = self.layers(x) 47 | return x 48 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from collections import OrderedDict 3 | 4 | from ... import layer as vn_layer 5 | 6 | 7 | class InvertedResidual(nn.Module): 8 | def __init__(self, inp, oup, stride, expand_ratio): 9 | super().__init__() 10 | self.stride = stride 11 | assert stride in [1, 2] 12 | 13 | self.use_res_connect = self.stride == 1 and inp == oup 14 | 15 | if abs(expand_ratio - 1) < .01: 16 | self.conv = nn.Sequential( 17 | # dw 18 | nn.Conv2d(inp, inp * expand_ratio, 3, stride, 1, groups=inp * expand_ratio, bias=False), 19 | nn.BatchNorm2d(inp * expand_ratio), 20 | nn.ReLU6(inplace=True), 21 | # pw-linear 22 | nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False), 23 | nn.BatchNorm2d(oup), 24 | ) 25 | else: 26 | self.conv = nn.Sequential( 27 | # pw 28 | nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False), 29 | nn.BatchNorm2d(inp * expand_ratio), 30 | nn.ReLU6(inplace=True), 31 | # dw 32 | nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3, stride, 1, groups=inp * expand_ratio, bias=False), 33 | nn.BatchNorm2d(inp * expand_ratio), 34 | nn.ReLU6(inplace=True), 35 | # pw-linear 36 | nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False), 37 | nn.BatchNorm2d(oup), 38 | ) 39 | 40 | def forward(self, x): 41 | if self.use_res_connect: 42 | return x + self.conv(x) 43 | else: 44 | return self.conv(x) 45 | 46 | 47 | def buildInvertedResBlock(residual_setting, input_channel, width_mult): 48 | # building inverted residual blocks 49 | features = [] 50 | for t, c, n, s in residual_setting: 51 | output_channel = int(c * width_mult) 52 | for i in range(n): 53 | if i == 0: 54 | features.append(InvertedResidual(input_channel, output_channel, s, t)) 55 | else: 56 | features.append(InvertedResidual(input_channel, output_channel, 1, t)) 57 | input_channel = output_channel 58 | layers = nn.Sequential(*features) 59 | return layers, input_channel 60 | 61 | 62 | def buildMobilenetv2(cfg, width_mult): 63 | """ 64 | """ 65 | # building first layer 66 | input_channel = int(32 * width_mult) 67 | 68 | #### stage 3 69 | s3_layer1 = vn_layer.Conv2dBatchReLU(3, input_channel, 3, 2) 70 | residual_setting = cfg[0] 71 | s3_layer2, output_channel_stage3 = buildInvertedResBlock(residual_setting, input_channel, 72 | width_mult) 73 | 74 | 75 | #### stage 4 76 | residual_setting = cfg[1] 77 | s4_layer1, output_channel_stage4 = buildInvertedResBlock(residual_setting, output_channel_stage3, 78 | width_mult) 79 | 80 | #### stage 5 81 | residual_setting = cfg[2] 82 | s5_layer1, output_channel_stage5 = buildInvertedResBlock(residual_setting, output_channel_stage4, 83 | width_mult) 84 | 85 | #### stage 6 86 | residual_setting = cfg[3] 87 | s6_layer1, output_channel_stage6 = buildInvertedResBlock(residual_setting, output_channel_stage5, 88 | width_mult) 89 | layer_list = [ 90 | # stage 3 91 | OrderedDict([ 92 | ('stage3/layer1', s3_layer1), 93 | ('stage3/layer2', s3_layer2), 94 | ]), 95 | # stage 4 96 | OrderedDict([ 97 | ('stage4/layer1', s4_layer1), 98 | ]), 99 | # stage 5 100 | OrderedDict([ 101 | ('stage5/layer1', s5_layer1), 102 | ]), 103 | # stage 6 104 | OrderedDict([ 105 | ('stage6/layer1', s6_layer1), 106 | ]), 107 | ] 108 | 109 | layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 110 | 111 | return layers 112 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/shufflenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from ... import layer as vn_layer 10 | 11 | 12 | class Block(nn.Module): 13 | def __init__(self, in_planes, out_planes, stride, groups): 14 | super().__init__() 15 | self.stride = stride 16 | 17 | mid_planes = int(out_planes / 4) 18 | g = 1 if in_planes==24 else groups 19 | 20 | layer_list = [ 21 | nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False), 22 | nn.BatchNorm2d(mid_planes), 23 | nn.ReLU(inplace=True), 24 | vn_layer.Shuffle(groups=g), 25 | nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False), 26 | nn.BatchNorm2d(mid_planes), 27 | nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False), 28 | nn.BatchNorm2d(out_planes), 29 | ] 30 | self.layers = nn.Sequential(*layer_list) 31 | self.shortcut = nn.Sequential() 32 | if stride == 2: 33 | self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) 34 | self.activation = nn.ReLU(inplace=True) 35 | 36 | def forward(self, x): 37 | y = self.layers(x) 38 | res = self.shortcut(x) 39 | if self.stride == 2: 40 | out = torch.cat([y, res], 1) 41 | else: 42 | out = y + res 43 | out = self.activation(out) 44 | return out 45 | 46 | 47 | class Stage(nn.Module): 48 | def __init__(self, in_planes, out_planes, groups, num_blocks): 49 | super().__init__() 50 | layer_list = [] 51 | for i in range(num_blocks): 52 | stride = 2 if i == 0 else 1 53 | cat_planes = in_planes if i == 0 else 0 54 | layer_list.append(Block(in_planes, out_planes - cat_planes, stride=stride, groups=groups)) 55 | in_planes = out_planes 56 | self.layers = nn.Sequential(*layer_list) 57 | 58 | def forward(self, x): 59 | return self.layers(x) 60 | 61 | 62 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/shufflenetv2.py: -------------------------------------------------------------------------------- 1 | """ 2 | ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from ... import layer as vn_layer 10 | 11 | 12 | class Split(nn.Module): 13 | def __init__(self, ratio): 14 | super().__init__() 15 | self.ratio = ratio 16 | 17 | def forward(self, x): 18 | c = int(x.size(1) * self.ratio) 19 | return x[:, :c, :, :], x[:, c:, :, :] 20 | 21 | 22 | class BasicBlock(nn.Module): 23 | def __init__(self, in_channels, split_ratio=0.5, groups=2): 24 | super().__init__() 25 | in_channels = int(in_channels * split_ratio) 26 | 27 | layer_list = [ 28 | vn_layer.Conv2dBatchReLU(in_channels, in_channels, 1, 1), 29 | nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False), 30 | nn.BatchNorm2d(in_channels), 31 | vn_layer.Conv2dBatchReLU(in_channels, in_channels, 1, 1), 32 | ] 33 | 34 | self.split = Split(split_ratio) 35 | self.layers = nn.Sequential(*layer_list) 36 | self.shuffle = vn_layer.Shuffle(groups) 37 | 38 | def forward(self, x): 39 | x1, x2 = self.split(x) 40 | x2_1 = self.layers(x2) 41 | x_1 = torch.cat([x1, x2_1], 1) 42 | out = self.shuffle(x_1) 43 | return out 44 | 45 | 46 | class DownBlock(nn.Module): 47 | def __init__(self, in_channels, out_channels, groups=2): 48 | super().__init__() 49 | mid_channels = out_channels // 2 50 | # left 51 | left_list = [ 52 | nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False), 53 | nn.BatchNorm2d(in_channels), 54 | vn_layer.Conv2dBatchReLU(in_channels, mid_channels, 1, 1), 55 | ] 56 | # right 57 | right_list = [ 58 | vn_layer.Conv2dBatchReLU(in_channels, mid_channels, 1, 1), 59 | nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False), 60 | nn.BatchNorm2d(mid_channels), 61 | vn_layer.Conv2dBatchReLU(mid_channels, mid_channels, 1, 1), 62 | ] 63 | 64 | self.left_layers = nn.Sequential(*left_list) 65 | self.right_layers = nn.Sequential(*right_list) 66 | self.shuffle = vn_layer.Shuffle(groups) 67 | 68 | def forward(self, x): 69 | left_x = self.left_layers(x) 70 | right_x = self.right_layers(x) 71 | # concat 72 | concat = torch.cat([left_x, right_x], 1) 73 | out = self.shuffle(concat) 74 | return out 75 | 76 | 77 | class Stage(nn.Module): 78 | def __init__(self, in_channels, out_channels, groups, num_blocks): 79 | super().__init__() 80 | layer_list = [DownBlock(in_channels, out_channels)] 81 | for i in range(num_blocks): 82 | layer_list.append(BasicBlock(out_channels)) 83 | in_channels = out_channels 84 | 85 | self.layers = nn.Sequential(*layer_list) 86 | 87 | def forward(self, x): 88 | return self.layers(x) 89 | 90 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/squeezenext.py: -------------------------------------------------------------------------------- 1 | """ 2 | ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from ... import layer as vn_layer 10 | 11 | 12 | class Block(nn.Module): 13 | ''' 14 | ''' 15 | def __init__(self, in_channels, out_channels): 16 | super().__init__() 17 | self.in_channels = in_channels 18 | self.out_channels = out_channels 19 | 20 | in_list = [int(out_channels * ii) for ii in (0.5, 0.25, 0.5, 0.5)] 21 | in_list.insert(0, in_channels) 22 | out_list = in_list[1:] + [out_channels] 23 | kernel_list = [(1, 1), (1, 1), (3, 1), (1, 3), (1, 1)] 24 | assert(len(in_list) == len(out_list) == len(kernel_list)) 25 | layer_list = [] 26 | for ii in range(len(in_list)): 27 | in_ch = in_list[ii] 28 | out_ch = out_list[ii] 29 | kernel_size = kernel_list[ii] 30 | layer = vn_layer.Conv2dBatchReLU(in_ch, out_ch, kernel_size, 1) 31 | layer_list.append(layer) 32 | self.layer = nn.Sequential(*layer_list) 33 | 34 | def forward(self, x): 35 | out = self.layer(x) 36 | if self.in_channels == self.out_channels: 37 | out = out + x 38 | return out 39 | 40 | 41 | class Stage(nn.Module): 42 | ''' 43 | ''' 44 | def __init__(self, in_channels, out_channels, repeat_times): 45 | super().__init__() 46 | layer_list = [] 47 | 48 | if repeat_times >= 1: 49 | layer = Block(in_channels, out_channels) 50 | layer_list.append(layer) 51 | for ii in range(repeat_times - 1): 52 | layer = Block(out_channels, out_channels) 53 | layer_list.append(layer) 54 | self.layer = nn.Sequential(*layer_list) 55 | 56 | def forward(self, x): 57 | out = self.layer(x) 58 | return out 59 | 60 | 61 | -------------------------------------------------------------------------------- /vedanet/network/backbone/brick/xception.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ... import layer as vn_layer 5 | 6 | class SeparableConv2d(nn.Module): 7 | """ 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, relu_in_middle=True): 10 | super().__init__() 11 | 12 | # Parameters 13 | self.in_channels = in_channels 14 | self.out_channels = out_channels 15 | self.kernel_size = kernel_size 16 | self.stride = stride 17 | self.relu_in_middle = relu_in_middle 18 | 19 | if isinstance(kernel_size, (list, tuple)): 20 | self.padding = [int(ii/2) for ii in kernel_size] 21 | else: 22 | self.padding = int(kernel_size/2) 23 | 24 | # Layer 25 | if relu_in_middle: 26 | self.layers = nn.Sequential( 27 | nn.Conv2d(self.in_channels, self.in_channels, self.kernel_size, self.stride, self.padding, groups=self.in_channels, bias=False), 28 | nn.BatchNorm2d(self.in_channels), 29 | nn.ReLU(inplace=True), 30 | 31 | nn.Conv2d(self.in_channels, self.out_channels, 1, 1, 0, bias=False), 32 | nn.BatchNorm2d(self.out_channels), 33 | ) 34 | else: 35 | self.layers = nn.Sequential( 36 | nn.Conv2d(self.in_channels, self.in_channels, self.kernel_size, self.stride, self.padding, groups=self.in_channels, bias=False), 37 | nn.BatchNorm2d(self.in_channels), 38 | 39 | nn.Conv2d(self.in_channels, self.out_channels, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(self.out_channels), 41 | ) 42 | 43 | 44 | def __repr__(self): 45 | s = '{name} ({in_channels}, {out_channels}, kernel_size={kernel_size}, stride={stride}, padding={padding}, relu_in_middle={relu_in_middle})' 46 | return s.format(name=self.__class__.__name__, **self.__dict__) 47 | 48 | def forward(self, x): 49 | x = self.layers(x) 50 | return x 51 | 52 | 53 | class MiniBlock(nn.Module): 54 | def __init__(self, in_channels, out_channels, stride, separable_conv_num, start_with_relu=True): 55 | super().__init__() 56 | layer_list = [] 57 | 58 | # start 59 | if start_with_relu: 60 | layer_list.append(nn.ReLU(inplace=True)) 61 | layer_list.append(SeparableConv2d(in_channels, out_channels, 3, 1)) 62 | # middle 63 | for _ in range(separable_conv_num - 1): 64 | layer_list.extend( 65 | [ 66 | nn.ReLU(inplace=True), 67 | SeparableConv2d(out_channels, out_channels, 3, 1), 68 | ] 69 | ) 70 | # end 71 | if stride > 1: 72 | self.shortcut = vn_layer.Conv2dBatchReLU(in_channels, out_channels, 1, 2) 73 | layer_list.append(nn.MaxPool2d(3, stride, 1)) 74 | else: 75 | self.shortcut = nn.Sequential() 76 | 77 | self.layers = nn.Sequential(*layer_list) 78 | 79 | def forward(self, x): 80 | y = self.layers(x) + self.shortcut(x) 81 | return y 82 | 83 | 84 | class Block(nn.Module): 85 | def __init__(self, in_channels, out_channels, block_layer, repeat): 86 | super().__init__() 87 | layer_list = [] 88 | layer_list.append(MiniBlock(in_channels, out_channels, 1, block_layer)) 89 | for idx in range(repeat - 1): 90 | layer = MiniBlock(out_channels, out_channels, 1, block_layer) 91 | layer_list.append(layer) 92 | self.layers = nn.Sequential(*layer_list) 93 | 94 | def forward(self, x): 95 | y = self.layers(x) 96 | return y 97 | -------------------------------------------------------------------------------- /vedanet/network/head/__init__.py: -------------------------------------------------------------------------------- 1 | from ._yolov3 import * 2 | from ._yolov2 import * 3 | from ._region_mobilenet import * 4 | from ._region_mobilenetv2 import * 5 | from ._tiny_yolov2 import * 6 | from ._tiny_yolov3 import * 7 | from ._region_shufflenet import * 8 | from ._region_shufflenetv2 import * 9 | from ._region_squeezenext import * 10 | from ._region_xception import * 11 | from ._region_light_xception import * 12 | -------------------------------------------------------------------------------- /vedanet/network/head/_region_light_xception.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['RegionLightXception'] 9 | 10 | 11 | class RegionLightXception(nn.Module): 12 | def __init__(self, num_anchors, num_classes): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchReLU(288, 32, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchReLU((4*32)+576, 576, 3, 1)), 25 | ('4_conv', nn.Conv2d(576, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_region_mobilenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['RegionMobilenet'] 9 | 10 | 11 | class RegionMobilenet(nn.Module): 12 | def __init__(self, num_anchors, num_classes, input_channels=48): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchReLU(512, 64, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchReLU((4*64)+1024, 1024, 3, 1)), 25 | ('4_conv', nn.Conv2d(1024, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_region_mobilenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['RegionMobilenetv2'] 9 | 10 | 11 | class RegionMobilenetv2(nn.Module): 12 | def __init__(self, num_anchors, num_classes, input_channels=48): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchReLU(96, 24, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchReLU((4*24)+320, 320, 3, 1)), 25 | ('4_conv', nn.Conv2d(320, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_region_shufflenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['RegionShufflenet'] 9 | 10 | 11 | class RegionShufflenet(nn.Module): 12 | def __init__(self, num_anchors, num_classes, input_channels=48): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchReLU(400, 64, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchReLU((4*64)+800, 800, 3, 1)), 25 | ('4_conv', nn.Conv2d(800, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_region_shufflenetv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['RegionShufflenetv2'] 9 | 10 | 11 | class RegionShufflenetv2(nn.Module): 12 | def __init__(self, num_anchors, num_classes, input_channels=48): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchReLU(232, 32, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchReLU((4*32)+464, 464, 3, 1)), 25 | ('4_conv', nn.Conv2d(464, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_region_squeezenext.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['RegionSqueezenext'] 9 | 10 | 11 | class RegionSqueezenext(nn.Module): 12 | def __init__(self, num_anchors, num_classes, input_channels=48): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchReLU(128, 16, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchReLU((4*16)+256, 256, 3, 1)), 25 | ('4_conv', nn.Conv2d(256, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_region_xception.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['RegionXception'] 9 | 10 | 11 | class RegionXception(nn.Module): 12 | def __init__(self, num_anchors, num_classes): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchReLU(728, 96, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchReLU((4*96)+1024, 1024, 3, 1)), 25 | ('4_conv', nn.Conv2d(1024, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_tiny_yolov2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['TinyYolov2'] 9 | 10 | 11 | class TinyYolov2(nn.Module): 12 | def __init__(self, num_anchors, num_classes): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | OrderedDict([ 17 | ('14_convbatch', vn_layer.Conv2dBatchLeaky(1024, 1024, 3, 1)), 18 | ('15_conv', nn.Conv2d(1024, num_anchors*(5+num_classes), 1, 1, 0)), 19 | ]), 20 | ] 21 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 22 | 23 | def forward(self, middle_feats): 24 | outputs = [] 25 | # stage 6 26 | stage6 = middle_feats[0] 27 | # Route : layers=-1, -4 28 | out = self.layers[0](stage6) 29 | features = [out] 30 | return features 31 | -------------------------------------------------------------------------------- /vedanet/network/head/_tiny_yolov3.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['TinyYolov3'] 9 | 10 | 11 | class TinyYolov3(nn.Module): 12 | custom_layers = () 13 | def __init__(self, num_classes, num_anchors_list): 14 | """ Network initialisation """ 15 | super().__init__() 16 | layer_list = [ 17 | # stage 6 18 | OrderedDict([ 19 | ('14_convbatch', vn_layer.Conv2dBatchLeaky(256, 512, 3, 1)), 20 | ('15_conv', nn.Conv2d(512, num_anchors_list[0]*(5+num_classes), 1, 1, 0)), 21 | ]), 22 | # stage 5 23 | # stage5 / upsample 24 | OrderedDict([ 25 | ('18_convbatch', vn_layer.Conv2dBatchLeaky(256, 128, 1, 1)), 26 | ('19_upsample', nn.Upsample(scale_factor=2)), 27 | ]), 28 | # stage5 / head 29 | OrderedDict([ 30 | ('21_convbatch', vn_layer.Conv2dBatchLeaky(256 + 128, 256, 3, 1)), 31 | ('22_conv', nn.Conv2d(256, num_anchors_list[0]*(5+num_classes), 1, 1, 0)), 32 | ]), 33 | ] 34 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 35 | 36 | def forward(self, middle_feats): 37 | outputs = [] 38 | #print(middle_feats[0].shape, middle_feats[1].shape) 39 | stage6 = self.layers[0](middle_feats[0]) 40 | stage5_upsample = self.layers[1](middle_feats[0]) 41 | stage5 = self.layers[2](torch.cat((middle_feats[1], stage5_upsample), 1)) 42 | features = [stage6, stage5] 43 | return features 44 | -------------------------------------------------------------------------------- /vedanet/network/head/_yolov2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .. import layer as vn_layer 7 | 8 | __all__ = ['Yolov2'] 9 | 10 | 11 | class Yolov2(nn.Module): 12 | def __init__(self, num_anchors, num_classes, input_channels=48): 13 | """ Network initialisation """ 14 | super().__init__() 15 | layer_list = [ 16 | # Sequence 2 : input = sequence0 17 | OrderedDict([ 18 | ('1_convbatch', vn_layer.Conv2dBatchLeaky(512, 64, 1, 1)), 19 | ('2_reorg', vn_layer.Reorg(2)), 20 | ]), 21 | 22 | # Sequence 3 : input = sequence2 + sequence1 23 | OrderedDict([ 24 | ('3_convbatch', vn_layer.Conv2dBatchLeaky((4*64)+1024, 1024, 3, 1)), 25 | ('4_conv', nn.Conv2d(1024, num_anchors*(5+num_classes), 1, 1, 0)), 26 | ]), 27 | ] 28 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 29 | 30 | def forward(self, middle_feats): 31 | outputs = [] 32 | # stage 5 33 | # Route : layers=-9 34 | stage6_reorg = self.layers[0](middle_feats[1]) 35 | # stage 6 36 | stage6 = middle_feats[0] 37 | # Route : layers=-1, -4 38 | out = self.layers[1](torch.cat((stage6_reorg, stage6), 1)) 39 | features = [out] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/_yolov3.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | from .brick import yolov3 as byolov3 8 | 9 | __all__ = ['Yolov3'] 10 | 11 | 12 | class Yolov3(nn.Module): 13 | custom_layers = (byolov3.Head, byolov3.Head.custom_layers) 14 | def __init__(self, num_classes, in_channels_list, num_anchors_list): 15 | """ Network initialisation """ 16 | super().__init__() 17 | layer_list = [ 18 | # stage 6, largest 19 | OrderedDict([ 20 | ('1_head', byolov3.Head(in_channels_list[0], num_anchors_list[0], num_classes)), 21 | ]), 22 | 23 | OrderedDict([ 24 | ('2_head', byolov3.Head(in_channels_list[1], num_anchors_list[1], num_classes)), 25 | ]), 26 | 27 | # smallest 28 | OrderedDict([ 29 | ('2_head', byolov3.Head(in_channels_list[2], num_anchors_list[2], num_classes)), 30 | ]), 31 | ] 32 | self.layers = nn.ModuleList([nn.Sequential(layer_dict) for layer_dict in layer_list]) 33 | 34 | def forward(self, middle_feats): 35 | outputs = [] 36 | stage6 = self.layers[0](middle_feats[0]) 37 | stage5 = self.layers[1](middle_feats[1]) 38 | stage4 = self.layers[2](middle_feats[2]) 39 | features = [stage6, stage5, stage4] 40 | return features 41 | -------------------------------------------------------------------------------- /vedanet/network/head/brick/__init__.py: -------------------------------------------------------------------------------- 1 | from . import yolov3 2 | -------------------------------------------------------------------------------- /vedanet/network/head/brick/yolov3.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | 6 | from ... import layer as vn_layer 7 | 8 | 9 | class Head(nn.Module): 10 | custom_layers = () 11 | def __init__(self, nchannels, nanchors, nclasses): 12 | super().__init__() 13 | mid_nchannels = 2 * nchannels 14 | layer_list = [ 15 | vn_layer.Conv2dBatchLeaky(nchannels, mid_nchannels, 3, 1), 16 | nn.Conv2d(mid_nchannels, nanchors*(5+nclasses), 1, 1, 0), 17 | ] 18 | self.feature = nn.Sequential(*layer_list) 19 | 20 | def forward(self, data): 21 | x = self.feature(data) 22 | return x 23 | 24 | -------------------------------------------------------------------------------- /vedanet/network/layer/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Extra lightnet layers 3 | # Copyright EAVISE 4 | # 5 | """ 6 | .. Note:: 7 | Every parameter that can get an int or tuple will behave as follows. |br| 8 | If a tuple of 2 ints is given, the first int is used for the height and the second for the width. |br| 9 | If an int is given, both the width and height are set to this value. 10 | """ 11 | 12 | from ._darknet import * 13 | -------------------------------------------------------------------------------- /weights/README.md: -------------------------------------------------------------------------------- 1 | Download weights from [baidudrive](https://pan.baidu.com/s/1a3Z5IUylBs6rI-GYg3RGbw#list/path=%2F) or [googledrive](https://drive.google.com/open?id=1nW3u35_5b0ILs2u9TOQ5Nubjx8-1ewwc) 2 | 3 | format: model_name train_iterations network_size mAP_on_VOC2007Test 4 | 5 | ### Train on pretrain models 6 | ###### 7 | Yolov2 60200 544x544 77.6 8 | 9 | Yolov3 50200 544x544 79.6 10 | 11 | ### Train from scratch 12 | ###### 13 | TinyYolov2 80000 544x544 57.2 14 | 15 | TinyYolov2 80000 416x416 57.5 16 | TinyYolov2 80200 416x416 57.0 17 | 18 | ###### 19 | TinyYolov3 80200 416x416 61.3 20 | 21 | TinyYolov3 80200 544x544 62.6 22 | 23 | ###### 24 | RegionMobilenet 160200 544x544 70.6 25 | 26 | RegionMobilenetv2 80200 416x416 62.8 27 | 28 | RegionMobilenetv2 160200 544x544 69.2 29 | 30 | ###### 31 | RegionShufflenet 160200 544x544 66.1 32 | 33 | RegionShufflenetv2 160200 544x544 64.3 34 | 35 | ###### 36 | RegionLightXception 160200 544x544 71.0 37 | 38 | ###### 39 | RegionSqueezenext 160200 544x544 59.6 40 | --------------------------------------------------------------------------------