├── LICENSE ├── README.md ├── checkClasses.py ├── format.py ├── labelme2coco.py └── rename.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 He Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datasetapi 2 | 规范化管理labelme数据集并生成coco数据集 3 | 4 | ## rename.py 5 | 用于对数据集中图片以及json文件的重命名 6 | 7 | ## format.py 8 | 用于替换json中不合法的imagePath 9 | 10 | ## checkClasses 11 | 用于检测当前一共标注了多少class 并对检测结果进行输出 12 | 13 | ## labelme2coco.py 14 | 用于生成coco形式的数据集 15 | 16 | # 具体操作可以参考视频 17 | 18 | ## https://www.bilibili.com/video/bv1Fk4y1U72f 19 | -------------------------------------------------------------------------------- /checkClasses.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | CLASS_NAMES = ['CA001', 'CA002', 'CA003', 'CA004', 5 | 'CD001', 'CD002', 'CD003', 'CD004', 6 | 'CD005', 'CD006', 7 | 'ZA001', 'ZA002', 'ZA003', 'ZA004', 8 | 'ZA005', 'ZA006', 9 | 'ZB001', 'ZB002', 'ZB003', 'ZB004', 10 | 'ZB005', 'ZB006', 'ZB007', 'ZB008', 11 | 'ZB009', 'ZB010', 12 | 'ZC001', 'ZC002', 'ZC003', 'ZC004', 13 | 'ZC005', 'ZC006', 'ZC007', 'ZC008', 14 | 'ZC009', 'ZC010', 'ZC011', 'ZC012', 15 | 'ZC013', 'ZC014', 'ZC015', 'ZC016', 16 | 'ZC017', 'ZC018', 'ZC019', 'ZC020', 17 | 'ZC021', 'ZC022', 'ZC023' 18 | ] 19 | CLASS_REAL_NAMES = ['draw_paper', 'roll_paper', 'toothbrush', 'tape', 20 | 'apple', 'pear', 'melon', 'kiwi', 21 | 'grapefruit', 'banana', 22 | 'soap', 'fulid', 'toothpaste', 'flower', 23 | 'duck', 'pencilbox', 24 | 'porridge', 'godmother', 'cookie', 'powder', 25 | 'gum', 'noodle', 'biscuit', 'chips', 26 | 'fries', 'seeds', 27 | 'sprite', 'cola', 'fenta', 'redbull', 28 | 'ADCaMilk', 'juice', 'WLJ', 'JDB', 29 | 'ice_tea', 'green_tea', 'Sydney', 'tea_pi', 30 | 'coco', 'NF_Spring', 'wahaha', 'ganten', 31 | 'c\'est_bon', 'hengda', 'master', 'JML', 'KLS', 32 | 'QCYH', 'ICE' 33 | ] 34 | CLASS_NAME_DICT = { 35 | 'CA001': 'draw_paper', 36 | 'CA002': 'roll_paper', 37 | 'CA003': 'toothbrush', 38 | 'CA004': 'tape', 39 | 'CD001': 'apple', 40 | 'CD002': 'pear', 41 | 'CD003': 'melon', 42 | 'CD004': 'kiwi', 43 | 'CD005': 'grapefruit', 44 | 'CD006': 'banana', 45 | 'ZA001': 'soap', 46 | 'ZA002': 'fulid', 47 | 'ZA003': 'toothpaste', 48 | 'ZA004': 'flower', 49 | 'ZA005': 'duck', 50 | 'ZA006': 'pencilbox', 51 | 'ZB001': 'porridge', 52 | 'ZB002': 'godmother', 53 | 'ZB003': 'cookie', 54 | 'ZB004': 'powder', 55 | 'ZB005': 'gum', 56 | 'ZB006': 'noodle', 57 | 'ZB007': 'biscuit', 58 | 'ZB008': 'chips', 59 | 'ZB009': 'fries', 60 | 'ZB010': 'seeds', 61 | 'ZC001': 'sprite', 62 | 'ZC002': 'cola', 63 | 'ZC003': 'fenta', 64 | 'ZC004': 'redbull', 65 | 'ZC005': 'ADCaMilk', 66 | 'ZC006': 'juice', 67 | 'ZC007': 'WLJ', 68 | 'ZC008': 'JDB', 69 | 'ZC009': 'ice_tea', 70 | 'ZC010': 'green_tea', 71 | 'ZC011': 'Sydney', 72 | 'ZC012': 'tea_pi', 73 | 'ZC013': 'coco', 74 | 'ZC014': 'NF_Spring', 75 | 'ZC015': 'wahaha', 76 | 'ZC016': 'ganten', 77 | 'ZC017': 'c\'est_bon', 78 | 'ZC018': 'hengda', 79 | 'ZC019': 'master', 80 | 'ZC020': 'JML', 81 | 'ZC021': 'KLS', 82 | 'ZC022': 'QCYH', 83 | 'ZC023': 'ICE' 84 | } 85 | 86 | dir_path = './' 87 | pattern = re.compile('"label": "([A-Z]{2}[0-9]{3}(?:.+)?)",') 88 | class_ids = [] 89 | for file in os.listdir(dir_path): 90 | if os.path.splitext(file)[-1] != '.json': 91 | continue 92 | with open(os.path.join(dir_path, file), 'r+', encoding='utf-8') as f: 93 | content = f.read() 94 | image_class_ids = pattern.findall(content) 95 | for id in image_class_ids: 96 | if id not in class_ids: 97 | if len(id) > 5: 98 | print("Find invalid id !!") 99 | content = content.replace(id, id[:5]) 100 | with open(os.path.join(dir_path, file), 'w', encoding='utf-8') as f: 101 | f.write(content) 102 | else: 103 | class_ids.append(id) 104 | print('一共有{}种class'.format(len(class_ids))) 105 | print('分别是') 106 | index = 1 107 | for id in class_ids: 108 | print('"{}",'.format(id), end="") 109 | index += 1 110 | print() 111 | index = 1 112 | for id in class_ids: 113 | print('"{}":{},'.format(id, index)) 114 | index += 1 115 | 116 | for id in class_ids: 117 | print("'{}',".format(CLASS_NAME_DICT[id]),end="") 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /format.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | # dir_path = os.path.dirname(__file__) 5 | # print(dir_path) 6 | dir_path = './' 7 | pattern = re.compile('"imagePath": "(.+?png)",') 8 | for file in os.listdir(dir_path): 9 | if os.path.splitext(file)[-1] != '.json': 10 | continue 11 | with open(os.path.join(dir_path, file), encoding='utf-8') as f: 12 | content = f.read() 13 | imagePath = pattern.findall(content)[0] 14 | print('imagePath ',imagePath) 15 | new_content = content.replace(imagePath, os.path.splitext(file)[0]+'.png') 16 | with open(os.path.join(dir_path, file), 'w', encoding='utf-8') as nf: 17 | nf.write(new_content) 18 | 19 | -------------------------------------------------------------------------------- /labelme2coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import glob 5 | import shutil 6 | import cv2 7 | from sklearn.model_selection import train_test_split 8 | 9 | np.random.seed(41) 10 | 11 | # 0为背景 12 | classname_to_id = { 13 | "CA002": 1, 14 | "CA004": 2, 15 | "CA003": 3, 16 | "CD006": 4, 17 | "CD002": 5, 18 | "CD001": 6, 19 | "ZA001": 7, 20 | "ZA003": 8, 21 | "ZA002": 9, 22 | "ZA005": 10, 23 | "ZB005": 11, 24 | "ZA004": 12, 25 | "ZC009": 13, 26 | "ZB003": 14, 27 | "ZB004": 15, 28 | "ZC012": 16, 29 | "ZC011": 17, 30 | "ZC010": 18, 31 | "ZC023": 19, 32 | "ZC013": 20, 33 | "ZC014": 21, 34 | "ZC007": 22, 35 | "ZB006": 23, 36 | "ZC008": 24, 37 | "ZC002": 25, 38 | "ZB001": 26, 39 | "ZC001": 27, 40 | "CD003": 28, 41 | "CA001": 29, 42 | } 43 | 44 | 45 | class Lableme2CoCo: 46 | 47 | def __init__(self): 48 | self.images = [] 49 | self.annotations = [] 50 | self.categories = [] 51 | self.img_id = 0 52 | self.ann_id = 0 53 | 54 | def save_coco_json(self, instance, save_path): 55 | json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1) # indent=2 更加美观显示 56 | 57 | # 由json文件构建COCO 58 | def to_coco(self, json_path_list): 59 | self._init_categories() 60 | for json_path in json_path_list: 61 | obj = self.read_jsonfile(json_path) 62 | self.images.append(self._image(obj, json_path)) 63 | shapes = obj['shapes'] 64 | for shape in shapes: 65 | annotation = self._annotation(shape) 66 | self.annotations.append(annotation) 67 | self.ann_id += 1 68 | self.img_id += 1 69 | instance = {} 70 | instance['info'] = 'spytensor created' 71 | instance['license'] = ['license'] 72 | instance['images'] = self.images 73 | instance['annotations'] = self.annotations 74 | instance['categories'] = self.categories 75 | return instance 76 | 77 | # 构建类别 78 | def _init_categories(self): 79 | for k, v in classname_to_id.items(): 80 | category = {} 81 | category['id'] = v 82 | category['name'] = k 83 | self.categories.append(category) 84 | 85 | # 构建COCO的image字段 86 | def _image(self, obj, path): 87 | image = {} 88 | from labelme import utils 89 | img_x = utils.img_b64_to_arr(obj['imageData']) 90 | h, w = img_x.shape[:-1] 91 | image['height'] = h 92 | image['width'] = w 93 | image['id'] = self.img_id 94 | image['file_name'] = os.path.basename(path).replace(".json", ".jpg") 95 | return image 96 | 97 | # 构建COCO的annotation字段 98 | def _annotation(self, shape): 99 | # print('shape', shape) 100 | label = shape['label'] 101 | points = shape['points'] 102 | annotation = {} 103 | annotation['id'] = self.ann_id 104 | annotation['image_id'] = self.img_id 105 | annotation['category_id'] = int(classname_to_id[label]) 106 | annotation['segmentation'] = [np.asarray(points).flatten().tolist()] 107 | annotation['bbox'] = self._get_box(points) 108 | annotation['iscrowd'] = 0 109 | annotation['area'] = 1.0 110 | return annotation 111 | 112 | # 读取json文件,返回一个json对象 113 | def read_jsonfile(self, path): 114 | with open(path, "r", encoding='utf-8') as f: 115 | return json.load(f) 116 | 117 | # COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式 118 | def _get_box(self, points): 119 | min_x = min_y = np.inf 120 | max_x = max_y = 0 121 | for x, y in points: 122 | min_x = min(min_x, x) 123 | min_y = min(min_y, y) 124 | max_x = max(max_x, x) 125 | max_y = max(max_y, y) 126 | return [min_x, min_y, max_x - min_x, max_y - min_y] 127 | 128 | 129 | if __name__ == '__main__': 130 | labelme_path = "../../../xianjin_data-3/" 131 | saved_coco_path = "../../../xianjin_data-3/" 132 | print('reading...') 133 | # 创建文件 134 | if not os.path.exists("%scoco/annotations/" % saved_coco_path): 135 | os.makedirs("%scoco/annotations/" % saved_coco_path) 136 | if not os.path.exists("%scoco/images/train2017/" % saved_coco_path): 137 | os.makedirs("%scoco/images/train2017" % saved_coco_path) 138 | if not os.path.exists("%scoco/images/val2017/" % saved_coco_path): 139 | os.makedirs("%scoco/images/val2017" % saved_coco_path) 140 | # 获取images目录下所有的joson文件列表 141 | print(labelme_path + "/*.json") 142 | json_list_path = glob.glob(labelme_path + "/*.json") 143 | print('json_list_path: ', len(json_list_path)) 144 | # 数据划分,这里没有区分val2017和tran2017目录,所有图片都放在images目录下 145 | train_path, val_path = train_test_split(json_list_path, test_size=0.1, train_size=0.9) 146 | print("train_n:", len(train_path), 'val_n:', len(val_path)) 147 | 148 | # 把训练集转化为COCO的json格式 149 | l2c_train = Lableme2CoCo() 150 | train_instance = l2c_train.to_coco(train_path) 151 | l2c_train.save_coco_json(train_instance, '%scoco/annotations/instances_train2017.json' % saved_coco_path) 152 | for file in train_path: 153 | # shutil.copy(file.replace("json", "jpg"), "%scoco/images/train2017/" % saved_coco_path) 154 | img_name = file.replace('json', 'png') 155 | temp_img = cv2.imread(img_name) 156 | try: 157 | cv2.imwrite("{}coco/images/train2017/{}".format(saved_coco_path, img_name.replace('png', 'jpg')),temp_img) 158 | except Exception as e: 159 | print(e) 160 | print('Wrong Image:', img_name ) 161 | continue 162 | print(img_name + '-->', img_name.replace('png', 'jpg')) 163 | 164 | for file in val_path: 165 | # shutil.copy(file.replace("json", "jpg"), "%scoco/images/val2017/" % saved_coco_path) 166 | img_name = file.replace('json', 'png') 167 | temp_img = cv2.imread(img_name) 168 | try: 169 | cv2.imwrite("{}coco/images/val2017/{}".format(saved_coco_path, img_name.replace('png', 'jpg')), temp_img) 170 | except Exception as e: 171 | print(e) 172 | print('Wrong Image:', img_name) 173 | continue 174 | print(img_name + '-->', img_name.replace('png', 'jpg')) 175 | 176 | # 把验证集转化为COCO的json格式 177 | l2c_val = Lableme2CoCo() 178 | val_instance = l2c_val.to_coco(val_path) 179 | l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_val2017.json' % saved_coco_path) 180 | 181 | -------------------------------------------------------------------------------- /rename.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # this_file_path = __file__ 4 | # this_dir_path = os.path.dirname(this_file_path) 5 | this_dir_path = './' 6 | json_index = 433 7 | png_index = 433 8 | for file in os.listdir(this_dir_path): 9 | file_path = os.path.join(this_dir_path, file) 10 | if os.path.splitext(file_path)[-1] == '.png': 11 | new_file_path = '.'+'/'.join((os.path.splitext(file_path)[0].split('\\'))[:-1]) + '/{:0>4}_Color.png'.format(png_index) 12 | png_index += 1 13 | print(file_path+'---->'+new_file_path) 14 | os.rename(file_path, new_file_path) 15 | elif os.path.splitext(file_path)[-1] == '.json': 16 | new_file_path = '.'+'/'.join((os.path.splitext(file_path)[0].split('\\'))[:-1]) + '/{:0>4}_Color.json'.format(json_index) 17 | json_index += 1 18 | print(file_path+'---->'+new_file_path) 19 | os.rename(file_path,new_file_path) 20 | --------------------------------------------------------------------------------