├── LICENSE
├── README.md
├── checkClasses.py
├── format.py
├── labelme2coco.py
└── rename.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 He Wang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # datasetapi
 2 | 规范化管理labelme数据集并生成coco数据集
 3 | 
 4 | ## rename.py
 5 | 用于对数据集中图片以及json文件的重命名
 6 | 
 7 | ## format.py
 8 | 用于替换json中不合法的imagePath
 9 | 
10 | ## checkClasses
11 | 用于检测当前一共标注了多少class 并对检测结果进行输出
12 | 
13 | ## labelme2coco.py
14 | 用于生成coco形式的数据集
15 | 
16 | # 具体操作可以参考视频
17 | 
18 | ## https://www.bilibili.com/video/bv1Fk4y1U72f
19 | 


--------------------------------------------------------------------------------
/checkClasses.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | CLASS_NAMES = ['CA001', 'CA002', 'CA003', 'CA004',
  5 |                'CD001', 'CD002', 'CD003', 'CD004',
  6 |                'CD005', 'CD006',
  7 |                'ZA001', 'ZA002', 'ZA003', 'ZA004',
  8 |                'ZA005', 'ZA006',
  9 |                'ZB001', 'ZB002', 'ZB003', 'ZB004',
 10 |                'ZB005', 'ZB006', 'ZB007', 'ZB008',
 11 |                'ZB009', 'ZB010',
 12 |                'ZC001', 'ZC002', 'ZC003', 'ZC004',
 13 |                'ZC005', 'ZC006', 'ZC007', 'ZC008',
 14 |                'ZC009', 'ZC010', 'ZC011', 'ZC012',
 15 |                'ZC013', 'ZC014', 'ZC015', 'ZC016',
 16 |                'ZC017', 'ZC018', 'ZC019', 'ZC020',
 17 |                'ZC021', 'ZC022', 'ZC023'
 18 |                ]
 19 | CLASS_REAL_NAMES = ['draw_paper', 'roll_paper', 'toothbrush', 'tape',
 20 |                     'apple', 'pear', 'melon', 'kiwi',
 21 |                     'grapefruit', 'banana',
 22 |                     'soap', 'fulid', 'toothpaste', 'flower',
 23 |                     'duck', 'pencilbox',
 24 |                     'porridge', 'godmother', 'cookie', 'powder',
 25 |                     'gum', 'noodle', 'biscuit', 'chips',
 26 |                     'fries', 'seeds',
 27 |                     'sprite', 'cola', 'fenta', 'redbull',
 28 |                     'ADCaMilk', 'juice', 'WLJ', 'JDB',
 29 |                     'ice_tea', 'green_tea', 'Sydney', 'tea_pi',
 30 |                     'coco', 'NF_Spring', 'wahaha', 'ganten',
 31 |                     'c\'est_bon', 'hengda', 'master', 'JML', 'KLS',
 32 |                     'QCYH', 'ICE'
 33 |                     ]
 34 | CLASS_NAME_DICT = {
 35 |     'CA001': 'draw_paper',
 36 |     'CA002': 'roll_paper',
 37 |     'CA003': 'toothbrush',
 38 |     'CA004': 'tape',
 39 |     'CD001': 'apple',
 40 |     'CD002': 'pear',
 41 |     'CD003': 'melon',
 42 |     'CD004': 'kiwi',
 43 |     'CD005': 'grapefruit',
 44 |     'CD006': 'banana',
 45 |     'ZA001': 'soap',
 46 |     'ZA002': 'fulid',
 47 |     'ZA003': 'toothpaste',
 48 |     'ZA004': 'flower',
 49 |     'ZA005': 'duck',
 50 |     'ZA006': 'pencilbox',
 51 |     'ZB001': 'porridge',
 52 |     'ZB002': 'godmother',
 53 |     'ZB003': 'cookie',
 54 |     'ZB004': 'powder',
 55 |     'ZB005': 'gum',
 56 |     'ZB006': 'noodle',
 57 |     'ZB007': 'biscuit',
 58 |     'ZB008': 'chips',
 59 |     'ZB009': 'fries',
 60 |     'ZB010': 'seeds',
 61 |     'ZC001': 'sprite',
 62 |     'ZC002': 'cola',
 63 |     'ZC003': 'fenta',
 64 |     'ZC004': 'redbull',
 65 |     'ZC005': 'ADCaMilk',
 66 |     'ZC006': 'juice',
 67 |     'ZC007': 'WLJ',
 68 |     'ZC008': 'JDB',
 69 |     'ZC009': 'ice_tea',
 70 |     'ZC010': 'green_tea',
 71 |     'ZC011': 'Sydney',
 72 |     'ZC012': 'tea_pi',
 73 |     'ZC013': 'coco',
 74 |     'ZC014': 'NF_Spring',
 75 |     'ZC015': 'wahaha',
 76 |     'ZC016': 'ganten',
 77 |     'ZC017': 'c\'est_bon',
 78 |     'ZC018': 'hengda',
 79 |     'ZC019': 'master',
 80 |     'ZC020': 'JML',
 81 |     'ZC021': 'KLS',
 82 |     'ZC022': 'QCYH',
 83 |     'ZC023': 'ICE'
 84 | }
 85 | 
 86 | dir_path = './'
 87 | pattern = re.compile('"label": "([A-Z]{2}[0-9]{3}(?:.+)?)",')
 88 | class_ids = []
 89 | for file in os.listdir(dir_path):
 90 |     if os.path.splitext(file)[-1] != '.json':
 91 |         continue
 92 |     with open(os.path.join(dir_path, file), 'r+', encoding='utf-8') as f:
 93 |         content = f.read()
 94 |         image_class_ids = pattern.findall(content)
 95 |         for id in image_class_ids:
 96 |             if id not in class_ids:
 97 |                 if len(id) > 5:
 98 |                     print("Find invalid id !!")
 99 |                     content = content.replace(id, id[:5])
100 |                     with open(os.path.join(dir_path, file), 'w', encoding='utf-8') as f:
101 |                         f.write(content)
102 |                 else:
103 |                     class_ids.append(id)
104 | print('一共有{}种class'.format(len(class_ids)))
105 | print('分别是')
106 | index = 1
107 | for id in class_ids:
108 |     print('"{}",'.format(id), end="")
109 |     index += 1
110 | print()
111 | index = 1
112 | for id in class_ids:
113 |     print('"{}":{},'.format(id, index))
114 |     index += 1
115 | 
116 | for id in class_ids:
117 |     print("'{}',".format(CLASS_NAME_DICT[id]),end="")
118 | 
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/format.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | # dir_path = os.path.dirname(__file__)
 5 | # print(dir_path)
 6 | dir_path = './'
 7 | pattern = re.compile('"imagePath": "(.+?png)",')
 8 | for file in os.listdir(dir_path):
 9 |     if os.path.splitext(file)[-1] != '.json':
10 |         continue
11 |     with open(os.path.join(dir_path, file), encoding='utf-8') as f:
12 |         content = f.read()
13 |         imagePath = pattern.findall(content)[0]
14 |         print('imagePath ',imagePath)
15 |         new_content = content.replace(imagePath, os.path.splitext(file)[0]+'.png')
16 |     with open(os.path.join(dir_path, file), 'w', encoding='utf-8') as nf:
17 |         nf.write(new_content)
18 | 
19 | 


--------------------------------------------------------------------------------
/labelme2coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | import glob
  5 | import shutil
  6 | import cv2
  7 | from sklearn.model_selection import train_test_split
  8 | 
  9 | np.random.seed(41)
 10 | 
 11 | # 0为背景
 12 | classname_to_id = {
 13 |     "CA002": 1,
 14 |     "CA004": 2,
 15 |     "CA003": 3,
 16 |     "CD006": 4,
 17 |     "CD002": 5,
 18 |     "CD001": 6,
 19 |     "ZA001": 7,
 20 |     "ZA003": 8,
 21 |     "ZA002": 9,
 22 |     "ZA005": 10,
 23 |     "ZB005": 11,
 24 |     "ZA004": 12,
 25 |     "ZC009": 13,
 26 |     "ZB003": 14,
 27 |     "ZB004": 15,
 28 |     "ZC012": 16,
 29 |     "ZC011": 17,
 30 |     "ZC010": 18,
 31 |     "ZC023": 19,
 32 |     "ZC013": 20,
 33 |     "ZC014": 21,
 34 |     "ZC007": 22,
 35 |     "ZB006": 23,
 36 |     "ZC008": 24,
 37 |     "ZC002": 25,
 38 |     "ZB001": 26,
 39 |     "ZC001": 27,
 40 |     "CD003": 28,
 41 |     "CA001": 29,
 42 | }
 43 | 
 44 | 
 45 | class Lableme2CoCo:
 46 | 
 47 |     def __init__(self):
 48 |         self.images = []
 49 |         self.annotations = []
 50 |         self.categories = []
 51 |         self.img_id = 0
 52 |         self.ann_id = 0
 53 | 
 54 |     def save_coco_json(self, instance, save_path):
 55 |         json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1)  # indent=2 更加美观显示
 56 | 
 57 |     # 由json文件构建COCO
 58 |     def to_coco(self, json_path_list):
 59 |         self._init_categories()
 60 |         for json_path in json_path_list:
 61 |             obj = self.read_jsonfile(json_path)
 62 |             self.images.append(self._image(obj, json_path))
 63 |             shapes = obj['shapes']
 64 |             for shape in shapes:
 65 |                 annotation = self._annotation(shape)
 66 |                 self.annotations.append(annotation)
 67 |                 self.ann_id += 1
 68 |             self.img_id += 1
 69 |         instance = {}
 70 |         instance['info'] = 'spytensor created'
 71 |         instance['license'] = ['license']
 72 |         instance['images'] = self.images
 73 |         instance['annotations'] = self.annotations
 74 |         instance['categories'] = self.categories
 75 |         return instance
 76 | 
 77 |     # 构建类别
 78 |     def _init_categories(self):
 79 |         for k, v in classname_to_id.items():
 80 |             category = {}
 81 |             category['id'] = v
 82 |             category['name'] = k
 83 |             self.categories.append(category)
 84 | 
 85 |     # 构建COCO的image字段
 86 |     def _image(self, obj, path):
 87 |         image = {}
 88 |         from labelme import utils
 89 |         img_x = utils.img_b64_to_arr(obj['imageData'])
 90 |         h, w = img_x.shape[:-1]
 91 |         image['height'] = h
 92 |         image['width'] = w
 93 |         image['id'] = self.img_id
 94 |         image['file_name'] = os.path.basename(path).replace(".json", ".jpg")
 95 |         return image
 96 | 
 97 |     # 构建COCO的annotation字段
 98 |     def _annotation(self, shape):
 99 |         # print('shape', shape)
100 |         label = shape['label']
101 |         points = shape['points']
102 |         annotation = {}
103 |         annotation['id'] = self.ann_id
104 |         annotation['image_id'] = self.img_id
105 |         annotation['category_id'] = int(classname_to_id[label])
106 |         annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
107 |         annotation['bbox'] = self._get_box(points)
108 |         annotation['iscrowd'] = 0
109 |         annotation['area'] = 1.0
110 |         return annotation
111 | 
112 |     # 读取json文件，返回一个json对象
113 |     def read_jsonfile(self, path):
114 |         with open(path, "r", encoding='utf-8') as f:
115 |             return json.load(f)
116 | 
117 |     # COCO的格式： [x1,y1,w,h] 对应COCO的bbox格式
118 |     def _get_box(self, points):
119 |         min_x = min_y = np.inf
120 |         max_x = max_y = 0
121 |         for x, y in points:
122 |             min_x = min(min_x, x)
123 |             min_y = min(min_y, y)
124 |             max_x = max(max_x, x)
125 |             max_y = max(max_y, y)
126 |         return [min_x, min_y, max_x - min_x, max_y - min_y]
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     labelme_path = "../../../xianjin_data-3/"
131 |     saved_coco_path = "../../../xianjin_data-3/"
132 |     print('reading...')
133 |     # 创建文件
134 |     if not os.path.exists("%scoco/annotations/" % saved_coco_path):
135 |         os.makedirs("%scoco/annotations/" % saved_coco_path)
136 |     if not os.path.exists("%scoco/images/train2017/" % saved_coco_path):
137 |         os.makedirs("%scoco/images/train2017" % saved_coco_path)
138 |     if not os.path.exists("%scoco/images/val2017/" % saved_coco_path):
139 |         os.makedirs("%scoco/images/val2017" % saved_coco_path)
140 |     # 获取images目录下所有的joson文件列表
141 |     print(labelme_path + "/*.json")
142 |     json_list_path = glob.glob(labelme_path + "/*.json")
143 |     print('json_list_path: ', len(json_list_path))
144 |     # 数据划分,这里没有区分val2017和tran2017目录，所有图片都放在images目录下
145 |     train_path, val_path = train_test_split(json_list_path, test_size=0.1, train_size=0.9)
146 |     print("train_n:", len(train_path), 'val_n:', len(val_path))
147 | 
148 |     # 把训练集转化为COCO的json格式
149 |     l2c_train = Lableme2CoCo()
150 |     train_instance = l2c_train.to_coco(train_path)
151 |     l2c_train.save_coco_json(train_instance, '%scoco/annotations/instances_train2017.json' % saved_coco_path)
152 |     for file in train_path:
153 |         # shutil.copy(file.replace("json", "jpg"), "%scoco/images/train2017/" % saved_coco_path)
154 |         img_name = file.replace('json', 'png')
155 |         temp_img = cv2.imread(img_name)
156 |         try:
157 |             cv2.imwrite("{}coco/images/train2017/{}".format(saved_coco_path, img_name.replace('png', 'jpg')),temp_img)
158 |         except Exception as e:
159 |             print(e)
160 |             print('Wrong Image:', img_name )
161 |             continue
162 |         print(img_name + '-->', img_name.replace('png', 'jpg'))
163 | 
164 |     for file in val_path:
165 |         # shutil.copy(file.replace("json", "jpg"), "%scoco/images/val2017/" % saved_coco_path)
166 |         img_name = file.replace('json', 'png')
167 |         temp_img = cv2.imread(img_name)
168 |         try:
169 |             cv2.imwrite("{}coco/images/val2017/{}".format(saved_coco_path, img_name.replace('png', 'jpg')), temp_img)
170 |         except Exception as e:
171 |             print(e)
172 |             print('Wrong Image:', img_name)
173 |             continue
174 |         print(img_name + '-->', img_name.replace('png', 'jpg'))
175 | 
176 |     # 把验证集转化为COCO的json格式
177 |     l2c_val = Lableme2CoCo()
178 |     val_instance = l2c_val.to_coco(val_path)
179 |     l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_val2017.json' % saved_coco_path)
180 | 
181 | 


--------------------------------------------------------------------------------
/rename.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # this_file_path = __file__
 4 | # this_dir_path = os.path.dirname(this_file_path)
 5 | this_dir_path = './'
 6 | json_index = 433
 7 | png_index = 433
 8 | for file in os.listdir(this_dir_path):
 9 |     file_path = os.path.join(this_dir_path, file)
10 |     if os.path.splitext(file_path)[-1] == '.png':
11 |         new_file_path = '.'+'/'.join((os.path.splitext(file_path)[0].split('\\'))[:-1]) + '/{:0>4}_Color.png'.format(png_index)
12 |         png_index += 1
13 |         print(file_path+'---->'+new_file_path)
14 |         os.rename(file_path, new_file_path)
15 |     elif os.path.splitext(file_path)[-1] == '.json':
16 |         new_file_path = '.'+'/'.join((os.path.splitext(file_path)[0].split('\\'))[:-1]) + '/{:0>4}_Color.json'.format(json_index)
17 |         json_index += 1
18 |         print(file_path+'---->'+new_file_path)
19 |         os.rename(file_path,new_file_path)
20 | 


--------------------------------------------------------------------------------