├── .vscode ├── launch.json └── settings.json ├── 123.py ├── README.md ├── bin ├── app_test.py ├── demo.py ├── my_test.py └── my_train.py ├── data └── readme.md ├── gcnet ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── classifier.cpython-37.pyc │ ├── json_utils.cpython-37.pyc │ ├── logger.cpython-37.pyc │ ├── model.cpython-37.pyc │ ├── resnet.cpython-37.pyc │ ├── train.cpython-37.pyc │ ├── transforms.cpython-37.pyc │ └── utils.cpython-37.pyc ├── classifier.py ├── json_utils.py ├── logger.py ├── resnet.py ├── test.py ├── train.py ├── transforms.py └── utils.py ├── img ├── readme.md └── 垃圾分类.png ├── models └── readme.md ├── preprocess ├── .ipynb_checkpoints │ ├── 01 垃圾分类_一级分类 数据分布-checkpoint.ipynb │ ├── 02 垃圾分类_二级分类 数据分析-checkpoint.ipynb │ ├── 03 数据预处理方法介绍-checkpoint.ipynb │ └── 04 垃圾分类 数据预处理与可视化-checkpoint.ipynb ├── 01-原始数据集分布可视化分析.py ├── 01.html ├── 02-原始数据集train-val划分.py ├── 03-train和val数据分布可视化.py ├── 03.html ├── 04-四大类垃圾分布可视化.py ├── 04.html ├── 05-四大类垃圾train-val分布可视化.py ├── 05.html ├── 06-数据增强transform.py ├── 07-原始数据可视化.py ├── 08-预处理数据加载.py ├── 09-测试resnext101模型.py ├── 10-Web服务环境搭建.py ├── 11-分类网络环境搭建.py └── images │ ├── cat.jpg │ ├── dog.jpg │ ├── dog_2.jpg │ ├── hua.jpg │ ├── img_1204.jpg │ ├── img_1221.jpg │ ├── img_1237.jpg │ ├── img_1522.jpg │ ├── img_229.jpg │ ├── img_4869.jpg │ ├── ren_l.jpg │ └── yindu.jpg └── 垃圾分类.png /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: 当前文件", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/home/ubuntu/anaconda3/envs/pytorch/bin/python", 3 | "open-in-browser.default": "\"open-in-browser.default\": \"Chrome\"" 4 | } -------------------------------------------------------------------------------- /123.py: -------------------------------------------------------------------------------- 1 | 123 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Garbage_Classification 2 | 3 | ![image](./img/垃圾分类.png) 4 | 5 | ## Description 6 | 7 | [1]https://www.bilibili.com/video/BV1gz41187Hk 8 | [2]https://www.bilibili.com/video/BV1MT4y1V74w 9 | 10 | We also provide [PyTorch code](https://github.com/HonglinChu/NanoCls) for deployment on mobile device. 11 | 12 | ## Tree 13 | ``` 14 | . 15 | ├── bin 16 | │   ├── app_test.py 17 | │   ├── demo.py 18 | │   ├── my_test.py 19 | │   └── my_train.py 20 | ├── data 21 | ├── gcnet 22 | │   ├── classifier.py 23 | │   ├── __init__.py 24 | │   ├── json_utils.py 25 | │   ├── logger.py 26 | │   ├── __pycache__ 27 | │   ├── resnet.py 28 | │   ├── test.py 29 | │   ├── train.py 30 | │   ├── transforms.py 31 | │   └── utils.py 32 | ├── models 33 | └── preprocess 34 | ├── 01.html 35 | ├── 01-原始数据集分布可视化分析.py 36 | ├── 02-原始数据集train-val划分.py 37 | ├── 03.html 38 | ├── 03-train和val数据分布可视化.py 39 | ├── 04.html 40 | ├── 04-四大类垃圾分布可视化.py 41 | ├── 05.html 42 | ├── 05-四大类垃圾train-val分布可视化.py 43 | ├── 06-数据增强transform.py 44 | ├── 07-原始数据可视化.py 45 | ├── 08-预处理数据加载.py 46 | ├── 09-测试resnext101模型.py 47 | ├── 10-Web服务环境搭建.py 48 | ├── 11-分类网络环境搭建.py 49 | └── images 50 | ``` 51 | 52 | ## Demo 53 | 54 | ``` 55 | python ./demo.py 56 | ``` 57 | ## Train 58 | ``` 59 | You should download data and models from BaiduYun: https://pan.baidu.com/s/1g9RoIGxf2OD1zo4bgbMQWg password: cdz5 60 | 61 | python ./my_train.py 62 | 63 | ``` 64 | 65 | ## Test 66 | ``` 67 | python ./my_test.py 68 | ``` 69 | 70 | ## Web 71 | 72 | ``` 73 | python ./app_test.py 74 | ``` 75 | 76 | 77 | ## Experiment 78 | 79 | |Model |Iter|precision| recall |f1-score | 80 | |---------|--------|-----|---------|-----------| 81 | |resnext101_32x16d|10|0.9827|0.9826|0.9826| 82 | |resnext101_32x8d|30|0.9589|0.9588|0.9583| 83 | |resnext101_32x8d|10|0.9473|0.9472|0.9472| 84 | |resnet18|10|0.8968|0.8959|0.8940| 85 | 86 | - resnext101_32x16d 87 | 88 | |LR |epoch|Train Loss| Valid Loss|Train Acc. | Valid Acc.| 89 | |---------|--------|-----|---------|-----------|---------| 90 | |0.001000|1.000000|0.281912|0.241702|90.296428|91.276075| 91 | |0.001000|2.000000|0.177333|0.147571|93.530952|94.628832| 92 | |0.001000|3.000000|0.163498|0.135344|94.181235|95.118656| 93 | |0.001000|4.000000|0.148997|0.081726|94.586606|96.968161| 94 | |0.001000|5.000000|0.133063|0.090255|95.110210|96.807702| 95 | |0.001000|6.000000|0.125995|0.069795|95.346677|97.415759| 96 | |0.001000|7.000000|0.122259|0.102625|95.642260|96.351659| 97 | |0.001000|8.000000|0.127478|0.068116|95.422684|97.660671| 98 | |0.001000|9.000000|0.132976|0.053337|95.312896|98.268727| 99 | |0.001000|10.000000|0.118554|0.068123|95.616924|97.669116| 100 | 101 | ## 102 | -------------------------------------------------------------------------------- /bin/app_test.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from flask import Flask, request 4 | from gcnet.json_utils import jsonify 5 | from gcnet.train import GarbageClassifier 6 | from gcnet.transforms import transforms_image 7 | import time 8 | from collections import OrderedDict 9 | import codecs 10 | 11 | # 获取所有配置参数 12 | app = Flask(__name__) 13 | # 设置编码-否则返回数据中文时候-乱码 14 | app.config['JSON_AS_ASCII'] = False #防止中文乱码 15 | # 16 | class_id2name = {0:'其他垃圾',1:'厨余垃圾',2:'可回收物',3:'有害垃圾'} 17 | 18 | # for line in codecs.open('data/garbage_label.txt', 'r', encoding='utf-8'): 19 | # line = line.strip() 20 | # _id = line.split(":")[0] 21 | # _name = line.split(":")[1] 22 | # class_id2name[int(_id)] = _name 23 | 24 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 25 | print('Pytorch garbage-classification Serving on {} ...'.format(device)) 26 | num_classes = len(class_id2name) 27 | model_name = 'resnext101_32x16d' 28 | model_path = './models/checkpoint/checkpoint_8.pth.tar' #args.resume # --resume checkpoint/garbage_resnext101_model_2_1111_4211.pth 29 | # print("model_name = ",model_name) 30 | # print("model_path = ",model_path) 31 | 32 | GCNet= GarbageClassifier(model_name, num_classes,ngpu=0, feature_extract=True) 33 | GCNet.model.to(device) # 设置模型运行环境 34 | # 如果要使用cpu环境,请指定 map_location='cpu' 35 | state_dict=torch.load(model_path)['state_dict'] #state_dict=torch.load(model_path) 36 | from collections import OrderedDict 37 | new_state_dict = OrderedDict() 38 | for k, v in state_dict.items(): 39 | head = k[:7] 40 | if head == 'module.': 41 | name = k[7:] # remove `module.` 42 | else: 43 | name = k 44 | new_state_dict[name] = v 45 | # load params 46 | GCNet.model.load_state_dict(new_state_dict) 47 | GCNet.model.eval() 48 | 49 | @app.route('/') 50 | def hello(): 51 | return "Nice To Meet You!" 52 | 53 | @app.route('/predict', methods=['POST']) 54 | def predict(): 55 | # 获取输入数据 56 | file = request.files['image'] 57 | img_bytes = file.read() 58 | # 特征提取 59 | feature = transforms_image(img_bytes) 60 | feature = feature.to(device) # 在device 上进行预测 61 | # 模型预测 62 | with torch.no_grad(): 63 | t1 = time.time() 64 | outputs = GCNet.model.forward(feature) #???? 65 | consume = (time.time() - t1) * 1000 #ms 66 | consume = int(consume) 67 | 68 | # API 结果封装 69 | label_c_mapping = {} 70 | ## The output has unnormalized scores. To get probabilities, you can run a softmax on it. 71 | ## 通过softmax 获取每个label的概率 72 | outputs = torch.nn.functional.softmax(outputs[0], dim=0) 73 | 74 | pred_list = outputs.cpu().numpy().tolist() 75 | 76 | for i, prob in enumerate(pred_list): 77 | label_c_mapping[int(i)] = prob 78 | ## 按照prob 降序,获取topK = 4 79 | dict_list = [] 80 | for label_prob in sorted(label_c_mapping.items(), key=lambda x: x[1], reverse=True)[:4]: 81 | label = int(label_prob[0]) 82 | result = {'label': label, 'c': label_prob[1], 'name': class_id2name[label]} 83 | dict_list.append(result) 84 | ## dict 中的数值按照顺序返回结果 85 | result = OrderedDict(error=0, errmsg='success', consume=consume, data=dict_list) 86 | return jsonify(result) 87 | 88 | if __name__ == '__main__': 89 | # curl -X POST -F file=@cat_pic.jpeg http://localhost:5000/predict 90 | app.run() 91 | -------------------------------------------------------------------------------- /bin/demo.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /bin/my_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/bin/my_test.py -------------------------------------------------------------------------------- /bin/my_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from gcnet.train import train 4 | 5 | #在命令行模式运行的时候不要开启一下命令 6 | #import multiprocessing 7 | #multiprocessing.set_start_method('spawn',True) 8 | # for (dirpath,dirname,filename) in os.walk((data_dir)): #查看目录结构 9 | # if filename: 10 | # print('*'*100) 11 | # print('dirpath:',dirpath) 12 | # print('dirname:',dirname) 13 | # print('filename:',filename) 14 | 15 | #metavar - 在 usage 说明中的参数名称,对于必选参数默认就是参数名称,对于可选参数默认是全大写的参数名称. 16 | #dest - 解析后的参数名称,默认情况下,对于可选参数选取最长的名称,中划线转换为下划线. 17 | 18 | data_path='./data/garbage_classify_4' 19 | save_path='./models/checkpoint' 20 | 21 | if __name__ == '__main__': 22 | # 创建一个参数的解析对象 23 | parser = argparse.ArgumentParser(description='Pytorch garbage Training ') 24 | 25 | # 设置参数信息 26 | ## 模型名称 choices=['resnext101_32x8d', 'resnext101_32x16d'], 27 | parser.add_argument('--model_name', default='resnext101_32x16d', type=str,help='model_name selected in train') 28 | 29 | parser.add_argument('--data_path', default=data_path, type=str,help='the path of training data') 30 | 31 | parser.add_argument('--save_path', default=save_path, type=str,help='the path of model to save') 32 | 33 | # 学习率 metavar='LR' 34 | parser.add_argument('--lr', default=0.001, type=float,help='initital learning rate 1e-2,12-4,0.001') 35 | 36 | # batch_size 37 | parser.add_argument('--batch_size',default=64, type=int,help='batch size') 38 | # num_works 39 | parser.add_argument('--num_workers', default=8,type=int,help='num_workers') 40 | # ngpu 41 | parser.add_argument('--ngpu', default=1,type=int,help='num_gpu') 42 | 43 | # 模型的存储路径metavar='PATH', metavar='PATH', 44 | parser.add_argument('--resume', default='./models/checkpoint/ngpu_checkpoint_8.pth.tar', type=str, help='path to latest checkpoint') 45 | 46 | # evaluate 47 | parser.add_argument('--evaluate', default=0, type=int,help='choose if to evaluate') 48 | 49 | # parser.add_argument('--checkpoint', default="checkpoint", type=str, help='path to save checkpoint') 50 | ## 模型迭代次数 metavar='N', 51 | parser.add_argument('--epochs', default=10, type=int, help='number of total epochs to run') 52 | 53 | # 图片分类g metavar='N', 54 | parser.add_argument('--num_classes', default=4, type=int, help='number of classes') 55 | 56 | # 从那个epoch 开始训练 metavar='N', 57 | parser.add_argument('--start_epoch', default=1, type=int, help='manual epoch number') 58 | 59 | # 进行参数解析 60 | args = parser.parse_args() 61 | # 训练 62 | train(args) 63 | 64 | 65 | -------------------------------------------------------------------------------- /data/readme.md: -------------------------------------------------------------------------------- 1 | 13 2 | -------------------------------------------------------------------------------- /gcnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .classifier import * 2 | from .resnet import * 3 | from .train import * 4 | from .transforms import * 5 | from .utils import * 6 | 7 | -------------------------------------------------------------------------------- /gcnet/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/classifier.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/classifier.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/json_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/json_utils.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/train.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/train.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/transforms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/transforms.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /gcnet/classifier.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | from progress.bar import Bar 6 | from gcnet import resnet 7 | from gcnet.utils import AverageMeter, accuracy 8 | 9 | class GarbageClassifier: 10 | 11 | def __init__(self,model_name,num_classes,ngpu,feature_extract=True): 12 | 13 | self.name='GarbageClassifier' 14 | 15 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 16 | 17 | if model_name=='resnext101_32x16d': 18 | model=resnet.resnext101_32x16d_wsl()#加载模型,默认1000 类别 19 | elif model_name=='resnext101_32x8d': 20 | model=resnet.resnext101_32x8d()#加载模型,默认1000 类别 21 | else: 22 | model=resnet.resnet50() 23 | 24 | if feature_extract: 25 | for param in model.parameters(): 26 | # 不需要更新梯度,冻结某些层的梯度 27 | param.requires_grad = False 28 | 29 | input_feat=model.fc.in_features #获取全连接层的输入特征 30 | 31 | model.fc=nn.Sequential( 32 | nn.Dropout(0.2) ,#防止过拟合 , 重新定义全连接层 33 | nn.Linear(in_features=input_feat,out_features=num_classes) 34 | ) 35 | 36 | if ngpu: 37 | model = nn.DataParallel(model,device_ids=[0,1]) 38 | model.to(self.device) 39 | print('Total params:%.2fM'%(sum(p.numel() for p in model.parameters())/1000000.0))#打印模型参数数量 40 | 41 | self.model=model 42 | 43 | def train_model(self,train_loader, criterion, optimizer): 44 | 45 | # 定义保存更新变量 46 | data_time = AverageMeter() 47 | batch_time = AverageMeter() 48 | losses = AverageMeter() 49 | top1 = AverageMeter() 50 | end = time.time() 51 | 52 | self.model.train() 53 | 54 | # 训练每批数据,然后进行模型的训练 55 | ## 定义bar 变量 56 | bar = Bar('Processing',max = len(train_loader)) 57 | for batch_index, (inputs, targets) in enumerate(train_loader): 58 | data_time.update(time.time() - end) 59 | # move tensors to GPU if cuda is_available 60 | inputs, targets = inputs.to(self.device), targets.to(self.device) 61 | # 在进行反向传播之前,我们使用zero_grad方法清空梯度 62 | optimizer.zero_grad() 63 | # 模型的预测 64 | outputs = self.model(inputs) 65 | # 计算loss 66 | loss = criterion(outputs, targets) 67 | # backward pass: 68 | loss.backward() 69 | # perform as single optimization step (parameter update) 70 | optimizer.step() 71 | 72 | # 计算acc和变量更新 73 | prec1, _ = accuracy(outputs.data, targets.data, topk=(1, 1)) 74 | losses.update(loss.item(), inputs.size(0)) 75 | top1.update(prec1.item(), inputs.size(0)) 76 | batch_time.update(time.time() - end) 77 | end = time.time() 78 | 79 | # plot progress 80 | ## 把主要的参数打包放进bar中 81 | # plot progress 82 | bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( 83 | batch=batch_index + 1, 84 | size=len(train_loader), 85 | data=data_time.val, 86 | bt=batch_time.val, 87 | total=bar.elapsed_td, 88 | eta=bar.eta_td, 89 | loss=losses.avg, 90 | top1=top1.avg 91 | ) 92 | bar.next() 93 | bar.finish() 94 | return (losses.avg, top1.avg) 95 | 96 | 97 | def test_model(self,val_loader, criterion,test = None): 98 | 99 | batch_time = AverageMeter() 100 | data_time = AverageMeter() 101 | losses = AverageMeter() 102 | top1 = AverageMeter() 103 | 104 | predict_all = np.array([],dtype=int) 105 | labels_all = np.array([],dtype=int) 106 | 107 | self.model.eval() 108 | end = time.time() 109 | 110 | # 训练每批数据,然后进行模型的训练 111 | ## 定义bar 变量 112 | bar = Bar('Processing', max=len(val_loader)) 113 | for batch_index, (inputs, targets) in enumerate(val_loader): 114 | data_time.update(time.time() - end) 115 | # move tensors to GPU if cuda is_available 116 | inputs, targets = inputs.to(self.device), targets.to(self.device) 117 | # 模型的预测 118 | outputs = self.model(inputs) 119 | # 计算loss 120 | loss = criterion(outputs, targets) 121 | 122 | # 计算acc和变量更新 123 | prec1, _ = accuracy(outputs.data, targets.data, topk=(1, 1)) 124 | losses.update(loss.item(), inputs.size(0)) 125 | top1.update(prec1.item(), inputs.size(0)) 126 | batch_time.update(time.time() - end) 127 | end = time.time() 128 | 129 | # 评估混淆矩阵的数据 130 | targets = targets.data.cpu().numpy() # 真实数据的y数值 131 | predic = torch.max(outputs.data,1)[1].cpu().numpy() # 预测数据y数值 132 | labels_all = np.append(labels_all,targets) # 数据赋值 133 | predict_all = np.append(predict_all,predic) 134 | 135 | ## 把主要的参数打包放进bar中 136 | # plot progress 137 | bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( 138 | batch=batch_index + 1, 139 | size=len(val_loader), 140 | data=data_time.val, 141 | bt=batch_time.val, 142 | total=bar.elapsed_td, 143 | eta=bar.eta_td, 144 | loss=losses.avg, 145 | top1=top1.avg 146 | ) 147 | bar.next() 148 | bar.finish() 149 | 150 | if test: 151 | return (losses.avg, top1.avg,predict_all,labels_all) 152 | else: 153 | return (losses.avg, top1.avg) 154 | 155 | -------------------------------------------------------------------------------- /gcnet/json_utils.py: -------------------------------------------------------------------------------- 1 | from flask import current_app, json 2 | """ 3 | 功能:关于flask的jsonify序列化dict后的结果会自动根据key的下标升序排列, 4 | 希望dict原样返回的方法【重写jsonify方法】 5 | 6 | """ 7 | def jsonify(*args, **kwargs): 8 | """This function wraps :func:`dumps` to add a few enhancements that make 9 | life easier. It turns the JSON output into a :class:`~flask.Response` 10 | object with the :mimetype:`application/json` mimetype. For convenience, it 11 | also converts multiple arguments into an array or multiple keyword arguments 12 | into a dict. This means that both ``jsonify(1,2,3)`` and 13 | ``jsonify([1,2,3])`` serialize to ``[1,2,3]``. 14 | 15 | For clarity, the JSON serialization behavior has the following differences 16 | from :func:`dumps`: 17 | 18 | 1. Single argument: Passed straight through to :func:`dumps`. 19 | 2. Multiple arguments: Converted to an array before being passed to 20 | :func:`dumps`. 21 | 3. Multiple keyword arguments: Converted to a dict before being passed to 22 | :func:`dumps`. 23 | 4. Both args and kwargs: Behavior undefined and will throw an exception. 24 | 25 | Example usage:: 26 | 27 | from flask import jsonify 28 | 29 | @app.route('/_get_current_user') 30 | def get_current_user(): 31 | return jsonify(username=g.user.username, 32 | email=g.user.email, 33 | id=g.user.id) 34 | 35 | This will send a JSON response like this to the browser:: 36 | 37 | { 38 | "username": "admin", 39 | "email": "admin@localhost", 40 | "id": 42 41 | } 42 | 43 | 44 | .. versionchanged:: 0.11 45 | Added support for serializing top-level arrays. This introduces a 46 | security risk in ancient browsers. See :ref:`json-security` for details. 47 | 48 | This function's response will be pretty printed if the 49 | ``JSONIFY_PRETTYPRINT_REGULAR`` config parameter is set to True or the 50 | Flask app is running in debug mode. Compressed (not pretty) formatting 51 | currently means no indents and no spaces after separators. 52 | 53 | .. versionadded:: 0.2 54 | """ 55 | 56 | indent = None 57 | separators = (',', ':') 58 | 59 | if current_app.config['JSONIFY_PRETTYPRINT_REGULAR'] or current_app.debug: 60 | indent = 2 61 | separators = (', ', ': ') 62 | 63 | if args and kwargs: 64 | raise TypeError('jsonify() behavior undefined when passed both args and kwargs') 65 | elif len(args) == 1: # single args are passed directly to dumps() 66 | data = args[0] 67 | else: 68 | data = args or kwargs 69 | 70 | return current_app.response_class( 71 | json.dumps(data, indent=indent, separators=separators,sort_keys=False) + '\n', 72 | mimetype=current_app.config['JSONIFY_MIMETYPE'] 73 | ) 74 | -------------------------------------------------------------------------------- /gcnet/logger.py: -------------------------------------------------------------------------------- 1 | # A simple torch style logger 2 | from __future__ import absolute_import 3 | # import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | __all__ = ['Logger', 'LoggerMonitor'] 7 | 8 | # def savefig(fname, dpi=None): 9 | # dpi = 150 if dpi == None else dpi 10 | # plt.savefig(fname, dpi=dpi) 11 | 12 | # def plot_overlap(logger, names=None): 13 | # names = logger.names if names == None else names 14 | # numbers = logger.numbers 15 | # for _, name in enumerate(names): 16 | # x = np.arange(len(numbers[name])) 17 | # plt.plot(x, np.asarray(numbers[name])) 18 | # return [logger.title + '(' + name + ')' for name in names] 19 | 20 | class Logger(object): 21 | '''Save training process to log file with simple plot function.''' 22 | 23 | def __init__(self, fpath, title=None, resume=False): 24 | self.file = None 25 | self.resume = resume 26 | self.title = '' if title == None else title 27 | if fpath is not None: 28 | if resume: 29 | self.file = open(fpath, 'r') 30 | name = self.file.readline() 31 | self.names = name.rstrip().split('\t') 32 | self.numbers = {} 33 | for _, name in enumerate(self.names): 34 | self.numbers[name] = [] 35 | 36 | for numbers in self.file: 37 | numbers = numbers.rstrip().split('\t') 38 | for i in range(0, len(numbers)): 39 | self.numbers[self.names[i]].append(numbers[i]) 40 | self.file.close() 41 | self.file = open(fpath, 'a') 42 | else: 43 | self.file = open(fpath, 'w') 44 | 45 | def set_names(self, names): 46 | if self.resume: 47 | pass 48 | # initialize numbers as empty list 49 | self.numbers = {} 50 | self.names = names 51 | for _, name in enumerate(self.names): 52 | self.file.write(name) 53 | self.file.write('\t') 54 | self.numbers[name] = [] 55 | self.file.write('\n') 56 | self.file.flush() 57 | 58 | def append(self, numbers): 59 | assert len(self.names) == len(numbers), 'Numbers do not match names' 60 | for index, num in enumerate(numbers): 61 | self.file.write("{0:.6f}".format(num)) 62 | self.file.write('|') 63 | self.numbers[self.names[index]].append(num) 64 | self.file.write('\n') 65 | self.file.flush() 66 | 67 | # def plot(self, names=None): 68 | # names = self.names if names == None else names 69 | # numbers = self.numbers 70 | # for _, name in enumerate(names): 71 | # x = np.arange(len(numbers[name])) 72 | # plt.plot(x, np.asarray(numbers[name])) 73 | # plt.legend([self.title + '(' + name + ')' for name in names]) 74 | # plt.grid(True) 75 | # # plt.savefig('1.jpg') 76 | 77 | def close(self): 78 | if self.file is not None: 79 | self.file.close() 80 | 81 | 82 | class LoggerMonitor(object): 83 | '''Load and visualize multiple logs.''' 84 | 85 | def __init__(self, paths): 86 | '''paths is a distionary with {name:filepath} pair''' 87 | self.loggers = [] 88 | for title, path in paths.items(): 89 | logger = Logger(path, title=title, resume=True) 90 | self.loggers.append(logger) 91 | 92 | # def plot(self, names=None): 93 | # plt.figure() 94 | # plt.subplot(121) 95 | # legend_text = [] 96 | # for logger in self.loggers: 97 | # legend_text += plot_overlap(logger, names) 98 | # plt.legend(legend_text, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 99 | # plt.grid(True) 100 | # 101 | if __name__ == '__main__': 102 | pass 103 | # Example 104 | # logger = Logger( '../saved_dict/log.txt') 105 | # logger.set_names(['Train loss', 'Valid loss', 'Test loss']) 106 | # 107 | # length = 100 108 | # t = np.arange(length) 109 | # train_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 110 | # valid_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 111 | # test_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 112 | # 113 | # for i in range(0, length): 114 | # logger.append([train_loss[i], valid_loss[i], test_loss[i]]) 115 | # logger.plot() 116 | # 117 | # # Example: logger monitor 118 | # paths = { 119 | # 'resadvnet20': '../saved_dict/log.txt' 120 | # } 121 | # 122 | # field = ['Valid Acc.'] 123 | # 124 | # monitor = LoggerMonitor(paths) 125 | # monitor.plot(names=field) 126 | # savefig('val.eps') 127 | -------------------------------------------------------------------------------- /gcnet/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | # from .utils import load_state_dict_from_url 3 | # add by shenfuli at 2019-10-28 4 | try: 5 | from torch.hub import load_state_dict_from_url 6 | except ImportError: 7 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 8 | 9 | 10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 11 | 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 12 | 'resnext101_32x16d_wsl'] 13 | 14 | model_urls = { 15 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 16 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 17 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 18 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 19 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 20 | 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', 21 | 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', 22 | 23 | # https://github.com/facebookresearch/WSL-Images/blob/master/hubconf.py 24 | 'resnext101_32x16d': 'https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth', 25 | 'resnext101_32x32d': 'https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth', 26 | 'resnext101_32x48d': 'https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth', 27 | } 28 | 29 | 30 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 31 | """3x3 convolution with padding""" 32 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 33 | padding=dilation, groups=groups, bias=False, dilation=dilation) 34 | 35 | 36 | def conv1x1(in_planes, out_planes, stride=1): 37 | """1x1 convolution""" 38 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 39 | 40 | 41 | class BasicBlock(nn.Module): 42 | expansion = 1 43 | 44 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 45 | base_width=64, dilation=1, norm_layer=None): 46 | super(BasicBlock, self).__init__() 47 | if norm_layer is None: 48 | norm_layer = nn.BatchNorm2d 49 | if groups != 1 or base_width != 64: 50 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 51 | if dilation > 1: 52 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 53 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 54 | self.conv1 = conv3x3(inplanes, planes, stride) 55 | self.bn1 = norm_layer(planes) 56 | self.relu = nn.ReLU(inplace=True) 57 | self.conv2 = conv3x3(planes, planes) 58 | self.bn2 = norm_layer(planes) 59 | self.downsample = downsample 60 | self.stride = stride 61 | 62 | def forward(self, x): 63 | identity = x 64 | 65 | out = self.conv1(x) 66 | out = self.bn1(out) 67 | out = self.relu(out) 68 | 69 | out = self.conv2(out) 70 | out = self.bn2(out) 71 | 72 | if self.downsample is not None: 73 | identity = self.downsample(x) 74 | 75 | out += identity 76 | out = self.relu(out) 77 | 78 | return out 79 | 80 | 81 | class Bottleneck(nn.Module): 82 | expansion = 4 83 | 84 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 85 | base_width=64, dilation=1, norm_layer=None): 86 | super(Bottleneck, self).__init__() 87 | if norm_layer is None: 88 | norm_layer = nn.BatchNorm2d 89 | width = int(planes * (base_width / 64.)) * groups 90 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 91 | self.conv1 = conv1x1(inplanes, width) 92 | self.bn1 = norm_layer(width) 93 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 94 | self.bn2 = norm_layer(width) 95 | self.conv3 = conv1x1(width, planes * self.expansion) 96 | self.bn3 = norm_layer(planes * self.expansion) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.downsample = downsample 99 | self.stride = stride 100 | 101 | def forward(self, x): 102 | identity = x 103 | 104 | out = self.conv1(x) 105 | out = self.bn1(out) 106 | out = self.relu(out) 107 | 108 | out = self.conv2(out) 109 | out = self.bn2(out) 110 | out = self.relu(out) 111 | 112 | out = self.conv3(out) 113 | out = self.bn3(out) 114 | 115 | if self.downsample is not None: 116 | identity = self.downsample(x) 117 | 118 | out += identity 119 | out = self.relu(out) 120 | 121 | return out 122 | 123 | 124 | class ResNet(nn.Module): 125 | 126 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, 127 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 128 | norm_layer=None): 129 | super(ResNet, self).__init__() 130 | if norm_layer is None: 131 | norm_layer = nn.BatchNorm2d 132 | self._norm_layer = norm_layer 133 | 134 | self.inplanes = 64 135 | self.dilation = 1 136 | if replace_stride_with_dilation is None: 137 | # each element in the tuple indicates if we should replace 138 | # the 2x2 stride with a dilated convolution instead 139 | replace_stride_with_dilation = [False, False, False] 140 | if len(replace_stride_with_dilation) != 3: 141 | raise ValueError("replace_stride_with_dilation should be None " 142 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 143 | self.groups = groups 144 | self.base_width = width_per_group 145 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 146 | bias=False) 147 | self.bn1 = norm_layer(self.inplanes) 148 | self.relu = nn.ReLU(inplace=True) 149 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 150 | self.layer1 = self._make_layer(block, 64, layers[0]) 151 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 152 | dilate=replace_stride_with_dilation[0]) 153 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 154 | dilate=replace_stride_with_dilation[1]) 155 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 156 | dilate=replace_stride_with_dilation[2]) 157 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 158 | self.fc = nn.Linear(512 * block.expansion, num_classes) 159 | 160 | for m in self.modules(): 161 | if isinstance(m, nn.Conv2d): 162 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 163 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 164 | nn.init.constant_(m.weight, 1) 165 | nn.init.constant_(m.bias, 0) 166 | 167 | # Zero-initialize the last BN in each residual branch, 168 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 169 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 170 | if zero_init_residual: 171 | for m in self.modules(): 172 | if isinstance(m, Bottleneck): 173 | nn.init.constant_(m.bn3.weight, 0) 174 | elif isinstance(m, BasicBlock): 175 | nn.init.constant_(m.bn2.weight, 0) 176 | 177 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 178 | norm_layer = self._norm_layer 179 | downsample = None 180 | previous_dilation = self.dilation 181 | if dilate: 182 | self.dilation *= stride 183 | stride = 1 184 | if stride != 1 or self.inplanes != planes * block.expansion: 185 | downsample = nn.Sequential( 186 | conv1x1(self.inplanes, planes * block.expansion, stride), 187 | norm_layer(planes * block.expansion), 188 | ) 189 | 190 | layers = [] 191 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 192 | self.base_width, previous_dilation, norm_layer)) 193 | self.inplanes = planes * block.expansion 194 | for _ in range(1, blocks): 195 | layers.append(block(self.inplanes, planes, groups=self.groups, 196 | base_width=self.base_width, dilation=self.dilation, 197 | norm_layer=norm_layer)) 198 | 199 | return nn.Sequential(*layers) 200 | 201 | def forward(self, x): 202 | x = self.conv1(x) 203 | x = self.bn1(x) 204 | x = self.relu(x) 205 | x = self.maxpool(x) 206 | 207 | x = self.layer1(x) 208 | x = self.layer2(x) 209 | x = self.layer3(x) 210 | x = self.layer4(x) 211 | 212 | x = self.avgpool(x) 213 | x = x.reshape(x.size(0), -1) 214 | x = self.fc(x) 215 | 216 | return x 217 | 218 | 219 | # add by shenfuli at 2019-10-28 220 | def _resnext(arch, block, layers, pretrained, progress, **kwargs): 221 | model = ResNet(block, layers, **kwargs) 222 | if pretrained: 223 | state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) 224 | model.load_state_dict(state_dict) 225 | return model 226 | 227 | 228 | def _resnet(arch, block, layers, pretrained, progress, **kwargs): 229 | model = ResNet(block, layers, **kwargs) 230 | if pretrained: 231 | state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) 232 | model.load_state_dict(state_dict) 233 | return model 234 | 235 | 236 | def resnet18(pretrained=False, progress=True, **kwargs): 237 | """Constructs a ResNet-18 model. 238 | Args: 239 | pretrained (bool): If True, returns a model pre-trained on ImageNet 240 | progress (bool): If True, displays a progress bar of the download to stderr 241 | """ 242 | return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, 243 | **kwargs) 244 | 245 | 246 | def resnet34(pretrained=False, progress=True, **kwargs): 247 | """Constructs a ResNet-34 model. 248 | Args: 249 | pretrained (bool): If True, returns a model pre-trained on ImageNet 250 | progress (bool): If True, displays a progress bar of the download to stderr 251 | """ 252 | return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, 253 | **kwargs) 254 | 255 | 256 | def resnet50(pretrained=False, progress=True, **kwargs): 257 | """Constructs a ResNet-50 model. 258 | Args: 259 | pretrained (bool): If True, returns a model pre-trained on ImageNet 260 | progress (bool): If True, displays a progress bar of the download to stderr 261 | """ 262 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, 263 | **kwargs) 264 | 265 | 266 | def resnet101(pretrained=False, progress=True, **kwargs): 267 | """Constructs a ResNet-101 model. 268 | Args: 269 | pretrained (bool): If True, returns a model pre-trained on ImageNet 270 | progress (bool): If True, displays a progress bar of the download to stderr 271 | """ 272 | return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, 273 | **kwargs) 274 | 275 | 276 | def resnet152(pretrained=False, progress=True, **kwargs): 277 | """Constructs a ResNet-152 model. 278 | Args: 279 | pretrained (bool): If True, returns a model pre-trained on ImageNet 280 | progress (bool): If True, displays a progress bar of the download to stderr 281 | """ 282 | return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, 283 | **kwargs) 284 | 285 | 286 | def resnext50_32x4d(pretrained=False, progress=True, **kwargs): 287 | """Constructs a ResNeXt-50 32x4d model. 288 | Args: 289 | pretrained (bool): If True, returns a model pre-trained on ImageNet 290 | progress (bool): If True, displays a progress bar of the download to stderr 291 | """ 292 | kwargs['groups'] = 32 293 | kwargs['width_per_group'] = 4 294 | return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], 295 | pretrained, progress, **kwargs) 296 | 297 | 298 | def resnext101_32x8d(pretrained=False, progress=True, **kwargs): 299 | """Constructs a ResNeXt-101 32x8d model. 300 | Args: 301 | pretrained (bool): If True, returns a model pre-trained on ImageNet 302 | progress (bool): If True, displays a progress bar of the download to stderr 303 | """ 304 | kwargs['groups'] = 32 305 | kwargs['width_per_group'] = 8 306 | return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], 307 | pretrained, progress, **kwargs) 308 | 309 | 310 | def resnext101_32x16d_wsl(progress=True, **kwargs): 311 | """Constructs a ResNeXt-101 32x16 model pre-trained on weakly-supervised data 312 | and finetuned on ImageNet from Figure 5 in 313 | `"Exploring the Limits of Weakly Supervised Pretraining" `_ 314 | Args: 315 | progress (bool): If True, displays a progress bar of the download to stderr. 316 | """ 317 | kwargs['groups'] = 32 318 | kwargs['width_per_group'] = 16 319 | return _resnext('resnext101_32x16d', Bottleneck, [3, 4, 23, 3], True, progress, **kwargs) 320 | 321 | 322 | def resnext101_32x32d_wsl(progress=True, **kwargs): 323 | """Constructs a ResNeXt-101 32x32 model pre-trained on weakly-supervised data 324 | and finetuned on ImageNet from Figure 5 in 325 | `"Exploring the Limits of Weakly Supervised Pretraining" `_ 326 | Args: 327 | progress (bool): If True, displays a progress bar of the download to stderr. 328 | """ 329 | kwargs['groups'] = 32 330 | kwargs['width_per_group'] = 32 331 | return _resnext('resnext101_32x32d', Bottleneck, [3, 4, 23, 3], True, progress, **kwargs) 332 | 333 | 334 | def resnext101_32x48d_wsl(progress=True, **kwargs): 335 | """Constructs a ResNeXt-101 32x48 model pre-trained on weakly-supervised data 336 | and finetuned on ImageNet from Figure 5 in 337 | `"Exploring the Limits of Weakly Supervised Pretraining" `_ 338 | Args: 339 | progress (bool): If True, displays a progress bar of the download to stderr. 340 | """ 341 | kwargs['groups'] = 32 342 | kwargs['width_per_group'] = 48 343 | return _resnext('resnext101_32x48d', Bottleneck, [3, 4, 23, 3], True, progress, **kwargs) -------------------------------------------------------------------------------- /gcnet/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/gcnet/test.py -------------------------------------------------------------------------------- /gcnet/train.py: -------------------------------------------------------------------------------- 1 | #(1)导入相关的库 2 | #(2)输入参数处理 3 | #(3)数据加载预处理 4 | #(4)工具类:日志,优化器 5 | #(5)模型加载,训练,评估,保存 6 | import os 7 | import torch 8 | import time 9 | import torchvision 10 | import numpy as np 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | import torch.nn as nn 14 | from torchvision import datasets,models,transforms 15 | from torch.utils.data import DataLoader 16 | from sklearn import metrics #计算混淆矩阵 17 | from gcnet.transforms import preprocess 18 | from gcnet.classifier import GarbageClassifier 19 | from gcnet.utils import AverageMeter, save_checkpoint,accuracy 20 | from gcnet.logger import Logger 21 | 22 | #data_path='./data/garbage_classify_test' 23 | 24 | class_id2name={0:'其他垃圾',1:'厨余垃圾',2:'可回收物', 3:'有害垃圾'} 25 | 26 | def train(args): 27 | 28 | data_path=args.data_path 29 | save_path=args.save_path 30 | #(1) load data 31 | TRAIN='{}/train'.format(data_path) 32 | VAL='{}/val'.format(data_path) 33 | train_data=datasets.ImageFolder(root=TRAIN, transform=preprocess) 34 | val_data=datasets.ImageFolder(root=VAL, transform=preprocess) 35 | 36 | class_list = [class_id2name[i] for i in list(range(len(train_data.class_to_idx.keys())))] 37 | 38 | train_loader=DataLoader( 39 | train_data, 40 | batch_size=args.batch_size, 41 | num_workers=args.num_workers, 42 | shuffle=True) 43 | 44 | val_loader=DataLoader(val_data, 45 | batch_size=args.batch_size, 46 | num_workers=args.num_workers, 47 | shuffle=False) 48 | 49 | 50 | #(2) model inital 51 | GCNet=GarbageClassifier(args.model_name,args.num_classes,args.ngpu,feature_extract=True) 52 | 53 | #(3) Evaluation:Confusion Matrix:Precision Recall F1-score 54 | criterion=nn.CrossEntropyLoss() 55 | 56 | #(4) Optimizer 57 | optimizer=torch.optim.Adam(GCNet.model.parameters(), args.lr) 58 | 59 | #(5) load checkpoint 断点重新加载,制定开始迭代的位置 60 | epochs=args.epochs 61 | start_epoch=args.start_epoch 62 | if args.resume: 63 | # --resume checkpoint/checkpoint.pth.tar 64 | # load checkpoint 65 | print('Resuming from checkpoint...') 66 | assert os.path.isfile(args.resume),'Error: no checkpoint directory found!!' 67 | checkpoint = torch.load(args.resume) 68 | best_acc = checkpoint['best_acc'] 69 | start_epoch = checkpoint['epoch'] 70 | state_dict = checkpoint['state_dict'] 71 | optim = checkpoint['optimizer'] 72 | #if # create new OrderedDict that does not contain `module.` 73 | ##由于之前的模型是在多gpu上训练的,因而保存的模型参数,键前边有‘module’,需要去掉,和训练模型一样构建新的字典 74 | # from collections import OrderedDict 75 | # new_state_dict = OrderedDict() 76 | # for k, v in state_dict.items(): 77 | # head = k[:7] 78 | # if head == 'module.': 79 | # name = k[7:] # remove `module.` 80 | # else: 81 | # name = k 82 | # new_state_dict[name] = v 83 | GCNet.model.load_state_dict(state_dict) 84 | optimizer.load_state_dict(optim) 85 | 86 | # #评估: 混淆矩阵;准确率、召回率、F1-score 87 | # if args.evaluate and args.resume: 88 | # print('\nEvaluate only') 89 | # test_loss, test_acc, predict_all,labels_all = GCNet.test_model(val_loader,criterion,test=True) 90 | # print('Test Loss:%.8f,Test Acc:%.2f' %(test_loss,test_acc)) 91 | # # 混淆矩阵 92 | # report = metrics.classification_report(labels_all,predict_all,target_names=class_list,digits=4) 93 | # confusion = metrics.confusion_matrix(labels_all,predict_all) 94 | # print('\n report ',report) 95 | # print('\n confusion',confusion) 96 | # return 97 | 98 | #(6) model train and val 99 | best_acc=0 100 | if not args.ngpu: 101 | logger = Logger(os.path.join(save_path,'log.txt'),title=None) 102 | else: 103 | logger = Logger(os.path.join(save_path,'log_ngpu.txt'),title=None) 104 | ## 设置logger 的头信息 105 | logger.set_names(['LR', 'epoch', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) 106 | for epoch in range(start_epoch,epochs+1): 107 | print('[{}/{}] Training'.format(epoch,args.epochs)) 108 | # train 109 | train_loss,train_acc = GCNet.train_model(train_loader,criterion,optimizer) 110 | # val 111 | test_loss,test_acc = GCNet.test_model(val_loader,criterion,test=None) 112 | # 核心参数保存logger 113 | logger.append([args.lr, int(epoch), train_loss, test_loss, train_acc, test_acc]) 114 | print('train_loss:%f, val_loss:%f, train_acc:%f, val_acc:%f' % ( train_loss, test_loss, train_acc, test_acc,)) 115 | #保存模型 116 | is_best = test_acc > best_acc 117 | best_acc = max(test_acc,best_acc) 118 | if not args.ngpu: 119 | name='checkpoint_'+str(epoch)+'.pth.tar' 120 | else: 121 | name='ngpu_checkpoint_'+str(epoch)+'.pth.tar' 122 | save_checkpoint({ 123 | 'epoch':epoch, 124 | 'state_dict':GCNet.model.state_dict(), 125 | 'train_acc':train_acc, 126 | 'test_acc':test_acc, 127 | 'best_acc':best_acc, 128 | 'optimizer':optimizer.state_dict() 129 | 130 | }, is_best, checkpoint=save_path, filename=name) 131 | print('Best acc:') 132 | print(best_acc) 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /gcnet/transforms.py: -------------------------------------------------------------------------------- 1 | #(1) transforms 2 | import io 3 | import torchvision.transforms as transforms 4 | from PIL import Image 5 | 6 | #数据预处理 7 | preprocess=transforms.Compose([ 8 | transforms.Resize((256,256)), #缩放最大边=256 9 | transforms.CenterCrop((224,224)), 10 | transforms.ToTensor(),# 归一化[0,1] 11 | transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]) #标准化 12 | ]) 13 | 14 | def transforms_image(img_bytes): 15 | 16 | #img=Image.open(io.Bytes(img_bytes))# 转换成图片,import io 17 | img = Image.open(io.BytesIO(img_bytes)) 18 | img=preprocess(img) #图片预处理 19 | img_batch=img.unsqueeze(0) #b c h w 20 | return img_batch 21 | 22 | -------------------------------------------------------------------------------- /gcnet/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Some helper functions for PyTorch, including: 3 | ''' 4 | import torch 5 | import os 6 | 7 | __all__ = ['AverageMeter', 'get_optimizer', 'save_checkpoint','accuracy'] 8 | 9 | def get_optimizer(model, args): 10 | if args.optimizer == 'sgd': 11 | return torch.optim.SGD(model.parameters(), 12 | args.lr) 13 | elif args.optimizer == 'rmsprop': 14 | return torch.optim.RMSprop(model.parameters(), 15 | args.lr) 16 | elif args.optimizer == 'adam': 17 | return torch.optim.Adam(model.parameters(), 18 | args.lr) 19 | else: 20 | raise NotImplementedError 21 | 22 | 23 | class AverageMeter(object): 24 | """Computes and stores the average and current value 25 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 26 | """ 27 | def __init__(self): 28 | self.reset() 29 | 30 | def reset(self): 31 | self.val = 0 32 | self.avg = 0 33 | self.sum = 0 34 | self.count = 0 35 | 36 | def update(self, val, n=1): 37 | self.val = val 38 | self.sum += val * n 39 | self.count += n 40 | self.avg = self.sum / self.count 41 | 42 | def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar'): 43 | 44 | if not os.path.exists(checkpoint): 45 | os.makedirs(checkpoint) 46 | # 保存断点信息 47 | filepath = os.path.join(checkpoint, filename) 48 | print('checkpoint filepath = ',filepath) 49 | torch.save(state, filepath) 50 | # 模型保存 51 | if is_best: 52 | # model_name ='best_'+str(state['epoch']) + '_' + str( 53 | # int(round(state['train_acc'] * 100, 0))) + '_' + str( 54 | # int(round(state['test_acc'] * 100, 0))) + '.pth' 55 | model_name='best_'+filename 56 | #print('Validation loss decreased Saving model ..,model_name = ', model_name) 57 | model_path = os.path.join(checkpoint, model_name) 58 | #print('model_path = ',model_path) 59 | #torch.save(state['state_dict'], model_path) 60 | torch.save(state, model_path) 61 | 62 | def accuracy(output, target, topk=(1,)): 63 | """Computes the precision@k for the specified values of k" 64 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 65 | """ 66 | maxk = max(topk) 67 | batch_size = target.size(0) 68 | _, pred = output.topk(maxk, 1, True, True) 69 | pred = pred.t() 70 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 71 | res = [] 72 | for k in topk: 73 | correct_k = correct[:k].view(-1).float().sum(0) 74 | res.append(correct_k.mul_(100.0 / batch_size)) 75 | return res -------------------------------------------------------------------------------- /img/readme.md: -------------------------------------------------------------------------------- 1 | 123 2 | -------------------------------------------------------------------------------- /img/垃圾分类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HonglinChu/Garbage_Classification/35a58c2a8d0d743505dfdf22df1cfaf0bc30cb9f/img/垃圾分类.png -------------------------------------------------------------------------------- /models/readme.md: -------------------------------------------------------------------------------- 1 | 123 2 | -------------------------------------------------------------------------------- /preprocess/.ipynb_checkpoints/01 垃圾分类_一级分类 数据分布-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 数据分析概括\n", 8 | "\n", 9 | "讲师:沈福利 北京工业大学硕士学位\n", 10 | "\n", 11 | "主要目录\n", 12 | "\n", 13 | "* 整体数据探测\n", 14 | "* 分析数据不同类别分布\n", 15 | "* 分析图片长宽比例分布\n", 16 | "* 切分数据集和验证集\n", 17 | "\n", 18 | "通过本章节学习,大家可以掌握如下知识点 \n", 19 | "\n", 20 | "* 掌握 pyecharts ,seaborn 如何快速绘制图表;\n", 21 | "\n", 22 | "* 加深python 基本操作,如类似list 列表推导式,dict 类型数据排序,walk 文件遍历操作,glob 文件匹配处理\n", 23 | "\n", 24 | "另外:如果大家对python 可视化图表不太熟悉的同学,大家可以学习老师 所讲的课程\n", 25 | "\n", 26 | "《Python 数据可视化库 Seaborn》 \n", 27 | "《Python 数据可视化 pyecharts 实战》\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "# 二级分类数据转一级分类数据" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 2, 20: 2, 21: 2, 22: 2, 23: 2, 24: 2, 25: 2, 26: 2, 27: 2, 28: 2, 29: 2, 30: 2, 31: 2, 32: 2, 33: 2, 34: 2, 35: 2, 36: 2, 37: 3, 38: 3, 39: 3}\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "garbage_classify_rule = {\n", 52 | " \"0\": \"其他垃圾/一次性快餐盒\",\n", 53 | " \"1\": \"其他垃圾/污损塑料\",\n", 54 | " \"2\": \"其他垃圾/烟蒂\",\n", 55 | " \"3\": \"其他垃圾/牙签\",\n", 56 | " \"4\": \"其他垃圾/破碎花盆及碟碗\",\n", 57 | " \"5\": \"其他垃圾/竹筷\",\n", 58 | " \"6\": \"厨余垃圾/剩饭剩菜\",\n", 59 | " \"7\": \"厨余垃圾/大骨头\",\n", 60 | " \"8\": \"厨余垃圾/水果果皮\",\n", 61 | " \"9\": \"厨余垃圾/水果果肉\",\n", 62 | " \"10\": \"厨余垃圾/茶叶渣\",\n", 63 | " \"11\": \"厨余垃圾/菜叶菜根\",\n", 64 | " \"12\": \"厨余垃圾/蛋壳\",\n", 65 | " \"13\": \"厨余垃圾/鱼骨\",\n", 66 | " \"14\": \"可回收物/充电宝\",\n", 67 | " \"15\": \"可回收物/包\",\n", 68 | " \"16\": \"可回收物/化妆品瓶\",\n", 69 | " \"17\": \"可回收物/塑料玩具\",\n", 70 | " \"18\": \"可回收物/塑料碗盆\",\n", 71 | " \"19\": \"可回收物/塑料衣架\",\n", 72 | " \"20\": \"可回收物/快递纸袋\",\n", 73 | " \"21\": \"可回收物/插头电线\",\n", 74 | " \"22\": \"可回收物/旧衣服\",\n", 75 | " \"23\": \"可回收物/易拉罐\",\n", 76 | " \"24\": \"可回收物/枕头\",\n", 77 | " \"25\": \"可回收物/毛绒玩具\",\n", 78 | " \"26\": \"可回收物/洗发水瓶\",\n", 79 | " \"27\": \"可回收物/玻璃杯\",\n", 80 | " \"28\": \"可回收物/皮鞋\",\n", 81 | " \"29\": \"可回收物/砧板\",\n", 82 | " \"30\": \"可回收物/纸板箱\",\n", 83 | " \"31\": \"可回收物/调料瓶\",\n", 84 | " \"32\": \"可回收物/酒瓶\",\n", 85 | " \"33\": \"可回收物/金属食品罐\",\n", 86 | " \"34\": \"可回收物/锅\",\n", 87 | " \"35\": \"可回收物/食用油桶\",\n", 88 | " \"36\": \"可回收物/饮料瓶\",\n", 89 | " \"37\": \"有害垃圾/干电池\",\n", 90 | " \"38\": \"有害垃圾/软膏\",\n", 91 | " \"39\": \"有害垃圾/过期药物\"\n", 92 | "}\n", 93 | "\n", 94 | "garbage_classify_index = {\"0\": \"其他垃圾\", \"1\": \"厨余垃圾\", \"2\": \"可回收物\", \"3\": \"有害垃圾\"}\n", 95 | "garbage_index_classify = {\"其他垃圾\":\"0\",\"厨余垃圾\":\"1\", \"可回收物\":\"2\",\"有害垃圾\":\"3\"}\n", 96 | "\n", 97 | "data_list = []\n", 98 | "rank1_garbage_classify_rule = {}\n", 99 | "for k,v in garbage_classify_rule.items():\n", 100 | " rank1_k = v.split('/')[0] \n", 101 | " rank1_v = k\n", 102 | " data_list.append([rank1_k,int(garbage_index_classify[rank1_k]),int(rank1_v)])\n", 103 | " \n", 104 | "# 获取一级分类label 对应的原始数据label \n", 105 | "rank_k_v_dict = {}\n", 106 | "for data in data_list:\n", 107 | " k = data[2] # 原标签\n", 108 | " v = data[1]# 新标签\n", 109 | " rank_k_v_dict[k]=v\n", 110 | "print(rank_k_v_dict)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "k_vlist_dict 就是一级分类需要处理的结果" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## 整体数据探测" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 2, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "************************************************************\n", 137 | "Directory path: ../data/garbage_classify/train_data\n", 138 | "total examples: 29605\n", 139 | "File name Example: ['.DS_Store', 'img_1.jpg', 'img_1.txt', 'img_10.jpg', 'img_10.txt']\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "import os\n", 145 | "from os import walk\n", 146 | "\n", 147 | "base_path = '../data/'\n", 148 | "data_path = os.path.join(base_path,'garbage_classify/train_data')\n", 149 | "for (dirpath, dirnames, filenames) in walk(data_path):\n", 150 | " if len(filenames)>0:\n", 151 | " print('*'*60)\n", 152 | " print(\"Directory path: \", dirpath)\n", 153 | " print(\"total examples: \", len(filenames))\n", 154 | " print(\"File name Example: \", filenames[:5])" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "我们来分析*.txt读取内容,然后获取img.txt\n", 162 | "\n", 163 | "首先,我们需要 匹配txt 文件进行输出" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 6, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "from glob import glob\n", 173 | "import os\n", 174 | "\n", 175 | "def get_img_info():\n", 176 | " data_path_txt = os.path.join(data_path,'*.txt')\n", 177 | " txt_file_list = glob(data_path_txt)\n", 178 | "\n", 179 | " # 存储txt 文件\n", 180 | " img_path_txt = 'img.txt'\n", 181 | " img_path_list = []\n", 182 | " img_label_dict = dict()# <标签,次数> \n", 183 | " img_name2label_dict = {}\n", 184 | " for file_path in txt_file_list:\n", 185 | " with open(file_path,'r') as f:\n", 186 | " line = f.readline()\n", 187 | " \n", 188 | " line = line.strip()\n", 189 | " img_name = line.split(',')[0]\n", 190 | " img_label = line.split(',')[1]\n", 191 | " img_label = int(img_label.strip())\n", 192 | " # 图片路径+标签\n", 193 | " img_name_path = os.path.join(base_path,'garbage_classify/train_data/{}'.format(img_name))\n", 194 | " img_path_list.append( \n", 195 | " {'img_name_path':img_name_path,\n", 196 | " 'img_label':img_label})\n", 197 | " return img_path_list" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 7, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "img_path_list = [{'img_name_path': '../data/garbage_classify/train_data/img_1.jpg', 'img_label': 0}, {'img_name_path': '../data/garbage_classify/train_data/img_10.jpg', 'img_label': 0}, {'img_name_path': '../data/garbage_classify/train_data/img_100.jpg', 'img_label': 0}, {'img_name_path': '../data/garbage_classify/train_data/img_1000.jpg', 'img_label': 2}, {'img_name_path': '../data/garbage_classify/train_data/img_10000.jpg', 'img_label': 21}, {'img_name_path': '../data/garbage_classify/train_data/img_10001.jpg', 'img_label': 21}, {'img_name_path': '../data/garbage_classify/train_data/img_10002.jpg', 'img_label': 21}, {'img_name_path': '../data/garbage_classify/train_data/img_10003.jpg', 'img_label': 21}, {'img_name_path': '../data/garbage_classify/train_data/img_10005.jpg', 'img_label': 21}, {'img_name_path': '../data/garbage_classify/train_data/img_10006.jpg', 'img_label': 21}]\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "print('img_path_list = ',get_img_info()[:10])" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 8, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "img_path_list = [{'img_name_path': '../data/garbage_classify/train_data/img_1.jpg', 'img_label': 0}, {'img_name_path': '../data/garbage_classify/train_data/img_10.jpg', 'img_label': 0}, {'img_name_path': '../data/garbage_classify/train_data/img_100.jpg', 'img_label': 0}]\n", 227 | "img_label_dict = {0: 1652, 2: 8611, 3: 1150, 1: 3389}\n" 228 | ] 229 | } 230 | ], 231 | "source": [ 232 | "# 对img_path_list 的img_label 进行修改为一级分类的标签\n", 233 | "img_path_list = []\n", 234 | "img_label_dict={}\n", 235 | "for img_info in get_img_info():\n", 236 | " img_label = img_info['img_label']# 修正前的标签\n", 237 | " img_label = rank_k_v_dict[img_label]\n", 238 | " img_info.update({'img_label':img_label}) # 修正后的标签\n", 239 | " \n", 240 | " # 图片路径+标签\n", 241 | " img_path_list.append(img_info)\n", 242 | "\n", 243 | " # 统计每个标签出现次数\n", 244 | " img_label = int(img_label)\n", 245 | " img_label_count = img_label_dict.get(img_label,0)\n", 246 | " if img_label_count:\n", 247 | " img_label_dict[img_label] = img_label_count + 1\n", 248 | " else:\n", 249 | " img_label_dict[img_label] = 1\n", 250 | "\n", 251 | "\n", 252 | "print('img_path_list = ',img_path_list[:3])\n", 253 | "print('img_label_dict = ',img_label_dict)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "## 数据不同类别分布" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "我们这里通过柱状图 来分析不同类别分布情况,我们使用pyecharts 这种效果非常棒的工具来展示" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "x 轴数据: 标签名称\n", 275 | "\n", 276 | "y 轴数据: 标签名称对应次数" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "首先我们 img_label_dict 按照key 进行排序,这样方便查看数据" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 9, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "name": "stdout", 293 | "output_type": "stream", 294 | "text": [ 295 | "{0: 1652, 1: 3389, 2: 8611, 3: 1150}\n", 296 | "{'0': '其他垃圾', '1': '厨余垃圾', '2': '可回收物', '3': '有害垃圾'}\n", 297 | "['其他垃圾', '厨余垃圾', '可回收物', '有害垃圾']\n", 298 | "[1652, 3389, 8611, 1150]\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "img_label_dict = dict(sorted(img_label_dict.items()))\n", 304 | "print(img_label_dict)\n", 305 | "print(garbage_classify_index)\n", 306 | "print([garbage_classify_index[str(k)] for k in img_label_dict.keys()])\n", 307 | "print(list(img_label_dict.values()))" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 10, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/html": [ 318 | "\n", 325 | "\n", 326 | "
\n", 327 | "\n", 328 | "\n", 329 | "\n" 465 | ], 466 | "text/plain": [ 467 | "" 468 | ] 469 | }, 470 | "execution_count": 10, 471 | "metadata": {}, 472 | "output_type": "execute_result" 473 | } 474 | ], 475 | "source": [ 476 | "# 导入库\n", 477 | "from pyecharts import options as opts\n", 478 | "from pyecharts.charts import Bar\n", 479 | "\n", 480 | "# 构建满足pyecharts 格式数据\n", 481 | "x = [garbage_classify_index[str(k)] for k in img_label_dict.keys()]\n", 482 | "y = list(img_label_dict.values())\n", 483 | "\n", 484 | "# 创建Bar 示例对象,同时x,y轴数据填充\n", 485 | "bar = Bar()\n", 486 | "\n", 487 | "bar.add_xaxis(xaxis_data=x)\n", 488 | "bar.add_yaxis(series_name='',yaxis_data=y)\n", 489 | "\n", 490 | "# 设置全局参数\n", 491 | "bar.set_global_opts(\n", 492 | " title_opts=opts.TitleOpts(title='垃圾分类 4类垃圾数量统计'),\n", 493 | " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=15))\n", 494 | ")\n", 495 | "\n", 496 | "# 展示图表\n", 497 | "bar.render()\n", 498 | "bar.render_notebook()" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "数据分析,可以得出一下的结论:\n", 506 | "\n", 507 | "1. 公共4 个分类,如上图分析Bar 图所示\n", 508 | "\n", 509 | "2. 较少数据为其他垃圾\n", 510 | "\n", 511 | "3. 较多的数据类别可以回收的垃圾\n", 512 | "\n", 513 | "\n", 514 | "我们的模型通过深度学习的迁移模型来完成,小数据量的样本也可以达到很好的效果,这些数据可以直接参与模型的训练输入" 515 | ] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "# 切分训练集和测试集" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 11, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/plain": [ 532 | "14802" 533 | ] 534 | }, 535 | "execution_count": 11, 536 | "metadata": {}, 537 | "output_type": "execute_result" 538 | } 539 | ], 540 | "source": [ 541 | "len(img_path_list)" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 12, 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/plain": [ 552 | "{'img_name_path': '../data/garbage_classify/train_data/img_1.jpg',\n", 553 | " 'img_label': 0}" 554 | ] 555 | }, 556 | "execution_count": 12, 557 | "metadata": {}, 558 | "output_type": "execute_result" 559 | } 560 | ], 561 | "source": [ 562 | "img_path_list[0]" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": 15, 568 | "metadata": {}, 569 | "outputs": [ 570 | { 571 | "name": "stdout", 572 | "output_type": "stream", 573 | "text": [ 574 | "train_size= 11841\n", 575 | "valid_size= 2961\n" 576 | ] 577 | } 578 | ], 579 | "source": [ 580 | "import random\n", 581 | "random.shuffle(img_path_list)\n", 582 | "\n", 583 | "# 0.8 0.2 切分\n", 584 | "img_count = len(img_path_list)\n", 585 | "train_img_list = img_path_list[:int(img_count*0.8)]\n", 586 | "val_img_list = img_path_list[int(img_count*0.8):]\n", 587 | "\n", 588 | "print('train_size=',len(train_img_list))\n", 589 | "print('valid_size=',len(val_img_list))\n" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "数据切分后,我们生成训练和验证集的数据" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 19, 602 | "metadata": {}, 603 | "outputs": [], 604 | "source": [ 605 | "import shutil\n", 606 | "\n", 607 | "# 训练数据处理\n", 608 | "with open(os.path.join(base_path,'4_garbage-classify-for-pytorch/train.txt'),'w') as f:\n", 609 | " for img_dict in train_img_list:\n", 610 | " #文本格式数据\n", 611 | " img_name_path = img_dict['img_name_path'] # ../data/garbage_classify/img_11674.jpg\n", 612 | " img_label = img_dict['img_label']\n", 613 | " f.write(\"{}\\t{}\\n\".format(img_name_path,img_label))\n", 614 | " # 图片-标签目录\n", 615 | " garbage_classify_dir = os.path.join(base_path,'4_garbage-classify-for-pytorch/train/{}'.format(img_label))\n", 616 | " #print(garbage_classify_dir)\n", 617 | " if not os.path.exists(garbage_classify_dir):\n", 618 | " os.makedirs(garbage_classify_dir)\n", 619 | " # 拷贝数据到目录下\n", 620 | " #print(garbage_classify_dir,img_name_path)\n", 621 | " shutil.copy(img_name_path,garbage_classify_dir)\n", 622 | " \n", 623 | "\n", 624 | "# 验证数据处理\n", 625 | "with open(os.path.join(base_path,'4_garbage-classify-for-pytorch/val.txt'),'w') as f:\n", 626 | " for img_dict in val_img_list:\n", 627 | " #文本格式数据\n", 628 | " img_name_path = img_dict['img_name_path'] # ../data/garbage_classify/img_11674.jpg\n", 629 | " img_label = img_dict['img_label']\n", 630 | " f.write(\"{}\\t{}\\n\".format(img_name_path,img_label))\n", 631 | " # 图片-标签目录\n", 632 | " garbage_classify_dir = os.path.join(base_path,'4_garbage-classify-for-pytorch/val/{}'.format(img_label))\n", 633 | " #print(garbage_classify_dir)\n", 634 | " if not os.path.exists(garbage_classify_dir):\n", 635 | " os.makedirs(garbage_classify_dir)\n", 636 | " # 拷贝数据到目录下\n", 637 | " #print(garbage_classify_dir,img_name_path)\n", 638 | " shutil.copy(img_name_path,garbage_classify_dir)\n", 639 | " " 640 | ] 641 | }, 642 | { 643 | "cell_type": "markdown", 644 | "metadata": {}, 645 | "source": [ 646 | "数据处理完成后,处理结果就是我们后续需要训练模型的格式了。" 647 | ] 648 | }, 649 | { 650 | "cell_type": "markdown", 651 | "metadata": {}, 652 | "source": [ 653 | "```\n", 654 | "├── test\n", 655 | "│   ├── 0\n", 656 | "│   ├── 1\n", 657 | "│   ├── 2\n", 658 | "│   └── 3\n", 659 | "└── train\n", 660 | " ├── 0\n", 661 | " ├── 1\n", 662 | " ├── 2\n", 663 | " └── 3\n", 664 | "```" 665 | ] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": {}, 670 | "source": [ 671 | "最后,我们来分析下。切分后验证集和训练集的分布" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 20, 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "name": "stdout", 681 | "output_type": "stream", 682 | "text": [ 683 | "../data/4_garbage-classify-for-pytorch/train.txt\n", 684 | "../data/4_garbage-classify-for-pytorch/val.txt\n", 685 | "train_dict = {0: 1312, 1: 2733, 2: 6871, 3: 925}\n", 686 | "test_dict = {0: 340, 1: 656, 2: 1740, 3: 225}\n", 687 | "garbage_classify_index = {'0': '其他垃圾', '1': '厨余垃圾', '2': '可回收物', '3': '有害垃圾'}\n" 688 | ] 689 | } 690 | ], 691 | "source": [ 692 | "train_path = os.path.join(base_path,'4_garbage-classify-for-pytorch/train.txt')\n", 693 | "test_path = os.path.join(base_path,'4_garbage-classify-for-pytorch/val.txt')\n", 694 | "print(train_path)\n", 695 | "print(test_path)\n", 696 | "\n", 697 | "\n", 698 | "def get_label_idx_list(data_path):\n", 699 | " label_idx_list = []\n", 700 | " import codecs\n", 701 | " for line in codecs.open(data_path,'r'):\n", 702 | " line = line.strip()\n", 703 | " label_idx = line.split('\\t')[1]\n", 704 | " label_idx_list.append(int(label_idx))\n", 705 | " return label_idx_list\n", 706 | "\n", 707 | "\n", 708 | "from collections import Counter\n", 709 | "train_dict = dict(Counter(get_label_idx_list(train_path)))\n", 710 | "test_dict = dict(Counter(get_label_idx_list(test_path)))\n", 711 | "\n", 712 | "train_dict = dict(sorted(train_dict.items()))\n", 713 | "test_dict = dict(sorted(test_dict.items()))\n", 714 | "\n", 715 | "print(\"train_dict = \",train_dict)\n", 716 | "print(\"test_dict = \",test_dict)\n", 717 | "print('garbage_classify_index = ',garbage_classify_index)" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 22, 723 | "metadata": {}, 724 | "outputs": [ 725 | { 726 | "data": { 727 | "text/html": [ 728 | "\n", 735 | "\n", 736 | "
\n", 737 | "\n", 738 | "\n", 739 | "\n" 894 | ], 895 | "text/plain": [ 896 | "" 897 | ] 898 | }, 899 | "execution_count": 22, 900 | "metadata": {}, 901 | "output_type": "execute_result" 902 | } 903 | ], 904 | "source": [ 905 | "# 导入库\n", 906 | "from pyecharts import options as opts\n", 907 | "from pyecharts.charts import Bar\n", 908 | "\n", 909 | "# 构建满足pyecharts 格式数据\n", 910 | "\n", 911 | "\n", 912 | "# 创建Bar 示例对象,同时x,y轴数据填充\n", 913 | "bar = Bar(init_opts=opts.InitOpts())\n", 914 | "\n", 915 | "assert len(train_dict.keys())==len(test_dict.keys())\n", 916 | "x = [ \"{}-{}\".format(label_idx, garbage_classify_index.get(str(label_idx),\"\")) \n", 917 | " for label_idx in img_label_dict.keys()]\n", 918 | "\n", 919 | "bar.add_xaxis(xaxis_data=list(x))\n", 920 | "bar.add_yaxis(series_name='Train',yaxis_data=list(train_dict.values()))\n", 921 | "bar.add_yaxis(series_name='Val',yaxis_data=list(test_dict.values()))\n", 922 | "\n", 923 | "# 设置全局参数\n", 924 | "bar.set_global_opts(\n", 925 | " title_opts=opts.TitleOpts(title='垃圾分类 不同类别数据数量统计'),\n", 926 | " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=15))\n", 927 | ")\n", 928 | "\n", 929 | "# 展示图表\n", 930 | "bar.render('rank1_render.html')\n", 931 | "bar.render_notebook()" 932 | ] 933 | }, 934 | { 935 | "cell_type": "markdown", 936 | "metadata": {}, 937 | "source": [ 938 | "# 总结" 939 | ] 940 | }, 941 | { 942 | "cell_type": "markdown", 943 | "metadata": {}, 944 | "source": [ 945 | "在选择好模型后,后续需要重点从数据层次上下手。\n", 946 | "\n", 947 | "* 类别分布不均衡,可以从网站上下载对应的图片数据,用于扩充我们的数据内容\n", 948 | "* 根据数据大小分布,我们这里把数据resize 操作" 949 | ] 950 | }, 951 | { 952 | "cell_type": "code", 953 | "execution_count": null, 954 | "metadata": {}, 955 | "outputs": [], 956 | "source": [] 957 | } 958 | ], 959 | "metadata": { 960 | "kernelspec": { 961 | "display_name": "Python 3", 962 | "language": "python", 963 | "name": "python3" 964 | }, 965 | "language_info": { 966 | "codemirror_mode": { 967 | "name": "ipython", 968 | "version": 3 969 | }, 970 | "file_extension": ".py", 971 | "mimetype": "text/x-python", 972 | "name": "python", 973 | "nbconvert_exporter": "python", 974 | "pygments_lexer": "ipython3", 975 | "version": "3.7.2" 976 | }, 977 | "toc": { 978 | "base_numbering": 1, 979 | "nav_menu": {}, 980 | "number_sections": true, 981 | "sideBar": true, 982 | "skip_h1_title": false, 983 | "title_cell": "Table of Contents", 984 | "title_sidebar": "Contents", 985 | "toc_cell": false, 986 | "toc_position": { 987 | "height": "calc(100% - 180px)", 988 | "left": "10px", 989 | "top": "150px", 990 | "width": "232.997px" 991 | }, 992 | "toc_section_display": true, 993 | "toc_window_display": false 994 | } 995 | }, 996 | "nbformat": 4, 997 | "nbformat_minor": 2 998 | } 999 | -------------------------------------------------------------------------------- /preprocess/.ipynb_checkpoints/02 垃圾分类_二级分类 数据分析-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 数据分析概括\n", 8 | "\n", 9 | "讲师:沈福利 北京工业大学硕士学位\n", 10 | "\n", 11 | "主要目录\n", 12 | "\n", 13 | "* 整体数据探测\n", 14 | "* 分析数据不同类别分布\n", 15 | "* 分析图片长宽比例分布\n", 16 | "* 切分数据集和验证集\n", 17 | "\n", 18 | "通过本章节学习,大家可以掌握如下知识点 \n", 19 | "\n", 20 | "* 掌握 pyecharts ,seaborn 如何快速绘制图表;\n", 21 | "\n", 22 | "* 加深python 基本操作,如类似list 列表推导式,dict 类型数据排序,walk 文件遍历操作,glob 文件匹配处理\n", 23 | "\n", 24 | "另外:如果大家对python 可视化图表不太熟悉的同学,大家可以学习老师 所讲的课程\n", 25 | "\n", 26 | "《Python 数据可视化库 Seaborn》 \n", 27 | "《Python 数据可视化 pyecharts 实战》" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "# 整体数据探测" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 11, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "data_path = ../data/garbage_classify/train_data\n", 47 | "************************************************************\n", 48 | "Directory path: ../data/garbage_classify/train_data\n", 49 | "total examples = 29605\n", 50 | "file name example: ['.DS_Store', 'img_1.jpg', 'img_1.txt', 'img_10.jpg', 'img_10.txt']\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "import os \n", 56 | "from os import walk\n", 57 | "\n", 58 | "base_path = '../data/'\n", 59 | "data_path = os.path.join(base_path,'garbage_classify/train_data')\n", 60 | "\n", 61 | "# 数据路径\n", 62 | "print('data_path = ',data_path)\n", 63 | "\n", 64 | "\n", 65 | "for (dirpath,dirnames,filenames) in walk(data_path):\n", 66 | " print('*' * 60)\n", 67 | " \n", 68 | " print('Directory path:',dirpath)\n", 69 | " print('total examples = ',len(filenames))\n", 70 | " print('file name example:',filenames[:5]) # 文件的列表" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "我们需要分析 *.txt 读取内容,然后img.txt\n", 78 | "\n", 79 | "首先,我们需要配置.txt 文件进行输出" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 53, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "from glob import glob\n", 89 | "\n", 90 | "def get_image_info():\n", 91 | " \n", 92 | " data_path_txt = os.path.join(data_path,'*.txt') # 所有*.txt 文件\n", 93 | " txt_file_list = glob(data_path_txt)\n", 94 | " \n", 95 | " #print(txt_file_list[:2])\n", 96 | " \n", 97 | " \n", 98 | " # 存储txt 文件变量 \n", 99 | " img_path_list = []\n", 100 | " img_name2label_dict = {}\n", 101 | " \n", 102 | " img_label_dict = {} # \n", 103 | " #读取我们文件的内容\n", 104 | " for file_path in txt_file_list:\n", 105 | " \n", 106 | " # 读取文件内容\n", 107 | " with open(file_path,'r') as f:\n", 108 | " line = f.readline()\n", 109 | " \n", 110 | " #print(line)\n", 111 | " line = line.strip()\n", 112 | " img_name = line.split(',')[0]\n", 113 | " img_label = line.split(',')[1]\n", 114 | " img_label = int(img_label) \n", 115 | " # 文件路径 ../data/garbage_classify/train_data/img_10.jpg\n", 116 | " img_name_path = os.path.join(base_path,'garbage_classify/train_data/{}'.format(img_name))\n", 117 | " \n", 118 | " img_path_list.append({'img_name_path':img_name_path,'img_label':img_label})\n", 119 | " \n", 120 | " \n", 121 | " # 图片名称-标签\n", 122 | " img_name2label_dict[img_name] = img_label\n", 123 | " \n", 124 | " # 统计每个标签出现的次数\n", 125 | " img_label_count = img_label_dict.get(img_label,0)\n", 126 | " if img_label_count:\n", 127 | " img_label_dict[img_label] = img_label_count + 1\n", 128 | " else:\n", 129 | " img_label_dict[img_label] =1 \n", 130 | " \n", 131 | " \n", 132 | " # 最终返还的结果\n", 133 | " return img_path_list,img_label_dict,img_name2label_dict" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 54, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "img_path_list = [{'img_name_path': '../data/garbage_classify/train_data/img_1.jpg', 'img_label': 0}, {'img_name_path': '../data/garbage_classify/train_data/img_10.jpg', 'img_label': 0}, {'img_name_path': '../data/garbage_classify/train_data/img_100.jpg', 'img_label': 0}]\n", 146 | "img_label_dict = {0: 242, 2: 279, 21: 657, 22: 375, 23: 309, 24: 318, 3: 85, 25: 550, 26: 351, 4: 387, 27: 536, 28: 382, 29: 416, 30: 321, 31: 446, 32: 280, 33: 322, 34: 395, 35: 351, 36: 265, 5: 289, 37: 322, 38: 391, 39: 437, 6: 395, 7: 362, 8: 380, 1: 370, 9: 389, 10: 387, 11: 736, 12: 331, 13: 409, 14: 357, 15: 419, 16: 352, 17: 309, 18: 362, 19: 312, 20: 226}\n", 147 | "img_label_dict len = 40\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "img_path_list,img_label_dict,img_name2label_dict = get_image_info()\n", 153 | "print('img_path_list = ',img_path_list[:3])\n", 154 | "print('img_label_dict = ',img_label_dict)\n", 155 | "print('img_label_dict len = ',len(img_label_dict))\n", 156 | "#print('img_name2label_dict = ',img_name2label_dict)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "# 数据不同类别分布¶\n", 164 | "我们这里通过柱状图 来分析不同类别分布情况,我们使用pyecharts 这种效果非常棒的工具来展示\n", 165 | "\n", 166 | "x 轴数据: 标签名称\n", 167 | "\n", 168 | "y 轴数据: 标签名称对应次数\n", 169 | "\n", 170 | "首先我们 img_label_dict 按照key 进行排序,这样方便查看数据\n", 171 | "\n", 172 | "关于pyecharts 实战部分: 《Python 数据可视化 pyecharts 实战》" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 70, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "Help on built-in function sorted in module builtins:\n", 185 | "\n", 186 | "sorted(iterable, /, *, key=None, reverse=False)\n", 187 | " Return a new list containing all items from the iterable in ascending order.\n", 188 | " \n", 189 | " A custom key function can be supplied to customize the sort order, and the\n", 190 | " reverse flag can be set to request the result in descending order.\n", 191 | "\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "help(sorted)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 74, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "# 默认情况key 进行排序\n", 206 | "img_label_dict = dict(sorted(img_label_dict.items()))" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 77, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "label_dict = {\n", 216 | " \"0\": \"其他垃圾/一次性快餐盒\",\n", 217 | " \"1\": \"其他垃圾/污损塑料\",\n", 218 | " \"2\": \"其他垃圾/烟蒂\",\n", 219 | " \"3\": \"其他垃圾/牙签\",\n", 220 | " \"4\": \"其他垃圾/破碎花盆及碟碗\",\n", 221 | " \"5\": \"其他垃圾/竹筷\",\n", 222 | " \"6\": \"厨余垃圾/剩饭剩菜\",\n", 223 | " \"7\": \"厨余垃圾/大骨头\",\n", 224 | " \"8\": \"厨余垃圾/水果果皮\",\n", 225 | " \"9\": \"厨余垃圾/水果果肉\",\n", 226 | " \"10\": \"厨余垃圾/茶叶渣\",\n", 227 | " \"11\": \"厨余垃圾/菜叶菜根\",\n", 228 | " \"12\": \"厨余垃圾/蛋壳\",\n", 229 | " \"13\": \"厨余垃圾/鱼骨\",\n", 230 | " \"14\": \"可回收物/充电宝\",\n", 231 | " \"15\": \"可回收物/包\",\n", 232 | " \"16\": \"可回收物/化妆品瓶\",\n", 233 | " \"17\": \"可回收物/塑料玩具\",\n", 234 | " \"18\": \"可回收物/塑料碗盆\",\n", 235 | " \"19\": \"可回收物/塑料衣架\",\n", 236 | " \"20\": \"可回收物/快递纸袋\",\n", 237 | " \"21\": \"可回收物/插头电线\",\n", 238 | " \"22\": \"可回收物/旧衣服\",\n", 239 | " \"23\": \"可回收物/易拉罐\",\n", 240 | " \"24\": \"可回收物/枕头\",\n", 241 | " \"25\": \"可回收物/毛绒玩具\",\n", 242 | " \"26\": \"可回收物/洗发水瓶\",\n", 243 | " \"27\": \"可回收物/玻璃杯\",\n", 244 | " \"28\": \"可回收物/皮鞋\",\n", 245 | " \"29\": \"可回收物/砧板\",\n", 246 | " \"30\": \"可回收物/纸板箱\",\n", 247 | " \"31\": \"可回收物/调料瓶\",\n", 248 | " \"32\": \"可回收物/酒瓶\",\n", 249 | " \"33\": \"可回收物/金属食品罐\",\n", 250 | " \"34\": \"可回收物/锅\",\n", 251 | " \"35\": \"可回收物/食用油桶\",\n", 252 | " \"36\": \"可回收物/饮料瓶\",\n", 253 | " \"37\": \"有害垃圾/干电池\",\n", 254 | " \"38\": \"有害垃圾/软膏\",\n", 255 | " \"39\": \"有害垃圾/过期药物\"\n", 256 | "}" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 89, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "x = dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39])\n", 269 | "y = dict_values([242, 370, 279, 85, 387, 289, 395, 362, 380, 389, 387, 736, 331, 409, 357, 419, 352, 309, 362, 312, 226, 657, 375, 309, 318, 550, 351, 536, 382, 416, 321, 446, 280, 322, 395, 351, 265, 322, 391, 437])\n", 270 | "x = ['0-其他垃圾/一次性快餐盒', '1-其他垃圾/污损塑料', '2-其他垃圾/烟蒂', '3-其他垃圾/牙签', '4-其他垃圾/破碎花盆及碟碗', '5-其他垃圾/竹筷', '6-厨余垃圾/剩饭剩菜', '7-厨余垃圾/大骨头', '8-厨余垃圾/水果果皮', '9-厨余垃圾/水果果肉', '10-厨余垃圾/茶叶渣', '11-厨余垃圾/菜叶菜根', '12-厨余垃圾/蛋壳', '13-厨余垃圾/鱼骨', '14-可回收物/充电宝', '15-可回收物/包', '16-可回收物/化妆品瓶', '17-可回收物/塑料玩具', '18-可回收物/塑料碗盆', '19-可回收物/塑料衣架', '20-可回收物/快递纸袋', '21-可回收物/插头电线', '22-可回收物/旧衣服', '23-可回收物/易拉罐', '24-可回收物/枕头', '25-可回收物/毛绒玩具', '26-可回收物/洗发水瓶', '27-可回收物/玻璃杯', '28-可回收物/皮鞋', '29-可回收物/砧板', '30-可回收物/纸板箱', '31-可回收物/调料瓶', '32-可回收物/酒瓶', '33-可回收物/金属食品罐', '34-可回收物/锅', '35-可回收物/食用油桶', '36-可回收物/饮料瓶', '37-有害垃圾/干电池', '38-有害垃圾/软膏', '39-有害垃圾/过期药物']\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "# 首先,构建数据\n", 276 | "x = img_label_dict.keys()\n", 277 | "y = img_label_dict.values()\n", 278 | "print('x = ',x)\n", 279 | "print('y = ',y)\n", 280 | "\n", 281 | "# 借助python 中列表推导方法\n", 282 | "x = [\"{}-{}\".format(label_idx,label_dict[str(label_idx)] )\n", 283 | " for label_idx in img_label_dict.keys()]\n", 284 | "\n", 285 | "print('x = ',x)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 94, 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "data": { 295 | "text/html": [ 296 | "\n", 303 | "\n", 304 | "
\n", 305 | "\n", 306 | "\n", 307 | "\n" 515 | ], 516 | "text/plain": [ 517 | "" 518 | ] 519 | }, 520 | "execution_count": 94, 521 | "metadata": {}, 522 | "output_type": "execute_result" 523 | } 524 | ], 525 | "source": [ 526 | "# 通过pyecharts来进行绘制图表\n", 527 | "\n", 528 | "## 导入pyecharts 相关库\n", 529 | "from pyecharts import options as opts\n", 530 | "from pyecharts.charts import Bar\n", 531 | "\n", 532 | "## 构建pyecharts 需要的数据\n", 533 | "x = list(x)\n", 534 | "y = list(y)\n", 535 | "\n", 536 | "## 创建pyechats 的Bar 示例对象\n", 537 | "bar = Bar(init_opts=opts.InitOpts(width='1100px',height='500px'))\n", 538 | "\n", 539 | "## 填充数据\n", 540 | "bar.add_xaxis(xaxis_data=x)\n", 541 | "bar.add_yaxis(series_name='',yaxis_data=y)\n", 542 | "\n", 543 | "## 设置一些全局参数\n", 544 | "bar.set_global_opts(\n", 545 | " # 给我们的表增加一个标题\n", 546 | " title_opts=opts.TitleOpts(title='垃圾分类 不同类别数据分布'),\n", 547 | " # 使得我们的x 轴数据倾斜\n", 548 | " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate = 15) ) \n", 549 | " )\n", 550 | "## 展示我们的图表\n", 551 | "bar.render_notebook()" 552 | ] 553 | }, 554 | { 555 | "cell_type": "markdown", 556 | "metadata": {}, 557 | "source": [ 558 | "# 数据长度和高度分布比例" 559 | ] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "metadata": {}, 564 | "source": [ 565 | "Python 如何获取JPG 图片的长度和高度信息?\n", 566 | "\n", 567 | "我们通过PIL 中Image 类处理" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": 103, 573 | "metadata": {}, 574 | "outputs": [ 575 | { 576 | "name": "stdout", 577 | "output_type": "stream", 578 | "text": [ 579 | "pic_file_name = ../data/garbage_classify/train_data/img_1.jpg\n", 580 | "img.size = (800, 575)\n" 581 | ] 582 | } 583 | ], 584 | "source": [ 585 | "# 导入PIL 库\n", 586 | "from PIL import Image\n", 587 | "\n", 588 | "# 获取一张图片\n", 589 | "base_path = '../data/'\n", 590 | "data_path = os.path.join(base_path,'garbage_classify/train_data')\n", 591 | "pic_file_name = os.path.join(data_path,'img_1.jpg')\n", 592 | "print('pic_file_name = ',pic_file_name)\n", 593 | "\n", 594 | "# 获取长度和高度\n", 595 | "img = Image.open(pic_file_name)\n", 596 | "print('img.size = ',img.size)" 597 | ] 598 | }, 599 | { 600 | "cell_type": "markdown", 601 | "metadata": {}, 602 | "source": [ 603 | "那么,接下来,我们来统计分析一些所有JPG 图片,来获取比例信息" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 123, 609 | "metadata": {}, 610 | "outputs": [ 611 | { 612 | "name": "stdout", 613 | "output_type": "stream", 614 | "text": [ 615 | "matched data_path_jpg = ../data/garbage_classify/train_data/*.jpg\n" 616 | ] 617 | }, 618 | { 619 | "data": { 620 | "text/plain": [ 621 | "[[1, 800, 575, 1.39, 0], [10, 750, 690, 1.09, 0], [100, 688, 357, 1.93, 0]]" 622 | ] 623 | }, 624 | "execution_count": 123, 625 | "metadata": {}, 626 | "output_type": "execute_result" 627 | } 628 | ], 629 | "source": [ 630 | "from glob import glob\n", 631 | "import os\n", 632 | "from PIL import Image\n", 633 | "data_path_jpg = os.path.join(data_path,'*.jpg')\n", 634 | "print('matched data_path_jpg = ',data_path_jpg)\n", 635 | "\n", 636 | "data_path_list = glob(data_path_jpg)\n", 637 | "data_list = []\n", 638 | "\n", 639 | "for file_path in data_path_list:\n", 640 | " #print(file_path)\n", 641 | " img = Image.open(file_path)\n", 642 | " width = img.size[0]\n", 643 | " height = img.size[1]\n", 644 | " \n", 645 | " ratio = float(\"{:.2f}\".format(width/height))\n", 646 | " \n", 647 | " img_name = file_path.split('/')[-1]\n", 648 | " img_id = img_name.split('.')[0].split('_')[1]\n", 649 | " img_label = img_name2label_dict.get(img_name,0)\n", 650 | " data_list.append([int(img_id),width,height,ratio,int(img_label)])\n", 651 | "\n", 652 | "data_list[:3]" 653 | ] 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "metadata": {}, 658 | "source": [ 659 | "首先,我们来分析单变量进行数据分析,这个时候我们直方图来完成。\n", 660 | "\n", 661 | "\n", 662 | "Python 中的seaborn 可视化工具库进行展示" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 129, 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "name": "stdout", 672 | "output_type": "stream", 673 | "text": [ 674 | "[1.39, 1.09, 1.93, 0.99, 1.11]\n" 675 | ] 676 | } 677 | ], 678 | "source": [ 679 | "ratio_list = [ratio[3] for ratio in data_list]\n", 680 | "\n", 681 | "print(ratio_list[:5])" 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": 131, 687 | "metadata": {}, 688 | "outputs": [ 689 | { 690 | "data": { 691 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEBCAYAAAB7Wx7VAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X1wW+W9J/Dv0btkyZblyC95cQJOSEJICJRAmrahuVvilCQbbqD3hnYmHdjJtCzdzGY7YbKBgWlaCgUumdIsHZbS3ttOytaFBBoKAdrctlAHyEtJcHDe49iOHUm2JVuypKO3s3/IUuJYsl4s+cg+388Mg3V0JP1yMF89ec5zfkeQJEkCERFNeiq5CyAiovHBwCciUggGPhGRQjDwiYgUgoFPRKQQDHwiIoVg4BMRKQQDn4hIIRj4REQKwcAnIlIIBj4RkUIw8ImIFIKBT0SkEBq5CwAAt3sQsVhuTTurqszo7fUVqaKJjccmNR6X9Hhs0ivFY6NSCaisLMv5dSUR+LGYlHPgJ15HqfHYpMbjkh6PTXqT5dhwSoeISCEY+ERECsHAJyJSCAY+EZFCMPCJiBSCgU9EpBAMfCIihSiJdfhKFIkBYjgybJteq4GGX8FEVCQMfJmI4QgOtTqGbVsyvwYaPf+TEFFxcDxJRKQQDHwiIoVg4BMRKQQDn4hIIRj4REQKwcAnIlIIBj4RkUIw8ImIFIKBT0SkEAx8IiKFYOATESkEA5+ISCEY+ERECsHAJyJSCAY+EZFCMPCJiBSCgU9EpBAMfCIihWDgExEpBAOfiEghGPhERArBwCciUggGPhGRQjDwiYgUIqvA37VrF1avXo3Vq1fjmWeeGfF8a2sr1q9fj8bGRjz66KOIRCIFL5SIiMYmY+A3Nzfjww8/xN69e/HGG2/gxIkTeP/994fts3XrVjz++ON49913IUkSmpqailYwERHlJ2Pg2+12bNu2DTqdDlqtFg0NDejq6ko+f+nSJQSDQSxevBgAsH79euzfv794FRMRUV40mXaYM2dO8ue2tja88847ePXVV5PbnE4n7HZ78rHdbofD4ShwmURENFYZAz/hzJkz+M53voNHHnkEs2bNSm6PxWIQBCH5WJKkYY+zUVVlzmn/BLvdktfrSoHU54fFbBi2zWTSw24zFeT9J/KxKSYel/R4bNKbLMcmq8A/cuQINm/ejO3bt2P16tXDnqutrYXL5Uo+7unpQXV1dU5F9Pb6EItJOb3GbrfA5fLm9JpS4hcj8PqCw7f5Rbii0TG/90Q/NsXC45Iej016pXhsVCohr4Fyxjn87u5uPPzww3juuedGhD0ATJs2DXq9HkeOHAEAvPnmm1i+fHnOhRARUXFlHOG/8sorEEURTz/9dHLbhg0bcODAAWzevBkLFy7Ec889h8ceeww+nw8LFizAxo0bi1o0ERHlTpAkKbe5lCJQ4pTOoBjBodbhJ7eXzK9BmT7r0yppTfRjUyw8Lunx2KRXisemaFM6REQ0OTDwiYgUgoFPRKQQDHwiIoVg4BMRKQQDn4hIIRj4REQKwcAnIlIIBj4RkUIw8ImIFIKBT0SkEAx8IiKFYOATESkEA5+ISCEY+ERECsHAJyJSCAY+EZFCMPCJiBSCgU9EpBAMfCIihWDgExEpBAOfiEghGPhERArBwCciUggGPhGRQjDwiYgUgoFPRKQQGrkLoIkhEgPEcGTEdr1WAw2HDUQTAgOfsiKGIzjU6hixfcn8Gmj0/DUimgg4NiMiUggGPhGRQjDwiYgUgoFPRKQQDHwiIoVg4BMRKQQDn4hIIbIOfJ/PhzVr1qCzs3PEc7t27cKKFSuwbt06rFu3Drt37y5okURENHZZXTFz7NgxPPbYY2hra0v5fEtLC55//nnccssthayNiIgKKKsRflNTE5544glUV1enfL6lpQUvvfQS1q5dix07dkAUxYIWSUREY5fVCP/JJ59M+9zg4CDmz5+PrVu3YubMmdi2bRtefPFFbNmyJesiqqrMWe97NbvdktfrSoHU54fFbBi2zWTSw24zFeT9C31sUtULFLbm8TCRf2eKjccmvclybMbcBKWsrAwvv/xy8vGDDz6I7du35xT4vb0+xGJSTp9rt1vgcnlzek0p8YsReH3B4dv8IlzR6JjfuxjHJlW9QOFqHg8T/XemmHhs0ivFY6NSCXkNlMe8SqerqwuvvfZa8rEkSdBo2EyLiKjUjDnwDQYDnn32WXR0dECSJOzevRt33XVXIWpTnD82t+H1v56TuwwimqTyDvxNmzbhs88+g81mw44dO/DQQw9h1apVkCQJDzzwQCFrVIRYTMJfP+3Cx5+PbEFMRFQIOc29HDhwIPnz1fP2jY2NaGxsLFxVCtTTH0RAjCAoRhAKR6HTquUuiYgmGV5pWyK6egYBABIApzsgbzFENCkx8EtEV88gTIb4X7gu9/llroaIJiMGfgkIhiLo6Q9i2U21AIBuBj4RFQEDvwR098QD/uY5U1Bp0eNyLwOfiAqPgV8CLvUMQq9Vo77aglqbiVM6RFQUDHyZSZKErp5B1FWZoFIJqK2KB74k5XblMRFRJgx8mfkCYQRDUdRWxfvR1NpMCIgRDPjDMldGRJMNeyAUUCQGiOHIsG16rQaaUb5W/cH4/majFgBQN9SI7HLvICrKdMUplIgUiYFfQGI4gkOtw6+UXTK/Bhp9+sMcCMUbjxmH9qlNBH6fH3PrK4tUKREpEad0ZBYYGuEb9fEra23lBmjUKp64JaKCY+DLLBCKQBAA/VArBZVKQI3NyKWZRFRwDHyZBcQIjDoNBEFIbuPSTCIqBga+zIJiNDmdk1BrM8HlCSISjeX8fpEY4OzzY1CMDPsnkvtbEdEkw5O2MguEIskTtgm1NhNikoSe/mDyJG62xHAEJ8/3jrg7VaaTx0Q0+XGEL7OAODLwrWY9AGBgMCRHSUQ0STHwZRSTpKEpneGBbzHF1+Qz8ImokBj4MhJDUUgAjLrhc/gWU/yCK2+AV9sSUeEw8GUUEBNr8FOP8L0c4RNRATHwZRQQE1fZDh/ha9QqmPQaeNlPh4gKiIEvo3QjfCA+yvcGOMInosJh4MsoEIoHvkGXKvB1PGlLRAXFwJdRUIxCq1ZBm6KdZnyEzykdIiocBr6MAmIEhmvm7xMsJh1P2hJRQTHwZZTqoquE8jItfIEIYrzzFREVCANfRqMFvsWoQ0ySkjdIISIaKwa+jAKhkY3TEni1LREVGgNfJqFwFOFIDMYUK3QAwDJ0e0Ovn4FPRIXBwJfJwFCQp5/SGbralhdfEVGBMPBlkpiqSX/SliN8IiosBr5MBgbjI/d0c/hmjvCJqMAY+DLJNKWT6KczwBE+ERUIA18m3sEQBAB6XeoRPhA/ccsRPhEVCgNfJh6fCINeA9VVNy+/lsWk5Rw+ERUMA18mHl8IJsPo95gtN3GET0SFw8CXSb9PRFmGwC+1EX40FsOev55H60W33KUQUR6yCnyfz4c1a9ags7NzxHOtra1Yv349Ghsb8eijjyISYSuAbHh8YtoTtgmJjpml0k+nq8cPXyCMkxfdkEqkJiLKXsbAP3bsGO6//360tbWlfH7r1q14/PHH8e6770KSJDQ1NRW6xkknGIogIEazGOHrIEnAYIm0SW7rHgAQXyrq9ARkroaIcpUx8JuamvDEE0+gurp6xHOXLl1CMBjE4sWLAQDr16/H/v37C1/lJOP2igAAk0E76n7JfjolMI8fikTR4fRhVq0FGrWAc50DcpdERDkafYgJ4Mknn0z7nNPphN1uTz622+1wOByFqWwSSwZ+himdclP8alufPwSgrNhljaq1zY1IVMLs6RXQqFVouzyAJfNHDgKIqHRlDPzRxGIxCFctK5QkadjjbFVVmfP6fLvdktfrikXq88NiNgzbZjLpYbeZhm2LDp30tFeVwWLWp923PhyL/6BRZ/1nlfr8AJBVHbk4fu4EDDo15tTbYDHrcfZSP5z9wTG/73grtd+ZUsJjk95kOTZjCvza2lq4XK7k456enpRTP5n09voQi+V2EtBut8Dl8ub8WcXkFyPw+oLDt/lFuKLRYdsuXuoHAEjR6LD9r903Isanci5dHoBrannWNQDIqo5sieEoPj3jwqxaCwb9Isx6NSwmLVrO9YzpfcdbKf7OlAoem/RK8dioVEJeA+UxLcucNm0a9Ho9jhw5AgB48803sXz58rG8pSK4fSJMBg006tEPf6n00zlxoQ+hcAwza+OjHEEQUF9jgdMdQDgSk7U2IspeXoG/adMmfPbZZwCA5557Dk899RRWrVoFv9+PjRs3FrTAycg9IMJ61VROOhq1CmUGjew3Qel0+iAAqLYak9uqKgyQJOBy76B8hRFRTrKe0jlw4EDy55dffjn587x58/Daa68VtqpJzu0VYTXrstrXatbD4xOLXNHoHO4ArBY91Ff9jcRmiX9hdTh9mFdfKVdpRJQDXmkrA7cvuxE+AFjNOnh88o7wnR4/7FeN7oH4klGtRoUOp0+mqogoVwz8cRaJxjAwGEJFloFfYdajf1DeEb7THYDdOnzVjyAIsFn06HQx8IkmCgb+OEtMz1Rasp/S6feFZGuv4A9G4PWHMeWaET4A2MoNuOQazHmFFRHJg4E/zhIXXWU/wtchGpPgk6m9gmuohcK1UzoAYCvXIxyJoXto7T8RlTYG/jhLBH62c/iVQ/t5vPJM6zjc8TBPHfjxaZ72y6W1RpmIUmPgj7MrgZ/dlE7F0H79Mi3NdLrjI/yqCsOI5yrKdNCqVbjoYOATTQQM/HHm9orQaVUZWyMnJKZ+5Fqa6XQHYDXroNeOvBWjSiVgqr0M7Qx8ogmBgV8Eo51gdXtFVFoMWfccspbFR/jZLs2UJKmgveqdbj+qK9P3ypluN+Oiw8f++EQTAAO/wAJiBE0HzuJgy2VEU6xecXtFVGY5nQMAOq0aJr0G/VmM8KOxGH7474fx8YnLOdU8GocngOrKkfP3CTOqzQiIEbj6g2n3IaLSwMAvMKc7gFA4hjOd/fjT4Q4MBoevrunzBlFpGTkfPhqrRZ/VCP9kuwcuTwBHTznROzD2ABZDUfT7QqgZLfBr4g2cLnSxPz5RqWPgF5jTHYBaJWDZTbVwuYP4v2+eSD7X7vCib0DEdXW5tVqtKNNlNcI/1OqEXquGQafBRy2OMa/dT9zVarQpnWl2M4x6NU628z63RKWOgV9gTk8AUyoMmD29Al+YZ8f5rgGcHWqH/LdjXdCoVVi6oDan98ymn04kGsPR0y4sbLDhK4unoncgiFMXPXn/OYD4/D0wvGnatdQqAXNnVOIkb2xOVPIY+AUUCkfRNxCEfWgKZPa0Chj1Grz3STvEcBQHTzhw21x7su1xthL9dEY7MXqy3Q1fIIxbbrBj9nQr6qpMOHauZ0yjfIc7McJPH/gAMK/eCoc7gL4CTCMRUfEw8AvoosMLSboyItZqVPjSwjocOe3Cux+3IyBGsPzmqTm/r9Wsz3i17eGTThh0asyfaYMgCLh+ajlC4diYWis73X6Um7QZl5DOmxnvltnKUT5RSWPgF9D5oROXV1+VunzxVKgEAW98eAE1lUbMrbfm/L7Ji6/SnLiNRGM4csqFW+ZMgVYT/0+auFCqx5P/qNvpDow6f58wvdoMs1HLaR2iEsfAL6DzXQOoKNNBr7tykVKlRY8l8+K3fVx+89S87vmbaMPgSdM181S7B4PBCJbMq0luS1wF2zOG5ZIO9+hLMhNUgoB59Va0tru5Hp+ohDHwCyQmSWjrGkjO319tzbJZuOl6G768qC6v9060YfB4U4/wT3d4IAjA/JlXbkQiCAKqKgzo7Q/k9ZmhcBRur5hV4APxz+4bEJPN1oio9DDwC6S71w+/GEm5omXqlDL8r39ZDIsp+wuurpZor5CuL/6FywOYNsU87G8WADClwoA+r4hoNPf7zro82Z2wTeA8PlHpY+AXyNnO+BLIbAMyF3qtGka9JuUIX5IktHV7U67tn2KN33e2L49Om4mmaTVZzOEDQK3NBKtZh2NnexGJAYNiZMQ/vN85kbyyvqctja7TNQiDTg2LKbcll9mymnUp5/Bd/UH4AmFcV1c+4rkpFfEvn3xO3Ga7JDNBEAR8edFUvNXchnOXPOhOcXPzJfNroMmyaRwRFR5H+AXS4wnAVp59U7Rcpbv4qq07vjIoVeCbDBqY9Br05DGP7/QEUGbQoMyQ/RfYqttnoMygwVvNbTl/HhEVHwO/QHr6g6gqz61HTi4qzLqUyzIvdA9Ao1Zhmr0s5eumWA15rdTJ1CUzFZNBi68vnYkTF/qSV+kSUelg4BeAJEnxwE9xk5BCqSo3wO0VEQpHh22/0O3FzBozNOrU/ymrKgzw+sMjmrhl4nQHRm2als5/uXU6yk06HD3dwyWaRCWGgV8A3kAYYjha1BF+w7QKRGNS8uIuAIjFJFy87MWsFNM5CfahefxcbkMYjsTQOxDM6wS0XqdG49J6ON0BtDt8Ob+eiIqHgV8AiZOiYx3hCyoh7eqWG6ZXQBAwrCtlV+8gxHB01O6btor4ks6LOQR+T38g3iIizxVHX1pYh0qLHodanQhzaQ5RyeCSiQJInBS1levR1RPJ+33EcBTHTrtGbF8yvwZlBi3qqy043XGlA+aFUU7YJug0alSYdWjLIfCd7sxtkUejVglYemMN3vm4HcfO9uC2oSuNiUheHOEXQOKkaDHn8AFgbr0V57oGEI7E5/Hbur0w6tWosY0ezFMqDLh42Zv1nLozxyWZqdgrjZg9vQKtF93JG7cTkbwY+AXQ4wnAbNTCoCvuX5jmzrAiHInhfNcAwpEYjp3rQcPUCqgyLAWdUmGALxBGb5ardZzuAIx6NSw5tnG+1q032KFRq9ByvndM70NEhcHALwBXfxBTijy6B4A5M6wQAJzq8OCD413oGxDReHt9xtclLsA6353dbQgdHj+qraYxX1Ng0Klx/dRyXHT4IIaimV9AREXFwC+AnqG7XBWb2ajF9GozWi704a3mNsyZXoEbZ1VmfF2lRQ+NWkjO+WfizLJLZjbmTK9A7JrVRUQkDwb+GMUkCb0DQUwZ5TaAhTR3hhVnO/vh8YXwz1+5PqtRuEolYEa1JasbjUeiMfT257ckMxVbuQFV5Qac6fRwXT6RzBj4Y9TvCyESlWAfhxE+AMytj4/o59Vbkx0qszGz1oI2hxfR2OjLJHsHgojGpFHvY5urOTMq4PGFcloaSkSFx8Afo0Qb4fEa4d84qxILrrPhX/9pTk6vm1lrQSgcwyXXyKZmV0tMvdTXpF/bn6vr6sqhUQtobrlcsPckotwx8McosQZ/PObwAcCo1+D7/7oYM2tzC+TE/pnm8c90eGDUqzGj2px3jdfSalSYVVuOo6dcySWlRDT+GPhjlFiDP16Bn68pFQaUGTQZT56e7uzH7GlWqFSF7fo5q84CMRxFy4W+gr4vEWUvq8Dft28f7r77bqxcuRK7d+8e8fyuXbuwYsUKrFu3DuvWrUu5z2TV4wmiwqyDVqPOvLOMBEHAnOlWnGjrQyzNyVOvP4SunkHcMKMi+/dN0Q4iluLta20mmPQaHDk18kpiIhofGa8Ucjgc2LlzJ/bs2QOdTocNGzbgjjvuwOzZs5P7tLS04Pnnn8ctt9xS1GJLUU9/INmgrNQtmV+NT8/24NylfsyZbh3x/JnOfgDADTNGPpdOqnYQN99gH7GfSiVgYUMV/nGmB5FoLG13TyIqnoz/1zU3N2Pp0qWwWq0wmUxobGzE/v37h+3T0tKCl156CWvXrsWOHTsgisq5lN7lGZ+Lrgph8ewp0GpU+KTVmfL50x2e5Hx7UT5/zhQExAg+b+N9b4nkkDHwnU4n7PYrI7bq6mo4HI7k48HBQcyfPx9bt27F3r17MTAwgBdffLE41ZaYSDSGPu/4rcEfK6Neg0UNVTh00olYinmX0x0eXF9XDq2mOKPvufWVMOrVOHwq9RcOERVXximdWCw27OIeSZKGPS4rK8PLL7+cfPzggw9i+/bt2LJlS9ZFVFXltyLEbi/c0sF8dLl8kCSgYUYl7HYLpD4/LObho32TSQ97iuZmqfbVajUjto32HqlIffE7TaWr4647ZuHIqUO4PCDi5jlXvsj9wTDanT5845/mpDyuqepNV3O6P0dFuRF3LKjDkZMOVNrKZJnWkft3ppTx2KQ3WY5NxsCvra3F4cOHk49dLheqq6+0u+3q6kJzczPuu+8+APEvBI0mtyZivb2+lCPO0djtFrhc8l7Ic3KoKZhRI8Dl8sIvRuD1DW9Q5veLcEVHLkVMtW84PHLbaO+Ril+Mt2dOV8dMuwl6rRrvf9SGqdYrodxyoRexmITpVaaUxzVVvelqHu3PcdOsSvzlaCc+ONyOm66vyurPVCil8DtTqnhs0ivFY6NSCXkNlDMOsZYtW4aDBw+ir68PgUAA7733HpYvX5583mAw4Nlnn0VHRwckScLu3btx11135VzIRJRoI2yfAFM6idU0kZiEhQ02HDrpxKB45Uvk6OkeqAQBDdOKM3+fsPB6G4x6DQ6ecGTemYgKKmPg19TUYMuWLdi4cSPuuecerFmzBosWLcKmTZvw2WefwWazYceOHXjooYewatUqSJKEBx54YDxql53LE4BOo4LVrJO7lIzEcBSHWh041OqAzWKAPxjB/9lzHOFIFB8c68Jf/nEJyxdPLXqLZ61GjSXz7Dh62sUOmkTjLKv/u9euXYu1a9cO23b1vH1jYyMaGxsLW9kE4HQHYLcax9xGeLzVVpmw7KZaNLdcxk9++w9cvOzFguts+ObXcmvXkK8vLqjF34514+gZF764oHZcPpOIeKXtmLg8gQkxnZPK7OkV2PC1OTjfNYC6KhP++z03jdtJ1DkzrKgqN+DgCfbWIRpPvKdtniRJgssTwILrbHKXkrcvLaxDQ105amwmGPXj96ugEgQsXVCDtz+6iH6fiAqzftw+m0jJOMLPk8cXQigSm7Aj/ISGaRUwj/FWhvlYdlMtJAn4OM1FYERUeAz8PCXaIhfqRiFKcHXfnXKzHjNrLThwtBOhKG+MQjQeOKWTp8SSTAZ+9q7tu1NfbcYHx7tx9JQTS2+skbEyImXgCD9PTk8AKkFAVfnE6KOTi0gMWXXAHKuZtRaUGTT40+GOwr85EY3AEX6eXJ4AbOX6Cd31MTHFcq2YBBw5OfzCqFQdMMdKpRJw46z4RWDnLvWjYVr2bZmJKHcM/Dw53YEJP52TqrUxUJxwT2f29AqcuNCH/R+34+H1C8ftc4mUaOIOT2XmdPsLeqNvpdJqVPjyzXU4etqV8faLRDQ2DPw8+INhDAYjsI/TCD/VXaUisXH56HHxtdtmoNysw7+/cxKR6CT6gxGVGAZ+HpyJJZnjNMK/ug9O4h8xPHLufaIyGbX4xorZ6HD68NbBi5PuC42oVDDw89DdG+85X1OZXY96Gp0YjiIUjqK+xow/NrfhT4c6JtUXGlGpYODn4UL3AHQaFeqmyBf4qaZ5irV8crzcPr8GarWAdz9px8XLpdV/nGgy4CqdPLR1e1Ffa4Falfn7crSlj2NRCitsCs1k0GDVHfU4cOQSfvr7Y9jYOBdLF9RkdZyJKDMGfo6isRjaHV7cuXhaVvtPxmAuJqtZj68vrcehk0688sdW7PnbeXx18VSsXFIPvU4td3lEExqHTjm65BpEKBLDdXWT4x6Xpcio1+B//stiPPzPCzG1yoS9H1zAjv84hE6XT+7SiCY0Bn6O2obmlmuqyibV/HmpUasEfGGuHd/fcAu+v2ExBoMR/PA/DuPQSXbXJMoXp3RydL5rAEa9Bm3d/bh4efiFQpymKY4Fs2z4wYO3Y9frx/Grt1sx1V4G61U99PVaDTQcuhBlxP9NctTWPYCZNeYJd1vDiebaVUgajQrfapyLaFTCy384MWmvSSAqJo7wcxAKR9HpGsTXlkyXu5RJL93J7rtun4G3D15Eh9OHGdVmGSojmrg4ws9Bu9OHmCShvoYnbOXy1VunwWrW4ePPHWzDQJQjBn4OLnTF5+xn1jLw5aJWq3D7/Br4gxGc7eyXuxyiCYWBn4Pz3QOwmnXDThjS+KuxGWG3GtFyoQ9RLo0iyhoDP0tiKIpPz/ZgwXU2uUtRPEEQsKihCv5gBOe7OMonyhYDP0uHTzkhhqL4yqKpcpdCAKZOMaGqXI+W8xzlE2WLgZ+lv3/WjepKI+ZM5234SoEgCFjYUAWvP4yjp3gxFlE2GPhZcHoCONnuwZcW1nH9fQmZUW2GrVyPfX9vQygclbscopLHwM/C3493QwDwpZtq5S6FriIIAm6bWw23V8T7hzvkLoeo5DHwM4jFJDS3dGPBdTbYyg1yl0PXqK0yYWFDFd46eBH9gyG5yyEqaQz8DN75+CJ6B0SsuCW7dsg0/u75ynWIRGJ4/a/n5C6FqKQx8EfR7vDijQ8u4La5diyeM0XuciiN6koTVi6ZgQ+Pd+P9Q5zaIUqHvXTSCEdi+MVbn6PMqMV9K2bDH7pyUpCrAEvPvXc2wOEO4NU/n0F5mQ533Fgjd0lEJYeBn0IkGsMrf/wcna5BfPeem/B5W9+w59kGufSoVAK+819vxL/9v0/xi7c+x+U+P1bdUQ+9lnfJIkrglM41xFAUL7x2HJ+0OvGNFQ28snYCSLRSDkUl/Le1C7CooQpvfngB//ulj7D/43Y4+vxyl0hUEjjCv8ollw+/eKsV7U4vHvj6PHzl5qkpb0BOpeXaVsoLG6pQbTPiVLsHTf95Fk3/eRY1lUYsaJiCqTYjrp9ajul2MzRqjndIWRj4iPe53/9xO/Y1t8Go1+B/rF/Ek7QTXE2lCWu/fD1cbj9OnO/DyXY3Drc6MDC0dFOjVmFGdRnqqspQV2VCVYUBNosBtnI9rGY9vwxoUsoq8Pft24ef//zniEQi+Pa3v41vfetbw55vbW3Fo48+isHBQdx22234wQ9+AI2m9L9LvP4Q/vKPS/jTkU54/WF8Ya4d9361ARaTLjmy5wnaiUsMR3GhawAmgwa33mDH8lumo8ZqQHfPIM53DaDD6cPnbX1obrk87HWCAFSU6WArN8Bm0cf/XW7AlAoDam0mVFca+YVAE1LGVHY4HNjR/kSZAAAFzUlEQVS5cyf27NkDnU6HDRs24I477sDs2bOT+2zduhU/+tGPsHjxYmzfvh1NTU345je/WdTC8yGGo+hw+HCuqx/HzvbgVIcHkgQsaqjCilunw+0N4uRF97DX8ATt5CEIAqqsRtjKDVhwfVVyezAUgdcfhcszCLdXhMcrwu0V0e8Lod3pw7FzvQhHrtxsRSUImGKNh3+lRY8ygxZlRk383wYNtBoV1CoV1CoBarVwzc8C1GoVNKorP6uTPwtQCQLbd1DRZAz85uZmLF26FFarFQDQ2NiI/fv343vf+x4A4NKlSwgGg1i8eDEAYP369XjhhRdyCnyVKvdf8MFACM0tlxGORBGTAEmSEJOk+M8xIBqLIRiKIhiKwusPweMV4RkUIQ2N2O1WI+69swE3NVShptKEmAR8dq5nxOdo1CqYDNqM20ppX6Neg2gk8/6l8OcYz88z6jWIxiS0Xugbse/862xwugdhNcendGbVxbe1XuiDJEkIRWLwB8Pw+kMoM+rQ4wmgpz+IdqcPATGCWAH/KqhWCVCpBagFFVRDXwQaQUj+rBIEQAAECBAQ/yIThPi/VQIAlQC1EP9iSjyO/yxALQhDj+P/36mGXmc06BAOR+KP429+5TMSD4X452HovZH8Of7jyLrir0lsm6jKynoxWISruLUaFW69wQ6DLveVZPlkJpBF4DudTtjtV0a51dXVOH78eNrn7XY7HA5HTkVUVpbltH/CuhVz8npdOvVTU3fCvH56ZVbbSnlf1lbYfYkmoowTkbFYbNhfMSVJGvY40/NERFQaMgZ+bW0tXK4rS95cLheqq6vTPt/T0zPseSIiKg0ZA3/ZsmU4ePAg+vr6EAgE8N5772H58uXJ56dNmwa9Xo8jR44AAN58881hzxMRUWkQJEnKeLZp3759eOmllxAOh3Hfffdh06ZN2LRpEzZv3oyFCxfi5MmTeOyxx+Dz+bBgwQI89dRT0Ol041E/ERFlKavAJyKiiY9XjxARKQQDn4hIIRj4REQKwcAnIlKICRn4+/btw913342VK1di9+7dcpdTUnw+H9asWYPOzk65Sykpu3btwurVq7F69Wo888wzcpdTMn7605/i7rvvxurVq/GrX/1K7nJK0k9+8hNs27ZN7jIKYsIFfqKZ229/+1u88cYb+N3vfoezZ8/KXVZJOHbsGO6//360tbXJXUpJaW5uxocffoi9e/fijTfewIkTJ/D+++/LXZbsPvnkE3z00Uf4wx/+gNdffx2/+c1vcP78ebnLKikHDx7E3r175S6jYCZc4F/dzM1kMiWbuRHQ1NSEJ554glc6X8Nut2Pbtm3Q6XTQarVoaGhAV1eX3GXJ7vbbb8evf/1raDQa9Pb2IhqNwmQyyV1WyfB4PNi5cye++93vyl1KwZR+0/prZGrmpmRPPvmk3CWUpDlzrjTZa2trwzvvvINXX31VxopKh1arxQsvvIBf/vKXWLVqFWpqePP3hMcffxxbtmxBd3e33KUUzIQb4bNZG+XrzJkzePDBB/HII49g1qxZcpdTMjZv3oyDBw+iu7sbTU1NcpdTEn7/+9+jrq4OX/ziF+UupaAm3Ai/trYWhw8fTj6+tpkbUSpHjhzB5s2bsX37dqxevVruckrCuXPnEAqFMH/+fBiNRqxcuRKnTp2Su6yS8Pbbb8PlcmHdunXo7++H3+/Hj3/8Y2zfvl3u0sZkwgX+smXL8LOf/Qx9fX0wGo1477338MMf/lDusqiEdXd34+GHH8bOnTsn3YhtLDo7O/HCCy8kp7f+/Oc/495775W5qtJw9YqlPXv24JNPPpnwYQ9MwMCvqanBli1bsHHjxmQzt0WLFsldFpWwV155BaIo4umnn05u27BhA+6//34Zq5LfnXfeiePHj+Oee+6BWq3GypUr+befSY7N04iIFGLCnbQlIqL8MPCJiBSCgU9EpBAMfCIihWDgExEpBAOfiEghGPhERArBwCciUoj/D40wrptyr1MiAAAAAElFTkSuQmCC\n", 692 | "text/plain": [ 693 | "
" 694 | ] 695 | }, 696 | "metadata": {}, 697 | "output_type": "display_data" 698 | } 699 | ], 700 | "source": [ 701 | "# 导入可视化库\n", 702 | "import seaborn as sns\n", 703 | "# 导入计算库\n", 704 | "import numpy as np\n", 705 | "\n", 706 | "sns.set()\n", 707 | "np.random.seed(0)\n", 708 | "\n", 709 | "# seaborn 直方图展示\n", 710 | "ax = sns.distplot(ratio_list) # 告诉我么数据分布(0,2)" 711 | ] 712 | }, 713 | { 714 | "cell_type": "markdown", 715 | "metadata": {}, 716 | "source": [ 717 | "那么,接下来获取(0,2) 数据,然后查看下效果" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 132, 723 | "metadata": {}, 724 | "outputs": [ 725 | { 726 | "name": "stdout", 727 | "output_type": "stream", 728 | "text": [ 729 | "Help on class filter in module builtins:\n", 730 | "\n", 731 | "class filter(object)\n", 732 | " | filter(function or None, iterable) --> filter object\n", 733 | " | \n", 734 | " | Return an iterator yielding those items of iterable for which function(item)\n", 735 | " | is true. If function is None, return the items that are true.\n", 736 | " | \n", 737 | " | Methods defined here:\n", 738 | " | \n", 739 | " | __getattribute__(self, name, /)\n", 740 | " | Return getattr(self, name).\n", 741 | " | \n", 742 | " | __iter__(self, /)\n", 743 | " | Implement iter(self).\n", 744 | " | \n", 745 | " | __next__(self, /)\n", 746 | " | Implement next(self).\n", 747 | " | \n", 748 | " | __reduce__(...)\n", 749 | " | Return state information for pickling.\n", 750 | " | \n", 751 | " | ----------------------------------------------------------------------\n", 752 | " | Static methods defined here:\n", 753 | " | \n", 754 | " | __new__(*args, **kwargs) from builtins.type\n", 755 | " | Create and return a new object. See help(type) for accurate signature.\n", 756 | "\n" 757 | ] 758 | } 759 | ], 760 | "source": [ 761 | "# 需要过滤ratio_list 数据,list 类型\n", 762 | "help(filter)" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 138, 768 | "metadata": {}, 769 | "outputs": [ 770 | { 771 | "name": "stdout", 772 | "output_type": "stream", 773 | "text": [ 774 | "14192\n" 775 | ] 776 | }, 777 | { 778 | "data": { 779 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEBCAYAAAB/rs7oAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8FHWe//FXpztJp3ORo3NwhJskYIBwCiqIAwSJEY2sFyOj/kRlnGWHmcVlgdXf4LrOCqs7DsP8GMfV0WHcjUc4XDlUPAlKuAkECEggdzr30UmnO6nfHzEtkKM7SXfSnf48H/KQ7m915Z3q4tPV36r6flWKoigIIYTwGF79HUAIIUTfksIvhBAeRgq/EEJ4GCn8QgjhYaTwCyGEh5HCL4QQHkYKvxBCeBgp/EII4WHsLvz//u//ztq1a9s9X1hYyLJly1i0aBErV66kvr7eoQGFEEI4ll2F/9ChQ6Snp3fY9pvf/IaHH36YvXv3ctNNN7F161aHBhRCCOFYNgt/VVUVr776Kk8//XS7NrPZTGZmJklJSQCkpqayd+9ex6cUQgjhMDYL/3PPPcfq1asJCgpq11ZZWUlAQAAajQYAvV5PSUmJ41MKIYRwmC4L/3vvvUd0dDSzZs3qsF1RFFQq1XXP3fhYCCGEa9F01fjxxx9jMBhYsmQJ1dXVGI1G/u3f/o1169YBEBoaSm1tLc3NzajVagwGAxERET0KUl5e16PX9ZWwsACXzwjukdMdMoJ75HSHjOAeOd0hI7TmrKysJyTEv8fr6LLwv/nmm9a/f/jhhxw+fNha9AG8vb2ZNm0aH3/8MSkpKezYsYM5c+b0KEhLi+uPDu0OGcE9crpDRnCPnO6QEdwjpztkhN7n7NF1/OvXr+ezzz4D4PnnnyctLY3Fixdz5MgRfvnLX/YqkBBCCOdSucpELAZDbX9H6JJeH+jyGcE9crpDRnCPnO6QEdwjpztkhNac5eV1hIUF9HgdcueuEEJ4GCn8QgjhYaTwCyGEh5HCL4QQHkYKvxBCeJgur+MXrsfSAiazpdN2rbGpD9MIIdyRFH43YzJbyMzufDykuVNjkEEzhBBdka4eIYTwMFL4hRDCw0jhF0IIDyOFXwghPIwUfiGE8DBS+IUQwsNI4RdCCA8jhV8IITyMFH4hhPAwUviFEMLDSOEXQggPY9dYPb/73e/Yt28fKpWKpUuX8thjj13XvmXLFj744AOCgoIAuP/++1m2bJnj0wohhOg1m4X/8OHDfPvtt+zatQuLxcLixYuZO3cuo0aNsi6TlZXFK6+8QmJiolPDCiGE6D2bXT0zZszg7bffRqPRUF5eTnNzMzqd7rplsrKy2LZtGykpKWzcuBGTyeS0wEIIIXrHrj5+b29vXnvtNZKTk5k1axaRkZHWtvr6euLj41mzZg3p6enU1NSwdetWpwUWQgjROypFURR7F25oaODpp59m8eLFPPDAAx0uc/bsWdatW8eOHTscFlL8qLTCyLHzpZ22T4mNICJU12m7EELY7OO/dOkSTU1NxMfH4+fnx8KFCzl//ry1vbCwkIyMDJYuXQqAoihoNN2f38VgqO32a/qSXh/oEhmNJgu1dY1dLuMKObviKtvSFnfI6Q4ZwT1yukNGaM1ZXl5HWFhAj9dhs6snPz+fDRs20NTURFNTE5999hlTp061tmu1WjZt2kReXh6KorB9+3YWLFjQ40BCCCGcy+ah+dy5czl16hT33HMParWahQsXkpyczIoVK1i1ahUJCQls3LiRlStXYjabmTJlSrvLPYUQQriObvXxO5Orf8Vyla+B9SY75txtbu7DRN3nKtvSFnfI6Q4ZwT1yukNG6KOuHiGEEAOLFH4hhPAwUviFEMLDSOEXQggPI4VfCCE8jBR+IYTwMFL4hRDCw0jhF0IIDyOFXwghPIwUfiGE8DBS+IUQwsNI4RdCCA8jhV8IITyMFH4hhPAwUviFEMLDSOEXQggPI4VfCCE8jBR+IYTwMHYV/t/97ncsXryY5ORk3nzzzXbt2dnZpKamkpSUxPr167FYLA4PKoQQwjFsFv7Dhw/z7bffsmvXLj744APeeecdvv/+++uWWbNmDc899xz79u1DURTS0tKcFlgIIUTv2Cz8M2bM4O2330aj0VBeXk5zczM6nc7aXlBQQGNjI5MnTwYgNTWVvXv3Oi+xEEKIXrGrq8fb25vXXnuN5ORkZs2aRWRkpLWttLQUvV5vfazX6ykpKXF8UiGEEA6hsXfBVatWsWLFCp5++mnS0tJ44IEHAGhpaUGlUlmXUxTlusf20usDu/2avuYKGZUKI4EB2i6XcYWctrhDRnCPnO6QEdwjpztkBAgLC+jV620W/kuXLtHU1ER8fDx+fn4sXLiQ8+fPW9ujoqIwGAzWx2VlZURERHQ7iMFQ2+3X9CW9PtAlMhpNFmrrGrtcxhVydsVVtqUt7pDTHTKCe+R0h4zQmrO8vK5Xxd9mV09+fj4bNmygqamJpqYmPvvsM6ZOnWptHzJkCL6+vhw9ehSAnTt3MmfOnB4HEkII4Vw2C//cuXO5/fbbueeee7jvvvtITEwkOTmZFStWcPr0aQA2b97MSy+9xKJFizAajSxfvtzpwYUQQvSMSlEUpb9DgHRP2KveZCEzu/OT53OnxqBqbu7DRN3nKtvSFnfI6Q4ZwT1yukNG6KOuHiGEEAOLFH4hhPAwUviFEMLDSOEXQggPI4VfCCE8jBR+IYTwMFL4hRDCw0jhF0IIDyOFXwghPIwUfiGE8DBS+IUQwsNI4RdCCA8jhV8IITyMFH4hhPAwUviFEMLDSOEXQggPI4VfCCE8jBR+IYTwMBp7FtqyZQt79uwBWufgffbZZ9u1f/DBBwQFBQFw//33s2zZMgdHFUII4Qg2C39GRgbffPMN6enpqFQqnnjiCT755BMWLFhgXSYrK4tXXnmFxMREp4YVQgjRezYLv16vZ+3atfj4+AAwevRoCgsLr1smKyuLbdu2UVBQwPTp0/mnf/onfH19nZNYCBdmaQGT2dJpu6+3Bo10sIp+ZrPwjx071vr33Nxc9uzZw7vvvmt9rr6+nvj4eNasWcPw4cNZu3YtW7duZfXq1c5JLIQLM5ktZGaXdNo+PT4Sja9dPaxCOI1KURTFngVzcnJ46qmn+Pu//3vuvffeTpc7e/Ys69atY8eOHQ4LKX5UWmHk2PnSTtunxEYQEarrw0TiWvL+CHdg16HH0aNHWbVqFevWrSM5Ofm6tsLCQjIyMli6dCkAiqKg0XT/iMZgqO32a/qSXh/oEhmNJgu1dY1dLuMKObviKtvSlp7ktPX+GI0mDM3NvY1mNZC3ZV9zh4zQmrO8vI6wsIAer8Nmb2NRURHPPPMMmzdvblf0AbRaLZs2bSIvLw9FUdi+fft1J36FEEK4FpuH5m+88QYmk4nf/va31ucefPBBDhw4wKpVq0hISGDjxo2sXLkSs9nMlClTeOyxx5waWgghRM/ZLPwbNmxgw4YN7Z5/6KGHrH9PSkoiKSnJscmEEEI4hVxYJoQQHkYKvxBCeBgp/EII4WGk8AshhIeRwi+EEB5GCr8QQngYKfxCCOFhpPALIYSHkWECHUyG5RVCuDop/A4mw/IKIVydHHsKIYSHkcIvhBAeRgq/EEJ4GCn8QgjhYaTwCyGEh5HCL4QQHkYKvxBCeBgp/EII4WGk8AshhIexq/Bv2bKF5ORkkpOTefnll9u1Z2dnk5qaSlJSEuvXr8di6XzIAiGEEP3LZuHPyMjgm2++IT09nR07dnDmzBk++eST65ZZs2YNzz33HPv27UNRFNLS0pwWWAghRO/YLPx6vZ61a9fi4+ODt7c3o0ePprCw0NpeUFBAY2MjkydPBiA1NZW9e/c6L7EQQohesTla2NixY61/z83NZc+ePbz77rvW50pLS9Hr9dbHer2ekpLOBynrjF4f2O3X9DV7MioVRgIDtJ2263S+6EN1Pc5ga/0wcLalK+huTme//x0ZqNuyP7hDRoCwsIBevd7uYSJzcnJ46qmnePbZZxkxYoT1+ZaWFlQqlfWxoijXPbaXwVDb7df0Jb0+0K6MRpOF2rrGztuNJgzNzT3OYWv9MHC2ZX/rSU5nv/83Gsjbsq+5Q0ZozVleXter4m/Xyd2jR4/y6KOP8utf/5p77733uraoqCgMBoP1cVlZGRERET0OJIQQwrlsFv6ioiKeeeYZNm/eTHJycrv2IUOG4Ovry9GjRwHYuXMnc+bMcXxSIYQQDmGzq+eNN97AZDLx29/+1vrcgw8+yIEDB1i1ahUJCQls3ryZDRs2UFdXx4QJE1i+fLlTQwshhOg5m4V/w4YNbNiwod3zDz30kPXvcXFxvP/++45NJgYkSwuUVhgxmjq+10OmphTC+WQOQNGnTGYL574v7/QEqExNKYTzybGVEEJ4GCn8QgjhYaTwCyGEh5HCL4QQHkYKvxBCeBgp/EII4WGk8AshhIeRwi+EEB5GCr8QQngYKfxCCOFhpPALIYSHkcIvhBAeRgq/EEJ4GCn8A0iTuZkPPs/h61OF/R1FCOHCZPzbAeJyUQ1HzpXSYGqdz7WxqZkF04b1cyohhCuSI/4BIN9Qx9cni9D5avjnn01nyjg9736awyeZef0dTQjhgqTwuzlFUTh9qRx/rYY7bx7OmKGDeHrJhNbi/1kOReX1/R1RCOFi7C78dXV13HXXXeTn57dr27JlC/PmzWPJkiUsWbKE7du3OzSk6FxpZQOGqkYmjAzFy0sFgEbtxSNJsai9VHx+vKCfEwohXI1dffwnT55kw4YN5ObmdtielZXFK6+8QmJioiOzCTtkfV+B1kfNmKHB1z0f7O/DtLgIDp4u5r45o/H1UfdTQiGEq7HriD8tLY3nn3+eiIiIDtuzsrLYtm0bKSkpbNy4EZPJ5NCQomMVNY0UlNUTNzwEjbr9WzkvcQgNJgvfZZf0QzohhKuyq/C/+OKLTJs2rcO2+vp64uPjWbNmDenp6dTU1LB161aHhhQdO3elCo1aRWzMoA7bxw4NZqjenwNH81EUpY/TCSFcVa8v5/T39+f111+3Pn788cdZt24dq1ev7tZ69PrA3kZxOnsyKhVGAgO0nbbrdL7oQ3U9ztC2/hZFId9Qx6ghwYSH+Hea8+45o9n6wSkqjBbiRoT2+Oc6ilJhBOh0G/V2+zhad/dLZ7//HXGHfzvgHjndISNAWFhAr17f68JfWFhIRkYGS5cuBVqvMtFour9ag6G2t1GcSq8PtCuj0WShtq6x83ajCUNzc49ztK2/pMJIY1MzUaG6dj/v2pwTYgbho/FiX8Zlwvy9e/xzHcVosgB0uo16u30cyd73/FrOfv9v1JOM/cEdcrpDRmjNWV5e16vi3+vLObVaLZs2bSIvLw9FUdi+fTsLFizo7WqFDXmldXipYHB410ePfr4a4oaHcOpSuXT3CCGAXhT+FStWcPr0aUJDQ9m4cSMrV65k0aJFKIrCY4895siM4gaKopBXWkdUmA4fje2rdSaNDqO0qoHiH7pZhBCerVt9MgcOHLD+/dp+/aSkJJKSkhyXSnSppr6JWqOZ+BEhdi0/cXQ4cIFTl8qJDvO3ubwQYmCTO3fdUF5pHQDDIuzr4wsL1jJE78+pS+XOjCWEcBNS+N1QXmkdoUG++GvtP1k7cVQYF/KqaPjh5KoQwnNJ4Xcz9Q1mDFWNdh/tt5k4OozmFoUzlyuclEwI4S6k8LuZC3lVAN3uqx8zNBidr0a6e4QQUvjdzYW8KrzVXoQHd36TUEfUXl5MGBnK6ctyWacQnk4Kv5s5f7WSyFA/60ic3TF+RAjVdU1yWacQHk4Kvxspr27EUNVIVFjPbvmPH956+ee5K5WOjCWEcDNS+N3I2SutJ2Z7ei2+fpAfYUG+ZEvhF8KjSeF3I9m5lQTqvBkU4NOj16tUKuJiQjh3tYoW6ecXwmNJ4XcTiqKQfaWSccMGoVJ1v3+/TdzwEOoazBQYZEpGITyVFH43UVhWT3V9U6dj79urrZ9funuE8FxS+N3E2R8KdWyMfePzdCY0SEtEiJ+c4BXCg0nhdxPZuZVEDPIjNKh71+93JH54COfzKmluaXFAMiGEu5HC7waaW1o4n1dp92ictsQPD6HB1MzVkjqHrE/86GpJLbXGpv6OIUSXej0Dl3C+3KJaGkzNjHfQ1Ilt3UXZVyoZGR3kkHV6urzSOtI+v2gdCyksWMu4YYMYOzS4n5MJ0Z4c8buBs7mtxSSulyd22wT7+zAk3F9O8DrI4ewS/u+bh8ktquGeOaOYEqunpUXhUFYx2bmyjYXrkSN+N5B9pZKYiAACdT7UO2hY5bjhIXx9qhBLcwsatXz+91RZdQN/2XuOUYOD+Ielk1B5qcjMLmH8iBC+OlFI5rlStL5q+WYlXIr8i3dxJnMzFwuqHdbN0yZ+eAhN5ha+L6xx6Ho9SUuLwp93n0VR4MmUCQT4/Tg/gpdKxW0To4kM8ePgqSIMlQ39mFSI69ld+Ovq6rjrrrvIz89v15adnU1qaipJSUmsX78ei0Um+3CUnPwqLM2Kw07stomNGYQKGbenN/YevsqF/GqWLRiHfpBfu3a12ot5U4ag9dVw6EwxzS1yt7RwDXYV/pMnT/LQQw+Rm5vbYfuaNWt47rnn2LdvH4qikJaW5siMHi07txK1l4pxQx3Tv9/GX+tNTGSg9PP3UF2Dmd0ZuSSODWf2TVGdLufjrWbm+Eiq6po4K5PgCBdhV+FPS0vj+eefJyIiol1bQUEBjY2NTJ48GYDU1FT27t3r2JQe7ExuBaMHB+Hro3b4uuOHh3CpsJomc7PD1z3QfXokD1NTM/feNsrmEBrDIgKIiQzg1KVyyqqky0f0P7sK/4svvsi0adM6bCstLUWv11sf6/V6SkpKHJPOw5VVN3C1pI5JY8Kdsv644SFYmhUuFlQ7Zf0DVYPJwqdH8kkcG85QO6fAnBEfgZdKxXufX3RyOiFs6/VVPS0tLdcd8SiK0qNBxPT6wN5GcTp7MioVRgIDOr+7VqfzRR9q33j6h7JLAZh/8wj0+gC71m9vToBZgVpe++AUVwz1zJ0+3K7X9JbywyQwnf0O3dk+faGjbfn+gRyMJguPJI9v197Z+xMYoGX6+EgyTheRV97AlLj2354dmdEVuUNOd8gIEBbWvTm3b9Trwh8VFYXBYLA+Lisr67BLyBaDoba3UZxKrw+0K6PRZKG2rrHzdqMJQ7N9XStfHctnSLg/3ijWn21r/dC9bTlqcBCZZ4q5c/owu1/TG8YfLkft7HfozvZxto7ec5O5mQ8/z+GmkaEM0mratXf1/oyMDuDCVS1/Sj/F/318Omqv3l9UZ+9+2d/cIac7ZITWnOXldb0q/r3e84YMGYKvry9Hjx4FYOfOncyZM6e3q/V4NcYmLuRXkThOb3vhXpg4Kozc4lqq62WYAXscPltCrdHM4pu7/w1J7eXFkttGUlBWz9enipyQTgj79Ljwr1ixgtOnTwOwefNmXnrpJRYtWoTRaGT58uUOC+ipTuaUoSgw1dmFf3QYAKcvlTv15wwUnx8vYHC4f4+Hx540JpyxQ4NJ/+p7jI1y2bPoH93q6jlw4ID176+//rr173Fxcbz//vuOSyU4dsFAWJCWmMje9eXZMiwigOAAH059X86tE6Od+rMamyx8eaKAr04UofXxIjJUx7CIgOtufHJll4tqyC2u5eH5Y3s8GY5KpeKh+WN54a0j7Pj6ex5eMK5br7e0gMn84weGUmG0dp/5emvQyC2Zwg4yZIMLamyycCa3ktsTB/dqti17qFQqJo4K48h5g1OHbzicXcLbe89jNFkIH6SlosbM1ZI6TuSUMXfyYAaH92we4b70xfECfLy9mH1T7z4gR0QFcXviED47ls+tE6OJibT/hKLJbCEz+8er5gIDtNZzCtPjI9H4yj9pYZscHzjY+auVfHY0n4OnirhSXIvZ0v0x7w+dKcHS3MKMuEgnJGxv4ugwGkwWLjnpss4TOWX8addZosN1/OqByTwwP5bUuaNYcutI/LUaPjua77Sf7SjGRjPfnS3h5vGR6LS9L66pc0cR4OfNO/vOy/zHos9J4XeQ8upGXv7bMbZ8cJqKmkbyDHV8eaKQ9K++79ZNOy0tCvu+u8qowUGMHtI3A3uNHxGK2kvFqe8d389/7kolW3dkERMZwK/un8zIwT/+TsEBPiyaGUNkqI6Dp4u5Uuy6V1QczCqmydLCvMShDlmfv9ab++eN4VJhDV+eKHTIOoWwlxR+B7A0t/DHnVnkFtey9PbRpM4Zxf3zxrBg+lA0ai/2Hc7jaol9Re3YBQOlVQ3cOTPG6d08bfx8NYwbNsjhJ3gra038If00ESF+rL5/En4ddEP4eKv5ydQhhAdrycgqxuCCd7YqisIXxwsYGR3E8CjHXec9+6Yoxo8I4b8/yyGvVCbFEX1HCr8D7Dp4me8La3j0zjjmJg5BrfbCy0tFdJg/d94cQ0igL18cL+RiftfdGYqisOe7K0SG+JE41rlX89xo4ugw8g31lFQaHbI+RVF4e+85miwt/CI1gUCdT6fLqr28mDNpMCrgzf/N7lH3mDOdv1pFUbmReYlDHLpelUrFipQJ6Hw1bN2RRYODhtwWwhYp/L10/mol/5txhVsTopkR375P3s9Xw8IZw4gO05GRVXzdibkbnbtaxeWiWpJmxuDl1TdH+22mx0WgAg5lFTtkfYfOFHPyUjmpc0YRZceduAE6b2YnRJFXWscHX15ySAZH+fx4Af5aDTPiHXe3bZtgfx+eXjKB0koj//VxtsyDLPqEFP5eaGlR+Mve8+hD/Hh4wdhOl9P8MDxvVKiOd/ad77C4Vtaa+MuecwT7+3BLF6M9OktokJa44SF8e6YEpZcnG6vqTPztkxzGDA1mwTT77wiOiQzk1onRfJKZx6VC1zjZW11n4tgFA7ckROPj7fiB8qB1Ksz7543h6HkDW9OzMFtc485lMXBJ4e+F4zkGiiuMpM4Zhdan6ys92or/6MHBvP7RWd7acw5TU+s/8Or6Jja9e5waYxO/uC8Bb41zCowtsyZEUVrVwKWC3k3O8j8HLtJkaeHxxfHd/uZy960jGRToy1/2nMPS3P9Hv1+fKqK5ReF2B3fz3ChpRgwPzx/L8ZwyXk07KXdSC6eSi357SFEUPv72ChGD/JgWa18XgLfGi1/cl8D+w3ns+fYKJy+WEaDzpqa+CZO5mV/dP5nRg507OfeNNwBdK35kCN4aLw6dKWZMDycJz75SyXdnS7j7lhF2dfHcyM9Xw08XjuP3H5xmz3dXSZk9okc5HKG5uYUvTxQQPzykR79Ld82fNowAnTdvfJTN2m2HWDwzhoUzYvB10jcN4bmk8PdQW3/8I0mx3TqqVau9WHr7aCaMCOHz4wW0KDA4zJ+fTB3KuGGOnWylIzfeAHSjiaPDOJxdwkPzx3b7Zi5Lcwt/3X+e8GBtj8ayaZM4Vs/0uAh2H7zMtFg90WH9c3NXxukiymtMPDS/e3fX9sbN46MYGRXEe19cIv3ry3z83VUmjgpjyjg9ccND0MitucIBpPD30J5vrxDk78OtCT3rj48fEUq8g+fRdYTp8ZEcPW/g5MUyptr5TabNJ5l5FJUbWbV0Yq/7wx9eMI6zuRW8tecc/7RsCl59dGlrG0VRSP/iIpEhfkwe65z5EDoTGarjF6kJXMyv5mBWEccvGMg81zpEd0SIH2FBWoZG+BMZ4jrDVwv3IoW/BwoMdWRdruC+uaP6rT/eWeKGhxAerOWjjCtMGae3+16CkgojO765TOLYcCY7YOKYYH8f7r9jDG9+fI4vTxQ6/FJKW3Lyq8nJq+KRheP6/EOnzZihwYwZGswjC2O5XFzDhbwqzlyu4NzVKrKvVKL1UTN5nJ7hkQHSHSS6Rb439sDXp4pQe6mYM2lwf0dxOLWXirtvGcmVklqOXTDYfgHQoii8+XE2GrUXP10Y67AstyZEEz88hPc+v0hFTddzEDjavsNXCdT5MDvBuQPX2cPLS8XowcHcOXM4K+9N4IE7xnB74mBCg3z5NquYD764xNncClpkMndhJyn83WRpbuHQmWImjw3v8qYkdzbrpkiiw3Skf33ZrmLy+bECLuRX8+BPxhAS6OuwHCqVip8tiqWlReG/Ps7uszFtiiuMnMgpY/HsES55JO2t8SImMpD504bxwPxxRIbqOHLOwH/89wkKDHIHsLBNCn83nbpUTq3RzK0ucCToLGovL+65bRSFZfV8d7br+ZOvltTy/heXuGlkqFO2SUSIjgd/MpazuZV8mpnn8PV3JP2r7/H29iL51pF98vN6I3yQH3dMGcJtk6KpqGnkhbePcLiLk/dCgBT+bvvmVBHBAT7cNMr1Tsw60tRYPTGRAaR9cbHT8XPKqht4Ne0kOq2GxxbHO21sobmTBzN5TDjvf3nJ6WPaXCqsJvNcKYtmxBAS2PXcxs5gaYF6k6XTPx19AVOpVIyMDmLtT6cQExHI/9t5hvc+vyijfopOSeHvhuo6E6culTP7piiHzJfqyrxUKp64azwWSwub3j1OZa3puvaa+iZeTTuJ2dLCr+6f5NAunhupVCoeXRyHv9abP+7Ior7R7JSfoygKaQcuEuTfOmpof2i73LazP5YuhnQIDvDl2YcTuT1xCHu+u8r/23lG7gIWHRrY1cvBDp0poUVRBnQ3z7WG6gP41QOTqW0ws+nd43x+vIDc4hp2fnOZtdsOYahq5O/vS2CI3rmzhAEE6VrHtDFUNfCHD0875a7e4zll5ORXc89tI23eie2qNGovlifFcv+8MRw5V8or/3MSo5M+KIX7sqvw7969m8WLF7Nw4UK2b9/ern3Lli3MmzePJUuWsGTJkg6XGQgysooZNTio324o6g8jo4P45dKJNDZZeGffeTa+dYSd31xmwohQnn9sOrExIX2WJTYmhEfvjOPc1Sre3nu+12MKXauuwczfPr3A4HB/bnPyFJR9YdHMGJ68ezwXC6p56a/H+vyqKOHabB7WlJSU8Oqrr/Lhhx/i4+PDgw8+yMyZMxkzZox1maysLF555RUSExOdGrY/XS6sJt9Qx7JuzpE6EMTGhPAfz9yCobqRy4U1RIXqHDpCdu5bAAASqklEQVQufXfckhBNaWUDuzNyUatVPLKwe3dOd0RRFN746Cw19U38IjVhwHTj3Tw+imCdD7//8DQvvnOU1fdPYmgffDsTrs/mHp6RkcHNN9/MoEGD0Ol0JCUlsXfv3uuWycrKYtu2baSkpLBx40ZMJlMna3NfXx7LR+2lYroThuZ1ByqViohBfswcH9lvRR9aT34umDGMhTOG8eWJQn7/4Wmq6pusJz97MpT/vsN5nLxUzgN3jGVEVN/MetZX4keEsnbZFFoUhd/+9Rjnr1b2dyThAmwW/tLSUvT6HycFiYiIoKTkx8vF6uvriY+PZ82aNaSnp1NTU8PWrVudk7aftCgKXx7LZ8LIUIIG6LX77sJktnDkXClRoTqmx0Vw8mIZ//qXTPYfvkpmdkmnA9B1JiOriPe/uMTUWD13THH+3cEqL1W3r9rprZjIQNY/MpXgAB/+439OcOSH4R/6mq0rllxs/p0BzWZXT0tLy3WX6SmKct1jf39/Xn/9devjxx9/nHXr1rF69epuBdHr++8o0pbTF8soq27ksZQJNnMqFUYCAzq/DFCn80Xfi5Eeba3f0tyCRt35TUdqb5yazxalonWGr84y2Pr51/7+M26KRh+q4/Oj+fxvxhWmxUcy46bBdu1LiqLw359c4G/7zjFxTDjPLp+OTuvdbrnu7pe23p8WVJzPq+q0PXZ4SJev9/bWtGtve9zVttPrA/mPX87lhTe+4487s/ipKZ6/+8nYPpveE0DtreZcF/M6T4mNcOq+Zw9XrkPXCgvrXZedzcIfFRXFkSNHrI8NBgMRET92dxQWFpKRkcHSpUuB1n9QGk33r4gwGFx3ou09B7/Hz1fNqMgAmzmNJgu1dZ2fSDMaTRiae36Jna31m8zNHDyR32n7pHF6p+azxfjD9IKdZbD182/8/cODfEm5ZTjfnS3l8Nlisi6V8ZOpQ7l5QiRRobp2hU1RFM5eqeR/M3I5d7WK2TdF8eidcdTXNlJfe30mvT6w2/ulrffHbHZse2CA1vrYnvfuH+5L4K0953hnTzZnL5XxeHJ8h3MhO5peH4jRaOrXfc+Wnrzf/UGvD6S8vK5Xxd/mOz579mx+//vfU1FRgZ+fH/v37+eFF16wtmu1WjZt2sTMmTMZOnQo27dvZ8GCBT0O5GqazM0cOW9gVsJgl7x9X4DWR8PcyYMpq2og31DP7oxcdmfkEh6sZdTgIIL8ffD1VlNS2UBeaR0lFUaCA3xYtmAcd0wZ0qdHvc7U1o3UGV9vDT7ealakjGdEVCBpn19i41uZPHn3BEZGD6xzG6JrNgt/ZGQkq1evZvny5ZjNZpYuXcrEiRNZsWIFq1atIiEhgY0bN7Jy5UrMZjNTpkzhscce64vsfeJYjoEGk4U7ujGFoOgf4YP8uHPWCBoazJz+vpzT31eQW1RLjbF1oht9sB9RYTqSZgzjlpui8R5gY9ubzM2c7GJgvRkTojCZW08i3DJpMJFhOv6y5xwvvnOU5FnDWTxrBL4DbJuIjtn1HS8lJYWUlJTrnru2Xz8pKYmkpCTHJnMRB08VER6sJWF0OOXlMgCWq1N5qfDz82bGhChmTPhxroQWRcFLpcLXW0NvaltXM5i1/pyer9vZOvpgSJoZw7dnSth9MJcTOWU8vji+X6/aEn3DPW9P7CPl1Y2cza0k5ZYRvb5WXNjHVneFrcLanaPejtj6YLA1g9mkcfpO21yRr7eaOZOiuRIVyPELBl74Sya3TRrMnbOG43/Dye7efmgK1yGFvwsZWUUotN40JPqGrcLd28Jqa/3T4yPR9MHJTleiUqkYERXIghkxvLMnm69OFHLoTDGTxoQzbliw9YY2T9w2A5W8i51QFIWDp4uJixmEfpBff8fpM7aOuL01GsyWrk8gylGhe9JpNdw8IYrYmEFkZhvIzC7l7OUKEkaHMXpIcH/HEw4khb8TF/KqKK1qIOWWEX36c/u7D9meI245Yh7YQgK1LJg+lKLy1glpvj1TwsmLZdTUNzF/6tABOwGRJ5F/oZ347Gg+Ol8N07o54XhvuXsfcm/76IVrUKlUDA73JzpMR2GZkbO5Few+mMvHh64wcXQYsyZEMX5ESIc3vQnXJ4W/A2XVDRy9YGDRjBh8feTa/e5wdh+9s137waVUGK03nLXxtA8ulUrFEL3/D38COHa+lG/PlHA8pwyVCkZFBzEiKojB4Tr0g/zQab3x12rw02rQ+WrQqKXfzxVJ4e/AgaMFqFBxx5Sh/R1F9LFrP7iuvSu2jat/cDnT4HB/xg4Zy9LbR3Mxv5qzuZVkX63kYFYRjU0d33Hr66NG56shJFBLoM4bS3ML+kFaokJ1+MgNkf1GCv8NGpssfHmykKmxesKC+37qPSFcndrLi9iYEGJjQriX1gshKmpMlNc0YjRZaGi0UN9oxthowWhq/dNobiGvpAZDZQNnFFABESF+jB0WzPDIQNTyzaBPSeG/wcHTxTSYLCyY7pw7daUPXAw0KpWKsGBtlwdKen0gufmVfHummLKqBorKjVwuquGbU8VkehuYMDKESWPC8ZcLA/qEbOVrmC3N7P3uKiOjgxg92Dljl7h7H7jwXPaMBWTrUl61l4rIUB2RoTomjQmjuMLI2cuVHLtQxsX8GlLnjuLWidF4DZDxk1yVFP5rfHo0n/KaRh5bHDdgBu4SwlEcffObSqUiOsyf6DB/SiqN5ORV89aec3x9spBHkmKJiZShI5xFOtZ+UNdg5qOMKySMCmP8iND+jiOER4kM0fHL+yfxf5LjKa1q4DdvZfK3Ty/Q0MU3DNFzcsT/g90Hc2lssvB380Z3uVx/32AlxEClUqm4JSGayWPD+fDL7/nsSD6Z50p58I6xzIiPkG/hDiSFH8g31HHgWD63TYy2ORm1u99gJYSr89d680hSLLdOjObtfefZtusMX50s5KcLxxEd5t/f8QYEj+/qaWyy8McdWfj7eXPvnK6P9oUQfWdkdBD/snwayxaMI7e4ln/582H+svcclbWm/o7m9jz6iF9RFN7Zd57iCiP/+GAiwf4yBokQPdXVVT9KhbFH3aBeXip+MnUo0+Ii+Cgjly+OF3DwdDGzb4pkwbRhDLHxDV10zKML/ydH8jl0poR7bh1J/PCQ/o4jhFvr6qqfwAAtowZ3fZVOVx8cGo0XD/xkHAunD+Pjb6+QkVXMVyeLGDc0mJnjI5kaF0GQDB5nN48s/IqisPOby+w6mEvi2HDumj2ivyMJ4fHsuVxUP8iPny2KI3XOKL46WcihMyW8s/8Cf91/gZioQMYPD2FkdBDDIgLQh/jJ/QCd8LjC32Cy8N+f5fD1qSJuTYjmZ3fGyuxaQriBa78ReKm9uH3KUOYmDqGwrJ5Tl8rJyatmf2YezT/0Kfl6qxn6w+ByESF+hAZpCfL3ISxYi79W0+4qIa2xqc9/p/5iV+HfvXs3f/zjH7FYLPzsZz9j2bJl17VnZ2ezfv166uvrmTZtGr/5zW/QaFzrM8XS3MKhrGI++PIStUYzybOGkzpnlFwiJoSb6OobQXiwlsWzR2BqslBcbqTAUE+BoY6CsnqOni+lvvH6LiSNWkWAn3frH13r/2feNBh9oA/6QX74DfChI2z+diUlJbz66qt8+OGH+Pj48OCDDzJz5kzGjBljXWbNmjX867/+K5MnT2bdunWkpaXx8MMPOzW4ParqTOQW13LqYhlHzhuoazAzekgQ//B3kxgZ7ZwhGYQQ/cNkbuZkThnQWtiHRwVaJ443W1oYHBHAsXOl1BnN1DWYqW0wU2dsorjCiKVZ4ci5Hz9UdL4aQoO0hAdrCQ3ybR2LKEhr/dYQ6OeN1kfttgeONgt/RkYGN998M4MGDQIgKSmJvXv38otf/AKAgoICGhsbmTx5MgCpqam89tpr3S783e1uMZmbOXSmmPoGCxZLCyZLM5ZmhcYmCzX1TVTVNWFsNAPg7e3F7B9uDImPGdTjN8vLS4VG7dXl5BP9365y8Xxe+PlqaLZ0vIwr5Gtr7yinK+W7MaOrZbuW3w9j8/dnvpjIQOqN5nbPK4pCk6WFUUMGUVndQEVtI9W1TVTWmaisNXGpoIasyxXtXqdWexHg542/rwZvHzW+ahXe3mq8NWq81Sp8NGo0Gi/UXipo/Q+VqvVGNZUK1CoVU2IjenQ1YW+7p1WKonR5kdW2bdswGo2sXr0agPfee49Tp07xwgsvAHD8+HFefvll3n33XQCuXLnCk08+yb59+3oVTAghhHPYvIGrpaXluiNkRVGue2yrXQghhGuxWfijoqIwGH7s+zIYDERERHTaXlZWdl27EEII12Kz8M+ePZtDhw5RUVFBQ0MD+/fvZ86cOdb2IUOG4Ovry9GjRwHYuXPnde1CCCFci80+fmi9nHPbtm2YzWaWLl3KihUrWLFiBatWrSIhIYFz586xYcMG6urqmDBhAi+99BI+PnIXnRBCuCK7Cr8QQoiBw+NH5xRCCE8jhV8IITyMFH4hhPAwUviFEMLDOL3w7969m8WLF7Nw4UK2b9/erv3TTz9lyZIl3H333fz85z+nuroagPT0dG699VaWLFnCkiVLePXVV/st45YtW5g3b541S9sy2dnZpKamkpSUxPr167FYnDsxdFc5s7OzrfmWLFnCbbfdxl133QX07bYEqKur46677iI/P79dW2fbrLCwkGXLlrFo0SJWrlxJfX19v2V0hX3Snpyusl92ltGV9sktW7aQnJxMcnIyL7/8crt2V9gvbWV06H6pOFFxcbEyb948pbKyUqmvr1dSUlKUnJwca3ttba1yyy23KMXFxYqiKMp//ud/Ki+88IKiKIqyceNGZffu3c6MZ1dGRVGUp556Sjl27Fi71yYnJyvHjx9XFEVR/vmf/1nZvn17v+ZsYzQaleTkZCUzM1NRlL7bloqiKCdOnFDuuusuZcKECUpeXl679s622ZNPPql89NFHiqIoypYtW5SXX365XzK6wj5pT05FcY390lbGNv25Tx48eFB54IEHFJPJpDQ1NSnLly9X9u/ff90y/b1f2sro6P3SqUf81w7wptPprAO8tTGbzTz//PNERkYCEBsbS1FREQCnT58mPT2dlJQU/vEf/9H66dbXGQGysrLYtm0bKSkpbNy4EZPJ1OHgdDe+rq9zttm2bRvTp09n2rRpQN9tS4C0tDSef/75Du/e7mybmc1mMjMzSUpKuu75/sjoCvukPTnBNfZLWxnb9Oc+qdfrWbt2LT4+Pnh7ezN69GgKCwut7a6wX9rK6Oj90qmFv7S0FL1eb30cERFBSUmJ9XFISAgLFiwAoLGxkT/96U/Mnz8faN0QP//5z9m1axfR0dFs3LixXzLW19cTHx/PmjVrSE9Pp6amhq1bt7Z7nV6vv+51fZ2zTW1tLWlpadbRU9uy9cW2BHjxxRet/7hv1Nk2q6ysJCAgwDqHg7O3ZVcZXWGftCenq+yXXWVs09/75NixY61FPTc3lz179jB37lxruyvsl7YyOnq/dOpsA/YO4FZbW8szzzxDXFwc9957LwB/+MMfrO1PPPGE9Zfu64z+/v68/vrr1sePP/4469atY86cOX06OJ2923LXrl3Mnz+fsLAw63N9tS1t6ex36Oh36e+B/vpzn7SHq+yX9nCVfTInJ4ennnqKZ599lhEjRlifd6X9srOMbRy1Xzr1iN/WAG/Q+mn78MMPExsby4svvgi0/nJvvfWWdRlFUVCr1f2SsbCwkPfff/+6LBqNps8Hp7NnW0LrCaDFixdbH/fltrSls20WGhpKbW0tzc3NQOe/W1/p733SHq6yX9rDFfbJo0eP8uijj/LrX//aWjDbuMp+2VVGcOx+6dTCb2uAt+bmZp5++mnuvPNO1q9fb/001el0/PnPf+bkyZMA/PWvf3XaEYGtjFqtlk2bNpGXl4eiKGzfvp0FCxb0+eB0tnJC65t+5swZEhMTrc/15ba0pbNt5u3tzbRp0/j4448B2LFjR78N9OcK+6Q9XGW/tMUV9smioiKeeeYZNm/eTHJycrt2V9gvbWV0+H7Zs3PQ9tu1a5eSnJysLFy4UPnTn/6kKIqiPPHEE8qpU6eU/fv3K7Gxscrdd99t/bNu3TpFURQlMzNTueeee5RFixYpTz/9tFJTU9MvGRVFUfbu3WttX7t2rWIymRRFUZTs7GzlvvvuU5KSkpRf/epX1uf7K2dZWZkye/bsdq/ry23ZZt68edarPK7N2Nk2y8/PV376058qd955p/L4448rVVVV/ZLRVfZJWzkVxXX2y64yusI++cILLyiTJ0++7j3929/+5lL7pa2Mjt4vZZA2IYTwMHLnrhBCeBgp/EII4WGk8AshhIeRwi+EEB5GCr8QQngYKfxCCOFhpPALIYSHkcIvhBAe5v8DllQat2JXzgwAAAAASUVORK5CYII=\n", 780 | "text/plain": [ 781 | "
" 782 | ] 783 | }, 784 | "metadata": {}, 785 | "output_type": "display_data" 786 | } 787 | ], 788 | "source": [ 789 | "ratio_list = list(filter(lambda x:x>0.5 and x<=2,ratio_list))\n", 790 | "print(len(ratio_list))\n", 791 | "# 导入可视化库\n", 792 | "import seaborn as sns\n", 793 | "# 导入计算库\n", 794 | "import numpy as np\n", 795 | "\n", 796 | "sns.set()\n", 797 | "np.random.seed(0)\n", 798 | "\n", 799 | "# seaborn 直方图展示\n", 800 | "ax = sns.distplot(ratio_list) # 告诉我么数据分布(0,2)" 801 | ] 802 | }, 803 | { 804 | "cell_type": "markdown", 805 | "metadata": {}, 806 | "source": [ 807 | "从上面内容,我们 数据整体分布(0.5,1.5) 之间。" 808 | ] 809 | }, 810 | { 811 | "cell_type": "markdown", 812 | "metadata": {}, 813 | "source": [ 814 | "《Python 数据可视化库 Seaborn》" 815 | ] 816 | }, 817 | { 818 | "cell_type": "markdown", 819 | "metadata": {}, 820 | "source": [ 821 | "# 切分数据集-训练集和验证集" 822 | ] 823 | }, 824 | { 825 | "cell_type": "markdown", 826 | "metadata": {}, 827 | "source": [ 828 | "对于分类问题,我们需要切分训练数据和验证数据" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": 140, 834 | "metadata": {}, 835 | "outputs": [ 836 | { 837 | "data": { 838 | "text/plain": [ 839 | "[{'img_name_path': '../data/garbage_classify/train_data/img_1.jpg',\n", 840 | " 'img_label': 0},\n", 841 | " {'img_name_path': '../data/garbage_classify/train_data/img_10.jpg',\n", 842 | " 'img_label': 0}]" 843 | ] 844 | }, 845 | "execution_count": 140, 846 | "metadata": {}, 847 | "output_type": "execute_result" 848 | } 849 | ], 850 | "source": [ 851 | "# 原始数据格式\n", 852 | "img_path_list[:2]" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": 141, 858 | "metadata": {}, 859 | "outputs": [ 860 | { 861 | "data": { 862 | "text/plain": [ 863 | "14802" 864 | ] 865 | }, 866 | "execution_count": 141, 867 | "metadata": {}, 868 | "output_type": "execute_result" 869 | } 870 | ], 871 | "source": [ 872 | "# 原始数据大小\n", 873 | "len(img_path_list)" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": 147, 879 | "metadata": {}, 880 | "outputs": [ 881 | { 882 | "name": "stdout", 883 | "output_type": "stream", 884 | "text": [ 885 | "img_count = 14802\n", 886 | "train_size = 11841\n", 887 | "train_img_list size = 11841\n", 888 | "val_img_list size = 2961\n" 889 | ] 890 | } 891 | ], 892 | "source": [ 893 | "# 原始数据进行shuffle\n", 894 | "import random \n", 895 | "random.shuffle(img_path_list)\n", 896 | "\n", 897 | "# 数据分布分布 0.8 0.2\n", 898 | "img_count = len(img_path_list)\n", 899 | "train_size = int(img_count*0.8)\n", 900 | "\n", 901 | "print('img_count = ',img_count)\n", 902 | "print('train_size = ',train_size)\n", 903 | "train_img_list = img_path_list[:train_size]\n", 904 | "val_img_list = img_path_list[train_size:]\n", 905 | "\n", 906 | "print('train_img_list size = ',len(train_img_list))\n", 907 | "print('val_img_list size = ',len(val_img_list))" 908 | ] 909 | }, 910 | { 911 | "cell_type": "markdown", 912 | "metadata": {}, 913 | "source": [ 914 | "数据切分后,我们就可以生成训练和验证数据了 " 915 | ] 916 | }, 917 | { 918 | "cell_type": "code", 919 | "execution_count": 156, 920 | "metadata": {}, 921 | "outputs": [], 922 | "source": [ 923 | "import shutil\n", 924 | "# 训练数据的生成\n", 925 | "\n", 926 | "with open(os.path.join(base_path,'40_garbage-classify-for-pytorch/train.txt'),'w') as f:\n", 927 | " for img_dict in train_img_list:\n", 928 | " \n", 929 | " # 文本格式数据\n", 930 | " img_name_path = img_dict['img_name_path']#'../data/garbage_classify/train_data/img_1.jpg'\n", 931 | " img_label = img_dict['img_label']\n", 932 | " \n", 933 | " f.write(\"{}\\t{}\\n\".format(img_name_path,img_label))\n", 934 | " \n", 935 | " # 图片-标签目录\n", 936 | " garbage_classify_dir = os.path.join(base_path,'40_garbage-classify-for-pytorch/train/{}'.format(img_label))\n", 937 | " \n", 938 | " ## 目录创建\n", 939 | " if not os.path.exists(garbage_classify_dir):\n", 940 | " os.makedirs(garbage_classify_dir)\n", 941 | " \n", 942 | " ## 图片数据进行拷贝\n", 943 | " shutil.copy(img_name_path,garbage_classify_dir)\n", 944 | "\n", 945 | "# 验证数据的生成\n", 946 | "with open(os.path.join(base_path,'40_garbage-classify-for-pytorch/val.txt'),'w') as f:\n", 947 | " for img_dict in val_img_list:\n", 948 | " \n", 949 | " # 文本格式数据\n", 950 | " img_name_path = img_dict['img_name_path']#'../data/garbage_classify/train_data/img_1.jpg'\n", 951 | " img_label = img_dict['img_label']\n", 952 | " \n", 953 | " f.write(\"{}\\t{}\\n\".format(img_name_path,img_label))\n", 954 | " \n", 955 | " # 图片-标签目录\n", 956 | " garbage_classify_dir = os.path.join(base_path,'40_garbage-classify-for-pytorch/val/{}'.format(img_label))\n", 957 | " \n", 958 | " ## 目录创建\n", 959 | " if not os.path.exists(garbage_classify_dir):\n", 960 | " os.makedirs(garbage_classify_dir)\n", 961 | " \n", 962 | " ## 图片数据进行拷贝\n", 963 | " shutil.copy(img_name_path,garbage_classify_dir)\n" 964 | ] 965 | }, 966 | { 967 | "cell_type": "markdown", 968 | "metadata": {}, 969 | "source": [ 970 | "# 训练数据和验证数据可视化分布" 971 | ] 972 | }, 973 | { 974 | "cell_type": "markdown", 975 | "metadata": {}, 976 | "source": [ 977 | "目前统计训练和验证数据 ,然后统计图表的方式在一张图展示" 978 | ] 979 | }, 980 | { 981 | "cell_type": "code", 982 | "execution_count": 167, 983 | "metadata": {}, 984 | "outputs": [ 985 | { 986 | "name": "stdout", 987 | "output_type": "stream", 988 | "text": [ 989 | "train_path = ../data/40_garbage-classify-for-pytorch/train.txt\n", 990 | "val_path = ../data/40_garbage-classify-for-pytorch/val.txt\n", 991 | "train_dict = {0: 198, 1: 291, 2: 231, 3: 72, 4: 306, 5: 243, 6: 307, 7: 294, 8: 301, 9: 322, 10: 320, 11: 579, 12: 266, 13: 320, 14: 280, 15: 336, 16: 293, 17: 240, 18: 288, 19: 254, 20: 173, 21: 520, 22: 290, 23: 250, 24: 248, 25: 432, 26: 294, 27: 437, 28: 305, 29: 338, 30: 257, 31: 361, 32: 223, 33: 260, 34: 313, 35: 267, 36: 219, 37: 265, 38: 309, 39: 339}\n", 992 | "val_dict = {0: 44, 1: 79, 2: 48, 3: 13, 4: 81, 5: 46, 6: 88, 7: 68, 8: 79, 9: 67, 10: 67, 11: 157, 12: 65, 13: 89, 14: 77, 15: 83, 16: 59, 17: 69, 18: 74, 19: 58, 20: 53, 21: 137, 22: 85, 23: 59, 24: 70, 25: 118, 26: 57, 27: 99, 28: 77, 29: 78, 30: 64, 31: 85, 32: 57, 33: 62, 34: 82, 35: 84, 36: 46, 37: 57, 38: 82, 39: 98}\n" 993 | ] 994 | } 995 | ], 996 | "source": [ 997 | "import codecs\n", 998 | "# 获取数据内容\n", 999 | "train_path = os.path.join(base_path,'40_garbage-classify-for-pytorch/train.txt')\n", 1000 | "val_path = os.path.join(base_path,'40_garbage-classify-for-pytorch/val.txt')\n", 1001 | "print('train_path = ',train_path)\n", 1002 | "print('val_path = ',val_path)\n", 1003 | "\n", 1004 | "# 统计 \n", 1005 | "def get_label_idx_list(data_path):\n", 1006 | " label_idx_list = []\n", 1007 | " \n", 1008 | " for line in codecs.open(data_path,'r'):\n", 1009 | " line = line.strip()\n", 1010 | " label_idx = line.split('\\t')[1]\n", 1011 | " label_idx_list.append(int(label_idx))\n", 1012 | " return label_idx_list\n", 1013 | "\n", 1014 | "from collections import Counter\n", 1015 | "\n", 1016 | "## Counter 统计label 出现次数\n", 1017 | "## dict 类别转化\n", 1018 | "##dict 中的key 进行sort asc\n", 1019 | "train_dict = dict(Counter(get_label_idx_list(train_path)))\n", 1020 | "train_dict = dict(sorted(train_dict.items()))\n", 1021 | "\n", 1022 | "\n", 1023 | "val_dict = dict(Counter(get_label_idx_list(val_path)))\n", 1024 | "val_dict = dict(sorted(val_dict.items()))\n", 1025 | "\n", 1026 | "\n", 1027 | "print('train_dict = ',train_dict)\n", 1028 | "print('val_dict = ',val_dict)" 1029 | ] 1030 | }, 1031 | { 1032 | "cell_type": "code", 1033 | "execution_count": 185, 1034 | "metadata": {}, 1035 | "outputs": [ 1036 | { 1037 | "data": { 1038 | "text/html": [ 1039 | "\n", 1046 | "\n", 1047 | "
\n", 1048 | "\n", 1049 | "\n", 1050 | "\n" 1313 | ], 1314 | "text/plain": [ 1315 | "" 1316 | ] 1317 | }, 1318 | "execution_count": 185, 1319 | "metadata": {}, 1320 | "output_type": "execute_result" 1321 | } 1322 | ], 1323 | "source": [ 1324 | "# 可视化操作\n", 1325 | "from pyecharts import options as opts\n", 1326 | "from pyecharts.charts import Bar\n", 1327 | "\n", 1328 | "# 构建数据\n", 1329 | "\n", 1330 | "## check train == val keys \n", 1331 | "assert train_dict.keys() == val_dict.keys()\n", 1332 | "\n", 1333 | "\n", 1334 | "# 借助python 中列表推导方法\n", 1335 | "x = [\"{}-{}\".format(label_idx,label_dict[str(label_idx)] )\n", 1336 | " for label_idx in train_dict.keys()]\n", 1337 | "\n", 1338 | "## train \n", 1339 | "train_y = list(train_dict.values())\n", 1340 | "## val\n", 1341 | "val_y = list(val_dict.values())\n", 1342 | "\n", 1343 | "# 创建Bar示例\n", 1344 | "bar = Bar(init_opts=opts.InitOpts(width='1100px',height='500px'))\n", 1345 | "bar.add_xaxis(xaxis_data=x)\n", 1346 | "bar.add_yaxis(series_name='Train',yaxis_data=train_y)\n", 1347 | "bar.add_yaxis(series_name='Val',yaxis_data=val_y)\n", 1348 | "\n", 1349 | "# 设置全局参数\n", 1350 | "bar.set_global_opts(\n", 1351 | " title_opts=opts.TitleOpts(title='垃圾分类 Train/Val 不同类别数据分布'),\n", 1352 | " # 使得我们的x 轴数据倾斜\n", 1353 | " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate = 15) ) \n", 1354 | ")\n", 1355 | "\n", 1356 | "# 展示图表\n", 1357 | "bar.render_notebook()" 1358 | ] 1359 | }, 1360 | { 1361 | "cell_type": "code", 1362 | "execution_count": null, 1363 | "metadata": {}, 1364 | "outputs": [], 1365 | "source": [] 1366 | }, 1367 | { 1368 | "cell_type": "code", 1369 | "execution_count": null, 1370 | "metadata": {}, 1371 | "outputs": [], 1372 | "source": [] 1373 | }, 1374 | { 1375 | "cell_type": "code", 1376 | "execution_count": null, 1377 | "metadata": {}, 1378 | "outputs": [], 1379 | "source": [] 1380 | }, 1381 | { 1382 | "cell_type": "code", 1383 | "execution_count": null, 1384 | "metadata": {}, 1385 | "outputs": [], 1386 | "source": [] 1387 | } 1388 | ], 1389 | "metadata": { 1390 | "kernelspec": { 1391 | "display_name": "Python 3", 1392 | "language": "python", 1393 | "name": "python3" 1394 | }, 1395 | "language_info": { 1396 | "codemirror_mode": { 1397 | "name": "ipython", 1398 | "version": 3 1399 | }, 1400 | "file_extension": ".py", 1401 | "mimetype": "text/x-python", 1402 | "name": "python", 1403 | "nbconvert_exporter": "python", 1404 | "pygments_lexer": "ipython3", 1405 | "version": "3.7.2" 1406 | }, 1407 | "toc": { 1408 | "base_numbering": 1, 1409 | "nav_menu": {}, 1410 | "number_sections": true, 1411 | "sideBar": true, 1412 | "skip_h1_title": false, 1413 | "title_cell": "Table of Contents", 1414 | "title_sidebar": "Contents", 1415 | "toc_cell": false, 1416 | "toc_position": { 1417 | "height": "calc(100% - 180px)", 1418 | "left": "10px", 1419 | "top": "150px", 1420 | "width": "165px" 1421 | }, 1422 | "toc_section_display": true, 1423 | "toc_window_display": false 1424 | } 1425 | }, 1426 | "nbformat": 4, 1427 | "nbformat_minor": 2 1428 | } 1429 | -------------------------------------------------------------------------------- /preprocess/01-原始数据集分布可视化分析.py: -------------------------------------------------------------------------------- 1 | #(1) 2 | import os 3 | import cv2 4 | from os import walk 5 | data_path='./data/garbage_classify/raw_data' 6 | for (dirpath,dirnames,filenames) in walk(data_path): 7 | print('dirpath:',dirpath) 8 | # print('dirnames:',dirnames) sub dir 9 | print('filenames:',filenames[:5]) 10 | 11 | #(2)二级分类不同类别的分布 12 | from glob import glob 13 | data_path='./data/garbage_classify/raw_data' 14 | def get_image_txt_info(path): 15 | data_path_txt=glob(os.path.join(path,'*.txt')) #all txt files 16 | img_list=[] 17 | img2label_dic={} 18 | label_count_dic={} 19 | for txt_path in data_path_txt: 20 | with open(txt_path,'r') as f: 21 | line=f.readline()# read a line 22 | #print(line) 23 | line=line.strip()# delete pre ' ' and last ' 24 | img_name=line.split(',')[0] # img_2778.jpg 25 | img_label=int(line.split(',')[1]) # 7 26 | img_name_path=os.path.join(data_path,img_name) #image 27 | #print(img_name_path)# 28 | img_list.append({'img_name_path':img_name_path,'img_label':img_label}) 29 | #image_name:img_label 30 | img2label_dic[img_name]=img_label 31 | #[img_label ,img_count] statistic 32 | img_label_count=label_count_dic.get(img_label,0)#不存在则初始化为0 33 | if img_label_count: 34 | label_count_dic[img_label]+=1 35 | else: 36 | label_count_dic[img_label]=1 37 | return img_list,img2label_dic,label_count_dic 38 | img_list,img2label_dic,label_count_dic=get_image_txt_info(data_path) 39 | 40 | 41 | #可视化每一个标签对应的label,通过pyecharts绘制图表 42 | 43 | from pyecharts import options as opts #pyecharts相关参数 44 | from pyecharts.charts import Bar 45 | label_dict = { 46 | "0": "其他垃圾/一次性快餐盒", 47 | "1": "其他垃圾/污损塑料", 48 | "2": "其他垃圾/烟蒂", 49 | "3": "其他垃圾/牙签", 50 | "4": "其他垃圾/破碎花盆及碟碗", 51 | "5": "其他垃圾/竹筷", 52 | "6": "厨余垃圾/剩饭剩菜", 53 | "7": "厨余垃圾/大骨头", 54 | "8": "厨余垃圾/水果果皮", 55 | "9": "厨余垃圾/水果果肉", 56 | "10": "厨余垃圾/茶叶渣", 57 | "11": "厨余垃圾/菜叶菜根", 58 | "12": "厨余垃圾/蛋壳", 59 | "13": "厨余垃圾/鱼骨", 60 | "14": "可回收物/充电宝", 61 | "15": "可回收物/包", 62 | "16": "可回收物/化妆品瓶", 63 | "17": "可回收物/塑料玩具", 64 | "18": "可回收物/塑料碗盆", 65 | "19": "可回收物/塑料衣架", 66 | "20": "可回收物/快递纸袋", 67 | "21": "可回收物/插头电线", 68 | "22": "可回收物/旧衣服", 69 | "23": "可回收物/易拉罐", 70 | "24": "可回收物/枕头", 71 | "25": "可回收物/毛绒玩具", 72 | "26": "可回收物/洗发水瓶", 73 | "27": "可回收物/玻璃杯", 74 | "28": "可回收物/皮鞋", 75 | "29": "可回收物/砧板", 76 | "30": "可回收物/纸板箱", 77 | "31": "可回收物/调料瓶", 78 | "32": "可回收物/酒瓶", 79 | "33": "可回收物/金属食品罐", 80 | "34": "可回收物/锅", 81 | "35": "可回收物/食用油桶", 82 | "36": "可回收物/饮料瓶", 83 | "37": "有害垃圾/干电池", 84 | "38": "有害垃圾/软膏", 85 | "39": "有害垃圾/过期药物" 86 | } 87 | 88 | #import matplotlib.pyplot as plt 89 | #首先对label_count_dic 按照key进行排序 90 | label_count_dic=dict(sorted(label_count_dic.items()))#默认按照key进行排序 91 | #x=label_count_dic.keys() #0-39 92 | x=['{}-{}'.format(label_index,label_dict[str(label_index)]) for label_index in label_count_dic] 93 | y=label_count_dic.values()#count 94 | x=list(x) 95 | y=list(y) 96 | #初始化 97 | bar=Bar(init_opts=opts.InitOpts(width='1100px',height='500px')) 98 | bar.add_xaxis(xaxis_data=x) 99 | # bar.add_yaxis(series_name='',yaxis_data=y) 100 | bar.add_yaxis(series_name='counts',y_axis=y) 101 | #设置全局变量 102 | bar.set_global_opts( 103 | title_opts=opts.TitleOpts(title='垃圾分类-不同类别的数据分布'), #增加标题 104 | xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=15)) # 105 | ) 106 | #展示我们的图表 107 | # bar.load_javascript() 108 | # bar.render_notebook() 109 | bar.render('./preprocess/01.html') 110 | #print('Done!') 111 | 112 | #(3)宽,高比例分布 113 | import os 114 | import json 115 | from glob import glob 116 | from PIL import Image 117 | data_path='./data/garbage_classify/raw_data' 118 | all_img_path=os.path.join(data_path,'*.jpg') 119 | #img=Image.open(image_path) #w,h 120 | img_path_list=glob(all_img_path) #扫描符合条件的所有路径 121 | data_list=[] 122 | filename='data_list.json' 123 | if os.path.exists(filename): 124 | with open(filename) as file_obj: 125 | data_list=json.load(file_obj) 126 | else: 127 | for img_path in img_path_list: 128 | img=Image.open(img_path) #读取过程需要1分钟 129 | w,h=img.size 130 | r=float('{:.02f}'.format(w/h)) 131 | img_name=img_path.split('/')[-1] 132 | img_id=img_name.split('.')[0].split('_')[-1] 133 | img_label=img2label_dic[img_name] 134 | data_list.append([int(img_id),w,h,r,int(img_label)]) 135 | with open(filename,'w') as file_obj: 136 | json.dump(data_list,file_obj) 137 | 138 | #print(w,h,ratio) 139 | #对单变量进行数据分析,使用直方图来完成 140 | #Python中的seaborn可视化工具库进行展示 141 | import seaborn as sns #导入可视化库 142 | import numpy as np 143 | import matplotlib.pyplot as plt 144 | 145 | ratio_list=[data[3] for data in data_list] 146 | ratio_list=list(filter(lambda x:x>0.5 and x<=2,ratio_list))#数据过滤操作保存区间内的值 147 | sns.set() 148 | np.random.seed(0) 149 | ax=sns.distplot(ratio_list) 150 | plt.show()#通过可视化可以知道ratio主要集中在0.5~1.5之间 151 | print('Done!') -------------------------------------------------------------------------------- /preprocess/01.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Awesome-pyecharts 6 | 7 | 8 | 9 | 10 |
11 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /preprocess/02-原始数据集train-val划分.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | #(4)切分验证集-0.2和训练集-0.8 4 | #img_path_list包括了图片路径和对应标签 5 | import random 6 | import json 7 | import os 8 | import cv2 9 | from os import walk 10 | #二级分类不同类别的分布 11 | from glob import glob 12 | 13 | data_path='./data/garbage_classify/raw_data' 14 | def get_image_txt_info(path): 15 | data_path_txt=glob(os.path.join(path,'*.txt')) #all txt files 16 | img_list=[] 17 | img2label_dic={} 18 | label_count_dic={} 19 | for txt_path in data_path_txt: 20 | with open(txt_path,'r') as f: 21 | line=f.readline()# read a line 22 | #print(line) 23 | line=line.strip()# delete pre ' ' and last ' 24 | img_name=line.split(',')[0] # img_2778.jpg 25 | img_label=int(line.split(',')[1]) # 7 26 | img_name_path=os.path.join(data_path,img_name) #image 27 | #print(img_name_path)# 28 | img_list.append({'img_name_path':img_name_path,'img_label':img_label}) 29 | #image_name:img_label 30 | img2label_dic[img_name]=img_label 31 | #[img_label ,img_count] statistic 32 | img_label_count=label_count_dic.get(img_label,0)#不存在则初始化为0 33 | if img_label_count: 34 | label_count_dic[img_label]+=1 35 | else: 36 | label_count_dic[img_label]=1 37 | return img_list,img2label_dic,label_count_dic 38 | img_list,img2label_dic,label_count_dic=get_image_txt_info(data_path) 39 | 40 | random.shuffle(img_list) #image_label image_name_path 41 | train_size=int(len(img_list)*0.8) 42 | train_list=img_list[:train_size] 43 | val_list=img_list[train_size:] 44 | 45 | #生成 train.txt文件和val.txt文件 46 | import shutil # copy 47 | path='./data/garbage_classify/' 48 | type={'train':train_list,'val':val_list} 49 | for key in type: 50 | with open(os.path.join(path,key+'.txt'),'w') as f: 51 | for img_dict in type[key]: 52 | img_name_path=img_dict['img_name_path'] 53 | img_label=img_dict['img_label'] 54 | f.write('{}\t{}\n'.format(img_name_path,img_label)) 55 | 56 | #生成train or val 57 | sub_path=os.path.join(path,key,str(img_label)) 58 | if not os.path.exists(sub_path): 59 | os.makedirs(sub_path) 60 | #图片数据copy 61 | shutil.copy(img_name_path,sub_path) 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /preprocess/03-train和val数据分布可视化.py: -------------------------------------------------------------------------------- 1 | 2 | #训练数据和验证数据可视化分布,主要是分析每一种标签对应的数量 3 | import os 4 | import codecs 5 | from collections import Counter 6 | 7 | from pyecharts import options as opts 8 | from pyecharts.charts import Bar 9 | label_dict = { 10 | "0": "其他垃圾/一次性快餐盒", 11 | "1": "其他垃圾/污损塑料", 12 | "2": "其他垃圾/烟蒂", 13 | "3": "其他垃圾/牙签", 14 | "4": "其他垃圾/破碎花盆及碟碗", 15 | "5": "其他垃圾/竹筷", 16 | "6": "厨余垃圾/剩饭剩菜", 17 | "7": "厨余垃圾/大骨头", 18 | "8": "厨余垃圾/水果果皮", 19 | "9": "厨余垃圾/水果果肉", 20 | "10": "厨余垃圾/茶叶渣", 21 | "11": "厨余垃圾/菜叶菜根", 22 | "12": "厨余垃圾/蛋壳", 23 | "13": "厨余垃圾/鱼骨", 24 | "14": "可回收物/充电宝", 25 | "15": "可回收物/包", 26 | "16": "可回收物/化妆品瓶", 27 | "17": "可回收物/塑料玩具", 28 | "18": "可回收物/塑料碗盆", 29 | "19": "可回收物/塑料衣架", 30 | "20": "可回收物/快递纸袋", 31 | "21": "可回收物/插头电线", 32 | "22": "可回收物/旧衣服", 33 | "23": "可回收物/易拉罐", 34 | "24": "可回收物/枕头", 35 | "25": "可回收物/毛绒玩具", 36 | "26": "可回收物/洗发水瓶", 37 | "27": "可回收物/玻璃杯", 38 | "28": "可回收物/皮鞋", 39 | "29": "可回收物/砧板", 40 | "30": "可回收物/纸板箱", 41 | "31": "可回收物/调料瓶", 42 | "32": "可回收物/酒瓶", 43 | "33": "可回收物/金属食品罐", 44 | "34": "可回收物/锅", 45 | "35": "可回收物/食用油桶", 46 | "36": "可回收物/饮料瓶", 47 | "37": "有害垃圾/干电池", 48 | "38": "有害垃圾/软膏", 49 | "39": "有害垃圾/过期药物" 50 | } 51 | base_path='./data/garbage_classify' 52 | type=['train','val'] 53 | res={} 54 | temp={} 55 | for i in type: 56 | 57 | sub_path=os.path.join(base_path,i+'.txt') 58 | #读取 统计不同label的数量,构建数据 59 | label_list=[] 60 | for line in codecs.open(sub_path,'r'): #读取txt文件 61 | line=line.strip() 62 | index=line.split('\t')[1]#0-39 63 | label_list.append(int(index)) 64 | temp=dict(Counter(label_list))#统计每一个数字出现了多少次 65 | res[i]=dict(sorted(temp.items()))#按照关键字排序 66 | 67 | #创建Bar 68 | bar=Bar(init_opts=opts.InitOpts(width='1000px',height='500px')) 69 | #获取 70 | #x=list(res['train'].keys()) 71 | #b=list(res['train'].keys()) 72 | a=[label_dict[str(b)] for b in res['train'].keys()] 73 | x=list(a) 74 | y_train=list(res['train'].values()) 75 | y_val=list(res['val'].values()) 76 | #设置title 77 | bar.set_global_opts( 78 | title_opts=opts.TitleOpts(title='garbage-classify: Train/Val'), 79 | xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30)) # 80 | ) 81 | 82 | bar.add_xaxis(xaxis_data=x) 83 | # yaxis_data ---> y_axis 84 | bar.add_yaxis(series_name='train',y_axis=y_train) 85 | bar.add_yaxis(series_name='val',y_axis=y_val) 86 | 87 | #保存 88 | bar.render('./preprocess/03.html') 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /preprocess/03.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Awesome-pyecharts 6 | 7 | 8 | 9 | 10 |
11 | 288 | 289 | 290 | -------------------------------------------------------------------------------- /preprocess/04-四大类垃圾分布可视化.py: -------------------------------------------------------------------------------- 1 | 2 | #分为四个大类 3 | import os 4 | import cv2 5 | #(2)二级分类不同类别的分布 6 | from glob import glob 7 | 8 | label_dict = { 9 | "0": "其他垃圾/一次性快餐盒", 10 | "1": "其他垃圾/污损塑料", 11 | "2": "其他垃圾/烟蒂", 12 | "3": "其他垃圾/牙签", 13 | "4": "其他垃圾/破碎花盆及碟碗", 14 | "5": "其他垃圾/竹筷", 15 | "6": "厨余垃圾/剩饭剩菜", 16 | "7": "厨余垃圾/大骨头", 17 | "8": "厨余垃圾/水果果皮", 18 | "9": "厨余垃圾/水果果肉", 19 | "10": "厨余垃圾/茶叶渣", 20 | "11": "厨余垃圾/菜叶菜根", 21 | "12": "厨余垃圾/蛋壳", 22 | "13": "厨余垃圾/鱼骨", 23 | "14": "可回收物/充电宝", 24 | "15": "可回收物/包", 25 | "16": "可回收物/化妆品瓶", 26 | "17": "可回收物/塑料玩具", 27 | "18": "可回收物/塑料碗盆", 28 | "19": "可回收物/塑料衣架", 29 | "20": "可回收物/快递纸袋", 30 | "21": "可回收物/插头电线", 31 | "22": "可回收物/旧衣服", 32 | "23": "可回收物/易拉罐", 33 | "24": "可回收物/枕头", 34 | "25": "可回收物/毛绒玩具", 35 | "26": "可回收物/洗发水瓶", 36 | "27": "可回收物/玻璃杯", 37 | "28": "可回收物/皮鞋", 38 | "29": "可回收物/砧板", 39 | "30": "可回收物/纸板箱", 40 | "31": "可回收物/调料瓶", 41 | "32": "可回收物/酒瓶", 42 | "33": "可回收物/金属食品罐", 43 | "34": "可回收物/锅", 44 | "35": "可回收物/食用油桶", 45 | "36": "可回收物/饮料瓶", 46 | "37": "有害垃圾/干电池", 47 | "38": "有害垃圾/软膏", 48 | "39": "有害垃圾/过期药物" 49 | } 50 | 51 | 52 | data_path='./data/garbage_classify/raw_data' 53 | def get_image_txt_info(path): 54 | data_path_txt=glob(os.path.join(path,'*.txt')) #all txt files 55 | img_list=[] 56 | img2label_dic={} 57 | label_count_dic={} 58 | for txt_path in data_path_txt: 59 | with open(txt_path,'r') as f: 60 | line=f.readline()# read a line 61 | #print(line) 62 | line=line.strip()# delete pre ' ' and last ' 63 | img_name=line.split(',')[0] # img_2778.jpg 64 | img_label=int(line.split(',')[1]) # 7 65 | img_name_path=os.path.join(data_path,img_name) #image 66 | #print(img_name_path)# 67 | img_list.append({'img_name_path':img_name_path,'img_label':img_label}) 68 | #image_name:img_label 69 | img2label_dic[img_name]=img_label 70 | #[img_label ,img_count] statistic 71 | img_label_count=label_count_dic.get(img_label,0)#不存在则初始化为0 72 | if img_label_count: 73 | label_count_dic[img_label]+=1 74 | else: 75 | label_count_dic[img_label]=1 76 | return img_list,img2label_dic,label_count_dic 77 | img_list,img2label_dic,label_count_dic=get_image_txt_info(data_path) 78 | 79 | #可视化每一个标签对应的label,通过pyecharts绘制图表 80 | from pyecharts import options as opts #pyecharts相关参数 81 | from pyecharts.charts import Bar 82 | # 0-5 6-13 14-36 37-39 83 | label_4_name={0:'其他垃圾', 1:'厨余垃圾', 2:'可回垃圾', 3:'有害垃圾'} 84 | label_4_count={0:0,1:0,2:0,3:0} 85 | #import matplotlib.pyplot as plt 86 | #首先对label_count_dic 按照key进行排序 87 | label_count_dic=dict(sorted(label_count_dic.items()))#默认按照key进行排序 0-39 88 | for i in range(len(label_count_dic)): 89 | if i<=5: 90 | label_4_count[0]+=label_count_dic[i] 91 | elif i>5 and i<=13: 92 | label_4_count[1]+=label_count_dic[i] 93 | elif i>13 and i<=36: 94 | label_4_count[2]+=label_count_dic[i] 95 | else: 96 | label_4_count[3]+=label_count_dic[i] 97 | 98 | #x=label_count_dic.keys() #0-39 99 | # x=['{}-{}'.format(label_index,label_dict[str(label_index)]) for label_index in label_count_dic] 100 | # y=label_count_dic.values()#count 101 | x=label_4_name.values() 102 | y=label_4_count.values() 103 | x=list(x) 104 | y=list(y) 105 | #初始化 106 | bar=Bar(init_opts=opts.InitOpts(width='1100px',height='500px')) 107 | bar.add_xaxis(xaxis_data=x) 108 | # yaxis_data ---> y_axis 109 | bar.add_yaxis(series_name='',y_axis=y) 110 | #设置全局变量 111 | bar.set_global_opts( 112 | title_opts=opts.TitleOpts(title='垃圾分类-不同类别的数据分布'), #增加标题 113 | xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=15)) # 114 | ) 115 | bar.render('./preprocess/04.html') 116 | #print('Done!') 117 | -------------------------------------------------------------------------------- /preprocess/04.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Awesome-pyecharts 6 | 7 | 8 | 9 | 10 |
11 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /preprocess/05-四大类垃圾train-val分布可视化.py: -------------------------------------------------------------------------------- 1 | #训练数据和验证数据可视化分布,主要是分析每一种标签对应的数量 2 | import os 3 | import codecs 4 | from collections import Counter 5 | 6 | from pyecharts import options as opts 7 | from pyecharts.charts import Bar 8 | label_dict = { 9 | "0": "其他垃圾/一次性快餐盒", 10 | "1": "其他垃圾/污损塑料", 11 | "2": "其他垃圾/烟蒂", 12 | "3": "其他垃圾/牙签", 13 | "4": "其他垃圾/破碎花盆及碟碗", 14 | "5": "其他垃圾/竹筷", 15 | "6": "厨余垃圾/剩饭剩菜", 16 | "7": "厨余垃圾/大骨头", 17 | "8": "厨余垃圾/水果果皮", 18 | "9": "厨余垃圾/水果果肉", 19 | "10": "厨余垃圾/茶叶渣", 20 | "11": "厨余垃圾/菜叶菜根", 21 | "12": "厨余垃圾/蛋壳", 22 | "13": "厨余垃圾/鱼骨", 23 | "14": "可回收物/充电宝", 24 | "15": "可回收物/包", 25 | "16": "可回收物/化妆品瓶", 26 | "17": "可回收物/塑料玩具", 27 | "18": "可回收物/塑料碗盆", 28 | "19": "可回收物/塑料衣架", 29 | "20": "可回收物/快递纸袋", 30 | "21": "可回收物/插头电线", 31 | "22": "可回收物/旧衣服", 32 | "23": "可回收物/易拉罐", 33 | "24": "可回收物/枕头", 34 | "25": "可回收物/毛绒玩具", 35 | "26": "可回收物/洗发水瓶", 36 | "27": "可回收物/玻璃杯", 37 | "28": "可回收物/皮鞋", 38 | "29": "可回收物/砧板", 39 | "30": "可回收物/纸板箱", 40 | "31": "可回收物/调料瓶", 41 | "32": "可回收物/酒瓶", 42 | "33": "可回收物/金属食品罐", 43 | "34": "可回收物/锅", 44 | "35": "可回收物/食用油桶", 45 | "36": "可回收物/饮料瓶", 46 | "37": "有害垃圾/干电池", 47 | "38": "有害垃圾/软膏", 48 | "39": "有害垃圾/过期药物" 49 | } 50 | 51 | base_path='./data/garbage_classify' 52 | type=['train','val'] 53 | res={} 54 | temp={} 55 | for i in type: 56 | sub_path=os.path.join(base_path,i+'.txt') 57 | #读取 统计不同label的数量,构建数据 58 | label_list=[] 59 | for line in codecs.open(sub_path,'r'): #读取txt文件 60 | line=line.strip() 61 | index=line.split('\t')[1]#0-39 62 | label_list.append(int(index)) 63 | temp=dict(Counter(label_list))#统计每一个数字出现了多少次 64 | res[i]=dict(sorted(temp.items()))#按照关键字排序 65 | 66 | #创建Bar 67 | bar=Bar(init_opts=opts.InitOpts(width='1000px',height='500px')) 68 | # 0-5 6-13 14-36 37-39 69 | label_4_name={0:'其他垃圾', 1:'厨余垃圾', 2:'可回垃圾', 3:'有害垃圾'} 70 | x=list(label_4_name.values()) 71 | 72 | label_4_count={0:0, 1:0, 2:0, 3:0} 73 | 74 | temp=res['train'] 75 | for j in range(len(temp)): 76 | if j<=5: 77 | label_4_count[0]+=temp[j] 78 | elif j>5 and j<=13: 79 | label_4_count[1]+=temp[j] 80 | elif j>13 and j<=36: 81 | label_4_count[2]+=temp[j] 82 | else: 83 | label_4_count[3]+=temp[j] 84 | y_train=list(label_4_count.values()) 85 | 86 | temp=res['val'] 87 | label_4_count={0:0, 1:0, 2:0, 3:0} 88 | for j in range(len(temp)): 89 | if j<=5: 90 | label_4_count[0]+=temp[j] 91 | elif j>5 and j<=13: 92 | label_4_count[1]+=temp[j] 93 | elif j>13 and j<=36: 94 | label_4_count[2]+=temp[j] 95 | else: 96 | label_4_count[3]+=temp[j] 97 | y_val=list(label_4_count.values()) 98 | #设置title 99 | bar.set_global_opts( 100 | title_opts=opts.TitleOpts(title='garbage-classify: Train/Val'), 101 | xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30)) # 102 | ) 103 | 104 | bar.add_xaxis(xaxis_data=x) 105 | # yaxis_data ---> y_axis 106 | bar.add_yaxis(series_name='train',y_axis=y_train) 107 | bar.add_yaxis(series_name='val',y_axis=y_val) 108 | 109 | #保存 110 | bar.render('./preprocess/05.html') -------------------------------------------------------------------------------- /preprocess/05.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Awesome-pyecharts 6 | 7 | 8 | 9 | 10 |
11 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /preprocess/06-数据增强transform.py: -------------------------------------------------------------------------------- 1 | 2 | #(1)裁剪-Crop 3 | #中心裁剪 . CenterCrop 4 | #随机裁剪 .RandomCrop 5 | #随机长宽比裁剪: RandomResizeCrop 6 | #上下左右中心裁剪: FiveCrop 7 | #上下左右中心裁剪后翻转:.TenCrop 8 | 9 | #(2) 翻转和旋转 --Flip and Rotation 10 | #依概率p水平翻转 RandomHorizontalFlip(p=0.5) 11 | #依概率p垂直翻转 RandomVerticalFlip(p=0.5) 12 | #随机翻转:.RandomRotation() 13 | 14 | #(3) 图像变换 15 | #缩放 transforms.Resize() 16 | #标准化:transforms.Normalize() 17 | #转为tensor ,并归一化到[0,1] .ToTensor 填充 .Pad 18 | #亮度 ,对比度,饱和度,.ColorJitter() 19 | #灰度转换 .Grayscale() 20 | #线性变换 .LinearTransformation() 21 | #放射变换 .RandomAffine() 22 | #概率p转换为灰度图: .RandomGrayScale() 23 | #将数据转换为PILImage .ToPILImage() 24 | import torch 25 | import torchvision 26 | from torchvision import transforms 27 | from PIL import Image 28 | 29 | img_path='./data/garbage_classify/train/0/img_4.jpg' 30 | img=Image.open(img_path) 31 | import matplotlib.pyplot as plt 32 | ax=plt.imshow(img) 33 | #plt.show() 34 | print(img.size) 35 | 36 | preprocess=transforms.Compose([ 37 | transforms.Resize((256,256)), #缩放最大边=256 38 | transforms.CenterCrop((224,224)), 39 | transforms.ToTensor(),# 归一化[0,1] 40 | transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]) #标准化 41 | ]) 42 | res_img=preprocess(img) 43 | res=res_img.permute(1,2,0) #Tensor中 c,h,w 44 | ax=plt.imshow(res) #PIL 中h,w,c 45 | plt.show() 46 | print(res_img.size) 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /preprocess/07-原始数据可视化.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | from torchvision import datasets,models,transforms 4 | from matplotlib import pyplot as plt 5 | import numpy as np 6 | 7 | device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') 8 | 9 | TRAIN='./data/garbage_classify/train' 10 | VAL='./data/garbage_classify/val' 11 | 12 | train_data=datasets.ImageFolder(TRAIN) 13 | val_data=datasets.ImageFolder(VAL) 14 | #print(train_data.imgs) 15 | 16 | from PIL import Image 17 | fig=plt.figure(figsize=(25,8)) 18 | 19 | for idx,img in enumerate(train_data.imgs[:9]): 20 | print(idx) 21 | img_path=img[0] 22 | img_name=img_path.split('/')[-1] 23 | img_idx=img[1] 24 | 25 | img=Image.open(img[0]) 26 | ax=fig.add_subplot(3,3,idx+1,xticks=[],yticks=[]) 27 | plt.imshow(img) 28 | ax.set_title('{}-{}'.format(img_idx,img_name)) 29 | 30 | plt.show() 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /preprocess/08-预处理数据加载.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torchvision 4 | import matplotlib 5 | from torchvision import datasets,models,transforms 6 | from matplotlib import pyplot as plt 7 | import numpy as np 8 | 9 | #matplotlib inline 10 | 11 | #matplotlib.rcParams['axes.unicode_minus']=False #用来正常显示负号 12 | # rm -rf ~/.cache/matplotlib 清除缓冲目录,添加simhei.tff文件 13 | plt.rcParams['font.sans-serif'] = ['simhei']#解决中文 14 | 15 | label_dict = { 16 | "0": "其他垃圾/一次性快餐盒", 17 | "1": "其他垃圾/污损塑料", 18 | "2": "其他垃圾/烟蒂", 19 | "3": "其他垃圾/牙签", 20 | "4": "其他垃圾/破碎花盆及碟碗", 21 | "5": "其他垃圾/竹筷", 22 | "6": "厨余垃圾/剩饭剩菜", 23 | "7": "厨余垃圾/大骨头", 24 | "8": "厨余垃圾/水果果皮", 25 | "9": "厨余垃圾/水果果肉", 26 | "10": "厨余垃圾/茶叶渣", 27 | "11": "厨余垃圾/菜叶菜根", 28 | "12": "厨余垃圾/蛋壳", 29 | "13": "厨余垃圾/鱼骨", 30 | "14": "可回收物/充电宝", 31 | "15": "可回收物/包", 32 | "16": "可回收物/化妆品瓶", 33 | "17": "可回收物/塑料玩具", 34 | "18": "可回收物/塑料碗盆", 35 | "19": "可回收物/塑料衣架", 36 | "20": "可回收物/快递纸袋", 37 | "21": "可回收物/插头电线", 38 | "22": "可回收物/旧衣服", 39 | "23": "可回收物/易拉罐", 40 | "24": "可回收物/枕头", 41 | "25": "可回收物/毛绒玩具", 42 | "26": "可回收物/洗发水瓶", 43 | "27": "可回收物/玻璃杯", 44 | "28": "可回收物/皮鞋", 45 | "29": "可回收物/砧板", 46 | "30": "可回收物/纸板箱", 47 | "31": "可回收物/调料瓶", 48 | "32": "可回收物/酒瓶", 49 | "33": "可回收物/金属食品罐", 50 | "34": "可回收物/锅", 51 | "35": "可回收物/食用油桶", 52 | "36": "可回收物/饮料瓶", 53 | "37": "有害垃圾/干电池", 54 | "38": "有害垃圾/软膏", 55 | "39": "有害垃圾/过期药物" 56 | } 57 | 58 | 59 | TRAIN='./data/garbage_classify/train' 60 | VAL='./data/garbage_classify/val' 61 | 62 | #(1)数据预处理 63 | train_transforms=transforms.Compose([ 64 | transforms.Resize((256,256)), #缩放最大边=256 65 | transforms.CenterCrop((224,224)), 66 | transforms.ToTensor(),# 归一化[0,1] 67 | transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]) #标准化 68 | ]) 69 | 70 | val_transforms=transforms.Compose([ 71 | transforms.Resize((256,256)), #缩放最大边=256 72 | transforms.CenterCrop((224,224)), 73 | transforms.ToTensor(),# 归一化[0,1] 74 | transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]) #标准化 75 | ]) 76 | #(2)数据格式转换 77 | train_data=datasets.ImageFolder(TRAIN,transform=train_transforms) 78 | #train_data.imgs 79 | #assert train_data.class_to_idx.keys()=val_data.class_to_idx.keys() 标签 ??? 80 | val_data=datasets.ImageFolder(VAL,transform=val_transforms) 81 | 82 | #(3) 数据加载 83 | batch_size=32 84 | num_workers=0 85 | train_loader=torch.utils.data.DataLoader(train_data,batch_size=batch_size,num_workers=num_workers,shuffle=True) 86 | val_loader=torch.utils.data.DataLoader(val_data,batch_size=batch_size,num_workers=num_workers,shuffle=False) 87 | #print(train_loader.dataset) 88 | 89 | img,labels=next(iter(train_loader)) 90 | 91 | classes=[i for i in range(40)] 92 | img=img.permute(0,2,3,1) 93 | fig=plt.figure(figsize=(25,8)) 94 | 95 | 96 | for idx in range(batch_size//4*4): 97 | ax=fig.add_subplot(4,batch_size//4,idx+1,xticks=[],yticks=[]) 98 | target_idx=classes[labels[idx]] #从标签-->class-->name 99 | target_name=label_dict[str(target_idx)] 100 | ax.set_title('{}-{}'.format(target_name,target_idx)) 101 | plt.imshow(img[idx]) 102 | plt.show() 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /preprocess/09-测试resnext101模型.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision import transforms 4 | from PIL import Image 5 | import matplotlib.pyplot as plt 6 | 7 | import gcnet.resnet as resnet 8 | 9 | #(1) read img 10 | img_path='./preprocess/images/cat.jpg' 11 | img=Image.open(img_path) 12 | 13 | #(2) preprocess 14 | preprocess=transforms.Compose([ 15 | transforms.Resize((256,256)), #缩放最大边=256 16 | transforms.CenterCrop((224,224)), 17 | transforms.ToTensor(),# 归一化[0,1] 18 | transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]) #标准化 19 | ]) 20 | input_img=preprocess(img)# c,h,w,=3,224,244 21 | # plt.imshow(input_img) 22 | # plt.show() 23 | 24 | input_batch=input_img.unsqueeze(0) #b,c,h,w =1,3,224,224 25 | 26 | #(3) load model 27 | model=resnet.resnext101_32x16d_wsl() #下载模型操作 /home/ubuntu/.cache/torch/checkpoints/ig_resnext101_32x16-c6f796b0.pth 28 | model.eval() #最后一层,线性模型输入是2048,输出是1000 (fc): Linear(in_features=2048, out_features=1000, bias=True) 29 | # print(model_ft) 30 | 31 | #(4) input 32 | if torch.cuda.is_available(): 33 | input_batch=input_batch.to('cuda')#load image to gpu 34 | model.to('cuda') 35 | with torch.no_grad(): 36 | output=model(input_batch) #输出1000个类别 37 | print(output[0].shape) 38 | 39 | #(5) index 40 | res=torch.nn.functional.softmax(output[0],dim=0) 41 | 42 | #(6) result->list 43 | res=res.cpu().numpy().tolist() 44 | max_v=0 45 | index=0 46 | for i ,c in enumerate(res): 47 | if max_v