├── ArcFace ├── util │ ├── __init__.py │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── utils.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ └── datasets.cpython-36.pyc │ ├── config.py │ ├── utils.py │ └── datasets.py ├── loss.png ├── cat_facebank.py ├── config.py ├── train.py ├── make_facebank.py ├── face_verify.py ├── 效果评估.py └── model.py ├── resnet 108人脸关键点 ├── __init__.py ├── util │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── data_agu.cpython-36.pyc │ │ ├── datasets.cpython-36.pyc │ │ └── common_utils.cpython-36.pyc │ ├── common_utils.py │ ├── data_agu.py │ └── datasets.py ├── loss.py ├── inference.py ├── predict.py └── train.py ├── resnet34 姿态检测 ├── __init__.py ├── util │ ├── __init__.py │ ├── data_agu.py │ ├── multi_proc_utils.py │ ├── loss.py │ ├── common_utils.py │ └── datasets.py ├── predict.py ├── train.py └── model.py ├── yoloV3 人脸检测 ├── util │ ├── __init__.py │ ├── __pycache__ │ │ ├── utils.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── datasets.cpython-36.pyc │ │ ├── datasets.cpython-38.pyc │ │ ├── parse_config.cpython-36.pyc │ │ └── torch_utils.cpython-36.pyc │ ├── data_mini_cat.py │ ├── torch_utils.py │ ├── parse_config.py │ ├── dataset_teacherCode.py │ └── datasets.py ├── cfg │ ├── face.names │ └── face.data ├── lr │ └── 学习率动态调整.py ├── data_load.py ├── predict.py ├── train.py ├── train2.py └── test.py └── README.md /ArcFace/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resnet34 姿态检测/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resnet34 姿态检测/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resnet34 姿态检测/util/data_agu.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/cfg/face.names: -------------------------------------------------------------------------------- 1 | Face -------------------------------------------------------------------------------- /ArcFace/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/ArcFace/loss.png -------------------------------------------------------------------------------- /ArcFace/util/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/ArcFace/util/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /ArcFace/util/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/ArcFace/util/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /ArcFace/util/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/ArcFace/util/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /ArcFace/util/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/ArcFace/util/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/yoloV3 人脸检测/util/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/yoloV3 人脸检测/util/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/yoloV3 人脸检测/util/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/yoloV3 人脸检测/util/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__pycache__/datasets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/yoloV3 人脸检测/util/__pycache__/datasets.cpython-38.pyc -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/resnet 108人脸关键点/util/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/__pycache__/data_agu.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/resnet 108人脸关键点/util/__pycache__/data_agu.cpython-36.pyc -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/resnet 108人脸关键点/util/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__pycache__/parse_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/yoloV3 人脸检测/util/__pycache__/parse_config.cpython-36.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/__pycache__/torch_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/yoloV3 人脸检测/util/__pycache__/torch_utils.cpython-36.pyc -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/__pycache__/common_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UnstoppableCurry/Face-payment/HEAD/resnet 108人脸关键点/util/__pycache__/common_utils.cpython-36.pyc -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/data_mini_cat.py: -------------------------------------------------------------------------------- 1 | path = '/www/dataset/yolo_helmet_train/yolo_helmet_train/anno/train.txt' 2 | 3 | with open(path, 'r') as file: 4 | img_files = file.read().splitlines() 5 | print(img_files) 6 | img_files = list(filter(lambda x: len(x) > 0, img_files)) -------------------------------------------------------------------------------- /resnet34 姿态检测/util/multi_proc_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from multiprocessing import cpu_count, Pool 4 | 5 | # cpu 数量 6 | cores = cpu_count() 7 | # 分块个数 8 | partitions = cores 9 | 10 | 11 | def parallelize(df, func): 12 | # 数据切分 13 | data_split = np.array_split(df, partitions) 14 | # 线程池 15 | pool = Pool(cores) 16 | # 数据分发 合并 17 | data = pd.concat(pool.map(func, data_split)) 18 | # 关闭线程池 19 | pool.close() 20 | # 执行完close后不会有新的进程加入到pool,join函数等待所有子进程结束 21 | pool.join() 22 | return data -------------------------------------------------------------------------------- /yoloV3 人脸检测/cfg/face.data: -------------------------------------------------------------------------------- 1 | cfg_model=yolo 2 | classes=1 3 | gpus = 0 4 | num_workers = 12 5 | batch_size = 12 6 | img_size = 416 7 | multi_scale = True 8 | epochs = 20 9 | train=/root/cv/dataset/人脸/datasets/yolo_widerface_open_train/anno/train.txt 10 | valid=/root/cv/dataset/人脸/datasets/yolo_widerface_open_train/anno/train.txt 11 | names=./cfg/face.names 12 | finetune_model = 13 | lr_step = 5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90 14 | lr0 = 4e-6 15 | 16 | #4e-6 17 | #valid=/www/dataset/yolo_helmet_train/yolo_helmet_train/anno/train.txt 18 | 19 | 20 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/lr/学习率动态调整.py: -------------------------------------------------------------------------------- 1 | # 指定区间 2 | # lr_scheduler.MultiStepLR() 3 | # Assuming optimizer uses lr = 0.05 for all groups 4 | # lr = 0.05 if epoch < 30 5 | # lr = 0.005 if 30 <= epoch < 80 6 | # lr = 0.0005 if epoch >= 80 7 | import torch.optim as optim 8 | import matplotlib.pyplot as plt 9 | import yoloV3 10 | import torch 11 | 12 | model = yoloV3.Yolov3(1) 13 | optimizer = optim.SGD(params=model.parameters(), lr=0.05) 14 | 15 | plt.figure() 16 | y=[] 17 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [30, 80], 0.1) 18 | for epoch in range(100): 19 | scheduler.step() 20 | print(epoch, 'lr={:.6f}'.format(scheduler.get_lr()[0])) 21 | y.append(scheduler.get_lr()[0]) 22 | 23 | plt.plot(y) 24 | plt.show() 25 | -------------------------------------------------------------------------------- /ArcFace/cat_facebank.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | # 加载pth,npy文件中存储的特征 6 | def load_facebank(facebank_path): 7 | embeddings = torch.load(facebank_path + '/facebank.pth', 8 | map_location=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")) 9 | names = np.load(facebank_path + '/names.npy') 10 | embeddings2 = torch.load(facebank_path + '/facebank_1.pth', 11 | map_location=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")) 12 | names2 = np.load(facebank_path + '/names_1.npy') 13 | p_arr = np.concatenate((names, ['wtx_3'])) # 先将p_变成list形式进行拼接,注意输入为一个tuple 14 | return torch.cat((embeddings, embeddings2), 0), p_arr 15 | 16 | 17 | if __name__ == '__main__': 18 | a = load_facebank('./facebank') 19 | print(a[0].shape, a[0], a[1]) 20 | torch.save(a[0], './facebank' + '/facebank.pth') 21 | np.save('./facebank/names', a[1]) -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/torch_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def init_seeds(seed=0): 5 | torch.manual_seed(seed) 6 | torch.cuda.manual_seed(seed) 7 | torch.cuda.manual_seed_all(seed) 8 | 9 | 10 | def select_device(force_cpu=False): 11 | if force_cpu: 12 | cuda = False 13 | device = torch.device('cpu') 14 | else: 15 | cuda = torch.cuda.is_available() 16 | device = torch.device('cuda:0' if cuda else 'cpu') 17 | 18 | if torch.cuda.device_count() > 1: 19 | device = torch.device('cuda' if cuda else 'cpu') 20 | print('Found %g GPUs' % torch.cuda.device_count()) 21 | # print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21') 22 | # torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available 23 | # print('Using ', torch.cuda.device_count(), ' GPUs') 24 | 25 | print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else '')) 26 | return device 27 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/loss.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | # date:2019-05-20 3 | # function: wing loss 4 | import torch 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | import os 8 | import math 9 | 10 | 11 | def wing_loss(landmarks, labels, w=0.06, epsilon=0.01): 12 | """ 13 | :param landmarks: 预测值 14 | :param labels: 真实值 15 | :param w: 16 | :param epsilon: 17 | :return: 18 | """ 19 | # MAE损失 20 | x = landmarks - labels 21 | # 插值 22 | c = w * (1.0 - math.log(1.0 + w / epsilon)) 23 | # 绝对值 24 | absolute_x = torch.abs(x) 25 | # 计算损失值 26 | losses = torch.where( \ 27 | (w > absolute_x), \ 28 | w * torch.log(1.0 + absolute_x / epsilon), \ 29 | absolute_x - c) 30 | # 损失平均 31 | losses = torch.mean(losses, dim=1, keepdim=True) 32 | loss = torch.mean(losses) 33 | # 返回损失值 34 | return loss 35 | 36 | 37 | def got_total_wing_loss(output, crop_landmarks): 38 | # 计算每个点的损失 39 | loss = wing_loss(output, crop_landmarks) 40 | # 返回损失值 41 | return loss 42 | -------------------------------------------------------------------------------- /resnet34 姿态检测/util/loss.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | # date:2019-05-20 3 | # function: wing loss 4 | import torch 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | import os 8 | import math 9 | 10 | 11 | def wing_loss(landmarks, labels, w=10., epsilon=2.): 12 | """ 13 | :param landmarks: 预测值 14 | :param labels: 真实值 15 | :param w: 16 | :param epsilon: 17 | :return: 18 | """ 19 | # MAE损失 20 | x = landmarks - labels 21 | # 插值 22 | c = w * (1.0 - math.log(1.0 + w / epsilon)) 23 | # 绝对值 24 | absolute_x = torch.abs(x) 25 | # 计算损失值 26 | losses = torch.where((w > absolute_x), w * torch.log(1.0 + absolute_x / epsilon), absolute_x - c) 27 | # 损失平均 28 | losses = torch.mean(losses, dim=1, keepdim=True) 29 | loss = torch.mean(losses) 30 | # 返回损失值 31 | return loss 32 | 33 | 34 | def got_total_wing_loss(output, crop_landmarks): 35 | """ 36 | 获取总损失 37 | :param output: 38 | :param crop_landmarks: 39 | :return: 40 | """ 41 | loss = wing_loss(output, crop_landmarks) 42 | 43 | return loss 44 | -------------------------------------------------------------------------------- /ArcFace/util/config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | from pathlib import Path 3 | import torch 4 | from torch.nn import CrossEntropyLoss 5 | from torchvision import transforms as trans 6 | 7 | 8 | # 模型训练时的参数配置信息 9 | def get_config(): 10 | conf = edict() 11 | # 训练的轮次 12 | conf.epochs = 64 13 | # 工作目录: 14 | conf.work_path = "./" 15 | # 微调模型的存储位置 16 | conf.finetune_backbone_model = "" 17 | conf.finetune_head_model = "" 18 | # 训练集路径 19 | conf.datasets_train_path = "/root/cv/dataset/人脸/datasets/insight_face" 20 | # 模型结果保存位置 21 | conf.save_path = conf.work_path + 'save' 22 | # 图像大小 23 | conf.input_size = [112, 112] 24 | # 特征向量的大小 25 | conf.embedding_size = 512 26 | # 网络深度 27 | conf.net_depth = 50 28 | # 随机失活的概率 29 | conf.drop_ratio = 0.6 30 | # 模型模式 31 | conf.net_mode = 'ir_se' # or 'ir' 32 | # 设备信息 33 | conf.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 34 | # bacth大小 35 | conf.batch_size = 2 36 | # 学习率 37 | conf.lr = 1e-3 38 | # 步进式衰减的轮次 39 | conf.milestones = [12, 15, 18] 40 | # 动量 41 | conf.momentum = 0.9 42 | conf.num_workers = 6 43 | # 损失函数 44 | conf.ce_loss = CrossEntropyLoss() 45 | return conf 46 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | module_defs[-1][key.rstrip()] = value.strip() 18 | 19 | return module_defs 20 | 21 | 22 | def parse_data_cfg(path): 23 | """Parses the data configuration file""" 24 | print('data_cfg : ',path) 25 | options = dict() 26 | # options['gpus'] = '0,1,2,3' 27 | # options['num_workers'] = '10' 28 | with open(path, 'r') as fp: 29 | lines = fp.readlines() 30 | for line in lines: 31 | line = line.strip() 32 | if line == '' or line.startswith('#'): 33 | continue 34 | key, value = line.split('=') 35 | options[key.strip()] = value.strip() 36 | return options 37 | -------------------------------------------------------------------------------- /ArcFace/config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | from pathlib import Path 3 | import torch 4 | from torch.nn import CrossEntropyLoss 5 | from torchvision import transforms as trans 6 | 7 | 8 | # 模型训练时的参数配置信息 9 | def get_config(): 10 | conf = edict() 11 | conf.name = '' 12 | # 训练的轮次 13 | conf.epochs = 50 14 | # 工作目录: 15 | conf.work_path = "./" 16 | # 微调模型的存储位置 17 | # conf.finetune_backbone_model = "/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-02-22-13_step_19188.pth" 18 | # conf.finetune_head_model = "/root/cv/pycharm/人脸检测/人脸识别/save/head_2022-01-31-02-22-13_step_19188.pth" 19 | conf.finetune_backbone_model = "" 20 | conf.finetune_head_model = "" 21 | 22 | # 训练集路径 23 | # 训练集路径 24 | # conf.datasets_train_path = "C:\\Users\86183\Desktop\人脸识别数据集\\" 25 | # conf.datasets_train_path = "/root/cv/dataset/人脸/datasets/insight_face" 26 | conf.datasets_train_path = "/root/cv/pycharm/人脸检测/人脸识别/mydataset2" 27 | # 模型结果保存位置 28 | conf.save_path = conf.work_path + 'local_save' 29 | # 图像大小 30 | conf.input_size = [112, 112] 31 | # 特征向量的大小 32 | conf.embedding_size = 512 33 | # 网络深度 34 | conf.net_depth = 50 35 | # 随机失活的概率 36 | conf.drop_ratio = 0.6 37 | # 模型模式 38 | conf.net_mode = 'ir_se' # or 'ir' 39 | # 设备信息 40 | conf.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 41 | # bacth大小 42 | conf.batch_size = 128 43 | # conf.batch_size = 128 44 | # 学习率 45 | conf.lr = 1e-3 46 | # 步进式衰减的轮次 47 | conf.milestones = [12, 15, 18] 48 | # 动量 49 | conf.momentum = 0.9 50 | conf.num_workers = 6 51 | # 损失函数 52 | conf.ce_loss = CrossEntropyLoss() 53 | return conf 54 | -------------------------------------------------------------------------------- /ArcFace/util/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from PIL import Image 3 | import numpy as np 4 | import io 5 | from torchvision import transforms as trans 6 | import torch 7 | import sys, os 8 | 9 | # root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 10 | # sys.path.append(root_path) 11 | # print(root_path) 12 | import sys 13 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 14 | sys.path.append(root_path) 15 | from model import l2_norm 16 | from util.datasets import de_preprocess 17 | 18 | import pdb 19 | import cv2 20 | 21 | 22 | # 获取网络中的不同层 23 | def separate_bn_paras(modules): 24 | if not isinstance(modules, list): 25 | modules = [*modules.modules()] 26 | paras_only_bn = [] 27 | paras_wo_bn = [] 28 | for layer in modules: 29 | if 'model' in str(layer.__class__): 30 | continue 31 | if 'container' in str(layer.__class__): 32 | continue 33 | else: 34 | if 'batchnorm' in str(layer.__class__): 35 | paras_only_bn.extend([*layer.parameters()]) 36 | else: 37 | paras_wo_bn.extend([*layer.parameters()]) 38 | return paras_only_bn, paras_wo_bn 39 | 40 | 41 | # 将反归一化,类型转换,翻转,归一化处理合并在一起进行 42 | hflip = trans.Compose([ 43 | de_preprocess, 44 | trans.ToPILImage(), 45 | trans.functional.hflip, 46 | trans.ToTensor(), 47 | trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 48 | ]) 49 | 50 | 51 | def hflip_batch(imgs_tensor): 52 | hfliped_imgs = torch.empty_like(imgs_tensor) 53 | for i, img_ten in enumerate(imgs_tensor): 54 | hfliped_imgs[i] = hflip(img_ten) 55 | return hfliped_imgs 56 | 57 | 58 | # 绘制框及分类结果 59 | def draw_box_name(bbox, name, frame): 60 | frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 6) 61 | frame = cv2.putText(frame, 62 | name, 63 | (bbox[0], bbox[1]), 64 | cv2.FONT_HERSHEY_SIMPLEX, 65 | 2, 66 | (0, 255, 0), 67 | 3, 68 | cv2.LINE_AA) 69 | return frame 70 | 71 | 72 | # 学习率衰减策略 73 | def schedule_lr(optimizer): 74 | # 每次变为原来的0.1 75 | for params in optimizer.param_groups: 76 | params['lr'] /= 10 77 | print(optimizer) 78 | -------------------------------------------------------------------------------- /ArcFace/util/datasets.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from torch.utils.data import DataLoader, Dataset, ConcatDataset 3 | from torchvision import transforms as trans 4 | from torchvision.datasets import ImageFolder 5 | from PIL import Image, ImageFile 6 | 7 | ImageFile.LOAD_TRUNCATED_IMAGES = True 8 | import numpy as np 9 | import cv2 10 | import pickle 11 | import torch 12 | from tqdm import tqdm 13 | 14 | 15 | def get_train_dataset(imgs_folder): 16 | # 水平翻转,标准化 17 | train_transform = trans.Compose( 18 | [ 19 | trans.RandomResizedCrop(112, scale=(1.0, 1.0), ratio=(1.0, 1.0)), # scale随机裁剪的面积占比,ratio随机裁剪长宽比 20 | trans.RandomHorizontalFlip(), 21 | trans.ToTensor(), 22 | trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 23 | ] 24 | ) 25 | # 一个通用的数据加载器默认数据及已经按照分配的类型分成了不同的文件夹 26 | # 一种类型的文件架下面只存放一种类型的图片 27 | ds = ImageFolder(imgs_folder, train_transform) 28 | # ImageFolder(root, transform=None, target_transform=None, loader=default_loader) 29 | # root 指定路径 30 | # transform 对PIL IMAGE 进行的转换操作 transform的输入是使用loader读取图片返回的对象 31 | # target_tf 对标签label的转换 32 | # loader 给定路径后如何读取图片,默认读取RGB格式的PIL Image对象 33 | # lass_num类别个数 34 | class_num = len(ds.classes) 35 | return ds, class_num 36 | 37 | 38 | def get_train_loader(conf): 39 | ''' 40 | 加载训练集 41 | :param conf: 42 | :return: 43 | ''' 44 | ds, class_num = get_train_dataset(conf.datasets_train_path + '/imgs') 45 | loader = DataLoader(ds, batch_size=conf.batch_size, shuffle=True) 46 | return loader, class_num, ds.__len__() 47 | 48 | 49 | # 反归一化 50 | def de_preprocess(tensor): 51 | return tensor * 0.5 + 0.5 52 | 53 | 54 | if __name__ == "__main__": 55 | from config import get_config 56 | import matplotlib.pyplot as plt 57 | 58 | # 获取参数配置信息 59 | conf = get_config() 60 | # 设置数据的路径 61 | conf.datasets_train_path = "/root/cv/dataset/人脸/datasets/insight_face" 62 | # 获取送入网络中的数据 63 | data_loader, class_num, datasets_len = get_train_loader(conf) 64 | # 打印数据数量和类别个数 65 | print("train datasets len : {}".format(datasets_len)) 66 | print(" class_num:{} ".format(class_num)) 67 | # 遍历数据进行展示 68 | for i, (imgs, labels) in enumerate(data_loader): 69 | # 遍历每个batch中的每一副图像进行展示 70 | for j in range(conf.batch_size): 71 | # 展示 72 | # cv2.imshow('results', np.uint8(de_preprocess(imgs[j].permute(1, 2, 0)) * 255.0)[:, :, ::-1]) 73 | # cv2.waitKey(0) 74 | plt.imshow(np.uint8(de_preprocess(imgs[j].permute(1, 2, 0)) * 255.0)[:, :, ::-1]) 75 | plt.show() 76 | # 打印相应的目标值 77 | print(labels[j]) 78 | cv2.destroyAllWindows() 79 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/data_load.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | import os 3 | import cv2 4 | import numpy as np 5 | 6 | # 指定文件路径 7 | # 数据路径 8 | root_path = '/root/cv/dataset/人脸/datasets' 9 | # txt文件的路径 10 | path = '/root/cv/dataset/人脸/datasets/yolo_widerface_open_train/anno/train.txt' 11 | # 要检测的类别 12 | path_voc_names = './cfg/face.names' 13 | 14 | if __name__ == '__main__': 15 | # 读取检测的目标类别 16 | with open(path_voc_names, 'r') as f: 17 | label_map = f.readlines() 18 | # 获取所有的类别 19 | for i in range(len(label_map)): 20 | label_map[i] = label_map[i].strip() 21 | print(i, ') ', label_map[i].strip()) 22 | # 获取所有的图像文件 23 | with open(path, 'r') as file: 24 | img_files = file.read().splitlines() 25 | img_files = list(filter(lambda x: len(x) > 0, img_files)) 26 | 27 | for i in range(len(img_files)): 28 | img_files[i] = img_files[i].replace('./', '/root/cv/dataset/人脸/datasets/') 29 | # 获取所有的标注文件 30 | label_files = [ 31 | x.replace('images', 'labels').replace('.jpg', '.txt').replace('./', '/root/cv/dataset/人脸/') 32 | for x in img_files] 33 | # 读取图像并对标注信息进行绘制 34 | # for i in range(len(img_files)): 35 | for i in range(100): 36 | # print(img_files[i]) 37 | # 图像的绝对路径 38 | # print(img_files[i][2:]) 39 | img_file = os.path.join(img_files[i]) 40 | # 图像读取,获取宽高 41 | # print(img_file) 42 | img = cv2.imread(img_file) 43 | w = img.shape[1] 44 | h = img.shape[0] 45 | # 标签文件的绝对路径 46 | label_path = os.path.join(label_files[i]) 47 | # print(i, label_path) 48 | if os.path.isfile(label_path): 49 | # 获取每一行的标注信息 50 | with open(label_path, 'r') as file: 51 | lines = file.read().splitlines() 52 | # 获取每一行中的标准信息(cls,x,y,w,h) 53 | x = np.array([x.split() for x in lines], dtype=np.float32) 54 | for k in range(len(x)): 55 | anno = x[k] 56 | label = int(anno[0]) 57 | # 获取框的坐标值,左上角坐标和右下角坐标 58 | x1 = int((float(anno[1]) - float(anno[3]) / 2) * w) 59 | y1 = int((float(anno[2]) - float(anno[4]) / 2) * h) 60 | 61 | x2 = int((float(anno[1]) + float(anno[3]) / 2) * w) 62 | y2 = int((float(anno[2]) + float(anno[4]) / 2) * h) 63 | 64 | # 将标注框绘制在图像上 65 | cv2.rectangle(img, (x1, y1), (x2, y2), (255, 30, 30), 2) 66 | # 将标注类别绘制在图像上 67 | cv2.putText(img, ("%s" % (str(label_map[label]))), (x1, y1), \ 68 | cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 255, 55), 6) 69 | cv2.putText(img, ("%s" % (str(label_map[label]))), (x1, y1), \ 70 | cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 55, 255), 2) 71 | cv2.imwrite("./samples/results_{}".format(os.path.basename(img_file)), img) 72 | 73 | # 结果显示 74 | # cv2.namedWindow('image', 0) 75 | # cv2.imshow('image', img) 76 | # if cv2.waitKey(1) == 27: 77 | # break 78 | # print("./samples/results_{}".format(os.path.basename(img_file))) 79 | # print(img_file, '---------------') 80 | # print("./samples/results_{}".format(os.path.basename(img_file))) 81 | # cv2.destroyAllWindows() 82 | -------------------------------------------------------------------------------- /ArcFace/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | warnings.filterwarnings("ignore") 5 | 6 | from config import get_config 7 | import argparse 8 | # -------------------------------- 9 | from util.datasets import de_preprocess, get_train_loader 10 | from model import Backbone, Arcface, l2_norm 11 | import torch 12 | from torch import optim 13 | import numpy as np 14 | from matplotlib import pyplot as plt 15 | from util.utils import separate_bn_paras, schedule_lr 16 | from PIL import Image 17 | from torchvision import transforms as trans 18 | import math 19 | import time 20 | from torch.utils.tensorboard import SummaryWriter 21 | 22 | 23 | def trainer(conf): 24 | tb_writer = SummaryWriter(comment=conf.name) 25 | # 加载训练集数据 26 | data_loader, class_num, datasets_len = get_train_loader(conf) 27 | # 模型选择 28 | model_ = Backbone(conf.net_depth, conf.drop_ratio, conf.net_mode).to(conf.device) 29 | print('{}_{} model generated'.format(conf.net_mode, conf.net_depth)) 30 | if os.access(conf.finetune_backbone_model, os.F_OK): 31 | model_.load_state_dict(torch.load(conf.finetune_backbone_model)) 32 | print("-------->>> load model : {}".format(conf.finetune_backbone_model)) 33 | # 加载head模型 34 | head_ = Arcface(embedding_size=conf.embedding_size, classnum=class_num).to(conf.device) 35 | 36 | if os.access(conf.finetune_head_model, os.F_OK): 37 | head_.load_state_dict(torch.load(conf.finetune_head_model)) 38 | print("-------->>> load head : {}".format(conf.finetune_head_model)) 39 | # 优化器 40 | paras_only_bn, paras_wo_bn = separate_bn_paras(model_) 41 | optimizer = optim.SGD([ 42 | {'params': paras_wo_bn + [head_.kernel], 'weight_decay': 5e-4}, 43 | {'params': paras_only_bn} 44 | ], lr=conf.lr, momentum=conf.momentum) 45 | # bn层分离冻结不进行正则化 46 | model_.train() 47 | step_ = 0 48 | # 用来存放loss,和准确率进行绘图 49 | loss_list = [] 50 | tensorboard_step = 0 51 | loss_mean = 0 52 | # 遍历每一个epoch 53 | for e in range(conf.epochs): 54 | # 学习率衰减策略,变为原来的0.1倍 55 | print(" epoch < {} >".format(e)) 56 | if e == conf.milestones[0]: 57 | schedule_lr(optimizer) 58 | if e == conf.milestones[1]: 59 | schedule_lr(optimizer) 60 | if e == conf.milestones[2]: 61 | schedule_lr(optimizer) 62 | for i, (imgs, labels) in enumerate(data_loader): 63 | imgs = imgs.to(conf.device) 64 | labels = labels.to(conf.device) 65 | optimizer.zero_grad() 66 | embeddings = model_(imgs) 67 | thetas = head_(embeddings, labels) 68 | loss = conf.ce_loss(thetas, labels) 69 | loss_list.append(loss) 70 | loss.backward() 71 | optimizer.step() 72 | # 每10个迭代次数打印信息 73 | if i % 10 == 0: 74 | print( 75 | " epoch - < {}/{} >, [{}/{}], loss: {:.6f} , bs: {}".format( 76 | e, conf.epochs, i, int(datasets_len / conf.batch_size), loss.item(), 77 | conf.batch_size)) 78 | # 每100个迭代次数保存checkpoint 79 | # if step_ % 533 == 0: 80 | # 迭代次数加1 81 | step_ += 1 82 | tensorboard_step += 1 83 | loss_mean += loss.item() 84 | if tb_writer: 85 | tb_writer.add_scalar("epoch", e, tensorboard_step) 86 | tb_writer.add_scalar("tensorboard_step", tensorboard_step, tensorboard_step) 87 | tb_writer.add_scalar("loss", loss.item(), tensorboard_step) 88 | tb_writer.add_scalar("mean_loss", loss_mean / tensorboard_step, tensorboard_step) 89 | # 保存路径 90 | save_path = conf.save_path 91 | # 若不存在,则创建该路径 92 | if not os.path.exists(save_path): 93 | os.mkdir(save_path) 94 | # 获取当前时刻 95 | time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) 96 | # 保存backbone的结果 97 | torch.save( 98 | model_.state_dict(), save_path + 99 | ('/model_{}_step_{}.pth'.format(time_str, step_))) 100 | # 保存head部分的结果 101 | torch.save( 102 | head_.state_dict(), save_path + 103 | ('/head_{}_step_{}.pth'.format(time_str, step_))) 104 | # 创建第一张画布 105 | plt.figure(0) 106 | # 绘制总损失曲线 , 颜色为蓝色 107 | plt.plot(loss_list, color="blue", label="Loss") 108 | # 曲线说明在左上方 109 | plt.legend(loc='upper left') 110 | # 保存图片 111 | plt.savefig("./loss.png") 112 | 113 | 114 | if __name__ == '__main__': 115 | # 获取配置信息 116 | conf = get_config() 117 | # 模型训练 118 | trainer(conf) 119 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import os 4 | import torch 5 | from util.datasets import * 6 | from util.utils import * 7 | from util.parse_config import parse_data_cfg 8 | from yoloV3 import Yolov3, Yolov3Tiny 9 | from util.torch_utils import select_device 10 | 11 | 12 | # os.environ['CUDA_VISIBLE_DEVICES'] = "0" 13 | # 图像预处理 14 | def process_data(img, img_size=416): 15 | img, _, _, _ = letterbox(img, height=img_size) 16 | # 通道转换 BGR to RGB 17 | img = img[:, :, ::-1].transpose(2, 0, 1) 18 | # 类型转换 uint8 to float32 19 | img = np.ascontiguousarray(img, dtype=np.float32) 20 | # 归一化 0 - 255 to 0.0 - 1.0 21 | img /= 255.0 22 | return img 23 | 24 | 25 | def detect(model_path, cfg, data_cfg, img_size=416, conf_thres=0.1, nms_thres=0.2, video_path=0): 26 | """ 27 | 28 | :param model_path: 模型路径 29 | :param cfg: 配置信息 30 | :param data_cfg: 数据配置信息 31 | :param img_size: 图像的大小 32 | :param conf_thres: 置信度阈值 33 | :param nms_thres: NMS阈值 34 | :param video_path: 要处理的视频路径 35 | :return: 36 | """ 37 | # 获取检测的类别信息 38 | classes = load_classes(parse_data_cfg(data_cfg)['names']) 39 | num_classes = len(classes) 40 | # 1.模型加载 41 | # 第一步:指定训练好的模型参数 42 | if "-tiny" in cfg: 43 | model = Yolov3Tiny(num_classes) 44 | else: 45 | model = Yolov3(num_classes) 46 | # 第二步:加载模型训练结果 47 | device = select_device() 48 | weights = model_path 49 | if os.access(weights, os.F_OK): 50 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 51 | else: 52 | return False 53 | use_cuda = torch.cuda.is_available() 54 | model.to(device).eval() 55 | 56 | # 2.数据加载 57 | video_capture = cv2.VideoCapture(video_path) 58 | 59 | # 3.遍历帧图像进行处理 60 | # 第一步:设置结果保存位置 61 | video_writer = None 62 | save_video_path = "result_{}.MP4".format(time.strftime("%Y-%m-%d_%H_%M_%S", time.localtime())) 63 | # 第二步:遍历视频中每一帧图像 64 | while (video_capture.isOpened()): 65 | ret, im0 = video_capture.read() 66 | # 第三步:图像预处理,并记录处理时间 67 | if ret: 68 | 69 | t = time.time() 70 | img = process_data(im0, img_size) 71 | if use_cuda: 72 | torch.cuda.synchronize() 73 | t1 = time.time() 74 | print("process time", t1 - t) 75 | # 第四步:模型前向推理进行检测,并记录推理时间 76 | img = torch.from_numpy(img).unsqueeze(0).to(device) 77 | pred, _ = model(img) 78 | if use_cuda: 79 | torch.cuda.synchronize() 80 | t2 = time.time() 81 | print("inference time", t2 - t1) 82 | # 第五步:非极大值抑制,NMS,并记录推理时间 83 | detections = non_max_suppression(pred, conf_thres, nms_thres)[0] 84 | if use_cuda: 85 | torch.cuda.synchronize() 86 | t3 = time.time() 87 | print("get res time", t3 - t2) 88 | if detections is None or len(detections) == 0: 89 | cv2.imshow("image", im0) 90 | cv2.waitKey(1) 91 | print('跳过-----------------------------------------') 92 | continue 93 | # 第六步:结果展示 94 | detections[:, :4] = scale_coords(img_size, detections[:, :4], im0.shape).round() 95 | 96 | for *xyxy, conf, cls_conf, cls in detections: 97 | label = "%s %.2f" % (classes[int(cls)], conf) 98 | xyxy = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) 99 | im0 = plot_one_box(xyxy, im0, label=label, color=(0, 0, 255), line_thickness=3) 100 | s2 = time.time() 101 | print("detect time", s2 - t) 102 | cv2.imshow("image", im0) 103 | cv2.waitKey(1) 104 | # if video_writer is None: 105 | # # fourcc = cv2.VideoWriter_fourcc(*"MJPG") 106 | # video_writer = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), fps=25, framesize=(im0.shape[0], im0.shape[1])) 107 | # video_writer.write(im0) 108 | cv2.destroyAllWindows() 109 | video_capture.release() 110 | 111 | 112 | # 4.模型使用 113 | if __name__ == '__main__': 114 | # 配置信息设置 115 | # 模型相关配置文件 116 | data_config = './cfg/face.data' 117 | # 检测模型路径 118 | 119 | # model_path = '/root/cv/pycharm/人脸检测/yolo3-人脸检测/weights-yolov3-faceyolov3_416_epoch_9.pt' 120 | # model_path = 'weights-yolov3-face/yolov3_demo_416_epoch_7.pt' 121 | model_path = 'weights-yolov3-face/yolov3_demo_416_epoch_2.pt' 122 | model_cfg = 'yolo' 123 | # video_path = "/root/cv/pycharm/人脸检测/yolo3-人脸检测/激励自己.mp4" # 测试视频 # 测试视频 124 | video_path = "./11.mp4" # 测试视频 # 测试视频 125 | # 图像尺寸 126 | img_size = 416 127 | # 检测置信度 128 | conf_thres = 0.2 129 | # nms 阈值 130 | nms_thres = 0.6 131 | with torch.no_grad(): 132 | detect( 133 | model_path=model_path, 134 | cfg=model_cfg, 135 | data_cfg=data_config, 136 | img_size=img_size, 137 | conf_thres=conf_thres, 138 | nms_thres=nms_thres, 139 | video_path=video_path 140 | ) 141 | -------------------------------------------------------------------------------- /resnet34 姿态检测/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import numpy as np 6 | 7 | import math 8 | import cv2 9 | import torch.nn.functional as F 10 | 11 | from util.common_utils import * 12 | from model import resnet50, resnet34, resnet18 13 | import matplotlib.pyplot as plt 14 | 15 | 16 | def model_predict(ops, model_, img): 17 | with torch.no_grad(): 18 | idx = 0 19 | # 3.数据加载 20 | # 遍历文件夹 21 | # 图像宽高 22 | img_width = img.shape[1] 23 | img_height = img.shape[0] 24 | # resize 25 | # 输入图片尺寸调整 26 | img_ = cv2.resize(img, (ops.img_size[1], ops.img_size[0]), interpolation=cv2.INTER_CUBIC) 27 | # 类型转换 28 | img_ = img_.astype(np.float32) 29 | # 归一化 30 | img_ = (img_ - 128.) / 256. 31 | # HWC->CHW 32 | img_ = img_.transpose(2, 0, 1) 33 | img_ = torch.from_numpy(img_) 34 | # 增加一个batch通道(bs, 3, h, w) 35 | img_ = img_.unsqueeze_(0) 36 | # 4.模型预测 37 | if use_cuda: 38 | img_ = img_.cuda() 39 | pre_ = model_(img_) 40 | out_put = pre_.cpu().detach().numpy() 41 | out_put = np.squeeze(out_put) 42 | yaw, pitch, roll = out_put 43 | yaw = yaw * 90 44 | pitch = pitch * 90 45 | roll = roll * 90 46 | print("yaw: {:.1f}, pitch: {:.1f}, roll: {:.1f}".format(yaw, pitch, roll)) 47 | return [yaw, pitch, roll] 48 | 49 | 50 | if __name__ == "__main__": 51 | # 1.配置信息解析 52 | parser = argparse.ArgumentParser(description=' Project face euler angle Test') 53 | # 训练好的模型路径 54 | parser.add_argument('--test_model', type=str, #yaw: -17.7, pitch: 14.9, roll: 7.3 55 | # default='./model_exp/resnet_18_imgsize_256-epoch-15.pth',#yaw: -4.3, pitch: 10.6, roll: -3.1 56 | default='./model_exp/2022-01-24_00-46-50/resnet_18_imgsize_256-epoch-5.pth', 57 | help='test_model') 58 | # 模型类型 59 | parser.add_argument('--model', type=str, default='resnet_18', 60 | help='model : resnet_x') 61 | # 分类类别个数 62 | parser.add_argument('--num_classes', type=int, default=3, 63 | help='num_classes') 64 | # GPU选择 65 | parser.add_argument('--GPUS', type=str, default='0', 66 | help='GPUS') 67 | # 测试集路径 68 | parser.add_argument('--test_path', type=str, default='/root/cv/dataset/人脸/datasets/face_euler_angle_datasets_mini/', 69 | help='test_path') 70 | # 输入模型图片尺寸 71 | parser.add_argument('--img_size', type=tuple, default=(256, 256), 72 | help='img_size') 73 | # 是否可视化图片 74 | parser.add_argument('--vis', type=bool, default=True, 75 | help='vis') 76 | print('\n/******************* {} ******************/\n'.format(parser.description)) 77 | # -------------------------------------------------------------------------- 78 | # 解析添加参数 79 | ops = parser.parse_args() 80 | # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 81 | unparsed = vars(ops) 82 | # 打印参数配置信息 83 | for key in unparsed.keys(): 84 | print('{} : {}'.format(key, unparsed[key])) 85 | # 设备设置 86 | os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS 87 | # 测试图片文件夹路径 88 | test_path = ops.test_path 89 | # 2.模型加载 90 | # 第一步:构建模型 91 | if ops.model == "resnet-50": 92 | model_ = resnet50(num_classes=ops.num_classes, img_size=ops.img_size[0]) 93 | elif ops.model == "resnet-34": 94 | model_ = resnet34(num_classes=ops.num_classes, img_size=ops.img_size[0]) 95 | else: 96 | model_ = resnet18(num_classes=ops.num_classes, img_size=ops.img_size[0]) 97 | # 第二步:获取设备信息 98 | use_cuda = torch.cuda.is_available() 99 | device = torch.device('cuda:0' if use_cuda else 'cpu') 100 | model_.to(device) 101 | model_.eval() 102 | # 第三步:加载预训练模型 103 | if os.access(ops.test_model, os.F_OK): 104 | ckpt = torch.load(ops.test_model, map_location=device) 105 | model_.load_state_dict(ckpt) 106 | 107 | font = cv2.FONT_HERSHEY_SIMPLEX 108 | cap = cv2.VideoCapture(0) 109 | # 获取属性 110 | frame_width = int(cap.get(3)) 111 | frame_height = int(cap.get(4)) 112 | while (cap.isOpened()): 113 | ret, frame = cap.read() # 获取每一帧图像 114 | if ret == True: 115 | result = model_predict(ops, model_, frame) 116 | img = cv2.putText(frame, "yaw:{:.1f},pitch:{:.1f},roll:{:.1f}".format(result[0], result[1], result[2]), (1, 80), 117 | cv2.FONT_HERSHEY_DUPLEX, 1, 118 | (55, 0, 220), 5) 119 | # img = cv2.putText(frame, "ypr:{:.1f},{:.1f},{:.1f}".format(result[0], result[1], result[2]), (1, 80), 120 | # cv2.FONT_HERSHEY_DUPLEX, 2, 121 | # (255, 50, 50), 2) 122 | if result is None: 123 | continue 124 | cv2.imshow('result', img) 125 | if cv2.waitKey(25) & 0xFF == ord("q"): 126 | break 127 | cap.release() 128 | cv2.destroyAllWindows() 129 | -------------------------------------------------------------------------------- /ArcFace/make_facebank.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | warnings.filterwarnings('ignore') 4 | import os 5 | import torch 6 | from model import Backbone 7 | import argparse 8 | from pathlib import Path 9 | from PIL import Image 10 | import numpy as np 11 | import io 12 | from torchvision import transforms as trans 13 | import torch 14 | from model import l2_norm 15 | 16 | 17 | def prepare_facebank(path_images, facebank_path, model, device, tta=True): 18 | ''' 19 | 创建人脸数据特征向量 20 | :param path_images:图像路径 21 | :param facebank_path:输出保存路径 22 | :param model:人脸特征提取使用的模型 23 | :param device:设备信息 24 | :param tta:是否获取镜像的特征 25 | :return: 26 | ''' 27 | test_transform_ = trans.Compose([ 28 | trans.RandomResizedCrop(112, scale=(1.0, 1.0), ratio=(1.0, 1.0)), # scale随机裁剪的面积占比,ratio随机裁剪长宽比 29 | trans.ToTensor(), 30 | trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 31 | ]) 32 | model.eval() 33 | embeddings = [] 34 | names = ['Unknown'] 35 | idx = 0 # 人脸类别数 36 | for path in path_images.iterdir(): 37 | if path.is_file(): 38 | continue 39 | else: 40 | idx += 1 41 | embs = [] 42 | for file in path.iterdir(): 43 | if not file.is_file(): 44 | continue 45 | else: 46 | try: 47 | img = Image.open(file).convert('RGB') # 读取图像 48 | print(" {}) {}".format(idx + 1, file)) 49 | except: 50 | continue 51 | if img.size != (112, 112): 52 | try: 53 | img = img.resize((112, 112)) 54 | except: 55 | continue 56 | with torch.no_grad(): 57 | if tta: 58 | mirror = trans.functional.hflip(img) 59 | emb = model(test_transform_(img).to(device).unsqueeze(0)) 60 | emb_mirror = model(test_transform_(mirror).to(device).unsqueeze(0)) 61 | embs.append(l2_norm(emb + emb_mirror)) 62 | else: 63 | embs.append(model(test_transform_(img).to(device).unsqueeze(0))) 64 | if len(embs) == 0: 65 | continue 66 | embedding = torch.cat(embs).mean(0, keepdim=True) 67 | embeddings.append(embedding) 68 | names.append(path.name) 69 | embeddings = torch.cat(embeddings) # shape(cls_num.1,embedding)->shape(cls_num,embedding) 70 | names = np.array(names) 71 | torch.save(embeddings, facebank_path + '/facebank_1.pth') 72 | np.save(facebank_path + '/names_1', names) 73 | return embeddings, names 74 | 75 | 76 | if __name__ == '__main__': 77 | # 参数配置 78 | parser = argparse.ArgumentParser(description='make facebank') 79 | # 模型 80 | parser.add_argument("--net_mode", help="which network, [ir, ir_se, mobilefacenet]", default='ir_se', type=str) 81 | # 网络深度 82 | parser.add_argument("--net_depth", help="how many layers [50,100,152]", default=50, type=int) 83 | # 预训练模型 84 | parser.add_argument("--finetune_backbone_model", help="finetune_backbone_model", 85 | # default="/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-02-22-13_step_19188.pth", 86 | # default="/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-05-27-40_step_2144.pth", 87 | default="/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-12-05-37_step_12864.pth", #自己训练标准训练集99epoch 88 | # default="/root/cv/pycharm/人脸检测/人脸识别/local_save/model_2022-02-02-05-23-17_step_169.pth", 89 | # default="/root/cv/pycharm/人脸检测/人脸识别/face_verify-model_ir_se-50.pth", # 老师训练模型 90 | # default="./local_save/model_2022-01-31-07-27-53_step_400.pth", 91 | type=str) 92 | # 人脸仓库中的人脸图像 93 | parser.add_argument("--facebank_images_path", help="facebank_images_path", 94 | # default="C:\\Users\86183\Desktop\人脸识别数据集\imgs\\", type=str) 95 | # default="/root/cv/dataset/人脸/datasets/insight_face/imgs", type=str) 96 | default="/root/cv/pycharm/人脸检测/人脸识别/mydataset2", type=str) 97 | 98 | # 人脸仓库 99 | parser.add_argument("--facebank_path", help="facebank_path", default="./facebank/", type=str) 100 | # 是否翻转 101 | parser.add_argument("-tta", "--tta", help="whether test time augmentation", default=False, type=bool) 102 | 103 | args = parser.parse_args() 104 | # 设备信息 105 | device_ = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 106 | # 模型选择 107 | model_ = Backbone(args.net_depth, 1., args.net_mode).to(device_) 108 | print('{}_{} model generated'.format(args.net_mode, args.net_depth)) 109 | # 加载预训练模型 110 | if os.access(args.finetune_backbone_model, os.F_OK): 111 | model_.load_state_dict(torch.load(args.finetune_backbone_model, map_location='cpu')) 112 | print("-------->>> load model : {}".format(args.finetune_backbone_model)) 113 | # 模型预测 114 | model_.eval() 115 | # 创建模型仓库 116 | targets, names = prepare_facebank(Path(args.facebank_images_path), args.facebank_path, model_, device_, 117 | tta=args.tta) 118 | -------------------------------------------------------------------------------- /resnet34 姿态检测/util/common_utils.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | # date:2020-04-11 3 | # Author: Eric.Lee 4 | # function: common utils 5 | 6 | import os 7 | import shutil 8 | import cv2 9 | import numpy as np 10 | import json 11 | import random 12 | 13 | 14 | # 创建路径 15 | def mkdir_(path, flag_rm=False): 16 | # 存在路径时 17 | # 若flag_rm = True,则删除文件重新创建 18 | # 否则不修改 19 | # 若不存在路劲,则创建路径即可 20 | if os.path.exists(path): 21 | if flag_rm == True: 22 | shutil.rmtree(path) 23 | os.mkdir(path) 24 | print('remove {} done ~ '.format(path)) 25 | else: 26 | os.mkdir(path) 27 | 28 | 29 | def plot_box(bbox, img, color=None, label=None, line_thickness=None): 30 | tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 31 | color = color or [random.randint(0, 255) for _ in range(3)] 32 | c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) 33 | cv2.rectangle(img, c1, c2, color, thickness=tl) # 目标的bbox 34 | if label: 35 | tf = max(tl - 2, 1) 36 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size 37 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox 38 | cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 39 | # 文本绘制 40 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 41 | 42 | 43 | class JSON_Encoder(json.JSONEncoder): 44 | def default(self, obj): 45 | if isinstance(obj, np.integer): 46 | return int(obj) 47 | elif isinstance(obj, np.floating): 48 | return float(obj) 49 | elif isinstance(obj, np.ndarray): 50 | return obj.tolist() 51 | else: 52 | return super(JSON_Encoder, self).default(obj) 53 | 54 | 55 | # 绘制关键点:将96个关键点绘制在图像上 56 | def draw_landmarks(img, output, draw_circle): 57 | img_width = img.shape[1] 58 | img_height = img.shape[0] 59 | dict_landmarks = {} 60 | for i in range(int(output.shape[0] / 2)): 61 | x = output[i * 2 + 0] * float(img_width) 62 | y = output[i * 2 + 1] * float(img_height) 63 | if 41 >= i >= 33: 64 | if 'left_eyebrow' not in dict_landmarks.keys(): 65 | dict_landmarks['left_eyebrow'] = [] 66 | dict_landmarks['left_eyebrow'].append([int(x), int(y), (0, 255, 0)]) 67 | if draw_circle: 68 | cv2.circle(img, (int(x), int(y)), 2, (0, 255, 0), -1) 69 | elif 50 >= i >= 42: 70 | if 'right_eyebrow' not in dict_landmarks.keys(): 71 | dict_landmarks['right_eyebrow'] = [] 72 | dict_landmarks['right_eyebrow'].append([int(x), int(y), (0, 255, 0)]) 73 | if draw_circle: 74 | cv2.circle(img, (int(x), int(y)), 2, (0, 255, 0), -1) 75 | elif 67 >= i >= 60: 76 | if 'left_eye' not in dict_landmarks.keys(): 77 | dict_landmarks['left_eye'] = [] 78 | dict_landmarks['left_eye'].append([int(x), int(y), (255, 0, 255)]) 79 | if draw_circle: 80 | cv2.circle(img, (int(x), int(y)), 2, (255, 0, 255), -1) 81 | elif 75 >= i >= 68: 82 | if 'right_eye' not in dict_landmarks.keys(): 83 | dict_landmarks['right_eye'] = [] 84 | dict_landmarks['right_eye'].append([int(x), int(y), (255, 0, 255)]) 85 | if draw_circle: 86 | cv2.circle(img, (int(x), int(y)), 2, (255, 0, 255), -1) 87 | elif 97 >= i >= 96: 88 | cv2.circle(img, (int(x), int(y)), 2, (0, 0, 255), -1) 89 | elif 54 >= i >= 51: 90 | if 'bridge_nose' not in dict_landmarks.keys(): 91 | dict_landmarks['bridge_nose'] = [] 92 | dict_landmarks['bridge_nose'].append([int(x), int(y), (0, 170, 255)]) 93 | if draw_circle: 94 | cv2.circle(img, (int(x), int(y)), 2, (0, 170, 255), -1) 95 | elif 32 >= i >= 0: 96 | if 'basin' not in dict_landmarks.keys(): 97 | dict_landmarks['basin'] = [] 98 | dict_landmarks['basin'].append([int(x), int(y), (255, 30, 30)]) 99 | if draw_circle: 100 | cv2.circle(img, (int(x), int(y)), 2, (255, 30, 30), -1) 101 | elif 59 >= i >= 55: 102 | if 'wing_nose' not in dict_landmarks.keys(): 103 | dict_landmarks['wing_nose'] = [] 104 | dict_landmarks['wing_nose'].append([int(x), int(y), (0, 255, 255)]) 105 | if draw_circle: 106 | cv2.circle(img, (int(x), int(y)), 2, (0, 255, 255), -1) 107 | elif 87 >= i >= 76: 108 | if 'out_lip' not in dict_landmarks.keys(): 109 | dict_landmarks['out_lip'] = [] 110 | dict_landmarks['out_lip'].append([int(x), int(y), (255, 255, 0)]) 111 | if draw_circle: 112 | cv2.circle(img, (int(x), int(y)), 2, (255, 255, 0), -1) 113 | elif 95 >= i >= 88: 114 | if 'in_lip' not in dict_landmarks.keys(): 115 | dict_landmarks['in_lip'] = [] 116 | dict_landmarks['in_lip'].append([int(x), int(y), (50, 220, 255)]) 117 | if draw_circle: 118 | cv2.circle(img, (int(x), int(y)), 2, (50, 220, 255), -1) 119 | else: 120 | if draw_circle: 121 | cv2.circle(img, (int(x), int(y)), 2, (255, 0, 255), -1) 122 | 123 | return dict_landmarks 124 | 125 | 126 | def draw_contour(image, dict): 127 | for key in dict.keys(): 128 | # print(key) 129 | _, _, color = dict[key][0] 130 | 131 | if 'basin' == key or 'wing_nose' == key: 132 | pts = np.array([[dict[key][i][0], dict[key][i][1]] for i in range(len(dict[key]))], np.int32) 133 | # print(pts) 134 | cv2.polylines(image, [pts], False, color) 135 | 136 | else: 137 | points_array = np.zeros((1, len(dict[key]), 2), dtype=np.int32) 138 | for i in range(len(dict[key])): 139 | x, y, _ = dict[key][i] 140 | points_array[0, i, 0] = x 141 | points_array[0, i, 1] = y 142 | 143 | # cv2.fillPoly(image, points_array, color) 144 | cv2.drawContours(image, points_array, -1, color, thickness=1) 145 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/common_utils.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | # date:2020-04-11 3 | # Author: Eric.Lee 4 | # function: common utils 5 | 6 | import os 7 | import shutil 8 | import cv2 9 | import numpy as np 10 | import json 11 | import random 12 | 13 | 14 | # 创建路径 15 | def mkdir_(path, flag_rm=False): 16 | # 存在路径时 17 | # 若flag_rm = True,则删除文件重新创建 18 | # 否则不修改 19 | # 若不存在路劲,则创建路径即可 20 | if os.path.exists(path): 21 | if flag_rm == True: 22 | shutil.rmtree(path) 23 | os.mkdir(path) 24 | print('remove {} done ~ '.format(path)) 25 | else: 26 | os.mkdir(path) 27 | 28 | 29 | def plot_box(bbox, img, color=None, label=None, line_thickness=None): 30 | tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 31 | color = color or [random.randint(0, 255) for _ in range(3)] 32 | c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) 33 | cv2.rectangle(img, c1, c2, color, thickness=tl) # 目标的bbox 34 | if label: 35 | tf = max(tl - 2, 1) 36 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size 37 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox 38 | cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 39 | # 文本绘制 40 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 41 | 42 | 43 | class JSON_Encoder(json.JSONEncoder): 44 | def default(self, obj): 45 | if isinstance(obj, np.integer): 46 | return int(obj) 47 | elif isinstance(obj, np.floating): 48 | return float(obj) 49 | elif isinstance(obj, np.ndarray): 50 | return obj.tolist() 51 | else: 52 | return super(JSON_Encoder, self).default(obj) 53 | 54 | 55 | # 绘制关键点:将96个关键点绘制在图像上 56 | def draw_landmarks(img, output, draw_circle): 57 | img_width = img.shape[1] 58 | img_height = img.shape[0] 59 | dict_landmarks = {} 60 | for i in range(int(output.shape[0] / 2)): 61 | x = output[i * 2 + 0] * float(img_width) 62 | y = output[i * 2 + 1] * float(img_height) 63 | if 41 >= i >= 33: 64 | if 'left_eyebrow' not in dict_landmarks.keys(): 65 | dict_landmarks['left_eyebrow'] = [] 66 | dict_landmarks['left_eyebrow'].append([int(x), int(y), (0, 255, 0)]) 67 | if draw_circle: 68 | cv2.circle(img, (int(x), int(y)), 2, (0, 255, 0), -1) 69 | elif 50 >= i >= 42: 70 | if 'right_eyebrow' not in dict_landmarks.keys(): 71 | dict_landmarks['right_eyebrow'] = [] 72 | dict_landmarks['right_eyebrow'].append([int(x), int(y), (0, 255, 0)]) 73 | if draw_circle: 74 | cv2.circle(img, (int(x), int(y)), 2, (0, 255, 0), -1) 75 | elif 67 >= i >= 60: 76 | if 'left_eye' not in dict_landmarks.keys(): 77 | dict_landmarks['left_eye'] = [] 78 | dict_landmarks['left_eye'].append([int(x), int(y), (255, 0, 255)]) 79 | if draw_circle: 80 | cv2.circle(img, (int(x), int(y)), 2, (255, 0, 255), -1) 81 | elif 75 >= i >= 68: 82 | if 'right_eye' not in dict_landmarks.keys(): 83 | dict_landmarks['right_eye'] = [] 84 | dict_landmarks['right_eye'].append([int(x), int(y), (255, 0, 255)]) 85 | if draw_circle: 86 | cv2.circle(img, (int(x), int(y)), 2, (255, 0, 255), -1) 87 | elif 97 >= i >= 96: 88 | cv2.circle(img, (int(x), int(y)), 2, (0, 0, 255), -1) 89 | elif 54 >= i >= 51: 90 | if 'bridge_nose' not in dict_landmarks.keys(): 91 | dict_landmarks['bridge_nose'] = [] 92 | dict_landmarks['bridge_nose'].append([int(x), int(y), (0, 170, 255)]) 93 | if draw_circle: 94 | cv2.circle(img, (int(x), int(y)), 2, (0, 170, 255), -1) 95 | elif 32 >= i >= 0: 96 | if 'basin' not in dict_landmarks.keys(): 97 | dict_landmarks['basin'] = [] 98 | dict_landmarks['basin'].append([int(x), int(y), (255, 30, 30)]) 99 | if draw_circle: 100 | cv2.circle(img, (int(x), int(y)), 2, (255, 30, 30), -1) 101 | elif 59 >= i >= 55: 102 | if 'wing_nose' not in dict_landmarks.keys(): 103 | dict_landmarks['wing_nose'] = [] 104 | dict_landmarks['wing_nose'].append([int(x), int(y), (0, 255, 255)]) 105 | if draw_circle: 106 | cv2.circle(img, (int(x), int(y)), 2, (0, 255, 255), -1) 107 | elif 87 >= i >= 76: 108 | if 'out_lip' not in dict_landmarks.keys(): 109 | dict_landmarks['out_lip'] = [] 110 | dict_landmarks['out_lip'].append([int(x), int(y), (255, 255, 0)]) 111 | if draw_circle: 112 | cv2.circle(img, (int(x), int(y)), 2, (255, 255, 0), -1) 113 | elif 95 >= i >= 88: 114 | if 'in_lip' not in dict_landmarks.keys(): 115 | dict_landmarks['in_lip'] = [] 116 | dict_landmarks['in_lip'].append([int(x), int(y), (50, 220, 255)]) 117 | if draw_circle: 118 | cv2.circle(img, (int(x), int(y)), 2, (50, 220, 255), -1) 119 | else: 120 | if draw_circle: 121 | cv2.circle(img, (int(x), int(y)), 2, (255, 0, 255), -1) 122 | 123 | return dict_landmarks 124 | 125 | 126 | def draw_contour(image, dict): 127 | for key in dict.keys(): 128 | # print(key) 129 | _, _, color = dict[key][0] 130 | 131 | if 'basin' == key or 'wing_nose' == key: 132 | pts = np.array([[dict[key][i][0], dict[key][i][1]] for i in range(len(dict[key]))], np.int32) 133 | # print(pts) 134 | cv2.polylines(image, [pts], False, color) 135 | 136 | else: 137 | points_array = np.zeros((1, len(dict[key]), 2), dtype=np.int32) 138 | for i in range(len(dict[key])): 139 | x, y, _ = dict[key][i] 140 | points_array[0, i, 0] = x 141 | points_array[0, i, 1] = y 142 | 143 | # cv2.fillPoly(image, points_array, color) 144 | cv2.drawContours(image, points_array, -1, color, thickness=1) 145 | -------------------------------------------------------------------------------- /resnet34 姿态检测/util/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import cv2 5 | import numpy as np 6 | import torch 7 | from torch.utils.data import DataLoader, Dataset 8 | import json 9 | 10 | 11 | # from data_agu import * 12 | 13 | 14 | class LoadImagesAndLabels(Dataset): 15 | def __init__(self, ops, img_size=(224, 224), flag_agu=False): 16 | ''' 17 | 初始化函数 18 | :param ops:配置文件读取对象 19 | :param img_size:图像输入尺寸 20 | :param flag_agu:工具类..暂定 21 | ''' 22 | file_list = [] 23 | bboxes_list = [] 24 | angles_list = [] 25 | # 计数 26 | idx = 0 27 | # 获取图像路径 28 | images_path = ops.train_path + 'images/' 29 | for f_ in os.listdir(images_path): 30 | print('加载数据进度-->', idx*100 / len(os.listdir(images_path))) 31 | # 获取对应label路径 32 | label_path = (images_path + f_).replace("images", "labels").replace(".jpg", ".json") 33 | # 读取json 34 | f = open(label_path, encoding='utf8') 35 | dict = json.load(f) 36 | f.close() 37 | # 获取角度和bbox目标框 38 | angle = dict["euler_angle"] 39 | bbox = dict['bbox'] 40 | idx += 1 41 | print(" images : {}".format(idx), end="\r") 42 | file_list.append(images_path + f_) 43 | bboxes_list.append((int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) 44 | angles_list.append((angle['yaw'], angle['pitch'], angle['roll'])) 45 | self.files = file_list 46 | self.bboxes = bboxes_list 47 | self.angles = angles_list 48 | self.img_size = img_size 49 | self.flag_agu = flag_agu 50 | 51 | def __len__(self): 52 | return len(self.files) 53 | 54 | def __getitem__(self, index): 55 | img_path = self.files[index] 56 | yaw, pitch, roll = self.angles[index] 57 | bbox = self.bboxes[index] 58 | # 去读图像 59 | img = cv2.imread(img_path) 60 | # 获取人脸范围 61 | xmin, ymin, xmax, ymax = bbox[0], bbox[1], bbox[2], bbox[3] 62 | face_w = xmax - xmin 63 | face_h = ymax - ymin 64 | # 随机扩展 65 | x_min = int(xmin - random.randint(-6, int(face_w * 3 / 5))) 66 | y_min = int(ymin - random.randint(-6, int(face_h * 2 / 3))) 67 | x_max = int(xmax + random.randint(-6, int(face_w * 3 / 5))) 68 | y_max = int(ymax + random.randint(-12, int(face_h * 2 / 5))) 69 | # clip 70 | x_min = np.clip(x_min, 0, img.shape[1] - 1) 71 | x_max = np.clip(x_max, 0, img.shape[1] - 1) 72 | y_min = np.clip(y_min, 0, img.shape[0] - 1) 73 | y_max = np.clip(y_max, 0, img.shape[0] - 1) 74 | try: 75 | face_crop = img[y_min:y_max, x_min:x_max, :] 76 | except: 77 | face_crop = img[bbox[1]:bbox[3], bbox[0]:bbox[2], :] 78 | 79 | if random.random() >= 0.5: 80 | face_crop = cv2.flip(face_crop, 1) 81 | yaw = -yaw 82 | roll = -roll # 俯仰角不变 眼镜蛇机动~ 83 | img_ = cv2.resize(face_crop, self.img_size, interpolation=random.randint(0, 4)) 84 | # 颜色增强 85 | if self.flag_agu: 86 | if random.random() > 0.7: 87 | img_hsv = cv2.cvtColor(img_, cv2.COLOR_BGR2HSV) 88 | # 随机生成增强的数值 89 | hue_x = random.randint(-10, 10) 90 | # 对H通道进行增强,并对范围进行调整,0 ~180 91 | img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_x) 92 | img_hsv[:, :, 0] = np.maximum(img_hsv[:, :, 0], 0) 93 | img_hsv[:, :, 0] = np.minimum(img_hsv[:, :, 0], 180) 94 | img_ = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR) 95 | # 数据归一化 96 | img_ = img_.astype(np.float32)[:, :, ::-1] # 倒装 97 | # 归一化 98 | img_ = (img_ - 128.) / 256. 99 | img_ = img_.transpose(2, 0, 1) # HWC-->CHW 增加计算效率 100 | # 角度归一化 101 | yaw = yaw / 90. 102 | pitch = pitch / 90. 103 | roll = roll / 90. 104 | # 构成一个一维数组 105 | angles_ = np.array([yaw, pitch, roll]).ravel() 106 | return img_, angles_ 107 | 108 | 109 | if __name__ == "__main__": 110 | import argparse 111 | 112 | parser = argparse.ArgumentParser(description=' Project Face Euler Angle Train') 113 | # yaw,pitch,roll 114 | parser.add_argument('--num_classes', type=int, default=3, 115 | help='num_classes') 116 | # 训练集标注信息 117 | parser.add_argument('--train_path', type=str, 118 | default='/root/cv/dataset/人脸/datasets/face_euler_angle_datasets_mini/', help='train_path') 119 | # 训练每批次图像数量 120 | parser.add_argument('--batch_size', type=int, default=1, 121 | help='batch_size') 122 | # 训练线程数 123 | parser.add_argument('--num_workers', type=int, default=0, 124 | help='num_workers') 125 | # 输入模型图片尺寸 126 | parser.add_argument('--img_size', type=tuple, default=(256, 256), 127 | help='img_size') 128 | # 是否进行数据增强 129 | parser.add_argument('--flag_agu', type=bool, default=True, 130 | help='data_augmentation') 131 | 132 | import matplotlib.pyplot as plt 133 | 134 | # -------------------------------------------------------------------------- 135 | ops = parser.parse_args() # 解析添加参数 136 | # 数据加载 137 | dataset = LoadImagesAndLabels(ops=ops, img_size=ops.img_size, flag_agu=ops.flag_agu) 138 | print('len train datasets : %s' % (dataset.__len__())) 139 | # Dataloader获取batchsize的数据 140 | dataloader = DataLoader(dataset, 141 | batch_size=ops.batch_size, 142 | num_workers=ops.num_workers, 143 | shuffle=True) 144 | # 获取每个batch的训练数据 145 | for i, (imgs_, angles_) in enumerate(dataloader): 146 | # 打印角度信息 147 | print(angles_) 148 | for j in range(ops.batch_size): 149 | # 结果展示:反归一化,表示形式CHW->HWC,类型转换,RGB->BGR 150 | # cv2.imshow('result',np.uint8(imgs_[i].permute(1, 2, 0)*256.0+128.0)[:,:,::-1]) 151 | # cv2.waitKey(0) 152 | if imgs_.shape[0]==1: 153 | plt.imshow(np.uint8(imgs_[0].permute(1, 2, 0) * 256.0 + 128.0)[:, :, ::-1]) 154 | else: 155 | plt.imshow(np.uint8(imgs_[i].permute(1, 2, 0) * 256.0 + 128.0)[:, :, ::-1]) 156 | 157 | plt.show() 158 | 159 | cv2.destroyAllWindows() 160 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/dataset_teacherCode.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import math 3 | import os 4 | import random 5 | import shutil 6 | from pathlib import Path 7 | from PIL import Image 8 | # import matplotlib.pyplot as plt 9 | from tqdm import tqdm 10 | import cv2 11 | import numpy as np 12 | import torch 13 | from torch.utils.data import Dataset 14 | from torch.utils.data import DataLoader 15 | # from utils import letterbox, random_affine, xywh2xyxy, xyxy2xywh 16 | import sys 17 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 18 | sys.path.append(root_path) 19 | from util.utils import * 20 | import os #引用OS 21 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' 22 | # import utils.letterbox as letterbox 23 | # import utils.random_affine as random_affine 24 | # import utils.xyxy2xywh as xyxy2xywh 25 | 26 | # 获取数据:图像数据和标签数据 27 | class LoadImagesAndLabels(Dataset): 28 | # 2.1 初始化处理 29 | def __init__(self, path, batch_size, img_size=416, augment=False, multi_scale=False, root_path=os.path.curdir): 30 | # 获取图像文件 31 | with open(path, 'r') as file: 32 | img_files = file.read().splitlines() 33 | img_files = list(filter(lambda x: len(x) > 0, img_files)) 34 | np.random.shuffle(img_files) 35 | self.img_files = img_files 36 | self.batch_size = batch_size 37 | self.img_size = img_size 38 | self.augment = augment 39 | self.multi_scale = multi_scale 40 | self.root_path = root_path 41 | self.scale_index = 0 42 | if self.multi_scale: 43 | self.img_size = img_size 44 | # 标签文件 45 | self.label_files = [x.replace('images', 'labels').replace('.jpg', '.txt') for x in self.img_files] 46 | 47 | # 2.2 数据量 48 | def __len__(self): 49 | return len(self.img_files) 50 | 51 | # 2.3 图像读取与增强 52 | def __getitem__(self, index): 53 | # 第一步:多尺度训练 54 | if self.multi_scale and (self.scale_index % self.batch_size == 0) and self.scale_index != 0: 55 | self.img_size = random.choice(range(11, 19)) * 32 56 | if self.multi_scale: 57 | self.scale_index += 1 58 | # 第二步:图像读取 59 | img_path = os.path.join(self.root_path, self.img_files[index][2:]) 60 | img = cv2.imread(img_path) 61 | # 第三步:颜色增强 62 | augmnet_hsv = random.random() < 0.5 63 | if self.augment and augmnet_hsv: 64 | fraction = 0.5 65 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 66 | S = img_hsv[:, :, 1].astype(np.float32) 67 | V = img_hsv[:, :, 2].astype(np.float32) 68 | a = (random.random() * 2 - 1) * fraction + 1 69 | S *= a 70 | if a > 1: 71 | np.clip(S, None, 255, out=S) 72 | a = (random.random() * 2 - 1) * fraction + 1 73 | V *= a 74 | if a > 1: 75 | np.clip(V, None, 255, out=V) 76 | img_hsv[:, :, 1] = S 77 | img_hsv[:, :, 2] = V 78 | cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) 79 | # 第四步:图像尺寸调整 80 | # 获取图像宽高 81 | h, w, _ = img.shape 82 | # resize+pad 83 | img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment) 84 | # 获取图像标签 85 | label_path = os.path.join(self.root_path, self.label_files[index][2:]) 86 | labels = [] 87 | # 读取标签文件 88 | if os.path.isfile(label_path): 89 | with open(label_path, 'r') as file: 90 | lines = file.read().splitlines() 91 | x = np.array([x.split() for x in lines], dtype=np.float32) 92 | if x.size > 0: 93 | labels = x.copy() 94 | # 修改目标框的位置信息 95 | labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw 96 | labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh 97 | labels[:, 1] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw 98 | labels[:, 2] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh 99 | 100 | # 第五步:几何变换的增强并调整label值 101 | # 仿射变化 102 | if self.augment: 103 | img, labels = random_affine(img, labels, degrees=(-20, 20), translate=(0.1, 0.1), scale=(0.9, 1.1)) 104 | # xywh 105 | nl = len(labels) 106 | if nl: 107 | labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size 108 | # 翻转 109 | # 水平翻转 110 | if self.augment: 111 | lr_flip = True 112 | if lr_flip and random.random() > 0.5: 113 | img = np.fliplr(img) 114 | if nl: 115 | labels[:, 1] = 1 - labels[:, 1] 116 | 117 | # 第六步:获取图像和标注信息结果 118 | # 标注信息 119 | labels_out = torch.zeros((nl, 6)) 120 | if nl: 121 | labels_out[:, 1:] = torch.from_numpy(labels) 122 | # 图像处理 123 | # 1.通道 124 | img = img[:, :, ::-1].transpose(2, 0, 1) 125 | # 2.类型 126 | img = np.ascontiguousarray(img, dtype=np.float32) 127 | # 3.归一化 128 | img /= 255.0 129 | return torch.from_numpy(img), labels_out, img_path, (h, w) 130 | 131 | # 2.4 获取batch数据 132 | @staticmethod 133 | def collate_fn(batch): 134 | img, label, img_path, hw = list(zip(*batch)) 135 | for i, l in enumerate(label): 136 | l[:, 0] = i 137 | print([x[0].shape for x in batch]) 138 | return torch.stack(img, 0), torch.cat(label, 0), img_path, hw 139 | # 3 数据获取测试 140 | if __name__ == "__main__": 141 | # 参数 142 | train_path = "/Users/yaoxiaoying/Desktop/人脸支付/02.code/datasets/yolo_widerface_open_train/anno/train.txt" 143 | root_path = "/Users/yaoxiaoying/Desktop/人脸支付/02.code/datasets" 144 | batch_size = 2 145 | img_size = 416 146 | num_workers = 2 147 | # 创建数据对象 148 | dataset = LoadImagesAndLabels(train_path, batch_size, img_size=img_size, augment=False, multi_scale=False, 149 | root_path=root_path) 150 | print(dataset.__len__()) 151 | # dataloader来获取数据 152 | dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, 153 | collate_fn=dataset.collate_fn) 154 | # 遍历loader 155 | for i, (imgs, targets, img_path_, _) in enumerate(dataloader): 156 | # 标注信息 157 | print('标注信息', targets) 158 | # 遍历imgs获取每一副图像进行展示 159 | for j in range(batch_size): 160 | # 对图像进行处理:反归一化,表示形式,通道,类型 161 | img_tmp = np.uint8(imgs[j].permute(1, 2, 0) * 255.0)[:, :, ::-1] 162 | # 显示 163 | cv2.imshow('result', img_tmp) 164 | cv2.waitKey(0) 165 | # 保存 166 | out_path = os.path.join("/Users/yaoxiaoying/Desktop/人脸支付/03.课堂代码/yolo_v3/result_aug", 167 | os.path.basename(img_path_[j])) 168 | cv2.imwrite(out_path,img_tmp) 169 | cv2.destroyAllWindows() 170 | continue 171 | -------------------------------------------------------------------------------- /ArcFace/face_verify.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | warnings.filterwarnings("ignore") 4 | import os 5 | import torch 6 | from model import Backbone 7 | import argparse 8 | from pathlib import Path 9 | import cv2 10 | from torchvision import transforms as trans 11 | from util.datasets import de_preprocess 12 | import torch 13 | from model import l2_norm 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | from PIL import Image, ImageDraw, ImageFont 17 | 18 | 19 | # 加载pth,npy文件中存储的特征 20 | def load_facebank(facebank_path): 21 | embeddings = torch.load(facebank_path + '/facebank.pth', 22 | map_location=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")) 23 | names = np.load(facebank_path + '/names.npy') 24 | return embeddings, names 25 | 26 | 27 | def infer(model, device, faces, target_embs, threshold=1.2, tta=False): 28 | ''' 29 | :param model: 进行预测的模型 30 | :param device: 设备信息 31 | :param faces: 要处理的人脸图像 32 | :param target_embs: 数据库中的人脸特征 33 | :param threshold: 阈值 34 | :param tta: 进行水平翻转的增强 35 | :return: 36 | ''' 37 | # 将类型转换和标准化合并在一起 38 | test_transform = trans.Compose([ 39 | trans.ToTensor(), 40 | trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 41 | ]) 42 | 43 | # 特征向量 44 | embs = [] 45 | # 遍历人脸图像 46 | for img in faces: 47 | # 若进行翻转 48 | if tta: 49 | # 镜像翻转 50 | mirror = trans.functional.hflip(img) 51 | # 模型预测 52 | emb = model(test_transform(img).to(device).unsqueeze(0)) 53 | emb_mirror = model(test_transform(mirror).to(device).unsqueeze(0)) 54 | # 获取最终的特征向量 55 | embs.append(l2_norm(emb + emb_mirror)) 56 | else: 57 | with torch.no_grad(): 58 | # 未进行翻转时,进行预测 59 | embs.append(model(test_transform(img).to(device).unsqueeze(0))) 60 | # 将特征拼接在一起 61 | source_embs = torch.cat(embs) 62 | # 计算要检测的图像特征与目标特征之间的差异 63 | diff_1 = source_embs.unsqueeze(-1) - target_embs.transpose(1, 0).unsqueeze(0) 64 | dist_1 = torch.sum(torch.pow(diff_1, 2), dim=1) 65 | 66 | diff = torch.mm(source_embs, target_embs.transpose(1, 0)) 67 | dist = torch.pow(diff, 2) * 64 68 | 69 | # print('dist_1-->', dist_1) 70 | # print('dist-->', dist) 71 | 72 | # 获取差异最小值及对应的索引 73 | # minimum, min_idx = torch.min(dist.squeeze(), dim=0) #计算欧式距离 距离越小说明相似度越高 74 | minimum, min_idx = torch.max(dist.squeeze(), dim=0) # 计算cos(x)时,值越大说明夹角越小 75 | # 若没有匹配成功,将索引设置为-1 76 | # min_idx[minimum > threshold] = -1 77 | min_idx[minimum < threshold] = -1 78 | 79 | return min_idx, minimum.unsqueeze(0) 80 | 81 | 82 | if __name__ == '__main__': 83 | # 配置相关参数 84 | parser = argparse.ArgumentParser(description='make facebank') 85 | # 模型 86 | parser.add_argument("--net_mode", help="which network, [ir, ir_se]", default='ir_se', type=str) 87 | # 模型深度 88 | parser.add_argument("--net_depth", help="how many layers [50,100,152]", default=50, type=int) 89 | # 预训练模型 90 | parser.add_argument("--finetune_backbone_model", help="finetune_backbone_model", 91 | # default="./save/model_2022-01-31-05-27-40_step_2144.pth", 92 | # default="./save/model_2022-01-31-05-53-58_step_500.pth", 93 | # default="/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-06-22-37_step_3752.pth", 94 | default="/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-12-05-37_step_12864.pth", #自己训练标准训练集99epoch 95 | # default="/root/cv/pycharm/人脸检测/人脸识别/local_save/model_2022-02-02-05-23-17_step_169.pth", 96 | # default="face_verify-model_ir_se-50.pth", # 老师训练模型 97 | type=str) 98 | # 人脸仓库 99 | parser.add_argument("--facebank_path", help="facebank_path", 100 | default="./facebank", type=str) 101 | # 是否进行水平翻转 102 | parser.add_argument("-tta", "--tta", help="whether test time augmentation", default=False, type=bool) 103 | # 要进行识别的人脸 104 | # parser.add_argument("-example", help="example", 105 | # default="G://机器视觉//cv项目代码//人脸检测项目代码//facetoPay//facetoPay//insight_face//example//", 106 | # type=str) 107 | parser.add_argument("-example", help="example", default="/root/cv/pycharm/人脸检测/人脸识别/example/example2/", type=str) 108 | # parser.add_argument("-example", help="example", default="C:\\Users\86183\Desktop\example\\", type=str) 109 | # 参数解析 110 | args = parser.parse_args() 111 | # 设备信息 112 | device_ = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 113 | # 模型选择 114 | model_ = Backbone(args.net_depth, 1., args.net_mode).to(device_) 115 | print('{}_{} model generated'.format(args.net_mode, args.net_depth)) 116 | # 加载预训练模型 117 | if os.access(args.finetune_backbone_model, os.F_OK): 118 | model_.load_state_dict(torch.load(args.finetune_backbone_model, 119 | map_location=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))) 120 | print("-------->>> load model : {}".format(args.finetune_backbone_model)) 121 | # 模型前向传播 122 | model_.eval() 123 | # 加载人脸仓库中的人脸特征及对应的名称 124 | targets, names = load_facebank(args.facebank_path) 125 | # 打印结果 126 | print("names : {}".format(names)) 127 | print("targets size : {}".format(targets.size())) 128 | # 要识别的人脸 129 | faces_identify = [] 130 | idx = 0 131 | # 遍历要处理的图像 132 | for file in os.listdir(args.example): 133 | # 若非图片文件,进行下一次循环 134 | # if not file.endswith('png'): 135 | # continue 136 | # 读取图像数据 137 | img = cv2.imread(args.example + file) 138 | label = file.split('-')[0] 139 | if img is None: 140 | continue 141 | # 获取图像的宽高 142 | x, y = img.shape[0:2] 143 | # 送入网络中的图像必须是112*112 144 | if x != 112 or y != 112: 145 | img = cv2.resize(img, (112, 112)) 146 | # 将数据放入list中 147 | faces_identify.append(Image.fromarray(img)) 148 | # 进行检测,results是索引,face_dst是差异 149 | results, face_dst = infer(model_, device_, faces_identify, targets, threshold=2.7, tta=False) 150 | faces_identify.pop() 151 | # 将其转换numpy的格式 152 | face_dst = list(face_dst.cpu().detach().numpy()) 153 | # 获取姓名和差异的大小 154 | print("{}) recognize:{} ,dst : {}".format(idx + 1, names[results + 1], face_dst)) 155 | # 将检测结果绘制在图像上 156 | # cv2.putText(img, names[results[idx] + 1], (2, 13), cv2.FONT_HERSHEY_DUPLEX, 0.5, (55, 0, 220), 5) 157 | cv2.putText(img, 158 | label + '-' + names[results + 1], (2, 13), 159 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 50, 50), 1) 160 | plt.imshow(img[:, :, ::-1]) 161 | plt.show() 162 | # cv2.namedWindow("imag_face", 0) 163 | # cv2.imshow("imag_face", img) 164 | # cv2.waitKey(1) 165 | # 将结果写入到文件中 166 | # cv2.imwrite(args.example + "results/" + file, img) 167 | idx += 1 168 | cv2.destroyAllWindows() 169 | print() 170 | print('------------------------') 171 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from yoloV3 import Yolov3, Yolov3Tiny 3 | from util.parse_config import parse_data_cfg 4 | from util.torch_utils import select_device 5 | import torch 6 | from torch.utils.data import DataLoader 7 | from util.datasets import LoadImagesAndLabels 8 | from util.utils import * 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from matplotlib.pyplot import MultipleLocator 12 | import time 13 | import sys 14 | 15 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 16 | sys.path.append(root_path) 17 | from util.utils import * 18 | 19 | 20 | def train(data_config='cfg/face.data'): 21 | # 1.配置文件解析 22 | get_data_cfg = parse_data_cfg(data_config) 23 | gpus = get_data_cfg['gpus'] 24 | num_workers = int(get_data_cfg['num_workers']) 25 | cfg_model = get_data_cfg['cfg_model'] 26 | train_path = get_data_cfg['train'] 27 | num_classes = int(get_data_cfg['classes']) 28 | finetune_model = int(get_data_cfg['batch_size']) 29 | batch_size = int(get_data_cfg['batch_size']) 30 | img_size = int(get_data_cfg['img_size']) 31 | multi_scale = get_data_cfg['multi_scale'] 32 | epochs = int(get_data_cfg['epochs']) 33 | lr_step = str(get_data_cfg['lr_step']) 34 | lr0 = float(get_data_cfg['lr0']) 35 | # root_path = str(get_data_cfg['root_path']) 36 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 37 | device = select_device() 38 | if multi_scale == 'True': 39 | multi_scale = True 40 | else: 41 | multi_scale = False 42 | 43 | # 2 模型加载 44 | if '-tiny' in cfg_model: 45 | model = Yolov3Tiny(num_classes) 46 | print('nimi') 47 | weights = './weights-yolov3-face-tiny4/' 48 | else: 49 | model = Yolov3(num_classes) 50 | weights = './weights-yolov3-face3-heat' 51 | model = model.to(device) 52 | # 设置模型训练位置 53 | if not os.path.exists(weights): 54 | os.mkdir(weights) 55 | latest = weights + 'latest_{}.pt'.format(img_size) 56 | # 设置优化器和学习率衰减策略 57 | optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) 58 | milestones = [int(i) for i in lr_step.split(',')] 59 | print(milestones, '动态学习率变换') 60 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1, last_epoch=-1) 61 | 62 | # 数据加载 63 | 64 | dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, 65 | multi_scale=multi_scale) 66 | dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, 67 | collate_fn=dataset.collate_fn, drop_last=True) 68 | flag_start = False 69 | xy_loss = [] 70 | wh_loss = [] 71 | conf_loss = [] 72 | cls_loss = [] 73 | total_loss = [] 74 | # 遍历数据集开始训练 75 | for epoch in range(0, epochs): 76 | model.train() 77 | if flag_start: # 第一次循环 学习率不变化 78 | scheduler.step() 79 | flag_start = True 80 | mloss = defaultdict(float) # loss初始化,定义一个字典 81 | # 模型训练 82 | t = time.time() 83 | nb = len(dataloader) 84 | print('nb-->', nb) 85 | n_burnin = min(round(nb / 5 + 1), 1000) 86 | print(n_burnin) 87 | # 遍历每一个batchsize的数据 88 | for i, (imgs, taegets, img_path, _) in enumerate(dataloader): 89 | multi_size = imgs.size() 90 | imgs = imgs.to(device) 91 | taegets = taegets.to(device) 92 | nt = len(taegets) 93 | if nt == 0: 94 | # 如果没有标签就跳过 95 | continue 96 | # 学习率预热 97 | # if epoch > 0: 98 | if epoch == 0 and i <= n_burnin: 99 | lr = lr0 * (i / n_burnin) ** 4 # 当起始step比较小时,并且是第一个epoch时,用比较小的学习率 100 | # lr = lr0 * epoch * i*8 / n_burnin # 当起始step比较小时,并且是第一个epoch时,用比较小的学习率 101 | print(lr) 102 | for x in optimizer.param_groups: 103 | x['lr'] = lr # 将lr更新到优化器中 104 | # 模型预测 105 | pred = model(imgs) 106 | target_list = build_targets(model, taegets) 107 | # loss 108 | loss, loss_dict = compute_loss(pred, target_list,nt,batch_size) 109 | # xywh mse 置信度用BCEWithLogitsLoss多分类损失 cls用二分类交叉熵损失CrossEntropyLoss 110 | # 老三样 111 | loss.backward() # 反向传播 计算题都 112 | optimizer.step() # 梯度更新 113 | optimizer.zero_grad() # 梯度清零 114 | # 打印 平均损失 115 | for key, value in loss_dict.items(): 116 | mloss[key] = (mloss[key] * i + value) / (i + 1) 117 | print( 118 | ' Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, wh {:.3f}, ' 119 | 'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s' 120 | .format(epoch, epochs - 1, i, nb - 1, 121 | multi_size[2], multi_size[3] 122 | , nt, scheduler.get_lr()[0], 123 | mloss['xy'], mloss['wh'], 124 | mloss['conf'], mloss['cls'], 125 | mloss['total'], time.time() - t), end='\n') 126 | xy_loss.append(mloss['xy']) 127 | wh_loss.append(mloss['wh']) 128 | conf_loss.append(mloss['conf']) 129 | cls_loss.append(mloss['cls']) 130 | total_loss.append(mloss['total']) 131 | t = time.time() 132 | # 模型保存 133 | chkpt = { 134 | 'epoch': epoch, 135 | 'model': model.module.state_dict() if type( 136 | model) is nn.parallel.DistributedDataParallel else model.state_dict() 137 | } 138 | torch.save(chkpt, weights + '/yolov3_last_{}_epoch_{}.pt'.format(img_size, epoch)) 139 | # 创建第一张画布 140 | plt.figure(0) 141 | 142 | # 绘制坐标损失曲线 143 | plt.plot(xy_loss, label="xy Loss") 144 | # 绘制宽高损失曲线 , 颜色为红色 145 | plt.plot(wh_loss, color="red", label="wh Loss") 146 | # 绘制置信度损失曲线 , 颜色为绿色 147 | plt.plot(conf_loss, color="green", label="conf Loss") 148 | # 绘制分类损失曲线 , 颜色为绿色 149 | plt.plot(cls_loss, color="yellow", label="cls Loss") 150 | # 绘制总损失曲线 , 颜色为蓝色 151 | plt.plot(total_loss, color="blue", label="sum Loss") 152 | # 曲线说明在左上方 153 | # plt.legend(loc='upper left') 154 | # 保存图片 155 | plt.savefig(weights + '/yolov3_last_{}_epoch_{}_loss.png'.format(img_size, epoch)) 156 | del chkpt 157 | 158 | # 创建第一张画布 159 | plt.figure(0) 160 | 161 | # 绘制坐标损失曲线 162 | plt.plot(xy_loss, label="xy Loss") 163 | # 绘制宽高损失曲线 , 颜色为红色 164 | plt.plot(wh_loss, color="red", label="wh Loss") 165 | # 绘制置信度损失曲线 , 颜色为绿色 166 | plt.plot(conf_loss, color="green", label="conf Loss") 167 | # 绘制分类损失曲线 , 颜色为绿色 168 | plt.plot(cls_loss, color="yellow", label="cls Loss") 169 | # 绘制总损失曲线 , 颜色为蓝色 170 | plt.plot(total_loss, color="blue", label="sum Loss") 171 | # 曲线说明在左上方 172 | plt.legend(loc='upper left') 173 | # 保存图片 174 | plt.savefig(weights+"./loss.png") 175 | 176 | 177 | if __name__ == '__main__': 178 | train(data_config='cfg/face.data') 179 | print('完成') 180 | # 181 | 182 | 183 | # train=/www/dataset/yolo_helmet_train/yolo_helmet_train/anno/train.txt 184 | # valid=/www/dataset/yolo_helmet_train/yolo_helmet_train/anno/train.txt 185 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/train2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from yoloV3 import Yolov3, Yolov3Tiny 3 | from util.parse_config import parse_data_cfg 4 | from util.torch_utils import select_device 5 | import torch 6 | from torch.utils.data import DataLoader 7 | from util.datasets import LoadImagesAndLabels 8 | from util.utils import * 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from matplotlib.pyplot import MultipleLocator 12 | import time 13 | import sys 14 | 15 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 16 | sys.path.append(root_path) 17 | from util.utils import * 18 | 19 | 20 | def train(data_config='cfg/face.data'): 21 | # 1.配置文件解析 22 | get_data_cfg = parse_data_cfg(data_config) 23 | gpus = get_data_cfg['gpus'] 24 | num_workers = int(get_data_cfg['num_workers']) 25 | cfg_model = get_data_cfg['cfg_model'] 26 | train_path = get_data_cfg['train'] 27 | num_classes = int(get_data_cfg['classes']) 28 | finetune_model = int(get_data_cfg['batch_size']) 29 | batch_size = int(get_data_cfg['batch_size']) 30 | img_size = int(get_data_cfg['img_size']) 31 | multi_scale = get_data_cfg['multi_scale'] 32 | epochs = int(get_data_cfg['epochs']) 33 | lr_step = str(get_data_cfg['lr_step']) 34 | lr0 = float(get_data_cfg['lr0']) 35 | # root_path = str(get_data_cfg['root_path']) 36 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 37 | device = select_device() 38 | if multi_scale == 'True': 39 | multi_scale = True 40 | else: 41 | multi_scale = False 42 | 43 | # 2 模型加载 44 | # if '-tiny' in cfg_model: 45 | # model = Yolov3Tiny(num_classes) 46 | # print('nimi') 47 | # weights = './weights-yolov3-face-tiny5/' 48 | # else: 49 | # model = Yolov3(num_classes) 50 | # weights = './weights-yolov3-face-heat/' 51 | model = Yolov3(num_classes) 52 | weights = './weights-yolov3-face4e-10/' 53 | modelpath = '/root/cv/pycharm/人脸检测/yolo3-人脸检测/model-19/yolov3_last_416_epoch_19.pt' 54 | # 第二步:加载模型训练结果 55 | device = select_device() 56 | if os.access(modelpath, os.F_OK): 57 | model.load_state_dict(torch.load(modelpath, map_location=device)['model']) 58 | else: 59 | return False 60 | model.to(device) 61 | use_cuda = torch.cuda.is_available() 62 | # 设置模型训练位置 63 | if not os.path.exists(weights): 64 | os.mkdir(weights) 65 | latest = weights + 'latest_{}.pt'.format(img_size) 66 | # 设置优化器和学习率衰减策略 67 | optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) 68 | milestones = [int(i) for i in lr_step.split(',')] 69 | print(milestones, '动态学习率变换') 70 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) 71 | 72 | # 数据加载 73 | 74 | dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, 75 | multi_scale=multi_scale) 76 | dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, 77 | collate_fn=dataset.collate_fn, drop_last=True) 78 | flag_start = False 79 | xy_loss = [] 80 | wh_loss = [] 81 | conf_loss = [] 82 | cls_loss = [] 83 | total_loss = [] 84 | # 遍历数据集开始训练 85 | for epoch in range(0, epochs): 86 | model.train() 87 | if flag_start: # 第一次循环 学习率不变化 88 | scheduler.step() 89 | flag_start = True 90 | mloss = defaultdict(float) # loss初始化,定义一个字典 91 | # 模型训练 92 | t = time.time() 93 | nb = len(dataloader) 94 | print('nb-->', nb) 95 | n_burnin = min(round(nb / 5 + 1), 1000) 96 | print(n_burnin) 97 | # 遍历每一个batchsize的数据 98 | for i, (imgs, taegets, img_path, _) in enumerate(dataloader): 99 | multi_size = imgs.size() 100 | # print(multi_size) 101 | imgs = imgs.to(device) 102 | taegets = taegets.to(device) 103 | nt = len(taegets) 104 | if nt == 0: 105 | # 如果没有标签就跳过 106 | continue 107 | # 学习率预热 108 | if epoch == 0 and i <= n_burnin: 109 | lr = lr0 * (i / n_burnin) ** 4 # 当起始step比较小时,并且是第一个epoch时,用比较小的学习率 110 | # lr = lr0 * epoch * i*8 / n_burnin # 当起始step比较小时,并且是第一个epoch时,用比较小的学习率 111 | print(lr) 112 | for x in optimizer.param_groups: 113 | x['lr'] = lr # 将lr更新到优化器中 114 | # 模型预测 115 | pred = model(imgs) 116 | target_list = build_targets(model, taegets) 117 | # loss 118 | loss, loss_dict = compute_loss(pred, target_list) 119 | # xywh mse 置信度用BCEWithLogitsLoss多分类损失 cls用二分类交叉熵损失CrossEntropyLoss 120 | # 老三样 121 | loss.backward() # 反向传播 计算题都 122 | optimizer.step() # 梯度更新 123 | optimizer.zero_grad() # 梯度清零 124 | # 打印 平均损失 125 | for key, value in loss_dict.items(): 126 | mloss[key] = (mloss[key] * i + value) / (i + 1) 127 | print( 128 | ' Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.25f}, loss: xy {:.3f}, wh {:.3f}, ' 129 | 'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s' 130 | .format(epoch, epochs - 1, i, nb - 1, 131 | multi_size[2], multi_size[3] 132 | , nt, scheduler.get_lr()[0], 133 | mloss['xy'], mloss['wh'], 134 | mloss['conf'], mloss['cls'], 135 | mloss['total'], time.time() - t), end='\n') 136 | xy_loss.append(mloss['xy']) 137 | wh_loss.append(mloss['wh']) 138 | conf_loss.append(mloss['conf']) 139 | cls_loss.append(mloss['cls']) 140 | total_loss.append(mloss['total']) 141 | t = time.time() 142 | # 模型保存 143 | chkpt = { 144 | 'epoch': epoch, 145 | 'model': model.module.state_dict() if type( 146 | model) is nn.parallel.DistributedDataParallel else model.state_dict() 147 | } 148 | torch.save(chkpt, weights + '/yolov3_19+{}_epoch_{}.pt'.format(img_size, epoch)) 149 | # 创建第一张画布 150 | plt.figure(0) 151 | 152 | # 绘制坐标损失曲线 153 | plt.plot(xy_loss, label="xy Loss") 154 | # 绘制宽高损失曲线 , 颜色为红色 155 | plt.plot(wh_loss, color="red", label="wh Loss") 156 | # 绘制置信度损失曲线 , 颜色为绿色 157 | plt.plot(conf_loss, color="green", label="conf Loss") 158 | # 绘制分类损失曲线 , 颜色为绿色 159 | plt.plot(cls_loss, color="yellow", label="cls Loss") 160 | # 绘制总损失曲线 , 颜色为蓝色 161 | plt.plot(total_loss, color="blue", label="sum Loss") 162 | # 曲线说明在左上方 163 | plt.legend(loc='upper left') 164 | # 保存图片 165 | plt.savefig(weights + '/yolov3_last_{}_epoch_{}_loss.png'.format(img_size, epoch)) 166 | del chkpt 167 | 168 | # 创建第一张画布 169 | plt.figure(0) 170 | 171 | # 绘制坐标损失曲线 172 | plt.plot(xy_loss, label="xy Loss") 173 | # 绘制宽高损失曲线 , 颜色为红色 174 | plt.plot(wh_loss, color="red", label="wh Loss") 175 | # 绘制置信度损失曲线 , 颜色为绿色 176 | plt.plot(conf_loss, color="green", label="conf Loss") 177 | # 绘制分类损失曲线 , 颜色为绿色 178 | plt.plot(cls_loss, color="yellow", label="cls Loss") 179 | # 绘制总损失曲线 , 颜色为蓝色 180 | plt.plot(total_loss, color="blue", label="sum Loss") 181 | # 曲线说明在左上方 182 | plt.legend(loc='upper left') 183 | # 保存图片 184 | plt.savefig("./loss.png") 185 | 186 | 187 | if __name__ == '__main__': 188 | train(data_config='cfg/face.data') 189 | print('完成') 190 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/data_agu.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | # date:2019-05-20 3 | # Author: Eric.Lee 4 | # function: face rot imageaug 5 | 6 | import cv2 7 | import numpy as np 8 | import random 9 | 10 | # 图像翻转后关键点的位置变换(98个关键点) 11 | flip_landmarks_dict = { 12 | 0: 32, 1: 31, 2: 30, 3: 29, 4: 28, 5: 27, 6: 26, 7: 25, 8: 24, 9: 23, 10: 22, 11: 21, 12: 20, 13: 19, 14: 18, 13 | 15: 17, 14 | 16: 16, 17: 15, 18: 14, 19: 13, 20: 12, 21: 11, 22: 10, 23: 9, 24: 8, 25: 7, 26: 6, 27: 5, 28: 4, 29: 3, 30: 2, 15 | 31: 1, 32: 0, 16 | 33: 46, 34: 45, 35: 44, 36: 43, 37: 42, 38: 50, 39: 49, 40: 48, 41: 47, 17 | 46: 33, 45: 34, 44: 35, 43: 36, 42: 37, 50: 38, 49: 39, 48: 40, 47: 41, 18 | 60: 72, 61: 71, 62: 70, 63: 69, 64: 68, 65: 75, 66: 74, 67: 73, 19 | 72: 60, 71: 61, 70: 62, 69: 63, 68: 64, 75: 65, 74: 66, 73: 67, 20 | 96: 97, 97: 96, 21 | 51: 51, 52: 52, 53: 53, 54: 54, 22 | 55: 59, 56: 58, 57: 57, 58: 56, 59: 55, 23 | 76: 82, 77: 81, 78: 80, 79: 79, 80: 78, 81: 77, 82: 76, 24 | 87: 83, 86: 84, 85: 85, 84: 86, 83: 87, 25 | 88: 92, 89: 91, 90: 90, 91: 89, 92: 88, 26 | 95: 93, 94: 94, 93: 95 27 | } 28 | 29 | 30 | # 非形变处理:将图像按长宽比resize,然后进行pad 31 | def letterbox(img_, img_size=256, mean_rgb=(128, 128, 128)): 32 | shape_ = img_.shape[:2] # shape = [height, width] 33 | ratio = float(img_size) / max(shape_) # ratio = old / new 34 | new_shape_ = (round(shape_[1] * ratio), round(shape_[0] * ratio)) 35 | dw_ = (img_size - new_shape_[0]) / 2 # width padding 36 | dh_ = (img_size - new_shape_[1]) / 2 # height padding 37 | top_, bottom_ = round(dh_ - 0.1), round(dh_ + 0.1) 38 | left_, right_ = round(dw_ - 0.1), round(dw_ + 0.1) 39 | # resize img 40 | img_a = cv2.resize(img_, new_shape_, interpolation=cv2.INTER_LINEAR) 41 | 42 | img_a = cv2.copyMakeBorder(img_a, top_, bottom_, left_, right_, cv2.BORDER_CONSTANT, 43 | value=mean_rgb) # padded square 44 | 45 | return img_a 46 | 47 | 48 | def img_agu_channel_same(img_): 49 | """ 50 | 将RGB图像转换为灰度图后,将灰度图的结果赋值给每一通道 51 | :param img_: 52 | :return: 53 | """ 54 | img_a = np.zeros(img_.shape, dtype=np.uint8) 55 | gray = cv2.cvtColor(img_, cv2.COLOR_RGB2GRAY) 56 | img_a[:, :, 0] = gray 57 | img_a[:, :, 1] = gray 58 | img_a[:, :, 2] = gray 59 | 60 | return img_a 61 | 62 | 63 | # 图像旋转 64 | def face_random_rotate(image, pts, angle, Eye_Left, Eye_Right, fix_res=False, img_size=(256, 256), vis=False): 65 | """ 66 | :param image: 要处理的图像 67 | :param pts: 关键点信息 68 | :param angle: 旋转的角度 69 | :param Eye_Left: 左眼关键点 70 | :param Eye_Right: 右眼关键点 71 | :param fix_res: 分辨率不变 72 | :param img_size: 图像的大小 73 | :param vis: 是否显示图像 74 | :return: 75 | """ 76 | # 获取左眼和右眼的中心点坐标 77 | cx, cy = (Eye_Left[0] + Eye_Right[0]) / 2, (Eye_Left[1] + Eye_Right[1]) / 2 78 | # 获取图像的宽高 79 | (h, w) = image.shape[:2] 80 | h = h 81 | w = w 82 | # 以两眼的中心为旋转中心,旋转角度为angle,缩放比例为1生成旋转矩阵M 83 | M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0) 84 | # 获取旋转角度的余弦和正弦值 85 | cos = np.abs(M[0, 0]) 86 | sin = np.abs(M[0, 1]) 87 | 88 | # 计算新图像的大小 89 | nW = int((h * sin) + (w * cos)) 90 | nH = int((h * cos) + (w * sin)) 91 | # 计算中心点的平移距离 92 | M[0, 2] += int(0.5 * nW) - cx 93 | M[1, 2] += int(0.5 * nH) - cy 94 | # 进行插值时使用的插值方法 95 | resize_model = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_NEAREST, cv2.INTER_AREA, cv2.INTER_LANCZOS4] 96 | # 按照生成的旋转矩阵对图像进行仿射变换,完成图像旋转 97 | img_rot = cv2.warpAffine(image, M, (nW, nH), flags=resize_model[random.randint(0, 4)]) 98 | # 获取旋转之后的关键点信息 99 | pts_r = [] 100 | # 遍历当前所有的关键点 101 | for pt in pts: 102 | # 获取关键点的坐标 103 | x = float(pt[0]) 104 | y = float(pt[1]) 105 | # 获取仿射变换后的关键点坐标 106 | x_r = (x * M[0][0] + y * M[0][1] + M[0][2]) 107 | y_r = (x * M[1][0] + y * M[1][1] + M[1][2]) 108 | # 将变换后的关键点坐标添加到列表pts_r中 109 | pts_r.append([x_r, y_r]) 110 | # 获取当前的关键点坐标 111 | x = [pt[0] for pt in pts_r] 112 | y = [pt[1] for pt in pts_r] 113 | # 获取关键点区域的x,y坐标的最大值和最小值 114 | x1, y1, x2, y2 = np.min(x), np.min(y), np.max(x), np.max(y) 115 | 116 | # 随机裁剪 117 | translation_pixels = 50 118 | # 生成随机裁剪的左上角坐标和右下角坐标 119 | scaling = 0.3 120 | x1 += random.randint(-int(max((x2 - x1) * scaling, translation_pixels)), int((x2 - x1) * 0.15)) 121 | y1 += random.randint(-int(max((y2 - y1) * scaling, translation_pixels)), int((y2 - y1) * 0.15)) 122 | x2 += random.randint(-int((x2 - x1) * 0.15), int(max((x2 - x1) * scaling, translation_pixels))) 123 | y2 += random.randint(-int((y2 - y1) * 0.15), int(max((y2 - y1) * scaling, translation_pixels))) 124 | # 对超出图像区域的坐标进行clip 125 | x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) 126 | x1 = int(max(0, x1)) 127 | y1 = int(max(0, y1)) 128 | x2 = int(min(x2, img_rot.shape[1] - 1)) 129 | y2 = int(min(y2, img_rot.shape[0] - 1)) 130 | # 裁剪图像获取旋转之后的人脸图像 131 | crop_rot = img_rot[y1:y2, x1:x2, :] 132 | # 初始化,存储裁剪之后的关键点 133 | crop_pts = [] 134 | # 获取宽高 135 | width_crop = float(x2 - x1) 136 | height_crop = float(y2 - y1) 137 | # 遍历所有的关键点 138 | for pt in pts_r: 139 | # 获取x,y坐标 140 | x = pt[0] 141 | y = pt[1] 142 | # 获取裁剪后关键点坐标,并进行归一化 143 | crop_pts.append([float(x - x1) / width_crop, float(y - y1) / height_crop]) 144 | 145 | # 随机镜像,镜像的概率是50% 146 | if random.random() >= 0.5: 147 | # 对人脸图像进行水平翻转 148 | crop_rot = cv2.flip(crop_rot, 1) 149 | # 翻转后的关键点 150 | crop_pts_flip = [] 151 | for i in range(len(crop_pts)): 152 | # 翻转后x坐标发生变化,y坐标不变 153 | x = 1. - crop_pts[flip_landmarks_dict[i]][0] 154 | y = crop_pts[flip_landmarks_dict[i]][1] 155 | crop_pts_flip.append([x, y]) 156 | # 获取关键点 157 | crop_pts = crop_pts_flip 158 | # 显示,有可视化设备 159 | if vis: 160 | # 对关键点进行计数 161 | idx = 0 162 | # 眼睛的8个关键点 163 | points_array_left_eye = np.zeros((1, 8, 2), dtype=np.int32) 164 | points_array_right_eye = np.zeros((1, 8, 2), dtype=np.int32) 165 | # 遍历所有的关键点 166 | for pt in crop_pts: 167 | # 获取x,y坐标 168 | x = int(pt[0] * width_crop) 169 | y = int(pt[1] * height_crop) 170 | # 在每一个关键点处绘制圆点 171 | cv2.circle(crop_rot, (int(x), int(y)), 2, (255, 0, 255), -1) 172 | # 眼睛的关键点,存储起来 173 | if 67 >= idx >= 60: 174 | points_array_left_eye[0, idx - 60, 0] = int(x) 175 | points_array_left_eye[0, idx - 60, 1] = int(y) 176 | cv2.circle(crop_rot, (int(x), int(y)), 2, (0, 0, 255), -1) 177 | elif 75 >= idx >= 68: 178 | points_array_right_eye[0, idx - 68, 0] = int(x) 179 | points_array_right_eye[0, idx - 68, 1] = int(y) 180 | cv2.circle(crop_rot, (int(x), int(y)), 2, (0, 255, 0), -1) 181 | idx += 1 182 | # 绘制眼睛的等高线 183 | cv2.drawContours(crop_rot, points_array_left_eye, -1, (0, 155, 255), thickness=2) 184 | cv2.drawContours(crop_rot, points_array_right_eye, -1, (0, 255, 155), thickness=2) 185 | # 宽高比不变时,进行填充 186 | if fix_res: 187 | crop_rot = letterbox(crop_rot, img_size=img_size[0], mean_rgb=(128, 128, 128)) 188 | # 直接进行resize 189 | else: 190 | crop_rot = cv2.resize(crop_rot, img_size, interpolation=resize_model[random.randint(0, 4)]) 191 | # 返回图像和对应的关键点 192 | return crop_rot, crop_pts 193 | 194 | # 图像白化 195 | def prewhiten(x): 196 | mean = np.mean(x) 197 | std = np.std(x) 198 | std_adj = np.maximum(std, 1.0 / np.sqrt(x.size)) 199 | y = np.multiply(np.subtract(x, mean), 1 / std_adj) 200 | return y 201 | 202 | 203 | # 图像亮度、对比度增强 204 | def contrast_img(img, c, b): # 亮度就是每个像素所有通道都加上b 205 | rows, cols, channels = img.shape 206 | # 新建全零(黑色)图片数组:np.zeros(img1.shape, dtype=uint8) 207 | blank = np.zeros([rows, cols, channels], img.dtype) 208 | dst = cv2.addWeighted(img, c, blank, 1 - c, b) 209 | return dst 210 | -------------------------------------------------------------------------------- /resnet34 姿态检测/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | import sys 7 | from util.datasets import * 8 | from util.loss import * 9 | from model import resnet50, resnet34, resnet18 10 | import cv2 11 | import time 12 | import json 13 | from datetime import datetime 14 | import random 15 | # 导入制图工具包 16 | import matplotlib.pyplot as plt 17 | from matplotlib.pyplot import MultipleLocator 18 | from util.common_utils import mkdir_ 19 | 20 | import torch.multiprocessing 21 | torch.multiprocessing.set_sharing_strategy('file_system') 22 | 23 | def trainer(ops): 24 | # 设备信息 25 | os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS 26 | # 构建模型 27 | # 2.模型加载 28 | # 第一步:模型结构初始化 29 | if ops.model == 'resnet_50': 30 | model_ = resnet50(pretrained=False, num_classes=ops.num_classes, img_size=ops.img_size[0], 31 | dropout_factor=ops.dropout) 32 | elif ops.model == 'resnet_34': 33 | model_ = resnet34(pretrained=False, num_classes=ops.num_classes, img_size=ops.img_size[0], 34 | dropout_factor=ops.dropout) 35 | elif ops.model == 'resnet_18': 36 | model_ = resnet18(pretrained=False, num_classes=ops.num_classes, img_size=ops.img_size[0], 37 | dropout_factor=ops.dropout) 38 | # GPU 39 | use_cuda = torch.cuda.is_available() 40 | device = torch.device("cuda:0" if use_cuda else "cpu") 41 | model_ = model_.to(device) 42 | # 第二步:加载预训练模型 43 | if os.access(ops.fintune_model, os.F_OK): # checkpoint 44 | chkpt = torch.load(ops.fintune_model, map_location=device) 45 | model_.load_state_dict(chkpt) 46 | 47 | # 3.数据加载 48 | dataset = LoadImagesAndLabels(ops=ops, img_size=ops.img_size, flag_agu=ops.flag_agu) 49 | dataloader = DataLoader(dataset, batch_size=ops.batch_size, num_workers=ops.num_workers, shuffle=True) 50 | 51 | # 4.模型训练 52 | # 第一步:相关参数设置 53 | # 优化器设置 54 | optimizer = torch.optim.Adam(model_.parameters(), lr=ops.init_lr, betas=(0.9, 0.99), weight_decay=ops.weight_decay) 55 | 56 | # 损失函数设置 57 | if ops.loss_define != "wing_loss": 58 | criterion = nn.MSELoss(reduce=True, reduction='mean') 59 | # 相关参数初始化 60 | loss_list = [] 61 | # 第二步:遍历每个epoch开始进行训练 62 | for epoch in range(0, ops.epochs): 63 | model_.train() 64 | loss_mean = 0 65 | loss_idx = 0 66 | # 第三步:遍历batch中的数据,进行预测 67 | for i, (imgs_, angles_) in enumerate(dataloader): 68 | if use_cuda: 69 | imgs_ = imgs_.cuda() 70 | angles_ = angles_.cuda() 71 | output_angles = model_(imgs_.float()) 72 | 73 | # 损失计算 74 | if ops.loss_define == "wing_loss": 75 | loss_angles = got_total_wing_loss(output_angles, angles_.float()) 76 | else: 77 | loss_angles = criterion(output_angles, angles_.float()) 78 | loss = loss_angles 79 | loss_list.append(loss) 80 | loss_mean += loss.item() 81 | loss_idx += 1 82 | # 打印结果 83 | # 每10个batch打印一次结果 84 | if i % 1 == 0: 85 | loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 86 | print(' %s - %s - epoch [%s/%s] (%s/%s):' % ( 87 | loc_time, ops.model, epoch, ops.epochs, i, int(dataset.__len__() / ops.batch_size)), \ 88 | 'Mean Loss : %.6f - Loss: %.6f' % (loss_mean / loss_idx, loss.item()), ' bs:', ops.batch_size, \ 89 | ' img_size: %s x %s' % (ops.img_size[0], ops.img_size[1])) 90 | 91 | # 反向传播进行梯度更新 92 | loss.backward() 93 | optimizer.step() 94 | optimizer.zero_grad() 95 | 96 | # 第四步:保存ckpt 97 | if epoch % 5 == 0: 98 | torch.save(model_.state_dict(), 99 | ops.model_exp + '{}_imgsize_{}-epoch-{}.pth'.format(ops.model, ops.img_size[0], epoch)) 100 | 101 | # 第五步:损失变化的曲线 102 | plt.figure(0) 103 | plt.plot(loss_list, color="blue", lable="loss") 104 | plt.savefig('./loss.png') 105 | 106 | 107 | # 1、配置信息设置 108 | if __name__ == "__main__": 109 | parser = argparse.ArgumentParser(description=' Project Face Euler Angle Train') 110 | # 模型输出文件夹 111 | parser.add_argument('--model_exp', type=str, default='./model_exp', 112 | help='model_exp') 113 | # 模型类型 114 | parser.add_argument('--model', type=str, default='resnet_18', 115 | help='model : resnet_18,resnet_34,resnet_50') 116 | # yaw,pitch,roll 117 | parser.add_argument('--num_classes', type=int, default=3, 118 | help='num_classes') 119 | # GPU选择 120 | parser.add_argument('--GPUS', type=str, default='0', 121 | help='GPUS') 122 | # 训练集标注信息 123 | parser.add_argument('--train_path', type=str, 124 | default='/root/cv/dataset/人脸/datasets/face_euler_angle_datasets_mini/', 125 | help='train_path') 126 | # 是否使用预训练模型 127 | parser.add_argument('--pretrained', type=bool, default=True, 128 | help='imageNet_Pretrain') 129 | # 预训练模型位置 130 | parser.add_argument('--fintune_model', type=str, 131 | default='/www/model/resnet-预训练模型/resnet18/resnet18-5c106cde.pth', 132 | help='fintune_model') 133 | # 损失函数定义 134 | parser.add_argument('--loss_define', type=str, default='wing_loss', 135 | help='define_loss') 136 | # 初始化学习率 137 | parser.add_argument('--init_lr', type=float, default=1e-3, 138 | help='init_learningRate') 139 | # 优化器正则损失权重 140 | parser.add_argument('--weight_decay', type=float, default=5e-4, 141 | help='weight_decay') 142 | # 优化器动量 143 | parser.add_argument('--momentum', type=float, default=0.9, 144 | help='momentum') 145 | # 训练每批次图像数量 146 | parser.add_argument('--batch_size', type=int, default=32, 147 | help='batch_size') 148 | # dropout 149 | parser.add_argument('--dropout', type=float, default=0.5, 150 | help='dropout') 151 | # 训练周期 152 | parser.add_argument('--epochs', type=int, default=10, 153 | help='epochs') 154 | # 训练线程数 155 | parser.add_argument('--num_workers', type=int, default=200, 156 | help='num_workers') 157 | # 输入模型图片尺寸 158 | parser.add_argument('--img_size', type=tuple, default=(256, 256), 159 | help='img_size') 160 | # 是否进行数据增强 161 | parser.add_argument('--flag_agu', type=bool, default=True, 162 | help='data_augmentation') 163 | # 模型输出文件夹是否进行清除 164 | parser.add_argument('--clear_model_exp', type=bool, default=False, 165 | help='clear_model_exp') 166 | 167 | # -------------------------------------------------------------------------- 168 | args = parser.parse_args() # 解析添加参数 169 | # -------------------------------------------------------------------------- 170 | # 根据配置信息创建训练结果保存的根目录 171 | # mkdir_的功能是: 172 | # 存在路径时 173 | # 若flag_rm = True,则删除文件重新创建 174 | # 否则不修改 175 | # 若不存在路劲,则创建路径即可 176 | mkdir_(args.model_exp, flag_rm=args.clear_model_exp) 177 | loc_time = time.localtime() 178 | args.model_exp = args.model_exp + '/' + time.strftime("%Y-%m-%d_%H-%M-%S", loc_time) + '/' 179 | # 根据训练时间创建保存结果的路经 180 | mkdir_(args.model_exp, flag_rm=args.clear_model_exp) 181 | # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 182 | unparsed = vars(args) 183 | # 打印参数结果 184 | for key in unparsed.keys(): 185 | print('{} : {}'.format(key, unparsed[key])) 186 | # 当前时间 187 | unparsed['time'] = time.strftime("%Y-%m-%d %H:%M:%S", loc_time) 188 | # 将配置信息写入到文件中 189 | fs = open(args.model_exp + 'train_ops.json', "w", encoding='utf-8') 190 | json.dump(unparsed, fs, ensure_ascii=False, indent=1) 191 | fs.close() 192 | # 模型训练 193 | trainer(ops=args) 194 | print('well done : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) 195 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/util/datasets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | import json 5 | from tqdm import tqdm 6 | import os 7 | import random 8 | import math 9 | import glob 10 | from torch.utils.data import Dataset 11 | import sys 12 | # from data_agu import * 13 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 14 | sys.path.append(root_path) 15 | from util.data_agu import * 16 | 17 | # root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 18 | # sys.path.append(root_path) 19 | # from data_agu import * 20 | # print(root_path) 21 | 22 | class LoadImagesAndLabels(Dataset): 23 | def __init__(self, ops, img_size=(224, 224), flag_agu=False): 24 | ''' 25 | 数据初始化 26 | :param ops:配置信息对象 27 | :param img_size: 图像尺寸 28 | :param flag_agu: 标记位 29 | ''' 30 | max_age = 0 # 年龄最大值在0以上 31 | min_age = 65535 # 年龄最小值在65535以下 32 | file_list = [] # 图像文件list 33 | landmarks_list = [] # 关键点list 34 | age_list = [] # 年龄list 35 | gender_list = [] # 性别list 36 | idx = 0 # 下标图像计数 37 | for f_ in os.listdir(ops.train_path): 38 | # 读取json解析 39 | f = open(ops.train_path + f_, encoding='utf8') 40 | dict = json.load(f) 41 | f.close() 42 | if dict['age'] > 100. or dict['age'] < 1.: 43 | continue # 年龄数据错误 44 | idx += 1 45 | img_path_ = (ops.train_path + f_).replace('label_new', 'image').replace('.json', '.jpg') 46 | # img = cv2.imread(img_path_) 47 | file_list.append(img_path_) # 这为啥还存 48 | pts = [] # 存储关键点 49 | for pt_ in dict['landmarks']: 50 | x, y = pt_ 51 | pts.append([x, y]) 52 | landmarks_list.append(pts) 53 | if dict['gender'] == 'male': # 存储性别 54 | gender_list.append(1) 55 | else: 56 | gender_list.append(0) 57 | age_list.append(dict['age']) # 存储年龄 58 | if max_age < dict['age']: 59 | max_age = dict['age'] 60 | if min_age > dict['age']: 61 | min_age = dict['age'] 62 | self.files = file_list 63 | self.landmarks = landmarks_list 64 | self.genders = gender_list 65 | self.img_size = img_size 66 | self.flag_agu = flag_agu 67 | self.ages = age_list 68 | 69 | def __len__(self): 70 | # 获取图像的个数 71 | return len(self.files) 72 | 73 | def __getitem__(self, index): 74 | img_path = self.files[index] 75 | pts = self.landmarks[index] # 关键点 76 | gender = self.genders[index] # 性别 77 | age = self.ages[index] # 年龄 78 | 79 | # 读取图像 80 | img = cv2.imread(img_path) 81 | if self.flag_agu and random.random() > 0.35: # 如果进行图像增强,进行图像旋转 82 | angle_random = random.randint(-33, 33) # 随机生成旋转角度 83 | left_eye = np.average(pts[60:68], axis=0) # 获取左眼和右眼的关键点均值,用于计算旋转中心 84 | right_eye = np.average(pts[68:76], axis=0) 85 | # 返回旋转后的crop图和归一化的关键点 86 | img_, landmarks_ = face_random_rotate(img, pts, angle_random, left_eye, right_eye, img_size=self.img_size) 87 | else: 88 | # 对人脸区域进行裁剪,并归一化 89 | # 人脸区域裁剪 90 | x_max = -65535 91 | y_max = -65535 92 | x_min = 65535 93 | y_min = 65535 94 | for pt_ in pts: 95 | # 获取关键点区域左上角坐标和右下角坐标 96 | x_, y_ = int(pt_[0]), int(pt_[1]) 97 | x_min = x_ if x_min > x_ else x_min 98 | y_min = y_ if y_min > y_ else y_min 99 | x_max = x_ if x_max < x_ else x_max 100 | y_max = y_ if y_max < y_ else y_max 101 | # 获取人脸区域的宽高 102 | face_w = x_max - x_min 103 | face_h = y_max - y_min 104 | # 人脸区域进行扩展并进行裁剪 105 | # 对人脸区域进行随机的扩展 106 | x_min = int(x_min - random.randint(-6, int(face_w / 10))) 107 | y_min = int(y_min - random.randint(-6, int(face_h / 10))) 108 | x_max = int(x_max + random.randint(-6, int(face_w / 10))) 109 | y_max = int(y_max + random.randint(-6, int(face_h / 10))) 110 | # 确保坐标在图像范围内 111 | x_min = np.clip(x_min, 0, img.shape[1] - 1) 112 | x_max = np.clip(x_max, 0, img.shape[1] - 1) 113 | y_min = np.clip(y_min, 0, img.shape[0] - 1) 114 | y_max = np.clip(y_max, 0, img.shape[0] - 1) 115 | # 修正后的坐标 再次获取人脸区域的宽高 116 | face_w = x_max - x_min 117 | face_h = y_max - y_min 118 | face_cut = img[y_min:y_max, x_min:x_max, :] # 裁切 119 | landmarks_ = [] # 关键点 120 | for pt_ in pts: 121 | x_, y_ = int(pt_[0]) - x_min, int(pt_[1]) - y_min # 获取关键点左上角,右下角相对于裁切后的坐标 122 | landmarks_.append([float(x_) / float(face_w), float(y_) / float(face_h)]) # 归一化处理 123 | img_ = cv2.resize(face_cut, self.img_size, interpolation=random.randint(0, 4)) # 图像缩放 124 | # 第三步:图像增强 125 | # 颜色增强 126 | if self.flag_agu: 127 | # 颜色增强 70%的概率 128 | if random.random() > 0.7: 129 | # 颜色空间转换 130 | img_hsv = cv2.cvtColor(img_, cv2.COLOR_BGR2HSV) 131 | hue_x = random.randint(-10, 10) 132 | # 对H通道进行增强 133 | img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_x) 134 | # 对取值进行修正 135 | img_hsv[:, :, 0] = np.maximum(img_hsv[:, :, 0], 0) 136 | img_hsv[:, :, 0] = np.minimum(img_hsv[:, :, 0], 180) 137 | # 将色彩空间转换为BGR 138 | img_ = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR) 139 | # 对数据进行归一化,类型处理 140 | img_ = img_.astype(np.float32)[:, :, ::-1] # BGR->RGB 141 | # 数值归一化 142 | img_ = (img_ - 128.0) / 256. 143 | img_ = img_.transpose(2, 0, 1) # CHW->HWC 加快收敛 144 | # 关键点扁平化处理 145 | landmarks_ = np.array(landmarks_).ravel() # flatten展平 [98,2]-->[1,196] 146 | age = np.expand_dims(np.array((age - 50.) / 100.), axis=0) # 年龄归一化,添加维度 本身维度是一维,但是预测需要2维数据 147 | # [len,1]->[1,len,1] 148 | return img_, landmarks_, gender, age 149 | 150 | 151 | if __name__ == '__main__': 152 | import argparse 153 | import matplotlib.pyplot as plt 154 | from torch.utils.data import DataLoader 155 | 156 | parser = argparse.ArgumentParser(description='Project Mult task train') 157 | # 训练集标注信息 158 | parser.add_argument('--train_path', type=str, 159 | default='/root/cv/dataset/人脸/datasets/wiki_crop_face_multi_task/label_new/', help='train_path') 160 | parser.add_argument('--img_size', type=tuple, default=(256, 256), 161 | help='img_size') # 输入模型图片尺寸 162 | parser.add_argument('--flag_agu', type=bool, default=False, 163 | help='data_augmentation') # 训练数据生成器是否进行数据扩增 164 | ops = parser.parse_args() # 解析添加参数 165 | dataset = LoadImagesAndLabels(ops, img_size=ops.img_size, flag_agu=ops.flag_agu) 166 | print(dataset.__len__()) 167 | dataloader = DataLoader(dataset, batch_size=2, num_workers=0, shuffle=True, drop_last=True) 168 | for (img_, pts_, gender_, age_) in dataloader: 169 | for j in range(2): 170 | img = np.uint8(img_[j].permute(1, 2, 0) * 256.0 + 128.0)[:, :, ::-1] 171 | img = cv2.UMat(img).get() 172 | # 把年龄绘制上 173 | cv2.putText(img, 'age:{:.2f}'.format(age_[j][0] * 100.0 + 50.0), (2, 20), cv2.FONT_HERSHEY_COMPLEX, 0.8, 174 | (0, 255, 0), 2) 175 | if gender_[j] == 1: 176 | cv2.putText(img, 'gender:{}'.format("male"), (2, 40), 177 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 178 | else: 179 | cv2.putText(img, 'gender:{}'.format("female"), (2, 40), 180 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 181 | # 关键点 182 | pts = pts_[j].reshape((-1, 2)) 183 | for pt in pts: 184 | x_, y_ = int(pt[0] * 256), int(pt[1] * 256) 185 | cv2.circle(img, (x_, y_), 2, (0, 255, 0), -1) 186 | # cv2.imshow("result", img) 187 | # cv2.waitKey(0) 188 | # plt.imshow(img[:, :, ::-1]) 189 | # plt.show() 190 | cv2.destroyAllWindows() 191 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/inference.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | # date:2020-04-25 3 | # Author: Eric.Lee 4 | # function: inference 5 | 6 | import os 7 | import argparse 8 | import torch 9 | import torch.nn as nn 10 | import numpy as np 11 | 12 | import time 13 | import datetime 14 | import os 15 | import math 16 | from datetime import datetime 17 | import cv2 18 | import torch.nn.functional as F 19 | 20 | from model import resnet50, resnet34, resnet34_2 21 | from util.common_utils import * 22 | import copy 23 | import matplotlib.pyplot as plt 24 | 25 | 26 | # 反归一化 27 | def de_preprocess(tensor): 28 | return tensor * 0.5 + 0.5 29 | 30 | 31 | if __name__ == "__main__": 32 | # 1.配置信息解析 33 | parser = argparse.ArgumentParser(description=' Project Landmarks Test') 34 | # 模型路径 35 | parser.add_argument('--test_model', type=str, 36 | # default='/root/cv/pycharm/人脸检测/人脸多任务/model_exp/2022-01-28_09-14-08/resnet_34_epoch-8.pth', 37 | # default='/root/cv/pycharm/人脸检测/人脸多任务/model_exp/2022-01-28_08-39-03/resnet_34_epoch-9.pth', 38 | # default='/root/cv/dataset/人脸/facetoPay/face_multi_task/face_multitask-resnet_34_imgsize-256-20210425.pth', 39 | # default='/root/cv/pycharm/人脸检测/人脸多任务/model_exp/2022-01-28_22-04-11/resnet_34_epoch-80.pth', 40 | # 正常 41 | # default='/root/cv/pycharm/人脸检测/人脸多任务/model_exp/2022-01-28_22-42-34/resnet_34_epoch-22.pth', 42 | default='/root/cv/pycharm/人脸检测/人脸多任务/model_exp/2022-01-29_01-38-27/resnet_34_epoch-98.pth',#开图数据增强后,无训练模型加载 43 | # default='/root/cv/pycharm/人脸检测/人脸多任务/model_exp/2022-01-28_11-23-49/resnet_34_epoch-1.pth', 44 | 45 | help='test_model') 46 | # 模型类型 47 | parser.add_argument('--model', type=str, default='resnet_34', 48 | help='model : resnet_50') 49 | # 输出数据(关键点)的个数 50 | parser.add_argument('--num_classes', type=int, default=196, 51 | help='num_classes') 52 | # GPU选择 53 | parser.add_argument('--GPUS', type=str, default='0', 54 | help='GPUS') 55 | # 测试集路径 56 | parser.add_argument('--test_path', type=str, 57 | default='/root/cv/dataset/人脸/facetoPay/face_multi_task/img/', 58 | help='test_path') 59 | # 输入模型图片尺寸 60 | parser.add_argument('--img_size', type=tuple, default=(256, 256), 61 | help='img_size') 62 | # 是否可视化图片 63 | parser.add_argument('--vis', type=bool, default=True, 64 | help='vis') 65 | ops = parser.parse_args() # 解析添加参数 66 | # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 67 | unparsed = vars(ops) 68 | for key in unparsed.keys(): 69 | print('{} : {}'.format(key, unparsed[key])) 70 | # 设备信息 71 | os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS 72 | # 测试图片文件夹路径 73 | test_path = ops.test_path 74 | 75 | # 2.模型加载 76 | # 第一步:构建模型 77 | # 加载模型 78 | if ops.model == 'resnet_50': 79 | model_ = resnet50(landmarks_num=ops.num_classes, img_size=ops.img_size[0]) 80 | elif ops.model == 'resnet_34': 81 | model_ = resnet34(landmarks_num=ops.num_classes, img_size=ops.img_size[0]) 82 | elif ops.mode == 'resnet_34_2': 83 | model_ = resnet34_2(landmarks_num=ops.num_classes, img_size=ops.img_size[0]) 84 | 85 | # 第二步:获取设备信息,并将模型写入设备中 86 | # 设备设置 87 | use_cuda = False # torch.cuda.is_available() 88 | device = torch.device("cuda:0" if use_cuda else "cpu") 89 | model_ = model_.to(device) 90 | # 设置为前向推断模式 91 | model_.eval() 92 | 93 | # 第三步:加载预训练模型 94 | # 加载训练好的模型 95 | if os.access(ops.test_model, os.F_OK): 96 | chkpt = torch.load(ops.test_model, map_location=device) 97 | model_.load_state_dict(chkpt) 98 | print('load test model : {}'.format(ops.test_model)) 99 | # 预测图片 100 | font = cv2.FONT_HERSHEY_SIMPLEX 101 | # 不进行梯度更新 102 | with torch.no_grad(): 103 | # 3.数据加载 104 | # 记录处理图片的个数 105 | idx = 0 106 | # 遍历文件夹 107 | for file in os.listdir(ops.test_path): 108 | # 若不以jpg结尾,则进行下次循环 109 | if '.jpg' not in file: 110 | continue 111 | idx += 1 112 | print('{}) image : {}'.format(idx, file)) 113 | # 读取图像数据 114 | img = cv2.imread(ops.test_path + file) 115 | # 获取图像的宽高 116 | img_width = img.shape[1] 117 | img_height = img.shape[0] 118 | # 输入图片预处理,修正图像的大小 119 | img_ = cv2.resize(img, (ops.img_size[1], ops.img_size[0])) 120 | # 类型转换和归一化 121 | img_ = img_.astype(np.float32) 122 | img_ = (img_ - 128.) / 256. 123 | # 通道调整 124 | img_ = img_.transpose(2, 0, 1) 125 | img_ = torch.from_numpy(img_) 126 | # 增加batch维 127 | img_ = img_.unsqueeze_(0) 128 | # 4.模型预测 129 | # 将数据写入设备中 130 | if use_cuda: 131 | img_ = img_.cuda() 132 | # 模型预测 133 | if ops.model == 'resnet_34': 134 | output_landmarks, output_gender, output_age, output_face_ol = model_(img_.float()) 135 | elif ops.model == 'resnet_34_2': 136 | output_landmarks, output_gender, output_age = model_(img_.float()) 137 | 138 | # output_landmarks, output_gender, output_age, output_face_ol = model_(img_.float()) 139 | # 获取关键点预测结果 140 | output_landmarks = output_landmarks.cpu().detach().numpy() 141 | # 去除batch维 142 | output_landmarks = np.squeeze(output_landmarks) 143 | # 获取关键点,以字典的形式输出,每个关键点不绘制圆形 144 | dict_landmarks = draw_landmarks(img, output_landmarks, draw_circle=False) 145 | # 绘制关键点 146 | draw_contour(img, dict_landmarks) 147 | # 性别输出结果 148 | output_gender = F.softmax(output_gender, dim=1) 149 | output_gender = output_gender[0] 150 | output_gender = output_gender.cpu().detach().numpy() 151 | output_gender = np.array(output_gender) 152 | # 概率最大类别索引,获取对应的性别 153 | gender_max_index = np.argmax(output_gender) 154 | # 最大概率 155 | score_gender = output_gender[gender_max_index] 156 | print(gender_max_index, score_gender) 157 | 158 | # 年龄输出结果 159 | output_age = output_age.cpu().detach().numpy()[0][0] 160 | output_age = (output_age * 100. + 50.) 161 | img = cv2.resize(img, (ops.img_size[1], ops.img_size[0])) 162 | 163 | # 将预测结果绘制图像上 164 | if gender_max_index == 1.: 165 | cv2.putText(img, 'gender:{}'.format("male"), (2, 20), 166 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 167 | cv2.putText(img, 'gender:{}'.format("male"), (2, 20), 168 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 169 | else: 170 | cv2.putText(img, 'gender:{}'.format("female"), (2, 20), 171 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 172 | cv2.putText(img, 'gender:{}'.format("female"), (2, 20), 173 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 174 | cv2.putText(img, 'age:{:.2f}'.format(output_age), (2, 50), 175 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 176 | cv2.putText(img, 'age:{:.2f}'.format(output_age), (2, 50), 177 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 178 | # cv2.putText(img, "yaw:{:.1f},pitch:{:.1f},roll:{:.1f}".format(output_face_ol[0][0], output_face_ol[0][1], 179 | # output_face_ol[0][2]), (1, 80), 180 | # cv2.FONT_HERSHEY_COMPLEX, 0.8, 181 | # (55, 0, 220), 1) 182 | # 将预测结果展示出来或保存下来 183 | if ops.vis: 184 | # cv2.namedWindow('image', 0) 185 | # cv2.imshow('image', img) 186 | # cv2.waitKey(0) 187 | # if cv2.waitKey(1) == 27: 188 | # break 189 | # cv2.imwrite("./result2/" + file, img) 190 | plt.imshow(img[:, :, ::-1]) 191 | plt.show() 192 | cv2.destroyAllWindows() 193 | 194 | print('well done ') 195 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Face-payment 2 | 源码实现提交 3 | 效果demo:[https://www.bilibili.com/video/BV1bL4y1s7Fr?spm_id_from=333.999.0.0 ](https://space.bilibili.com/452931925?spm_id_from=333.788.0.0) 4 | # 本项目是人脸支付级别的 人脸项目 源码实现仅供交流学习使用 不可商用 5 | 项目架构 6 | - 1.人脸检测实现目标框锁定 7 | - 2.人脸108关键点检测 实现 人脸矫正 年龄预测 性别预测 还可实现活体检测(例如计算人眼关键点横纵比变换) 8 | - 3.人脸姿态识别 欧拉角不符合预期时不予进行身份校验 9 | - 4.人脸识别 实现一对多 十万分之一误检率 通过准确率50% 10 | 11 | # 代码细节与跑的坑 12 | - 1.首先声明 这个项目不可开源商用 所以不方便为大家提供已经训练好的任何模型与数据,并且也不提供任何数据集 因为涉及个人隐私 非隐私数据可自行寻找开源数据集 13 | 14 | - 2.第一个坑 提供的yoloV3 代码如果说自行训练,90%的概率梯度不下降,因为不是标准U版代码实现 没有使用梯度累计 数据增强 学习率退火等优化方法 , 15 | 如果你的显卡设备显存不够大 请不要使用此代码,batchsize太小了训练不出来啊 16 | 17 | - 3.第二个坑 不要试图用本项目提供的yolov3-tiny代码尝试训练。。。 预测可以 但是NMS的实现貌似也跟U版有很大问题,自己联调的时候直接修改U版V3-SPP的NMS 18 | 如果有需求要同时检测多人就别改,不需要 仅检测一个人的数据则topk取1就行 当然了NMS还有其他两个阈值 也是要看个人喜好进行更改 19 | 20 | ![image](https://user-images.githubusercontent.com/65523997/162975652-47f50c12-f0ad-44b1-868d-b9dd12098bf1.png) 21 | 22 | -4.第三个坑 关键点检测与姿态识别的backbone都是使用resnet ,但是96关键点的输出头是三个多任务(关键点-年龄-性别)使用winloss联合训练,个人尝试过将姿态也 23 | 加入winloss中进行进行联合训练,但是精度并不理想代码实现中也有 24 | 25 | 数据集说明:公开的数据集比较少超过68个关键点,其中比较有名的是Wider Facial Landmark in the Wild(WFLW),它提供了98个关键点。 WFLW 包含了 10000 张脸,其中 7500 用于训练,2500 张用于测试。除了关键点之外,还有遮挡、姿态、妆容、光照、模糊和表情等信息的标注。 26 | 27 | # 项目讲解 28 | # yoloV3 29 | 使用U版就行没什么好说的 30 | 个人推荐连接 自带教程:https://github.com/WZMIAOMIAO/deep-learning-for-image-processing/tree/master/pytorch_object_detection/yolov3_spp 31 | 32 | 数据集大家找找应该可以找到的 大概名称是yolo_widerface_open_train 我也不好乱发出来 33 | 34 | 网络结构 35 | ![image](https://user-images.githubusercontent.com/65523997/162977659-c5081bb3-c4c1-46e4-b015-59ae34423b59.png) 36 | ![image](https://user-images.githubusercontent.com/65523997/162977702-f5380ed5-6cb5-479d-81cf-dc040013091f.png) 37 | 学习率预热与退火策略可视化 38 | ![image](https://user-images.githubusercontent.com/65523997/162977856-5110b03f-4b8f-4f17-ae8c-a97f42d5829c.png) 39 | 40 | loss函数 41 | ![image](https://user-images.githubusercontent.com/65523997/162977917-0da9431a-bafd-4a68-aa24-84e445261d35.png) 42 | 43 | 这里说一下,如果说训练时难以拟合的时候 建议在yolo的训练代码里将 anchors的权重适当增加,或者说那个部分的loss降不下去就将对应的公式权重增加 44 | 另外梯度累计与数据增强大家也要看一下 不多废话想必学习这个项目的人都有这个基础了 45 | 46 | # 人脸108关键点检测 多任务 47 | 48 | 96关键点检测多任务实现 49 | 系统架构 50 | ![image](https://user-images.githubusercontent.com/65523997/162980530-5b0cb893-3522-4844-9219-daaf57fb747a.png) 51 | 52 | 53 | 54 | ![image](https://user-images.githubusercontent.com/65523997/162979445-10280313-8778-4775-bd1c-476f27eacd72.png) 55 | 56 | 这里是多任务所使用的的loss函数 57 | 58 | ![image](https://user-images.githubusercontent.com/65523997/162979465-95743e5f-85fd-4f4f-8c3c-3e99d248a6d5.png) 59 | 60 | 当∣ x ∣ < w 时,主体是一个对数函数,另外还有两个控制参数,分别是w和ε,其他取值时是一个L1损失。要找合适的 w 和 ϵ 值,要进行调参,推荐的参数如下表所示: 61 | 62 | 63 | ![image](https://user-images.githubusercontent.com/65523997/162979534-1e56019d-883b-41a9-bc2a-6acf2d9b673a.png) 64 | 65 | 数据集的名称貌似是 wiki_crop_face_multi_task 格式如下 66 | 67 | ![image](https://user-images.githubusercontent.com/65523997/162979733-2174fc9d-91c8-4d66-9b38-b30ada36faf6.png) 68 | 69 | 70 | 71 | 72 | 数据增强的逻辑 73 | 74 | 送入网络中图像如下图所示:(第一排是原始图像,第二排是送入网络中的图像) 75 | 76 | 77 | 78 | ![image](https://user-images.githubusercontent.com/65523997/162980576-b18ccc39-53e7-4347-9b18-153129031a8e.png) 79 | 80 | 81 | 82 | 架构很简单 83 | 84 | ![image](https://user-images.githubusercontent.com/65523997/162980835-a9b1db8d-54b8-4b5e-9332-ce9800c52856.png) 85 | 86 | 三任务联合训练时的loss 87 | 88 | ![image](https://user-images.githubusercontent.com/65523997/162980926-722dbaa6-2b4f-4bf5-b7e0-40b3bd3f9ebe.png) 89 | 90 | 如果将姿态任务添加进去呢? 91 | 暴力添加 这里简单说一下,并没有很全的数据 所以直接teacherforcing思想 用训练好的姿态网络作为老师以此计算loss 92 | 93 | ![loss (2)](https://user-images.githubusercontent.com/65523997/162981190-9de7c23a-9d02-40c1-ada9-de8666c1bc5d.png) 94 | 95 | 评估时效果很差 再次调整 将姿态任务的loss权重修改后 96 | ![loss](https://user-images.githubusercontent.com/65523997/162981514-e60c1818-7cc7-4b05-a4fa-d035f79b4038.png) 97 | 98 | 这里有不小的坑 反正自己比较菜即使这样效果也不理想 99 | 100 | 多任务训练时, 步长一定时, batch越大拟合的越快,但是如果开启数据增强,则拟合的更慢 101 | Batch越小,所需步长越大 但是问题来了 102 | 103 | ![image](https://user-images.githubusercontent.com/65523997/162981826-cdd4f3db-d688-4355-8528-3f8190a48ae5.png) 104 | 105 | 如果按照训练相对时间来看, batchsize 未必越大越好, 相同时间的情况下,batch 越大拟合的未必越快 106 | 107 | ![image](https://user-images.githubusercontent.com/65523997/162981893-b488438a-1764-4f4c-8dfe-38e525aebccd.png) 108 | 109 | 第三张图是学习量相同时的拟合速度,可以见到拟合最快的是batchsize64所以可见batchsize未必越大越好,而是合适就行 110 | 111 | ![image](https://user-images.githubusercontent.com/65523997/162982140-a36a9447-9959-4b50-8767-c8a6a575fbe4.png) 112 | 113 | 效果图 114 | 115 | ![image](https://user-images.githubusercontent.com/65523997/162982342-3b14c017-0615-43a4-a30b-27cb6f0cc2f9.png) 116 | 117 | # 姿态检测 118 | 人脸姿态估计指的是根据一幅二维的人脸图像,计算出其在实际三维空间中的面部朝向。输入就是一张二维人脸图片,输出表示方位的三个旋转角度 (pitch, yaw, roll)(欧拉角),其中 pitch 表示俯仰角(关于x轴的旋转角度),yaw 表示偏航角(关于y轴的旋转角度),roll 表示翻滚角(关于z轴的旋转角度),分别对应则这抬头,摇头和转头,如下图所示(我们把人脸理解为一架向我们飞来的飞机) 119 | ![image](https://user-images.githubusercontent.com/65523997/162982455-5571f626-7c37-4cf7-ac6f-d48647285819.png) 120 | 121 | 122 | 数据集貌似叫这个 face_euler_angle_datasets 我是发不了 大家理解 123 | 格式是这样的,dataloader中的dataset好像也写好了数据增强 124 | 125 | ![image](https://user-images.githubusercontent.com/65523997/162982720-462ceb7d-5f17-48bb-8971-4dd5a7911c88.png) 126 | 127 | 训练也很简单 就是直接回归欧拉角 128 | 129 | ![image](https://user-images.githubusercontent.com/65523997/162982852-1e4ffbca-69f9-46a6-a1ec-3763f04289d0.png) 130 | 131 | 132 | # 重点-ArcFace 人脸识别 孪生网络 133 | 134 | 先说评估 135 | 错误拒绝率(FAR) 136 | 相似度值范围内等分为若干档,得到若干个不同的阈值 S,计算不同阈值 S 的 FRR 如下:FRR(S) = 同人比对相似度中低于阈值S的数量 / 同一人比对总数 × 100%; 137 | 138 | 错误接受率(FRR) 139 | 相似度值范围内等分为若干档,得到若干个不同的阈值 S,计算不同阈值 S 的 FAR 如下:FAR(S) = 非同人比对相似度中不低于阈值S的数量 / 非同人比对总数 ×100%; 140 | 141 | ![image](https://user-images.githubusercontent.com/65523997/162983297-13c003b5-ecd0-4a3f-b05f-77839bd4139a.png) 142 | 143 | ![image](https://user-images.githubusercontent.com/65523997/162983415-f5a2afb9-7e81-4554-bc25-26a92119ee59.png) 144 | 145 | 这个数据集更发不了了,因为是1对N的,想看效果 就要去采集 没法发哈 这里填一下坑:数据采集时的思路 146 | - 1.使用yolov3-spp 第一个步骤的代码直接录入视频数据,将生成的目标框作为我们数据集的数据 同时要适当扩大anchors 因为要缩放,用opencv就行,手机去录最好,像素高,最后每一帧的结果保存成图片就行 这的代码有时间再搞上来吧,因为是联调写的 跟这边的训练没太多关系 147 | 148 | - 2.第一步获取到的原始数据并不能直接用,因为。。。你需要进行放射变换与旋转,我们期望得到的人脸处于图像的正中心并且希望是正对着观察者的 149 | 150 | 151 | 数据集搞定以后 就需要搞模型了 这里上内容了 152 | ![image](https://user-images.githubusercontent.com/65523997/162984487-00ef3f3d-1110-42b7-b846-38d844f1fe2a.png) 153 | 154 | 上图中是利用孪生网络架构做人脸识别的例子。第一个子网络的输入是一幅图片,然后依次送入到卷积层、池化层和全连接层,最后输出一个特征向量。最后的向量 h1 是对输入图像 x1 的编码。然后,向第二个子网络(与第一个子网络完全相同)输入图片 x2,我们对它做相同的处理,并且得到对 x2 的编码 h2,为了比较图片 x1 和 x2,我们计算了编码结果 h1 和 h2之间的距离。如果它比某个阈值(一个超参数)小,则意味着两张图片是同一个人,否则,两张图片中不是同一个人 155 | 156 | 网络训练架构 157 | 158 | - 1.骨干网络(Backbone network):一些用于提取特征的网络 159 | 160 | - 2.距离度量网络(Assembled network):用于拼接在骨干网络后的用于距离度量的网络 161 | 162 | 也就是将提取出来的特征图进行比较 然后进行身份的校验 163 | 164 | - 骨干网络 165 | ![image](https://user-images.githubusercontent.com/65523997/162985318-f7c783d8-cbb2-4165-89f3-7dedc0087a62.png) 166 | SeNet 通道注意力 167 | 大概是这样 168 | 169 | ![image](https://user-images.githubusercontent.com/65523997/162985406-f3456582-9e74-49c9-aa77-7ea6f132c8f7.png) 170 | 171 | 跟resnet差不多就是多加了几个残差链接 在通道上实现了注意力 172 | 不多说看重点 173 | - 距离度量网络 174 | 这里作者使用的是特征向量的夹角余弦值 作为相似度评判标准 175 | 176 | ![image](https://user-images.githubusercontent.com/65523997/162986190-6d070c63-772b-4d9e-b3bd-4267a8c96df2.png) 177 | 178 | ![image](https://user-images.githubusercontent.com/65523997/162987440-eabe2ed3-8a57-4e6e-844c-29ac096b428b.png) 179 | ![image](https://user-images.githubusercontent.com/65523997/162987479-1a7d7ae4-02b8-41f1-b926-11e5040a3bd2.png) 180 | ![image](https://user-images.githubusercontent.com/65523997/162987910-7cf17d6c-a28f-4e9d-be3a-9e9059d71cff.png) 181 | ![image](https://user-images.githubusercontent.com/65523997/162987953-01ed666b-25fc-413a-8be1-4dc9b3b405c1.png) 182 | 183 | 本质就是一个多分类网络 屁股上加一个softmax 和 argmax就知道是谁 输出值还能得到置信度 很简单的思路 奥卡姆剃刀~ 184 | 然后部署之前还缺一个流程就是将用户数据输入 进行训练 得到一个用户的平均特征图 保存,用于部署时的比较喽 185 | 186 | - 开源开到底 讲下预测阶段的部署改进方法 187 | 188 | 1.预测阶段原作者的写法是欧氏距离。。。 这样会有曝光误差 并且阈值的判断是很小的数 如果小于它 这样如果用fp16 或者fp32 很影响精度了 毕竟支付级别的误检率是十万分之一 189 | 数据格式上的先天不足导致的指标下降得不偿失~ 自己理解是这样考虑了部署的场景,但是学术上没啥问题的 190 | ![image](https://user-images.githubusercontent.com/65523997/162988993-b32ed497-f258-48f1-9c27-7b7b2262df36.png) 191 | 改进的话其实也挺简单,不过也挖坑了。直接遵循训练阶段的计算方式就好,把w直接矩阵相乘特征图,得到的其实就是我们要的1:N 夹角余弦值(相似度) 然后送进softmax中即可 192 | 得到两点好处:1.阈值判断是 一个比较大的数判断是否大于某个阈值,不会拘泥数据格式了 2.提高并行度 比如将一个用户的20帧视频作为输入(batchsize大了呗) 输出的时候 哪个人出现的频率最高就是谁 这种场景还是比较nice 或者是输入多个人的数据 并行判断~ 193 | 2.改进:新增评估代码,自动调参 貌似论文作者没提供评估代码? 自己依据第一点的改进 遵循训练时的方法 如果要想实现简单的混淆矩阵 分类评估的 AUC曲线 没现成代码 就面向W大矩阵编程了,公式就是上面那两个 错误识别率 等等 代码细节不讲了 有注释 194 | 195 | ![frr-far](https://user-images.githubusercontent.com/65523997/162990540-aa1fc4d6-c25d-406b-b001-77a0d586582d.png) 196 | 197 | 这个是整条曲线~ 虽然面积不大把,部署时调参也有依据,并且还加了一个很low的准确率参数,这样你就可以知道当前参数情况下除了frr 与arr的指标,广义上的准确率是多少, 198 | 因为我的数据集一共就8个人,当前做到十万分之一的误识率情况下50%的准确率 大体可以接受 当然了 数据集越多 相应的指标就越高 199 | ![image](https://user-images.githubusercontent.com/65523997/162992981-1257d1b8-194c-4f2b-b227-dc7168947c48.png) 200 | 201 | # TODO 202 | 整理下部署代码搞上来,部署代码的功能早就实现了简单说下 203 | 204 | 1.实现人脸自动录入 得到arcface所需规整图像数据集 205 | 206 | 2.实现多模块联调 就是解耦做的不好 python还是没java方便 打个jar包直接import多好哈哈哈 这个部署代码仅仅是录入与部署检测功能 特征向量计算还是需要这些源码 207 | goodluck 都是实打实的经验哈 给我一键三联 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /ArcFace/效果评估.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | warnings.filterwarnings("ignore") 4 | import os 5 | import torch 6 | from model import Backbone 7 | import argparse 8 | from pathlib import Path 9 | import cv2 10 | from torchvision import transforms as trans 11 | from util.datasets import de_preprocess 12 | import torch 13 | from model import l2_norm 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | from PIL import Image, ImageDraw, ImageFont 17 | from collections import Counter 18 | import matplotlib.pyplot as plt 19 | 20 | 21 | # 加载pth,npy文件中存储的特征 22 | def load_facebank(facebank_path): 23 | embeddings = torch.load(facebank_path + '/facebank.pth', 24 | map_location=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")) 25 | names = np.load(facebank_path + '/names.npy') 26 | return embeddings, names 27 | 28 | 29 | def infer(model, device, faces, target_embs, threshold=1.2, tta=False, label=None, names=None): 30 | ''' 31 | :param names: 32 | :param label: 目标值 33 | :param model: 进行预测的模型 34 | :param device: 设备信息 35 | :param faces: 要处理的人脸图像 36 | :param target_embs: 数据库中的人脸特征 37 | :param threshold: 阈值 38 | :param tta: 进行水平翻转的增强 39 | :return: 40 | ''' 41 | rang_nums = 0 42 | right_nums = 0 43 | # 将类型转换和标准化合并在一起 44 | test_transform = trans.Compose([ 45 | trans.ToTensor(), 46 | trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 47 | ]) 48 | 49 | # 特征向量 50 | embs = [] 51 | # 遍历人脸图像 52 | for img in faces: 53 | # 若进行翻转 54 | if tta: 55 | # 镜像翻转 56 | mirror = trans.functional.hflip(img) 57 | # 模型预测 58 | emb = model(test_transform(img).to(device).unsqueeze(0)) 59 | emb_mirror = model(test_transform(mirror).to(device).unsqueeze(0)) 60 | # 获取最终的特征向量 61 | embs.append(l2_norm(emb + emb_mirror)) 62 | else: 63 | with torch.no_grad(): 64 | # 未进行翻转时,进行预测 65 | embs.append(model(test_transform(img).to(device).unsqueeze(0))) 66 | # 将特征拼接在一起 67 | source_embs = torch.cat(embs) 68 | # 计算要检测的图像特征与目标特征之间的差异 69 | # diff_1 = source_embs.unsqueeze(-1) - target_embs.transpose(1, 0).unsqueeze(0) 70 | # dist_1 = torch.sum(torch.pow(diff_1, 2), dim=1) 71 | 72 | diff = torch.mm(source_embs, target_embs.transpose(1, 0)) 73 | dist = torch.pow(diff, 2) * 64 74 | 75 | # print('dist_1-->', dist_1) 76 | # print('dist-->', dist) 77 | 78 | # 获取差异最小值及对应的索引 79 | # minimum, min_idx = torch.min(dist.squeeze(), dim=0) #计算欧式距离 距离越小说明相似度越高 80 | minimum, min_idx = torch.max(dist.squeeze(), dim=1) # 计算cos(x)时,值越大说明夹角越小 81 | # 若没有匹配成功,将索引设置为-1 82 | # min_idx[minimum > threshold] = -1 83 | min_idx[minimum < threshold] = 0 84 | # print(len(min_idx), len(minimum)) 85 | dicts = {} 86 | dicts2 = {} 87 | for i in range(source_embs.shape[0]): 88 | if dist[i][list(names).index(label[i + 1]) - 1] < threshold: 89 | # 自己与自己比低于阈值,也就是自己与自己比预测错 dist[i][list(names).index(label[i + 1])] 90 | if label[i + 1] not in dicts: 91 | dicts[label[i + 1]] = 1 92 | else: 93 | dicts[label[i + 1]] += 1 94 | # print('低于阈值') # 自己与自己比的总数是自己类别的训练集元素数量 95 | if names[min_idx[i] + 1] != label[i + 1]: 96 | rang_nums += 1 97 | else: 98 | right_nums += 1 99 | lists = dist[i][:list(names).index(label[i + 1]) - 1] 100 | FAR_index = 0 101 | for j in dist[i]: 102 | if j == dist[i][list(names).index(label[i + 1]) - 1]: 103 | continue 104 | else: 105 | if j > threshold: 106 | if j not in dicts2: 107 | dicts2[names[FAR_index + 1]] = 1 108 | else: 109 | dicts2[names[FAR_index + 1]] += 1 110 | FAR_index += 1 111 | 112 | # print(dicts, dicts2) 113 | return rang_nums, right_nums, dicts, dicts2 114 | 115 | 116 | if __name__ == '__main__': 117 | # 配置相关参数 118 | parser = argparse.ArgumentParser(description='make facebank') 119 | # 模型 120 | parser.add_argument("--net_mode", help="which network, [ir, ir_se]", default='ir_se', type=str) 121 | # 模型深度 122 | parser.add_argument("--net_depth", help="how many layers [50,100,152]", default=50, type=int) 123 | # 预训练模型 124 | parser.add_argument("--finetune_backbone_model", help="finetune_backbone_model", 125 | # default="./save/model_2022-01-31-05-27-40_step_2144.pth", 126 | # default="./save/model_2022-01-31-05-53-58_step_500.pth", 127 | # default="/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-06-22-37_step_3752.pth", 128 | default="/root/cv/pycharm/人脸检测/人脸识别/save/model_2022-01-31-12-05-37_step_12864.pth", 129 | # 自己训练标准训练集99epoch 130 | # default="/root/cv/pycharm/人脸检测/人脸识别/local_save/model_2022-02-02-05-23-17_step_169.pth", 131 | # default="face_verify-model_ir_se-50.pth", # 老师训练模型 132 | type=str) 133 | # 人脸仓库 134 | parser.add_argument("--facebank_path", help="facebank_path", 135 | default="./facebank", type=str) 136 | # 是否进行水平翻转 137 | parser.add_argument("-tta", "--tta", help="whether test time augmentation", default=False, type=bool) 138 | # 要进行识别的人脸 139 | # parser.add_argument("-example", help="example", 140 | # default="G://机器视觉//cv项目代码//人脸检测项目代码//facetoPay//facetoPay//insight_face//example//", 141 | # type=str) 142 | parser.add_argument("-example", help="example", default="/root/cv/pycharm/人脸检测/人脸识别/example/example2/", type=str) 143 | # parser.add_argument("-example", help="example", default="C:\\Users\86183\Desktop\example\\", type=str) 144 | # 参数解析 145 | args = parser.parse_args() 146 | # 设备信息 147 | device_ = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 148 | # 模型选择 149 | model_ = Backbone(args.net_depth, 1., args.net_mode).to(device_) 150 | print('{}_{} model generated'.format(args.net_mode, args.net_depth)) 151 | # 加载预训练模型 152 | if os.access(args.finetune_backbone_model, os.F_OK): 153 | model_.load_state_dict(torch.load(args.finetune_backbone_model, 154 | map_location=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))) 155 | print("-------->>> load model : {}".format(args.finetune_backbone_model)) 156 | # 模型前向传播 157 | model_.eval() 158 | # 加载人脸仓库中的人脸特征及对应的名称 159 | targets, names = load_facebank(args.facebank_path) 160 | # 打印结果 161 | print("names : {}".format(names)) 162 | print("targets size : {}".format(targets.size())) 163 | # 要识别的人脸 164 | faces_identify = [] 165 | label = ['null', ] 166 | # 遍历要处理的图像 167 | img_nums = 0 168 | for file in os.listdir(args.example): 169 | # 若非图片文件,进行下一次循环 170 | # if not file.endswith('png'): 171 | # continue 172 | label.append(file.split('-')[0]) 173 | # label.append(file) 174 | # 读取图像数据 175 | img = cv2.imread(args.example + file) 176 | if img is None: 177 | continue 178 | # 获取图像的宽高 179 | x, y = img.shape[0:2] 180 | # 送入网络中的图像必须是112*112 181 | if x != 112 or y != 112: 182 | img = cv2.resize(img, (112, 112)) 183 | # 将数据放入list中 184 | faces_identify.append(Image.fromarray(img)) 185 | img_nums += 1 186 | # 进行检测,results是索引,face_dst是差异 187 | 阈值 = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10, 10.5, 11, 12, 13, 14, 15, 16, 188 | 17, 18, 19, 20, 24, 29, 34, 39, 45, 49] 189 | yuzhi = np.arange(0.1, 100, 0.1) 190 | # yuzhi = [x for x in range(1, 50)] 191 | frr = [] 192 | far = [] 193 | for i in yuzhi: 194 | rang_nums, right_nums, dicts, dicts2, = infer(model_, device_, faces_identify, targets, threshold=i, 195 | tta=False, label=label, names=names) 196 | # print(rang_nums, right_nums) 197 | print('阈值-->', i, '正确率-->', right_nums / (rang_nums + right_nums)) 198 | result = Counter(label) 199 | # print(result) 200 | # print(dicts, dicts2) 201 | FRR = 0 # 错误拒绝率 自己不认自己 202 | index = 0 203 | FAR = 0 # 错误接受率, 别人认成自己 204 | for key in dicts.keys(): 205 | FRR += dicts[key] / result[key] 206 | index += 1 207 | # print(FRR, index) 208 | for key in dicts2.keys(): 209 | FAR += dicts2[key] 210 | if index == 0: 211 | FRR = 0 212 | else: 213 | FRR = FRR / index 214 | FAR = FAR / (len(targets) * img_nums) 215 | # print('targets', len(targets), 'img_nums', img_nums) 216 | print('FRR-->', FRR) 217 | print('FAR-->', FAR) 218 | frr.append(FRR * 100) 219 | far.append(FAR * 100) 220 | if FAR <= 0.00001: 221 | print('符合标准的情况', 'FAR->', FAR * 100, 'FRR->', FRR * 100) 222 | if FRR == 1 or FAR == 0: 223 | break 224 | 225 | plt.plot(frr, far, 'r-.p', label="plot figure") 226 | plt.show() 227 | plt.savefig('./frr-far2.png') 228 | -------------------------------------------------------------------------------- /ArcFace/model.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout2d, Dropout, AvgPool2d, \ 2 | MaxPool2d, AdaptiveAvgPool2d, Sequential, Module, Parameter 3 | import torch.nn.functional as F 4 | import torch 5 | from collections import namedtuple 6 | import math 7 | import pdb 8 | 9 | 10 | class Flatten(Module): 11 | def forward(self, input): 12 | ''' 13 | 拍平 14 | :param input: 15 | :return: 16 | ''' 17 | return input.view(input.size(0), -1) 18 | 19 | 20 | def l2_norm(input, axis=1): 21 | ''' 22 | l2 正则范数 23 | :param input: 24 | :param axis: 25 | :return: 26 | ''' 27 | norm = torch.norm(input, 2, axis, True) 28 | output = torch.div(input, norm) 29 | return output 30 | 31 | 32 | class SEModule(Module): 33 | def __init__(self, channels, reduction): 34 | ''' 35 | :param channels:输入通道数 36 | :param reduction:缩放比例 37 | ''' 38 | super(SEModule, self).__init__() 39 | self.avg_pool = AdaptiveAvgPool2d(1) 40 | self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False) 41 | self.relu = ReLU(inplace=True) 42 | self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False) 43 | self.sigmoid = Sigmoid() 44 | 45 | def forward(self, x): 46 | module_input = x 47 | x = self.avg_pool(x) 48 | x = self.fc1(x) # 1x1卷积实现 将通道融合降维,然后再还原回去 49 | x = self.relu(x) 50 | x = self.fc2(x) 51 | x = self.sigmoid(x) 52 | return module_input * x # 输入直接×权重 53 | 54 | 55 | class bottleneck_IR(Module): 56 | def __init__(self, in_channel, depth, stride): 57 | ''' 58 | 改进型瓶IR残差块,新增BN层 与relu激活的优化 步长的调整,一个是为了降低参数量的初衷,一个是希望步长为1时更好的提取特征图 59 | :param in_channel:输入通道数 60 | :param depth:输出通道数 61 | :param stride:步长 62 | ''' 63 | super(bottleneck_IR, self).__init__() 64 | if in_channel == depth: 65 | # 短连接 输入通道数与输出通道数相同 直接使用池化操作,池化操作取决于步长 66 | # 步长为1 不变 67 | # 步长为2 下采样2倍,对应了残差块第二个卷积核步长为2的情况的尺度变换 68 | self.shortcut_layer = MaxPool2d(1, stride) 69 | else: 70 | # 不相等的情况就需要使用1x1卷积核 去匹配指定维度 71 | self.shortcut_layer = Sequential( 72 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), 73 | BatchNorm2d(depth) 74 | ) 75 | # 残差块部分 76 | self.res_layer = Sequential( 77 | BatchNorm2d(in_channel), 78 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), # 卷积核尺寸3x3 步长1x1 填充padding=1 79 | PReLU(depth), 80 | Conv2d(depth, in_channel, (3, 3), stride, 1, bias=False), # 卷积核尺寸3x3 步长stride:论文中为2 填充padding=1 81 | BatchNorm2d(depth) # 还原回来的尺度 82 | ) 83 | 84 | def forward(self, x): 85 | shortcut = self.shortcut_layer(x) 86 | res = self.res_layer(x) 87 | return res + shortcut 88 | 89 | 90 | class bottleneck_IR_SE(Module): 91 | def __init__(self, in_channel, depth, stride): 92 | ''' 93 | 瓶颈模块中天剑SE模块 94 | :param in_channel:输入通道 95 | :param depth: 输出通道 96 | :param stride: 步长 97 | ''' 98 | super(bottleneck_IR_SE, self).__init__() 99 | # 短连接部分 100 | if in_channel == depth: 101 | self.shortcut_layer = MaxPool2d(1, stride) 102 | else: 103 | self.shortcut_layer = Sequential( 104 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), 105 | BatchNorm2d(depth)) 106 | # 残差部分加入se模块 107 | self.res_layer = Sequential( 108 | BatchNorm2d(in_channel), 109 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), 110 | PReLU(depth), 111 | Conv2d(depth, depth, (3, 3), stride, 1, bias=False), 112 | BatchNorm2d(depth), 113 | SEModule(depth, 16) 114 | ) 115 | 116 | def forward(self, x): 117 | shortcut = self.shortcut_layer(x) 118 | res = self.res_layer(x) 119 | return res + shortcut 120 | 121 | 122 | # 定义元祖类型类 123 | class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): 124 | '''A named tuple describing a ResNet block.''' 125 | 126 | 127 | def get_block(in_channel, depth, num_units, stride=2): 128 | return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)] 129 | 130 | 131 | def get_blocks(num_layers): 132 | if num_layers == 50: 133 | blocks = [ 134 | get_block(in_channel=64, depth=64, num_units=3), 135 | get_block(in_channel=64, depth=64 * 2, num_units=4), 136 | get_block(in_channel=64 * 2, depth=64 * 4, num_units=14), 137 | get_block(in_channel=64 * 4, depth=64 * 8, num_units=3) 138 | ] 139 | elif num_layers == 100: 140 | blocks = [ 141 | get_block(in_channel=64, depth=64, num_units=3), 142 | get_block(in_channel=64, depth=64 * 2, num_units=13), 143 | get_block(in_channel=64 * 2, depth=64 * 4, num_units=30), 144 | get_block(in_channel=64 * 4, depth=64 * 8, num_units=3), 145 | 146 | ] 147 | elif num_layers == 152: 148 | blocks = [ 149 | get_block(in_channel=64, depth=64, num_units=3), 150 | get_block(in_channel=64, depth=64 * 2, num_units=8), 151 | get_block(in_channel=64 * 2, depth=64 * 4, num_units=36), 152 | get_block(in_channel=64 * 4, depth=64 * 8, num_units=3), 153 | ] 154 | return blocks 155 | 156 | 157 | class Backbone(Module): 158 | def __init__(self, num_layers, drop_ratio, mode='ir'): 159 | ''' 160 | 骨干网络 161 | :param num_layers:网路层数 162 | :param drop_ratio:随机失活概率 163 | :param mode:是否添加se模块 164 | ''' 165 | super(Backbone, self).__init__() 166 | assert num_layers in [50, 100, 152], 'num_layer 必须是50,100,152其中之一,其他层数尚未支持' 167 | assert mode in ['ir', 'ir_se'], 'model 必须为ir 或者ir_se,其他尚未支持' 168 | blocks = get_blocks(num_layers) 169 | if mode == 'ir': 170 | unit_module = bottleneck_IR 171 | elif mode == 'ir_se': 172 | unit_module = bottleneck_IR_SE 173 | # 输入层 174 | self.input_layer = Sequential( 175 | Conv2d(3, 64, (3, 3), 1, 1, bias=False), 176 | BatchNorm2d(64), 177 | PReLU(64) 178 | ) 179 | # 输出层 180 | self.output_layer = Sequential( 181 | BatchNorm2d(512), 182 | Dropout(drop_ratio), 183 | Flatten(), 184 | Linear(512 * 7 * 7, 512), # restnet 输出...7*7*512 展平向量 185 | BatchNorm1d(512) 186 | ) 187 | # 残差模块部分 188 | modules = [] 189 | for block in blocks: 190 | for bottleneck in block: 191 | modules.append( 192 | unit_module( 193 | bottleneck.in_channel, 194 | bottleneck.depth, 195 | bottleneck.stride 196 | ) 197 | ) 198 | self.body = Sequential(*modules) 199 | 200 | def forward(self, x): 201 | x = self.input_layer(x) 202 | x = self.body(x) 203 | x = self.output_layer(x) 204 | return l2_norm(x) 205 | 206 | 207 | class Arcface(Module): 208 | def __init__(self, embedding_size=512, classnum=51332, s=64., m=0.5): 209 | ''' 210 | :param embedding_size:人脸图像特征向量 211 | :param classnum:人脸分类数,人的个数 212 | :param s:半径 213 | :param m:夹角差值 214 | ''' 215 | super(Arcface, self).__init__() 216 | self.classnum = classnum 217 | self.kernel = Parameter(torch.Tensor(embedding_size, classnum)) 218 | self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul(1e5) # w权重初始化 219 | self.m = m # m2夹角差值,默认0.5 220 | self.s = s # 缩放倍数 默认64 221 | self.cos_m = math.cos(m) 222 | self.sin_m = math.sin(m) 223 | self.mm = self.sin_m * m 224 | # 阈值 防止角度超过 π 225 | self.threshold = math.cos(math.pi - m) 226 | 227 | def forward(self, embbedings, label): 228 | # 权重规范化 229 | nB = len(embbedings) 230 | kernel_norm = l2_norm(self.kernel, axis=0) 231 | # 将特征向量与权重相乘 获取cos相似度 232 | cos_theta = torch.mm(embbedings, kernel_norm) # 1 x nB X nB x cls 233 | cos_theta = cos_theta.clamp(-1, 1) # 将数值规范化在 -1 ,1 之间 更加稳定 234 | cos_theta_2 = torch.pow(cos_theta, 2) # 求平方 235 | sin_theta_2 = 1 - cos_theta_2 # sin(x)=1-cos(x)^2 236 | sin_theta = torch.sqrt(sin_theta_2) 237 | # cos_theta_m 是相似度向量添加类似l2惩罚项, 仅对目标类别生效 238 | cos_theta_m = (cos_theta * self.cos_m - sin_theta * self.sin_m) # cos(x+m)=cos(x) cos(m)- sin(x)sin(m) 239 | cond_v = cos_theta - self.threshold # cos(x)-m 240 | cond_mask = cond_v <= 0 # 所有超出阈值的类别会被标记为True 241 | # 大于0 说明theta在 m的左边,小于0说明theta在m的右边 242 | # 在右边说明超出阈值,非法,进行正则惩罚 243 | keep_val = (cos_theta - self.mm) 244 | cos_theta_m[cond_mask] = keep_val[cond_mask] # 将所有需要替换的类别使用m3惩罚项替换 245 | output = cos_theta * 1.0 # cos_theta 是目标类别与每个类别相似度向量 246 | # 获取类别索引值 247 | idx_ = torch.arange(0, nB, dtype=torch.long) # batch 中下标 而不是batch=1时数据类别的下标 248 | output[idx_, label] = cos_theta_m[idx_, label] 249 | output *= self.s 250 | return output 251 | -------------------------------------------------------------------------------- /resnet34 姿态检测/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | import torch.utils.model_zoo as model_zoo 5 | 6 | 7 | def load_model(model, pretrained_state_dict): 8 | model_dict = model.state_dict() 9 | pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if 10 | k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} 11 | model.load_state_dict(pretrained_dict, strict=False) 12 | if len(pretrained_dict) == 0: 13 | print('尚未加载参数') 14 | else: 15 | for k, v in pretrained_state_dict.items(): 16 | if k in pretrained_dict: 17 | print('->load{} {}'.format(k, v.size())) 18 | else: 19 | print('[info] skip {} {}'.format(k, v.size())) 20 | return model 21 | 22 | 23 | def conv3x3(in_planes, out_planes, stride=1): 24 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 25 | 26 | 27 | class BasicBlock(nn.Module): 28 | # 每一个残差块中的channel都是恒定的 所以倍数是1 29 | expansion = 1 30 | 31 | def __init__(self, inpulanes, planes, stride=1, downsample=None): 32 | ''' 33 | 基础模块初始化 34 | :param inpulanes: 输入 35 | :param planes: 卷积 36 | :param stride: 步长 37 | :param downsample:下采样倍数,用于模块与模块之间连接时尺度的计算 38 | ''' 39 | super(BasicBlock, self).__init__() 40 | self.conv1 = conv3x3(inpulanes, planes, stride) 41 | self.bn1 = nn.BatchNorm2d(planes) 42 | self.relu = nn.ReLU(inplace=True) # 节省内存操作 43 | self.conv2 = conv3x3(planes, planes) 44 | self.bn2 = nn.BatchNorm2d(planes) 45 | self.downsample = downsample 46 | self.stride = stride 47 | 48 | def forward(self, x): 49 | residual = x 50 | # CBL 51 | out = self.conv1(x) 52 | out = self.bn1(out) 53 | out = self.relu(out) 54 | # CB 55 | out = self.conv2(out) 56 | out = self.bn2(out) 57 | if self.downsample is not None: 58 | residual = self.downsample(x) 59 | out += residual 60 | out = self.relu(out) 61 | return out 62 | 63 | 64 | # 瓶颈模块 65 | class Bottleneck(nn.Module): 66 | expansion = 4 67 | 68 | # 每一个残差块中最后一个卷积核都会扩大 69 | # conv2x中 前两个通道数量是64 而最后一个卷积层的通道数是256channel 70 | def __init__(self, inplanes, planes, stride=1, downsample=None): 71 | ''' 72 | 瓶颈模块初始化用于 50-152restnet构建 降低参数量 73 | :param inplanes:输入尺寸 74 | :param planes:输出尺寸 75 | :param stride:步长 76 | :param downsample:下采样 77 | ''' 78 | super(Bottleneck, self).__init__() 79 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 80 | self.bn1 = nn.BatchNorm2d(planes) 81 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 82 | self.bn2 = nn.BatchNorm2d(planes) 83 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 84 | self.bn3 = nn.BatchNorm2d(planes * 4) 85 | self.relu = nn.ReLU(inplace=True) 86 | self.downsample = downsample 87 | self.stride = stride 88 | 89 | def forward(self, x): 90 | residual = x 91 | # 1x1 92 | out = self.conv1(x) 93 | out = self.bn1(out) 94 | out = self.relu(out) 95 | # 3x3 96 | out = self.conv2(out) 97 | out = self.bn2(out) 98 | out = self.relu(out) 99 | # 1x1 100 | out = self.conv3(out) 101 | out = self.bn3(out) 102 | # out = self.relu(out) 103 | if self.downsample is not None: 104 | residual = self.downsample(x) 105 | out += residual 106 | out = self.relu(out) 107 | return out 108 | 109 | 110 | class ResNet(nn.Module): 111 | def __init__(self, block, layers, num_classes=1000, img_size=224, dropout_factor=1.): 112 | ''' 113 | :param block:残差结构 114 | :param layers:残差结构的具体数量,例如res50[3,4,6,3]==16x3 +2 =50 115 | :param num_classes:网络输出的类别数量 116 | :param img_size:图像大小 117 | :param droput_factor:随机失活的概率 118 | ''' 119 | self.inplanes = 64 # channel定义为64 是因为maxpool后的channel变为64 120 | self.dropout_factor = dropout_factor 121 | super(ResNet, self).__init__() 122 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) # 3通道 64维度卷积核 卷积尺寸7x7 步长2 填充3 123 | self.bn1 = nn.BatchNorm2d(64) 124 | self.relu = nn.ReLU(inplace=True) 125 | 126 | # 构建conv2x,3x,4x,5x部分 127 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, 128 | ceil_mode=True) # ceil_mode 计算方式,如果数据不够填充 仍然进行剩余计算最大值 如果设置为False 则只计算够的相当于是否截断操作 129 | 130 | self.layer1 = self._make_layer(block, 64, layers[0]) 131 | self.layer2 = self._make_layer(block, 64 * 2, layers[1], stride=2) 132 | self.layer3 = self._make_layer(block, 64 * 4, layers[2], stride=2) 133 | self.layer4 = self._make_layer(block, 64 * 8, layers[3], stride=2) 134 | # fc 135 | assert img_size % 32 == 0 # 整个网络是32倍下采样,所以图像要允许可以被32整除 136 | pool_kernel = int(img_size / 32) 137 | self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True) 138 | # 全局平均池化 (n,c,1,1)的特征图 139 | self.dropout = nn.Dropout(self.dropout_factor) 140 | self.fc = nn.Linear(512 * block.expansion, num_classes) # res18/32 fc 维度512,res50/101/152 fc 维度2048 141 | # 网络参数初始化 142 | for m in self.modules(): 143 | if isinstance(m, nn.Conv2d): 144 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 145 | # 卷积核尺寸x 输出维度 146 | m.weight.data.normal_(0, math.sqrt(2. / n)) # 根号下 矩阵节点数量分之一 147 | # 卷积层初始化 148 | elif isinstance(m,nn.BatchNorm2d): # bn层初始化 149 | m.weight.data.fill_(1) 150 | m.bias.data.zero_() 151 | 152 | def _make_layer(self, block, planes, blocks, stride=1): 153 | downsample = None 154 | # 1x1下采样 155 | if stride != 1 or self.inplanes != planes * block.expansion: 156 | downsample = nn.Sequential( 157 | nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 158 | nn.BatchNorm2d(planes * block.expansion), 159 | ) 160 | layers = [] 161 | layers.append(block(self.inplanes, planes, stride, downsample)) 162 | # 第一个残差块通道数发生改变,修正 163 | self.inplanes = planes * block.expansion 164 | for i in range(1, blocks): 165 | layers.append(block(self.inplanes, planes)) 166 | return nn.Sequential(*layers) 167 | 168 | def forward(self, x): 169 | x = self.conv1(x) 170 | x = self.bn1(x) 171 | x = self.relu(x) 172 | x = self.maxpool(x) 173 | # 残差模块 174 | x = self.layer1(x) 175 | x = self.layer2(x) 176 | x = self.layer3(x) 177 | x = self.layer4(x) 178 | # 全局池化 179 | x = self.avgpool(x) 180 | x = x.view(x.size(0), -1) # flatten展平 181 | x = self.dropout(x) 182 | x = self.fc(x) 183 | return x 184 | 185 | 186 | # 2.4 构建不同层的网络 187 | def resnet18(pretrained=False, **kwargs): 188 | # 模型初始化 189 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 190 | if pretrained: 191 | # 加载预训练模型 192 | print("Load pretrained model from {}".format(model_urls['resnet18'])) 193 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet18']) 194 | model = load_model(model, pretrained_state_dict) 195 | return model 196 | 197 | 198 | def resnet34(pretrained=False, **kwargs): 199 | # 模型初始化 200 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 201 | if pretrained: 202 | # 加载预训练模型 203 | print("Load pretrained model from {}".format(model_urls['resnet34'])) 204 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet34']) 205 | model = load_model(model, pretrained_state_dict) 206 | return model 207 | 208 | 209 | def resnet50(pretrained=False, **kwargs): 210 | # 模型初始化 211 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 212 | if pretrained: 213 | # 加载预训练模型 214 | print("Load pretrained model from {}".format(model_urls['resnet50'])) 215 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet50']) 216 | model = load_model(model, pretrained_state_dict) 217 | return model 218 | 219 | 220 | def resnet101(pretrained=False, **kwargs): 221 | # 模型初始化 222 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 223 | if pretrained: 224 | # 加载预训练模型 225 | print("Load pretrained model from {}".format(model_urls['resnet101'])) 226 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet101']) 227 | model = load_model(model, pretrained_state_dict) 228 | return model 229 | 230 | 231 | def resnet152(pretrained=False, **kwargs): 232 | # 模型初始化 233 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 234 | if pretrained: 235 | # 加载预训练模型 236 | print("Load pretrained model from {}".format(model_urls['resnet152'])) 237 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet152']) 238 | model = load_model(model, pretrained_state_dict) 239 | return model 240 | 241 | model_urls = { 242 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 243 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 244 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 245 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 246 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 247 | } 248 | 249 | # 3.模型测试 250 | if __name__ == "__main__": 251 | model = resnet34(False, num_classes=3, img_size=224) 252 | input = torch.randn(32, 3, 224, 224) 253 | output = model(input) 254 | print(output.size()) 255 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/predict.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | # date:2020-04-25 3 | # Author: Eric.Lee 4 | # function: inference 5 | 6 | import os 7 | import argparse 8 | import torch 9 | import torch.nn as nn 10 | import numpy as np 11 | 12 | import time 13 | import datetime 14 | import os 15 | import math 16 | from datetime import datetime 17 | import cv2 18 | import torch.nn.functional as F 19 | 20 | from model import resnet50, resnet34, resnet34_2, resnet152 21 | from util.common_utils import * 22 | import copy 23 | 24 | 25 | def predict(model, ops, img): 26 | # 不进行梯度更新 27 | with torch.no_grad(): 28 | # 读取图像数据 29 | # img = cv2.imread(file_img) 30 | # 获取图像的宽高 31 | img_width = img.shape[1] 32 | img_height = img.shape[0] 33 | # 输入图片预处理,修正图像的大小 34 | img_ = cv2.resize(img, (ops.img_size[1], ops.img_size[0])) 35 | # 类型转换和归一化 36 | img_ = img_.astype(np.float32) 37 | img_ = (img_ - 128.) / 256. 38 | # 通道调整 39 | img_ = img_.transpose(2, 0, 1) 40 | img_ = torch.from_numpy(img_) 41 | # 增加batch维 42 | img_ = img_.unsqueeze_(0) 43 | # 4.模型预测 44 | # 将数据写入设备中 45 | if use_cuda: 46 | img_ = img_.cuda() 47 | # 模型预测 48 | output_landmarks, output_gender, output_age, output_face_ol = model_(img_.float()) 49 | # 获取关键点预测结果 50 | output_landmarks = output_landmarks.cpu().detach().numpy() 51 | # 去除batch维 52 | output_landmarks = np.squeeze(output_landmarks) 53 | # 获取关键点,以字典的形式输出,每个关键点不绘制圆形 54 | dict_landmarks = draw_landmarks(img, output_landmarks, draw_circle=False) 55 | # 绘制关键点 56 | draw_contour(img, dict_landmarks) 57 | # 性别输出结果 58 | output_gender = F.softmax(output_gender, dim=1) 59 | output_gender = output_gender[0] 60 | output_gender = output_gender.cpu().detach().numpy() 61 | output_gender = np.array(output_gender) 62 | # 概率最大类别索引,获取对应的性别 63 | gender_max_index = np.argmax(output_gender) 64 | # 最大概率 65 | score_gender = output_gender[gender_max_index] 66 | print(gender_max_index, score_gender) 67 | 68 | # 年龄输出结果 69 | output_age = output_age.cpu().detach().numpy()[0][0] 70 | output_age = (output_age * 100. + 50.) 71 | # 将预测结果绘制图像上 72 | if gender_max_index == 1.: 73 | cv2.putText(img, 'gender:{}'.format("male"), (2, 20), 74 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 75 | cv2.putText(img, 'gender:{}'.format("male"), (2, 20), 76 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 77 | else: 78 | cv2.putText(img, 'gender:{}'.format("female"), (2, 20), 79 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 80 | cv2.putText(img, 'gender:{}'.format("female"), (2, 20), 81 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 82 | cv2.putText(img, 'age:{:.2f}'.format(output_age), (2, 50), 83 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 84 | cv2.putText(img, 'age:{:.2f}'.format(output_age), (2, 50), 85 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 86 | 87 | cv2.putText(img, "yaw:{:.1f},pitch:{:.1f},roll:{:.1f}".format(output_face_ol[0][0], output_face_ol[0][1], 88 | output_face_ol[0][2]), (1, 80), 89 | cv2.FONT_HERSHEY_DUPLEX, 1, 90 | (55, 0, 220), 5) 91 | # 将预测结果展示出来或保存下来 92 | return img 93 | 94 | 95 | def predict2(model, ops, img): 96 | # 不进行梯度更新 97 | with torch.no_grad(): 98 | # 读取图像数据 99 | # img = cv2.imread(file_img) 100 | # 获取图像的宽高 101 | img_width = img.shape[1] 102 | img_height = img.shape[0] 103 | # 输入图片预处理,修正图像的大小 104 | img_ = cv2.resize(img, (ops.img_size[1], ops.img_size[0])) 105 | # 类型转换和归一化 106 | img_ = img_.astype(np.float32) 107 | img_ = (img_ - 128.) / 256. 108 | # 通道调整 109 | img_ = img_.transpose(2, 0, 1) 110 | img_ = torch.from_numpy(img_) 111 | # 增加batch维 112 | img_ = img_.unsqueeze_(0) 113 | # 4.模型预测 114 | # 将数据写入设备中 115 | if use_cuda: 116 | img_ = img_.cuda() 117 | # 模型预测 118 | output_landmarks, output_gender, output_age = model_(img_.float()) 119 | # 获取关键点预测结果 120 | output_landmarks = output_landmarks.cpu().detach().numpy() 121 | # 去除batch维 122 | output_landmarks = np.squeeze(output_landmarks) 123 | # 获取关键点,以字典的形式输出,每个关键点不绘制圆形 124 | dict_landmarks = draw_landmarks(img, output_landmarks, draw_circle=False) 125 | # 绘制关键点 126 | draw_contour(img, dict_landmarks) 127 | # 性别输出结果 128 | output_gender = F.softmax(output_gender, dim=1) 129 | output_gender = output_gender[0] 130 | output_gender = output_gender.cpu().detach().numpy() 131 | output_gender = np.array(output_gender) 132 | # 概率最大类别索引,获取对应的性别 133 | gender_max_index = np.argmax(output_gender) 134 | # 最大概率 135 | score_gender = output_gender[gender_max_index] 136 | print(gender_max_index, score_gender) 137 | 138 | # 年龄输出结果 139 | output_age = output_age.cpu().detach().numpy()[0][0] 140 | output_age = (output_age * 100. + 50.) 141 | # 将预测结果绘制图像上 142 | if gender_max_index == 1.: 143 | cv2.putText(img, 'gender:{}'.format("male"), (2, 20), 144 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 145 | cv2.putText(img, 'gender:{}'.format("male"), (2, 20), 146 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 147 | else: 148 | cv2.putText(img, 'gender:{}'.format("female"), (2, 20), 149 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 150 | cv2.putText(img, 'gender:{}'.format("female"), (2, 20), 151 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 152 | cv2.putText(img, 'age:{:.2f}'.format(output_age), (2, 50), 153 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 255, 0), 2) 154 | cv2.putText(img, 'age:{:.2f}'.format(output_age), (2, 50), 155 | cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 20, 0), 1) 156 | 157 | # 将预测结果展示出来或保存下来 158 | return img 159 | 160 | 161 | def 实时目标检测(model, ops): 162 | cap = cv2.VideoCapture(0) 163 | # 获取属性 164 | frame_width = int(cap.get(3)) 165 | frame_height = int(cap.get(4)) 166 | while cap.isOpened(): 167 | ret, frame = cap.read() # 获取每一帧图像 168 | if ret: 169 | img = predict(model, ops, frame) 170 | cv2.imshow('result', img) 171 | if cv2.waitKey(25) & 0xFF == ord("q"): 172 | break 173 | cap.release() 174 | cv2.destroyAllWindows() 175 | 176 | 177 | def test_model(model, ops, img): 178 | img = predict(model, ops, img) 179 | return img 180 | 181 | 182 | if __name__ == "__main__": 183 | # 1.配置信息解析 184 | parser = argparse.ArgumentParser(description=' Project Landmarks Test') 185 | # 模型路径 186 | parser.add_argument('--test_model', type=str, 187 | default='model_exp/2022-01-29_04-23-13/resnet_34_epoch-51.pth', 188 | # default='predict_多任务/resnet_34_epoch-9.pth', 189 | # default='predict_多任务/face_multitask-resnet_34_imgsize-256-20210425.pth', 190 | help='test_model') 191 | # 模型类型 192 | parser.add_argument('--model', type=str, default='resnet_34', 193 | help='model : resnet_152') 194 | # 输出数据(关键点)的个数 195 | parser.add_argument('--num_classes', type=int, default=196, 196 | help='num_classes') 197 | # GPU选择 198 | parser.add_argument('--GPUS', type=str, default='0', 199 | help='GPUS') 200 | # 测试集路径 201 | parser.add_argument('--test_path', type=str, 202 | default='/Users/yaoxiaoying/Desktop/人脸支付/03.课堂代码/face_multi_task/img/', 203 | help='test_path') 204 | # 输入模型图片尺寸 205 | parser.add_argument('--img_size', type=tuple, default=(256, 256), 206 | help='img_size') 207 | # 是否可视化图片 208 | parser.add_argument('--vis', type=bool, default=True, 209 | help='vis') 210 | ops = parser.parse_args() # 解析添加参数 211 | # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 212 | unparsed = vars(ops) 213 | for key in unparsed.keys(): 214 | print('{} : {}'.format(key, unparsed[key])) 215 | # 设备信息 216 | os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS 217 | # 测试图片文件夹路径 218 | test_path = ops.test_path 219 | 220 | # 2.模型加载 221 | # 第一步:构建模型 222 | # 加载模型 223 | if ops.model == 'resnet_50': 224 | model_ = resnet50(landmarks_num=ops.num_classes, img_size=ops.img_size[0]) 225 | elif ops.model == 'resnet_34': 226 | model_ = resnet34(landmarks_num=ops.num_classes, img_size=ops.img_size[0]) 227 | elif ops.model == 'resnet152': 228 | model_ = resnet152(landmarks_num=ops.num_classes, img_size=ops.img_size[0]) 229 | # 第二步:获取设备信息,并将模型写入设备中 230 | # 设备设置 231 | use_cuda = False 232 | # use_cuda = torch.cuda.is_available() 233 | # device = torch.device("cuda:0" if use_cuda else "cpu") 234 | device = torch.device("cpu") 235 | # model_ = model_.to(device) 236 | # 设置为前向推断模式 237 | model_.eval() 238 | 239 | # 第三步:加载预训练模型 240 | # 加载训练好的模型 241 | if os.access(ops.test_model, os.F_OK): 242 | # chkpt = torch.load(ops.test_model, map_location=device) 243 | chkpt = torch.load(ops.test_model) 244 | model_.load_state_dict(chkpt) 245 | print('load test model : {}'.format(ops.test_model)) 246 | # 预测图片 247 | font = cv2.FONT_HERSHEY_SIMPLEX 248 | # 实时目标检测(model_, ops) 249 | img = cv2.imread('C:\\Users\\86183\\Desktop\\YZ.jpg') 250 | img = test_model(model_, ops, img) 251 | cv2.imshow('result', img) 252 | cv2.waitKey(0) 253 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/util/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import math 3 | import os 4 | import random 5 | import shutil 6 | from pathlib import Path 7 | from tqdm import tqdm 8 | import cv2 9 | import numpy as np 10 | import torch 11 | from torch.utils.data import DataLoader, Dataset 12 | import sys 13 | 14 | # from utils import letterbox, random_affine, xywh2xyxy, xyxy2xywh 15 | 16 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | sys.path.append(root_path) 18 | from util.utils import * 19 | import os # 引用OS 20 | 21 | 22 | class LoadImagesAndLabels(Dataset): 23 | # 初始化 24 | def __init__(self, path, batch_size, img_size=416, augment=False, multi_scale=False, root_path=os.path.curdir): 25 | ''' 26 | :param path: 27 | :param batch_size: 28 | :param img_size: 29 | :param augment: 30 | :param multi_scale: 31 | :param root_path: 32 | ''' 33 | with open(path, 'r',encoding='utf-8') as file: 34 | img_files = file.read().splitlines() 35 | img_files = list(filter(lambda x: len(x) > 0, img_files)) 36 | # 读取所有图片文件 37 | np.random.shuffle(img_files) # 乱序处理数据 38 | self.img_files = img_files 39 | self.batch_size = batch_size 40 | self.img_size = img_size 41 | self.augment = augment 42 | self.multi_scale = multi_scale 43 | self.root_path = root_path 44 | self.scale_index = 0 45 | if self.multi_scale: 46 | self.img_size = img_size 47 | self.label_file = [ 48 | x.replace('images', 'labels').replace('./', '/root/cv/dataset/人脸/datasets/').replace('.jpg', '.txt') for x 49 | # x.replace('images', 'labels').replace('./', '/www/dataset/yolo_helmet_train/').replace('.jpg', '.txt') for x 50 | in self.img_files] 51 | 52 | def __len__(self): 53 | ''' 54 | 数据量 55 | :return: 56 | ''' 57 | return len(self.img_files) 58 | 59 | def __getitem__(self, index): 60 | ''' 61 | 图像读取与数据增强 62 | :param index: 63 | :return: 64 | ''' 65 | # 是否进行多尺度训练 66 | if self.multi_scale and (self.scale_index % self.batch_size == 0) and self.scale_index != 0: 67 | # batch必须能整除才行... 68 | self.img_size = random.choice(range(11, 19)) * 32 69 | # 尺寸从11-19 必须是32的倍数 70 | if self.multi_scale: 71 | self.scale_index += 1 72 | # 图像读取 73 | # img_path = os.path.join(self.img_files[index].replace('./', '/www/dataset/yolo_helmet_train/')) 74 | img_path = os.path.join(self.img_files[index].replace('./', '/root/cv/dataset/人脸/datasets/')) 75 | # print(img_path, '-------') 76 | img = cv2.imread(img_path) 77 | # print(img.shape) 78 | # print(img) 79 | # 颜色增强 在HSV色彩空间进行颜色的处理 色调(H),饱和度(S),明度(V) 不改变颜色的基础上更改颜色饱和度和亮度 80 | augment_hsv = random.random() < 0.5 # 去 0 - 1 相同概率出现 类似teacher-forching 81 | if self.augment and augment_hsv: 82 | fraction = 0.5 83 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 84 | S = img_hsv[:, :, 1].astype(np.float32) 85 | V = img_hsv[:, :, 2].astype(np.float32) 86 | # 生成随机数a 在[0,5, 1.5]之间,对S通道进行处理 87 | a = (random.random() * 2 - 1) * fraction + 1 88 | S *= a 89 | if a > 1: 90 | np.clip(S, None, 255, out=S) 91 | # 截取函数,将S这个数组中的元素截取到指定范围内,小于None的就是None,大于255的就取255,out参数是将截取后的数放入数组中,但是要保持数据shape一致 92 | # 生成随机数a 在[0,5, 1.5]之间,对通道进行处理 93 | a = (random.random() * 2 - 1) * fraction + 1 94 | V *= a 95 | if a > 1: 96 | np.clip(V, None, 255, out=V) # V尺度做相同操作 97 | # 赋值给原图像 98 | img_hsv[:, :, 1] = S 99 | img_hsv[:, :, 2] = V 100 | # 颜色空间转换为BGR,完成图像增强 101 | cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) 102 | # 将图像尺度调整为正方形,不能进行直接resize 会破坏图像的比例导致失真 103 | h, w, _ = img.shape 104 | # resize+pad填充 105 | img, ratio, padw, padh = letterbox(img, height=self.img_size, augment=self.augment) 106 | # 获取图像标签 107 | label_path = os.path.join(self.label_file[index]) 108 | labels = [] 109 | # 读取标签文件 110 | if os.path.isfile(label_path): 111 | with open(label_path, 'r') as file: 112 | lines = file.read().splitlines() 113 | x = np.array([x.split() for x in lines], dtype=np.float32) 114 | if x.size > 0: 115 | labels = x.copy() 116 | # 修改目标框的位置信息 117 | labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw 118 | labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh 119 | labels[:, 1] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw 120 | labels[:, 2] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh 121 | # 几何变换增强,并处理label值 122 | # 仿射变换 123 | if self.augment: 124 | img, labels = random_affine(img, labels, degrees=(-20, 20), translate=(0.1, 0.1), scale=(0.9, 1.1)) 125 | # xywh变换 126 | nl = len(labels) 127 | if nl: 128 | # 不为空就计算 转化xyxy为xywh,且并归一化 129 | labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # 第一个是图片index,第二个是cls 后面四个是xywh 130 | # 翻转 131 | if self.augment: 132 | lr_flip = True 133 | if lr_flip and random.random() > 0.5: 134 | img = np.fliplr(img) 135 | if nl: 136 | labels[:, 1] = 1 - labels[:, 1] # 水平翻转 x 变为1-x 137 | 138 | # 获取图像和标注信息结果 139 | # 标注 140 | labels_out = torch.zeros((nl, 6)) # 生成一个全零数据存储label值 141 | if nl: 142 | labels_out[:, 1:] = torch.from_numpy(labels) # 若标签不为空,将其填充在全零数组中 143 | 144 | # 图像处理 145 | # 通道更改 146 | img = img[:, :, ::-1].transpose(2, 0, 1) # 通道倒序排列后更改位置 147 | # 通道BGR to RGB,表示形式转换为3x416x416(CHW) chw比hwc效率更高效果更好 148 | # 类型 149 | img = np.ascontiguousarray(img, dtype=np.float32) # 更改数据类型 150 | # 内存不连续存储的数组转换为内存连续存储的数组,使得运行速度更快 151 | # 归一化 152 | img /= 255.0 153 | return torch.from_numpy(img), labels_out, img_path, (h, w) 154 | 155 | @staticmethod # 静态方法 156 | def collate_fn(batch): 157 | ''' 158 | 静态方法不需要实例对象也可调用,这里是为了实现自定义的batch读取 159 | :param batch: 160 | :return: 161 | ''' 162 | img, label, img_path, hw = list(zip(*batch)) 163 | for i, l in enumerate(label): 164 | l[:, 0] = i # 图片下标设置为1 165 | return torch.stack(img, 0), torch.cat(label, 0), img_path, hw 166 | 167 | 168 | if __name__ == '__main__': 169 | # 测试 170 | # 指定文件路径 171 | # 数据路径 172 | 173 | root_path = '/root/cv/dataset/人脸/datasets' 174 | # txt文件的路径 175 | path = '/www/dataset/yolo_helmet_train/yolo_helmet_train/anno/train.txt' 176 | # 要检测的类别 177 | path_voc_names = './cfg/face.names' 178 | batch_size = 2 179 | img_size = 416 180 | num_workers = 2 181 | dataset = LoadImagesAndLabels(path, batch_size, img_size=img_size, augment=False, multi_scale=False, 182 | root_path=root_path) 183 | # print(dataset.__len__()) 184 | dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, 185 | collate_fn=dataset.collate_fn) 186 | # for a in enumerate(dataloader): 187 | # pass 188 | # for i, (imgs, targets, img_path_, _) in enumerate(dataloader): 189 | # print('标注信息', len(targets), targets) 190 | # print(imgs.size, '===========================') 191 | # for j in range(batch_size): 192 | # img_tmp = np.uint8(imgs[j].permute(1, 2, 0) * 255.0)[:, :, ::-1] 193 | # img_tmp = np.ascontiguousarray(img_tmp) 194 | # out_path = os.path.join("/root/demo_img/", os.path.basename(img_path_[j])) 195 | # for k in range(len(targets)): 196 | # anno = targets[k][1::] 197 | # # print(anno, 'anno') 198 | # label = int(anno[0]) 199 | # 200 | # # 获取框的坐标值,左上角坐标和右下角坐标 201 | # x1 = int((float(anno[1]) - float(anno[3]) / 2) * img_size) 202 | # y1 = int((float(anno[2]) - float(anno[4]) / 2) * img_size) 203 | # 204 | # x2 = int((float(anno[1]) + float(anno[3]) / 2) * img_size) 205 | # y2 = int((float(anno[2]) + float(anno[4]) / 2) * img_size) 206 | # 207 | # # 将标注框绘制在图像上 208 | # cv2.rectangle(img_tmp, (x1, y1), (x2, y2), (255, 30, 30), 2) 209 | # # 将标注类别绘制在图像上 210 | # # cv2.putText(img_tmp, ("%s" % (str('face'))), (x1, y1), cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 255, 55), 6) 211 | # cv2.putText(img_tmp, ("%s" % (str("face"))), (x1, y1), cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 55, 255), 2) 212 | # cv2.imwrite(out_path, img_tmp) 213 | # print(out_path) 214 | # # continue 215 | # # print('结束') 216 | # break 217 | # 第一步:指定文件路径 218 | path = '/www/dataset/yolo_helmet_train/yolo_helmet_train/anno/train.txt' 219 | path_voc_names = '../cfg/face.names' 220 | # 第二步:获取目标类别 221 | with open(path_voc_names, 'r', encoding='utf*8') as f: 222 | lable_map = f.readlines() 223 | for i in range(len(lable_map)): 224 | lable_map[i] = lable_map[i].strip() 225 | print(i, lable_map[i]) 226 | # 第三步:获取图像数据和标注信息 227 | with open(path, 'r', encoding='utf-8') as file: 228 | img_files = file.readlines() 229 | for i in range(len(img_files)): 230 | img_files[i] = img_files[i].strip() 231 | print(img_files[i]) 232 | 233 | label_files = [x.replace('images', 'labels').replace('.jpg', '.txt') for x in img_files] 234 | print(label_files) 235 | # 第四步:将标注信息绘制在图像上 236 | for i in range(len(label_files)): 237 | # 获取图像文件 238 | img_file = os.path.join('/www/dataset/yolo_helmet_train/', img_files[i][2:]) 239 | out_path = os.path.join("/root/demo_img/", os.path.basename(img_file)) 240 | 241 | img = cv2.imread(img_file) 242 | w = img.shape[1] 243 | h = img.shape[0] 244 | # 标注文件 245 | lable_path = os.path.join('/www/dataset/yolo_helmet_train/', label_files[i][2:]) 246 | # 读取标注绘制在图像上 247 | if os.path.isfile(lable_path): 248 | with open(lable_path, 'r') as file: 249 | lines = file.read().splitlines() 250 | x = np.array([x.split() for x in lines], dtype=np.float32) 251 | for k in range(len(x)): 252 | anno = x[k] 253 | label = int(anno[0]) 254 | x1 = int((float(anno[1]) - float(anno[3]) / 2) * w) 255 | y1 = int((float(anno[2]) - float(anno[4]) / 2) * h) 256 | 257 | x2 = int((float(anno[1]) + float(anno[3]) / 2) * w) 258 | y2 = int((float(anno[2]) + float(anno[4]) / 2) * h) 259 | 260 | cv2.rectangle(img, (x1, y1), (x2, y2), (255, 20, 20), 2) 261 | cv2.putText(img, ("%s" % (str(lable_map[label]))), (x1, y1), cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 255, 0), 262 | 6) 263 | cv2.imwrite(out_path, img) 264 | cv2.destroyAllWindows() 265 | -------------------------------------------------------------------------------- /yoloV3 人脸检测/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from collections import OrderedDict 4 | import torch 5 | import torch.nn.functional as F 6 | import torch.nn as nn 7 | 8 | # reference: 9 | # https://github.com/ultralytics/yolov3/blob/master/models.py 10 | # https://github.com/TencentYoutuResearch/ObjectDetection-OneStageDet/blob/master/yolo/vedanet/network/backbone/brick/darknet53.py 11 | # True 查看相关的网络结构 12 | flag_yolo_structure = False 13 | 14 | 15 | # 构建CBL模块 16 | class Conv2dBatchLeaky(nn.Module): 17 | def __init__(self, in_channels, out_channels, kernel_size, stride, leaky_slope=0.1): 18 | ''' 19 | :param in_channels: 输入特征图的通道数 20 | :param out_channels: 输出特征图的通道数,即卷积核个数 21 | :param kernel_size: 卷积核大小 22 | :param stride: 步长 23 | :param leaky_slope: leak_relu的系数 24 | ''' 25 | super(Conv2dBatchLeaky, self).__init__() 26 | 27 | self.in_channels = in_channels 28 | self.out_channels = out_channels 29 | self.kernel_size = kernel_size 30 | self.stride = stride 31 | if isinstance(kernel_size, (list, tuple)): 32 | self.padding = [int(ii / 2) for ii in kernel_size] 33 | if flag_yolo_structure: 34 | print('------------------->>>> Conv2dBatchLeaky isinstance') 35 | else: 36 | self.padding = int(kernel_size / 2) 37 | 38 | self.leaky_slope = leaky_slope 39 | # Layer 40 | # LeakyReLU : y = max(0, x) + leaky_slope*min(0,x) 41 | self.layers = nn.Sequential( 42 | nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False), 43 | nn.BatchNorm2d(self.out_channels), 44 | nn.LeakyReLU(self.leaky_slope, inplace=True) 45 | ) 46 | 47 | def forward(self, x): 48 | x = self.layers(x) 49 | return x 50 | 51 | 52 | # 构建Resunit模块 53 | class ResBlockSum(nn.Module): 54 | def __init__(self, nchannels): 55 | super().__init__() 56 | self.block = nn.Sequential( 57 | Conv2dBatchLeaky(nchannels, int(nchannels / 2), 1, 1), 58 | Conv2dBatchLeaky(int(nchannels / 2), nchannels, 3, 1) 59 | ) 60 | 61 | def forward(self, x): 62 | return x + self.block(x) 63 | 64 | 65 | # 构建头部分 66 | class HeadBody(nn.Module): 67 | def __init__(self, in_channels, out_channels): 68 | super(HeadBody, self).__init__() 69 | 70 | self.layer = nn.Sequential( 71 | Conv2dBatchLeaky(in_channels, out_channels, 1, 1), 72 | Conv2dBatchLeaky(out_channels, out_channels * 2, 3, 1), 73 | Conv2dBatchLeaky(out_channels * 2, out_channels, 1, 1), 74 | Conv2dBatchLeaky(out_channels, out_channels * 2, 3, 1), 75 | Conv2dBatchLeaky(out_channels * 2, out_channels, 1, 1) 76 | ) 77 | 78 | def forward(self, x): 79 | x = self.layer(x) 80 | return x 81 | 82 | 83 | # 上采样 84 | class Upsample(nn.Module): 85 | # Custom Upsample layer (nn.Upsample gives deprecated warning message) 86 | 87 | def __init__(self, scale_factor=1, mode='nearest'): 88 | super(Upsample, self).__init__() 89 | self.scale_factor = scale_factor 90 | self.mode = mode 91 | 92 | def forward(self, x): 93 | return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) 94 | 95 | 96 | # 网络的输出层,若进行预测返回预测结果 97 | # default anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] 98 | class YOLOLayer(nn.Module): 99 | def __init__(self, anchors, nC): 100 | print('input anchors', anchors) 101 | """ 102 | :param anchors: 103 | :param nC: 104 | """ 105 | super(YOLOLayer, self).__init__() 106 | 107 | self.anchors = torch.FloatTensor(anchors) 108 | self.nA = len(anchors) # number of anchors (3) 109 | self.nC = nC # number of classes 110 | self.img_size = 0 111 | if flag_yolo_structure: 112 | print('init YOLOLayer ------ >>> ') 113 | print('anchors : ', self.anchors) 114 | print('nA : ', self.nA) 115 | print('nC : ', self.nC) 116 | print('img_size : ', self.img_size) 117 | 118 | def forward(self, p, img_size, var=None): # p : feature map 119 | bs, nG = p.shape[0], p.shape[-1] # batch_size , grid 120 | if flag_yolo_structure: 121 | print('bs, nG --->>> ', bs, nG) 122 | if self.img_size != img_size: 123 | create_grids(self, img_size, nG, p.device) 124 | 125 | # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, xywh + confidence + classes) 126 | p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction 127 | 128 | if self.training: 129 | return p 130 | else: # inference 131 | io = p.clone() # inference output 132 | io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy 133 | io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method 134 | io[..., 4:] = torch.sigmoid(io[..., 4:]) # p_conf, p_cls 135 | io[..., :4] *= self.stride 136 | if self.nC == 1: 137 | io[..., 5] = 1 # single-class model 138 | # flatten prediction, reshape from [bs, nA, nG, nG, nC] to [bs, nA * nG * nG, nC] 139 | return io.view(bs, -1, 5 + self.nC), p 140 | 141 | 142 | # 若图像尺寸不是416,调整anchor的生成,输出特征图 143 | def create_grids(self, img_size, nG, device='cpu'): 144 | # self.nA : len(anchors) # number of anchors (3) 145 | # self.nC : nC # number of classes 146 | # nG : 输出特征图的大小,与输入图像大小有关 147 | self.img_size = img_size 148 | self.stride = img_size / nG 149 | if flag_yolo_structure: 150 | print('create_grids stride : ', self.stride) 151 | 152 | # build xy offsets 153 | grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float() 154 | grid_y = grid_x.permute(0, 1, 3, 2) 155 | self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device) 156 | if flag_yolo_structure: 157 | print('grid_x : ', grid_x.size(), grid_x) 158 | print('grid_y : ', grid_y.size(), grid_y) 159 | print('grid_xy : ', self.grid_xy.size(), self.grid_xy) 160 | 161 | # build wh gains 162 | print(self.anchors, 'anchors') 163 | self.anchor_vec = self.anchors.to(device) / self.stride # 基于 stride 的归一化 164 | # print('self.anchor_vecself.anchor_vecself.anchor_vec:',self.anchor_vec) 165 | self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device) 166 | self.nG = torch.FloatTensor([nG]).to(device) 167 | 168 | 169 | def get_yolo_layer_index(module_list): 170 | yolo_layer_index = [] 171 | for index, l in enumerate(module_list): 172 | try: 173 | a = l[0].img_size and l[0].nG # only yolo layer need img_size and nG 174 | yolo_layer_index.append(index) 175 | except: 176 | pass 177 | assert len(yolo_layer_index) > 0, "can not find yolo layer" 178 | return yolo_layer_index 179 | 180 | 181 | # ----------------------yolov3------------------------ 182 | # 1.1 模型构建 183 | class Yolov3(nn.Module): 184 | def __init__(self, num_classes=80, 185 | anchors=[(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), 186 | (373, 326)]): 187 | super().__init__() 188 | # 获取不同输出尺度上的anchor,对于416*416的图像, 189 | anchor_mask1 = [i for i in range(2 * len(anchors) // 3, len(anchors), 1)] 190 | anchor_mask2 = [i for i in range(len(anchors) // 3, 2 * len(anchors) // 3, 1)] 191 | anchor_mask3 = [i for i in range(0, len(anchors) // 3, 1)] 192 | 193 | layer_list = [] 194 | # list0 构建backbone的第一部分,获取52*52的特征图 195 | layer_list.append(OrderedDict([ 196 | # CBL 197 | ("0_stage1_conv", Conv2dBatchLeaky(3, 32, 3, 1, 1)), 198 | # CBL + Resunit 199 | ("0_stage2_conv", Conv2dBatchLeaky(32, 64, 3, 2)), 200 | ('0_stage2_ressum1', ResBlockSum(64)), 201 | # CBL + resunit*2 202 | ("0_stage3_conv", Conv2dBatchLeaky(64, 128, 3, 2)), 203 | ('0_stage3_ressum1', ResBlockSum(128)), 204 | ('0_stage3_ressum2', ResBlockSum(128)), 205 | # CBL + resunit*8 206 | ("0_stage4_conv", Conv2dBatchLeaky(128, 256, 3, 2)), 207 | ('0_stage4_ressum1', ResBlockSum(256)), 208 | ('0_stage4_ressum2', ResBlockSum(256)), 209 | ('0_stage4_ressum3', ResBlockSum(256)), 210 | ('0_stage4_ressum4', ResBlockSum(256)), 211 | ('0_stage4_ressum5', ResBlockSum(256)), 212 | ('0_stage4_ressum6', ResBlockSum(256)), 213 | ('0_stage4_ressum7', ResBlockSum(256)), 214 | ('0_stage4_ressum8', ResBlockSum(256)), 215 | ])) 216 | 217 | # list1 构建backbone的第二部分获取26*26的特征图 list1 218 | layer_list.append(OrderedDict([ 219 | # CBL +resunit*8 220 | ('1_stage5_conv', Conv2dBatchLeaky(256, 512, 3, 2)), 221 | ('1_stage5_ressum1', ResBlockSum(512)), 222 | ('1_stage5_ressum2', ResBlockSum(512)), 223 | ('1_stage5_ressum3', ResBlockSum(512)), 224 | ('1_stage5_ressum4', ResBlockSum(512)), 225 | ('1_stage5_ressum5', ResBlockSum(512)), 226 | ('1_stage5_ressum6', ResBlockSum(512)), 227 | ('1_stage5_ressum7', ResBlockSum(512)), 228 | ('1_stage5_ressum8', ResBlockSum(512)), 229 | 230 | ])) 231 | 232 | # list 2 获取13*13的特征图并输入输出部分 233 | layer_list.append(OrderedDict([ 234 | # CBL+resunit*4 235 | ('2_stage6_conv', Conv2dBatchLeaky(512, 1024, 3, 2)), 236 | ('2_stage6_ressum1', ResBlockSum(1024)), 237 | ('2_stage6_ressum2', ResBlockSum(1024)), 238 | ('2_stage6_ressum3', ResBlockSum(1024)), 239 | ('2_stage6_ressum4', ResBlockSum(1024)), 240 | # CBL*5 241 | ('2_headbody1', HeadBody(1024, 512)), 242 | ])) 243 | 244 | # list 3 获取13*13特征图像的预测 13*13*255 245 | layer_list.append(OrderedDict([ 246 | ('3_conv_1', Conv2dBatchLeaky(512, 1024, 3, 1)), 247 | ('3_conv_2', nn.Conv2d(in_channels=1024, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, 248 | stride=1)), 249 | ])) 250 | 251 | # list 4 获取13*13特征图上的检测结果 3*((x, y, w, h, confidence) + classes ) 252 | layer_list.append(OrderedDict([ 253 | ('4_yolo', YOLOLayer([anchors[i] for i in anchor_mask1], num_classes)), 254 | ])) 255 | # list 5 上采样 256 | layer_list.append(OrderedDict([ 257 | ('5_conv', Conv2dBatchLeaky(512, 256, 1, 1)), 258 | ('5_upsample', Upsample(scale_factor=2)), 259 | ])) 260 | 261 | # list 6 获取26*26的特征图并输入输出部分 262 | layer_list.append(OrderedDict([ 263 | ('6_head_body2', HeadBody(768, 256)), 264 | ])) 265 | 266 | # list 7 获取26*26特征图像的预测 26*26*255 267 | layer_list.append(OrderedDict([ 268 | ('7_conv_1', Conv2dBatchLeaky(256, 512, 3, 1)), 269 | ('7_conv_2', nn.Conv2d(in_channels=512, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, 270 | stride=1)), 271 | ])) 272 | 273 | # list 8 获取26*26特征图上的检测结果 3*((x, y, w, h, confidence) + classes ) 274 | layer_list.append(OrderedDict([ 275 | ('8_yolo', YOLOLayer([anchors[i] for i in anchor_mask2], num_classes)), 276 | ])) 277 | 278 | # list 9 279 | layer_list.append(OrderedDict([ 280 | ('9_conv', Conv2dBatchLeaky(256, 128, 1, 1)), 281 | ('9_upsample', Upsample(scale_factor=2)), 282 | ])) 283 | 284 | # list 10 获取52*52的特征图并输入输出部分 285 | layer_list.append(OrderedDict([ 286 | ('10_head_body3', HeadBody(384, 128)), 287 | ])) 288 | 289 | # list 11 获取52*52特征图像的预测 52*52*255 290 | layer_list.append(OrderedDict([ 291 | ('11_conv_1', Conv2dBatchLeaky(128, 256, 3, 1)), 292 | ('11_conv_2', nn.Conv2d(in_channels=256, out_channels=len(anchor_mask1) * (num_classes + 5), kernel_size=1, 293 | stride=1)), 294 | ])) 295 | 296 | # list 12 获取52*52特征图上的检测结果 3*((x, y, w, h, confidence) + classes ) 297 | layer_list.append(OrderedDict([ 298 | ('12_yolo', YOLOLayer([anchors[i] for i in anchor_mask3], num_classes)) 299 | ])) 300 | 301 | # nn.ModuleList类似于pytho中的list类型,只是将一系列层装入列表 302 | self.module_list = nn.ModuleList([nn.Sequential(i) for i in layer_list]) 303 | 304 | # 获取输出结果 list4 list8 list12 305 | self.yolo_layer_index = get_yolo_layer_index(self.module_list) 306 | 307 | def forward(self, x): 308 | # 前向传播 309 | img_size = x.shape[-1] 310 | output = [] 311 | # list0 312 | x = self.module_list[0](x) 313 | x_route1 = x 314 | print(x.shape) 315 | # list1 316 | x = self.module_list[1](x) 317 | x_route2 = x 318 | print(x.shape) 319 | # list2 320 | x = self.module_list[2](x) 321 | # list3 322 | print(x.shape) 323 | yolo_head = self.module_list[3](x) 324 | # list4 325 | yolo_head_out_13x13 = self.module_list[4][0](yolo_head, img_size) 326 | output.append(yolo_head_out_13x13) 327 | # list5 328 | x = self.module_list[5](x) 329 | # 融合 330 | x = torch.cat([x, x_route2], 1) 331 | # list6 332 | x = self.module_list[6](x) 333 | # list7 334 | yolo_head = self.module_list[7](x) 335 | # list8 336 | yolo_head_out_26x26 = self.module_list[8][0](yolo_head, img_size) 337 | output.append(yolo_head_out_26x26) 338 | # list9 339 | x = self.module_list[9](x) 340 | # 融合 341 | x = torch.cat([x, x_route1], 1) 342 | # list10 343 | x = self.module_list[10](x) 344 | # list11 345 | yolo_head = self.module_list[11](x) 346 | # list12 347 | yolo_head_out_52x52 = self.module_list[12][0](yolo_head, img_size) 348 | output.append(yolo_head_out_52x52) 349 | # 输出结果 350 | if self.training: 351 | return output 352 | else: 353 | io, p = list(zip(*output)) 354 | return torch.cat(io, 1), p 355 | 356 | 357 | # 1.2 模型测试 358 | if __name__ == "__main__": 359 | # 定义模型输入,实例化 360 | input = torch.Tensor(1, 3, 416, 416) 361 | model = Yolov3(num_classes=20) 362 | # 训练阶段 363 | model.train() 364 | reses = model(input) 365 | # for res in reses: 366 | # print(np.shape(res)) 367 | # # 预测阶段 368 | # model.eval() 369 | # infer_res, train_res = model(input) 370 | # print('预测', np.shape(infer_res)) 371 | # for res in train_res: 372 | # print("训练", np.shape(res)) 373 | -------------------------------------------------------------------------------- /resnet 108人脸关键点/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | import sys 7 | from model import resnet50, resnet34, resnet18, resnet18_predict_face_ol_function, resnet152 8 | from loss import got_total_wing_loss, wing_loss 9 | import cv2 10 | import time 11 | import json 12 | from datetime import datetime 13 | from torch.utils.data import DataLoader 14 | import matplotlib.pyplot as plt 15 | from util.common_utils import * 16 | from util.datasets import * 17 | from torch.utils.tensorboard import SummaryWriter 18 | 19 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 20 | sys.path.append(root_path) 21 | from util.data_agu import * 22 | 23 | device = torch.device("cuda:0") 24 | 25 | 26 | def get_predict_face_ol_model(path): 27 | model_path = '' 28 | model_ = resnet18_predict_face_ol_function(num_classes=3, img_size=256) 29 | use_cuda = torch.cuda.is_available() 30 | model_.to(device) 31 | model_.eval() 32 | # 第三步:加载预训练模型 33 | if os.access(model_path, os.F_OK): 34 | ckpt = torch.load(path, map_location=device) 35 | model_.load_state_dict(ckpt) 36 | return model_ 37 | 38 | 39 | def predict_face_ol(img, model): 40 | with torch.no_grad(): 41 | pre_ = model(img) 42 | return pre_ 43 | 44 | 45 | def trainer(ops): 46 | print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') 47 | tb_writer = SummaryWriter(comment=ops.name) 48 | # face_ol_model = get_predict_face_ol_model('./model/predict_face_ol/resnet_18_imgsize_256-epoch-15.pth') 49 | # 设备信息 50 | # os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS 51 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1' 52 | # 构建模型 53 | # 2.模型加载 54 | # 第一步:模型结构初始化 55 | # 模型加载 56 | if ops.model == 'resnet_50': 57 | model_ = resnet50(pretrained=False, landmarks_num=ops.num_classes, img_size=ops.img_size[0], 58 | dropout_factor=ops.dropout) 59 | elif ops.model == 'resnet_34': 60 | model_ = resnet34(pretrained=False, landmarks_num=ops.num_classes, img_size=ops.img_size[0], 61 | dropout_factor=ops.dropout) 62 | elif ops.model == 'resnet_18': 63 | model_ = resnet18(pretrained=False, landmarks_num=ops.num_classes, img_size=ops.img_size[0], 64 | dropout_factor=ops.dropout) 65 | elif ops.model == 'resnet_152': 66 | model_ = resnet152(pretrained=False, landmarks_num=ops.num_classes, img_size=ops.img_size[0], 67 | dropout_factor=ops.dropout) 68 | # 若有GPU使用GPU进行训练 69 | use_cuda = torch.cuda.is_available() 70 | # 否则使用CPU 71 | # 将网络写入设备中 72 | model_ = model_.to(device) 73 | 74 | # 第二步:加载预训练模型 75 | # 加载预训练模型 76 | if os.access(ops.fintune_model, os.F_OK): # checkpoint 77 | chkpt = torch.load(ops.fintune_model, map_location=device) 78 | model_.load_state_dict(chkpt) 79 | print('load fintune model : {}'.format(ops.fintune_model)) 80 | # model_test = model_ 81 | # model_dict = model_.state_dict() 82 | # pretrained_dict = {k: v for k, v in model_dict.items() if 83 | # (k in model_dict and 'fc' not in k)} # 将'fc'这一层的权重选择不加载即可。 84 | # model_dict.update(pretrained_dict) # 更新权重 85 | # model_.load_state_dict(model_dict) 86 | 87 | # 3.数据加载 88 | # 数据加载 89 | dataset = LoadImagesAndLabels(ops=ops, img_size=ops.img_size, flag_agu=True) 90 | print('len train datasets : %s' % (dataset.__len__())) 91 | # Dataloader获取batchsize的数据 92 | dataloader = DataLoader(dataset, 93 | batch_size=ops.batch_size, 94 | num_workers=ops.num_workers, 95 | shuffle=True) 96 | 97 | # 4.模型训练 98 | # 第一步:相关参数设置 99 | # 优化器设计 100 | optimizer = torch.optim.Adam(model_.parameters(), lr=ops.init_lr, betas=(0.9, 0.99), 101 | weight_decay=ops.weight_decay) 102 | 103 | # 损失函数:用于计算年龄和关键点 104 | if ops.loss_define != 'wing_loss': 105 | criterion = nn.MSELoss(reduce=True, reduction='mean') 106 | # 交叉熵损失函数:softmax+损失的组合 107 | criterion_gender = nn.CrossEntropyLoss() 108 | # 学习率 109 | init_lr = ops.init_lr 110 | # 初始化损失,将损失添加到列表中用于绘制训练曲线 111 | pts_loss = [] 112 | gender_loss = [] 113 | age_loss = [] 114 | sum_loss = [] 115 | face_ol_loss = [] 116 | # 第二步:遍历每个epoch开始进行训练 117 | # 遍历每个epoch进行训练 118 | index_tensorboard_step = 0 119 | for epoch in range(0, ops.epochs): 120 | # 模型训练开始 121 | model_.train() 122 | # 损失均值 123 | loss_mean = 0. 124 | # 损失计算计数器 125 | loss_idx = 0. 126 | 127 | # 第三步:遍历batch中的数据,进行预测 128 | # 遍历每个batch中的数据 129 | for i, (imgs_, pts_, gender_, age_) in enumerate(dataloader): 130 | # 将数据写入设备中 131 | if use_cuda: 132 | imgs_ = imgs_.cuda(device=device) 133 | pts_ = pts_.cuda(device=device) 134 | gender_ = gender_.cuda(device=device) 135 | age_ = age_.cuda(device=device) 136 | # 将图像送入网络中,进行预测 137 | # print(imgs_.shape) 138 | output_landmarks, output_gender, output_age, output_ol = model_(imgs_.float()) 139 | # result = predict_face_ol(imgs_.float(), face_ol_model) 140 | # print(result.shape, 'outputol-->', output_ol.shape) 141 | # print(output_landmarks.shape, 'output_landmarks-->', output_landmarks.shape) 142 | # 计算年龄和关键点的损失 143 | if ops.loss_define == 'wing_loss': 144 | loss_pts = got_total_wing_loss(output_landmarks, pts_.float()) 145 | loss_age = got_total_wing_loss(output_age, age_.float()) 146 | # loss_face_ol = got_total_wing_loss(output_ol, result.float()) 147 | else: 148 | loss_pts = criterion(output_landmarks, pts_.float()) 149 | loss_age = criterion(output_age, age_.float()) 150 | # loss_face_ol = criterion(output_ol, result.float()) 151 | 152 | # 计算性别的损失 153 | loss_gender = criterion_gender(output_gender, gender_) 154 | pts_loss.append(loss_pts) 155 | age_loss.append(loss_age) 156 | gender_loss.append(loss_gender) 157 | # face_ol_loss.append(loss_face_ol) 158 | # 多任务损失:不同任务的损失的权重是不一样的,相对较难的任务权重较大 159 | loss = loss_pts + 0.3 * loss_age + 0.25 * loss_gender 160 | # loss = loss_pts + 0.3 * loss_age + 0.25 * loss_gender + 0.2 * loss_face_ol 161 | sum_loss.append(loss) 162 | # 求损失均值 163 | loss_mean += loss.item() 164 | # 计数加1 165 | loss_idx += 1. 166 | # 每10个batch打印一次结果 167 | if i % 1 == 0: 168 | loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 169 | print(' %s - %s - epoch [%s/%s] (%s/%s):' % ( 170 | loc_time, ops.model, epoch, ops.epochs, i, int(dataset.__len__() / ops.batch_size)), \ 171 | 'Mean Loss : %.6f - Loss: %.6f' % (loss_mean / loss_idx, loss.item()), \ 172 | " loss_pts:{:.4f},loss_age:{:.4f},loss_gender:{:.4f},loss_face_欧拉:{:.4f}".format(loss_pts.item(), 173 | loss_age.item(), 174 | loss_gender.item(), 175 | # loss_face_ol.item()0 176 | 0.), \ 177 | ' lr : %.5f' % init_lr, ' bs:', ops.batch_size, \ 178 | ' img_size: %s x %s' % (ops.img_size[0], ops.img_size[1])) 179 | # 计算梯度 180 | loss.backward() 181 | # 优化器对模型参数更新 182 | optimizer.step() 183 | # 优化器梯度清零 184 | optimizer.zero_grad() 185 | # plt.figure(0) 186 | # # 绘制pts损失曲线 187 | # plt.plot(pts_loss, label="pts Loss") 188 | # # 绘制性别损失曲线 , 颜色为红色 189 | # plt.plot(gender_loss, color="red", label="gender Loss") 190 | # # 绘制年龄损失曲线 , 颜色为绿色 191 | # plt.plot(age_loss, color="green", label="age Loss") 192 | # # 绘制欧拉角回归,颜色黄色 193 | # plt.plot(face_ol_loss, color="yellow", label="face_ol_loss") 194 | # # 绘制总损失曲线 , 颜色为蓝色 195 | # plt.plot(sum_loss, color="blue", label="sum Loss") 196 | # plt.show() 197 | # write into tensorboard 198 | index_tensorboard_step += 1 199 | if tb_writer: 200 | tags = ['lr', "loss_mean", "loss", 'loss_pts', 'loss_age', 'loss_gender', "loss_face_ol"] 201 | print(index_tensorboard_step) 202 | # for x, tag in zip([init_lr, loss_pts.item(), 203 | # loss_age.item(), 204 | # loss_gender.item(), 205 | # loss_face_ol.item(), loss_mean / loss_idx, loss.item()], tags): 206 | # print(type(x)) 207 | # tb_writer.add_scalar(tag, x, index_tensorboard_step) 208 | tb_writer.add_scalar("lr", init_lr, index_tensorboard_step) 209 | tb_writer.add_scalar("loss_mean", loss_mean / loss_idx, index_tensorboard_step) 210 | tb_writer.add_scalar("loss", loss.item(), index_tensorboard_step) 211 | tb_writer.add_scalar("loss_pts", loss_pts.item(), index_tensorboard_step) 212 | tb_writer.add_scalar("loss_age", loss_age.item(), index_tensorboard_step) 213 | tb_writer.add_scalar("loss_gender", loss_gender.item(), index_tensorboard_step) 214 | # tb_writer.add_scalar("loss_face_ol", loss_face_ol.item(), index_tensorboard_step) 215 | tb_writer.add_scalar("index_tensorboard_step", index_tensorboard_step, index_tensorboard_step) 216 | print('index_tensorboard_step-->', index_tensorboard_step) 217 | 218 | # 第四步:保存ckpt 219 | # 每3个epoch保存一次训练结果 220 | torch.save(model_.state_dict(), ops.model_exp + '{}_epoch-{}.pth'.format(ops.model, epoch)) 221 | 222 | # 第五步:损失变化的曲线 223 | # 创建第一张画布 224 | plt.figure(0) 225 | # 绘制pts损失曲线 226 | plt.plot(pts_loss, label="pts Loss") 227 | # 绘制性别损失曲线 , 颜色为红色 228 | plt.plot(gender_loss, color="red", label="gender Loss") 229 | # 绘制年龄损失曲线 , 颜色为绿色 230 | plt.plot(age_loss, color="green", label="age Loss") 231 | # 绘制欧拉角回归,颜色黄色 232 | plt.plot(face_ol_loss, color="yellow", label="face_ol_loss") 233 | # 绘制总损失曲线 , 颜色为蓝色 234 | plt.plot(sum_loss, color="blue", label="sum Loss") 235 | # 曲线说明在左上方 236 | plt.legend(loc='upper left') 237 | # 保存图片 238 | plt.savefig("./loss.png") 239 | 240 | 241 | # 1、配置信息设置 242 | if __name__ == "__main__": 243 | parser = argparse.ArgumentParser(description=' Project Multi Task Train') 244 | # 模型输出文件夹 245 | parser.add_argument('--model_exp', type=str, default='./model_exp', 246 | help='model_exp') 247 | # 模型类型 248 | parser.add_argument('--model', type=str, default='resnet_34', 249 | help='model : resnet_152') 250 | # landmarks 个数*2(每个关键点有x,y两个坐标) 251 | parser.add_argument('--num_classes', type=int, default=196, 252 | help='num_classes') 253 | # GPU选择 254 | parser.add_argument('--GPUS', type=str, default='0', 255 | help='GPUS') 256 | # 训练集标注信息 257 | parser.add_argument('--train_path', type=str, 258 | default='C:\\Users\\86183\\Desktop\\wiki_crop_face_multi_task\\label_new\\', help='train_path') 259 | # default = '/root/cv/dataset/人脸/datasets/wiki_crop_face_multi_task/label_new/', help = 'train_path') 260 | 261 | # 初始化学习率 262 | parser.add_argument('--pretrained', type=bool, default=True, 263 | help='imageNet_Pretrain') 264 | # 模型微调 265 | parser.add_argument('--fintune_model', type=str, 266 | # default='/www/model/resnet-预训练模型/resnet34/resnet34-333f7ec4.pth', 267 | default='none', 268 | # default='/root/cv/pycharm/人脸检测/人脸多任务/model_exp/2022-01-28_11-23-49/resnet_34_epoch-13', 269 | # default='/root/cv/pycharm/人脸检测/人脸多任务/model/predict_多任务/face_multitask-resnet_34_imgsize-256-20210425.pth', 270 | help='fintune_model') 271 | # 损失函数定义 272 | parser.add_argument('--loss_define', type=str, default='wing_loss', 273 | help='define_loss') 274 | # 初始化学习率 275 | parser.add_argument('--init_lr', type=float, default=2e-4, 276 | help='init_learningRate') 277 | # 优化器正则损失权重 278 | parser.add_argument('--weight_decay', type=float, default=5e-4, 279 | help='weight_decay') 280 | # 训练每批次图像数量 281 | parser.add_argument('--batch_size', type=int, default=64, 282 | help='batch_size') 283 | # dropout 284 | parser.add_argument('--dropout', type=float, default=0.5, 285 | help='dropout') 286 | # 训练周期 287 | parser.add_argument('--epochs', type=int, default=100, 288 | help='epochs') 289 | # 训练数据生成器线程数 290 | parser.add_argument('--num_workers', type=int, default=0, 291 | help='num_workers') 292 | # 输入模型图片尺寸 293 | parser.add_argument('--img_size', type=tuple, default=(256, 256), 294 | help='img_size') 295 | # 训练数据生成器是否进行数据扩增 296 | parser.add_argument('--flag_agu', type=bool, default=False, 297 | help='data_augmentation') 298 | # 模型输出文件夹是否进行清除 299 | parser.add_argument('--clear_model_exp', type=bool, default=False, 300 | help='clear_model_exp') 301 | parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') 302 | 303 | # -------------------------------------------------------------------------- 304 | args = parser.parse_args() # 解析添加参数 305 | # -------------------------------------------------------------------------- 306 | # 根据配置信息创建训练结果保存的根目录 307 | # mkdir_的功能是: 308 | # 存在路径时 309 | # 若flag_rm = True,则删除文件重新创建 310 | # 否则不修改 311 | # 若不存在路劲,则创建路径即可 312 | mkdir_(args.model_exp, flag_rm=args.clear_model_exp) 313 | loc_time = time.localtime() 314 | args.model_exp = args.model_exp + '/' + time.strftime("%Y-%m-%d_%H-%M-%S", loc_time) + '/' 315 | # 根据训练时间创建保存结果的路经 316 | mkdir_(args.model_exp, flag_rm=args.clear_model_exp) 317 | # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 318 | unparsed = vars(args) 319 | # 打印参数结果 320 | for key in unparsed.keys(): 321 | print('{} : {}'.format(key, unparsed[key])) 322 | # 当前时间 323 | unparsed['time'] = time.strftime("%Y-%m-%d %H:%M:%S", loc_time) 324 | # 将配置信息写入到文件照中 325 | fs = open(args.model_exp + 'train_ops.json', "w", encoding='utf-8') 326 | # 将配置信息写入到json文件中 327 | json.dump(unparsed, fs, ensure_ascii=False, indent=1) 328 | fs.close() 329 | # 模型训练 330 | trainer(ops=args) 331 | print('well done : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) 332 | --------------------------------------------------------------------------------