├── weights └── 这里放入模型 ├── requirements.txt ├── 启动和结束进程.py ├── 辅助功能.py ├── json ├── 名称_编号.json ├── 编号_名称.json ├── 名称_操作.json ├── 词_数表.json └── 数_词表.json ├── resnet_utils.py ├── Layers.py ├── 杂项.py ├── 运行辅助.py ├── config.py ├── README.md ├── Batch.py ├── 处理训练数据5.py ├── Embed.py ├── Sublayers.py ├── 训练状态判断模型A.py ├── 筛选事件特征图片.py ├── 训练X.py ├── 取训练数据.py ├── 状态标注.py ├── LICENSE ├── 训练数据截取_A.py └── 模型_策略梯度.py /weights/这里放入模型: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Flyingbirdd/ai-GAME/HEAD/requirements.txt -------------------------------------------------------------------------------- /启动和结束进程.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | os.system('taskkill /IM scrcpy.exe /F') 5 | os.system('taskkill /IM adb.exe /F') 6 | #os.system('adb connect 127.0.0.1:7555') 7 | os.system("scrcpy --max-size 960") -------------------------------------------------------------------------------- /辅助功能.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | def 状态信息综合(图片张量,操作序列,trg_mask): 3 | 状态={} 4 | 状态['图片张量']=图片张量[np.newaxis, :] 5 | 状态['操作序列'] = 操作序列 6 | 状态['trg_mask']=trg_mask 7 | return 状态 8 | -------------------------------------------------------------------------------- /json/名称_编号.json: -------------------------------------------------------------------------------- 1 | {"攻击":0,"补刀":1,"推塔":2,"一技能":3,"二技能":4,"三技能":5,"召唤师技能":6,"回城":7,"发起进攻":8,"发起撤退":9,"发起集合":10,"上移":11,"右移":12,"下移":13,"左移":14,"左上移":15,"左下移":16,"右下移":17,"右上移":18,"移动停":19,"无移动":20,"无动作":21,"恢复":21} -------------------------------------------------------------------------------- /json/编号_名称.json: -------------------------------------------------------------------------------- 1 | {"0": "攻击", "1": "补刀", "2": "推塔", "3": "一技能", "4": "二技能", "5": "三技能", "6": "召唤师技能", "7": "回城", "8": "发起进攻", "9": "发起撤退", "10": "发起集合", "11": "上移", "12": "右移", "13": "下移", "14": "左移", "15": "左上移", "16": "左下移", "17": "右下移", "18": "右上移", "19": "移动停", "20": "无移动", "21": "无动作", "22": "恢复"} -------------------------------------------------------------------------------- /resnet_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class myResnet(nn.Module): 6 | def __init__(self, resnet): 7 | super(myResnet, self).__init__() 8 | self.resnet = resnet 9 | 10 | def forward(self, img, att_size=6): 11 | x = img 12 | 13 | x = self.resnet.conv1(x) 14 | x = self.resnet.bn1(x) 15 | x = self.resnet.relu(x) 16 | x = self.resnet.maxpool(x) 17 | 18 | x = self.resnet.layer1(x) 19 | x = self.resnet.layer2(x) 20 | x = self.resnet.layer3(x) 21 | x = self.resnet.layer4(x) 22 | 23 | fc = x.mean(3).mean(2).squeeze() 24 | att = F.adaptive_avg_pool2d(x,[att_size,att_size]).squeeze().permute(1, 2, 0) 25 | 26 | return fc, att 27 | 28 | -------------------------------------------------------------------------------- /json/名称_操作.json: -------------------------------------------------------------------------------- 1 | {"攻击":"d 0 169 1982 100\nc\nu 0\nc\n","补刀":"d 0 106 1822 100\nc\nu 0\nc\n","推塔":"d 0 318 2067 100\nc\nu 0\nc\n","一技能":"d 0 133 1660 100\nc\nu 0\nc\n","二技能":"d 0 342 1782 100\nc\nu 0\nc\n","三技能":"d 0 455 1984 100\nc\nu 0\nc\n","召唤师技能":"d 0 117 1496 100\nc\nu 0\nc\n","回城":"d 0 108 1206 100\nc\nu 0\nc\n","发起进攻":"d 0 945 2110 100\nc\nu 0\nc\n","发起撤退":"d 0 851 2112 100\nc\nu 0\nc\n","发起集合":"d 0 765 2110 100\nc\nu 0\nc\n","上移":"d 1 237 321 300\nc\nm 1 349 321 100\nc\n","右移":"d 1 237 321 300\nc\nm 1 237 434 100\nc\n","下移":"d 1 237 321 300\nc\nm 1 180 321 100\nc\n","左移":"d 1 237 321 300\nc\nm 1 237 209 100\nc\n","左上移":"d 1 237 321 300\nc\nm 1 315 243 100\nc\n","左下移":"d 1 237 321 300\nc\nm 1 158 243 100\nc\n","右下移":"d 1 237 321 300\nc\nm 1 158 400 100\nc\n","右上移":"d 1 237 321 300\nc\nm 1 315 400 100\nc\n","移动停":"u 1\nc\n","恢复":"d 0 111 1345 100\nc\nu 0\nc\n"} -------------------------------------------------------------------------------- /Layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from Sublayers import FeedForward, MultiHeadAttention, Norm 4 | 5 | 6 | class DecoderLayer(nn.Module): 7 | def __init__(self, d_model, heads, dropout=0.1): 8 | super().__init__() 9 | self.norm_1 = Norm(d_model) 10 | self.norm_2 = Norm(d_model) 11 | self.norm_3 = Norm(d_model) 12 | 13 | self.dropout_1 = nn.Dropout(dropout) 14 | self.dropout_2 = nn.Dropout(dropout) 15 | self.dropout_3 = nn.Dropout(dropout) 16 | 17 | self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout) 18 | self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout) 19 | self.ff = FeedForward(d_model, dropout=dropout) 20 | 21 | def forward(self, x, trg_mask): 22 | x2 = self.norm_1(x) 23 | x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask)) 24 | x2 = self.norm_3(x) 25 | x2 = self.ff(x2) 26 | x = x + self.dropout_3(x2) 27 | return x -------------------------------------------------------------------------------- /杂项.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import torch 3 | import numpy as np 4 | def 打印抽样数据(数_词表,数据, 输出_分): 5 | 临 = 数据[0] 6 | 欲打印=[数_词表[str(临[i,0])] for i in range(0,临.shape[0])] 7 | 临 = 输出_分.cpu().numpy() 8 | 欲打印2 = [数_词表[str(临[i])] for i in range(0,临.shape[0])] 9 | print("抽样输出",欲打印) 10 | print("目标输出", 欲打印2) 11 | # for i in range(16): 12 | # print(数_词表[str(临[i, 0])]) 13 | 14 | def nopeak_mask(size, device): 15 | np_mask = np.triu(np.ones((1, size, size)), 16 | k=1).astype('uint8') 17 | np_mask = Variable(torch.from_numpy(np_mask) == 0) 18 | 19 | np_mask = np_mask.cuda(device) 20 | return np_mask 21 | def 打印测试数据(数_词表,数据, 输人_分,标签): 22 | 临 = 数据[0] 23 | 欲打印=[数_词表[str(临[i])] for i in range(临.size)] 24 | 打印="" 25 | for i in range(len(欲打印)): 26 | 打印=打印+欲打印[i] 27 | 28 | 29 | 30 | 临 = 输人_分.cpu().numpy()[0] 31 | 欲打印2 = [数_词表[str(临[i])]for i in range(输人_分.size(1))] 32 | # 欲打印2=str(欲打印2) 33 | # print("输入:", 欲打印2) 34 | if 标签==打印: 35 | return True 36 | else: 37 | print(打印) 38 | return False 39 | 40 | 41 | 42 | print("输出:",打印) 43 | 44 | # for i in range(16): 45 | # print(数_词表[str(临[i, 0])]) 46 | def 打印测试数据_A(数_词表,数据, 输人_分): 47 | if 数据.shape[0]!=0: 48 | 49 | 临 = 数据[0] 50 | 欲打印=[数_词表[str(临[i])] for i in range(临.size)] 51 | 打印="" 52 | for i in range(len(欲打印)): 53 | 打印=打印+欲打印[i] 54 | 55 | 56 | 57 | 临 = 输人_分.cpu().numpy()[0] 58 | 欲打印2 = [数_词表[str(临[i])]for i in range(输人_分.size(1))] 59 | 欲打印2=str(欲打印2) 60 | #print("输入:", 欲打印2) 61 | print("输出:",打印) 62 | 63 | -------------------------------------------------------------------------------- /运行辅助.py: -------------------------------------------------------------------------------- 1 | import win32gui, win32ui, win32con 2 | from PIL import Image 3 | from pyminitouch import MNTDevice 4 | 5 | 6 | class MyMNTDevice(MNTDevice): 7 | def __init__(self,ID): 8 | MNTDevice.__init__(self,ID) 9 | 10 | 11 | def 发送(self,内容): 12 | self.connection.send(内容) 13 | 14 | def 取图(窗口名称): 15 | # 获取后台窗口的句柄,注意后台窗口不能最小化 16 | hWnd = win32gui.FindWindow(0,窗口名称) # 窗口的类名可以用Visual Studio的SPY++工具获取 17 | # 获取句柄窗口的大小信息 18 | left, top, right, bot = win32gui.GetWindowRect(hWnd) 19 | width = right - left 20 | height = bot - top 21 | # 返回句柄窗口的设备环境,覆盖整个窗口,包括非客户区,标题栏,菜单,边框 22 | hWndDC = win32gui.GetWindowDC(hWnd) 23 | # 创建设备描述表 24 | mfcDC = win32ui.CreateDCFromHandle(hWndDC) 25 | # 创建内存设备描述表 26 | saveDC = mfcDC.CreateCompatibleDC() 27 | # 创建位图对象准备保存图片 28 | saveBitMap = win32ui.CreateBitmap() 29 | # 为bitmap开辟存储空间 30 | saveBitMap.CreateCompatibleBitmap(mfcDC, width, height) 31 | # 将截图保存到saveBitMap中 32 | saveDC.SelectObject(saveBitMap) 33 | # 保存bitmap到内存设备描述表 34 | saveDC.BitBlt((0, 0), (width, height), mfcDC, (0, 0), win32con.SRCCOPY) 35 | 36 | 37 | bmpinfo = saveBitMap.GetInfo() 38 | bmpstr = saveBitMap.GetBitmapBits(True) 39 | ###生成图像 40 | im_PIL = Image.frombuffer('RGB',(bmpinfo['bmWidth'],bmpinfo['bmHeight']),bmpstr,'raw','BGRX') 41 | #im_PIL= Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr) 42 | #im_PIL =Image.frombytes('RGB',(bmpinfo['bmWidth'],bmpinfo['bmHeight']),bmpstr) 43 | box = (8,31,968,511) 44 | im2 = im_PIL.crop(box) 45 | #im2.save('./dd2d.jpg') 46 | win32gui.DeleteObject(saveBitMap.GetHandle()) 47 | saveDC.DeleteDC() 48 | mfcDC.DeleteDC() 49 | win32gui.ReleaseDC(hWnd, hWndDC) 50 | return im2 51 | 52 | 53 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | 2 | class GPT2Config(object): 3 | def __init__( 4 | self, 5 | vocab_size_or_config_json_file=12491, 6 | n_positions=1024, 7 | n_ctx=512, 8 | n_embd=768, 9 | n_layer=6, 10 | n_head=6, 11 | layer_norm_epsilon=1e-5, 12 | initializer_range=0.02, 13 | ): 14 | self.vocab_size = vocab_size_or_config_json_file 15 | self.n_ctx = n_ctx 16 | self.n_positions = n_positions 17 | self.n_embd = n_embd 18 | self.n_layer = n_layer 19 | self.n_head = n_head 20 | self.layer_norm_epsilon = layer_norm_epsilon 21 | self.initializer_range = initializer_range 22 | 23 | class TransformerConfig(object): 24 | def __init__( 25 | self, 26 | d_model=768, 27 | n_layers=12, 28 | heads=12, 29 | dropout=0.0, 30 | load_weights='weights' 31 | ): 32 | self.d_model = d_model 33 | self.n_layers = n_layers 34 | self.heads = heads 35 | self.dropout = dropout 36 | self.load_weights = load_weights 37 | 38 | 39 | class GPT2Config(object): 40 | def __init__( 41 | self, 42 | vocab_size_or_config_json_file=12491, 43 | n_positions=1024, 44 | n_ctx=1024, 45 | n_embd=768, 46 | n_layer=12, 47 | n_head=12, 48 | layer_norm_epsilon=1e-5, 49 | initializer_range=0.02, 50 | ): 51 | self.vocab_size = vocab_size_or_config_json_file 52 | self.n_ctx = n_ctx 53 | self.n_positions = n_positions 54 | self.n_embd = n_embd 55 | self.n_layer = n_layer 56 | self.n_head = n_head 57 | self.layer_norm_epsilon = layer_norm_epsilon 58 | self.initializer_range = initializer_range -------------------------------------------------------------------------------- /json/词_数表.json: -------------------------------------------------------------------------------- 1 | {"上移_攻击": 0, "上移_补刀": 1, "上移_推塔": 2, "上移_一技能": 3, "上移_二技能": 4, "上移_三技能": 5, "上移_召唤师技能": 6, "上移_回城": 7, "上移_发起进攻": 8, "上移_发起撤退": 9, "上移_发起集合": 10, "上移_无动作": 11, "上移_恢复": 12, "右移_攻击": 13, "右移_补刀": 14, "右移_推塔": 15, "右移_一技能": 16, "右移_二技能": 17, "右移_三技能": 18, "右移_召唤师技能": 19, "右移_回城": 20, "右移_发起进攻": 21, "右移_发起撤退": 22, "右移_发起集合": 23, "右移_无动作": 24, "右移_恢复": 25, "下移_攻击": 26, "下移_补刀": 27, "下移_推塔": 28, "下移_一技能": 29, "下移_二技能": 30, "下移_三技能": 31, "下移_召唤师技能": 32, "下移_回城": 33, "下移_发起进攻": 34, "下移_发起撤退": 35, "下移_发起集合": 36, "下移_无动作": 37, "下移_恢复": 38, "左移_攻击": 39, "左移_补刀": 40, "左移_推塔": 41, "左移_一技能": 42, "左移_二技能": 43, "左移_三技能": 44, "左移_召唤师技能": 45, "左移_回城": 46, "左移_发起进攻": 47, "左移_发起撤退": 48, "左移_发起集合": 49, "左移_无动作": 50, "左移_恢复": 51, "左上移_攻击": 52, "左上移_补刀": 53, "左上移_推塔": 54, "左上移_一技能": 55, "左上移_二技能": 56, "左上移_三技能": 57, "左上移_召唤师技能": 58, "左上移_回城": 59, "左上移_发起进攻": 60, "左上移_发起撤退": 61, "左上移_发起集合": 62, "左上移_无动作": 63, "左上移_恢复": 64, "左下移_攻击": 65, "左下移_补刀": 66, "左下移_推塔": 67, "左下移_一技能": 68, "左下移_二技能": 69, "左下移_三技能": 70, "左下移_召唤师技能": 71, "左下移_回城": 72, "左下移_发起进攻": 73, "左下移_发起撤退": 74, "左下移_发起集合": 75, "左下移_无动作": 76, "左下移_恢复": 77, "右下移_攻击": 78, "右下移_补刀": 79, "右下移_推塔": 80, "右下移_一技能": 81, "右下移_二技能": 82, "右下移_三技能": 83, "右下移_召唤师技能": 84, "右下移_回城": 85, "右下移_发起进攻": 86, "右下移_发起撤退": 87, "右下移_发起集合": 88, "右下移_无动作": 89, "右下移_恢复": 90, "右上移_攻击": 91, "右上移_补刀": 92, "右上移_推塔": 93, "右上移_一技能": 94, "右上移_二技能": 95, "右上移_三技能": 96, "右上移_召唤师技能": 97, "右上移_回城": 98, "右上移_发起进攻": 99, "右上移_发起撤退": 100, "右上移_发起集合": 101, "右上移_无动作": 102, "右上移_恢复": 103, "移动停_攻击": 104, "移动停_补刀": 105, "移动停_推塔": 106, "移动停_一技能": 107, "移动停_二技能": 108, "移动停_三技能": 109, "移动停_召唤师技能": 110, "移动停_回城": 111, "移动停_发起进攻": 112, "移动停_发起撤退": 113, "移动停_发起集合": 114, "移动停_无动作": 115, "移动停_恢复": 116, "无移动_攻击": 117, "无移动_补刀": 118, "无移动_推塔": 119, "无移动_一技能": 120, "无移动_二技能": 121, "无移动_三技能": 122, "无移动_召唤师技能": 123, "无移动_回城": 124, "无移动_发起进攻": 125, "无移动_发起撤退": 126, "无移动_发起集合": 127, "无移动_无动作": 128, "无移动_恢复": 129} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI玩手机游戏 2 | ## 说明 3 | 一、运行环境win10;win7未测试,估计是可以,还需要添加 PyQt5模块用于截图参考(requirements.txt)。 4 | 环境配置参考视频 1 5 | 链接:https://pan.baidu.com/s/1fJRyX-scxbeOJ2lsddTLiA 6 | 提取码:msr5 7 | 二、需要1060或以上算力的显卡。 8 | 三、需要一台打开安卓调试并能玩王者荣耀的手机。 9 | 四、需要下载[scrcpy](https://github.com/Genymobile/scrcpy/blob/master/README.zh-Hans.md) 的windows版本。 把所有文件解压到项目根目录即可(这是我的笨办法) 。 10 | 五、pyminitouch库运行时会自动安装minitouch。如果无法自动安装则需要手动安装[minitouch](https://github.com/openstf/minitouch) ,比较麻烦。 11 | 还有,minitouch不支持Android10及以上系统 12 | 13 | ## 运行游戏AI 14 | 15 | 1. 下载预训练模型 16 | 你可以通过以下链接下载训练过的模型: 17 | 18 | Google 云盘 19 | 百度网盘 (提取码:oiar) 20 | 下载完成后,将模型文件放入 weights 文件夹。 21 | 22 | 注意: 如果需要加载不同的模型,请修改 模型_策略梯度.py 文件中的第 261 行。 23 | 2. 启动 scrcpy 24 | 运行脚本 “启动和结束进程.py” 启动 scrcpy。 25 | 3. 启动王者荣耀并进入5v5人机对战 26 | 启动 王者荣耀 游戏并进入 5v5 人机对战模式。 27 | 然后运行脚本 “训练数据截取_A.py” 开始收集训练数据。 28 | ## 生成训练数据(半自动) 29 | 运行 “训练数据截取_A.py” 这时就可以生成训练用的数据。 30 | 按"i"键则结束或则是重新运行 31 | 按键'w' 's ' 'a' 'd'控制方向 左、下、右箭头对应是1、2、3技能,上箭头长按则攻击。其它按键请参考源码。 32 | 注意!! 如果用按键控制则会记录按键操作数据,否则会记录AI玩游戏的数据。 33 | 根据我的经验,随着模型训练次数增加,手动干预的次数会越来越小。但总体来说训练数据的获取依然需要人为干预,因为游戏结束 34 | 后要重新开始需要手动操控(我并没有做自动化脚本)。 35 | 36 | # 如何训练主模型 37 | 一、下载状态判断模型 你可以从[google云盘](https://drive.google.com/file/d/1eqy-xX29sjEguuQI_1m8qaLEX3g4KAQ7/view?usp=sharing) 下载训练过的模型,也可以百度网盘下载 38 | 链接:https://pan.baidu.com/s/1-UCuPutZQck3Iawot9bGrw 39 | 提取码:545t 40 | 后放入weights文件夹下 41 | 二、数据预处理 42 | 将图片用resnet101预处理后再和对应操作数据一起处理后用numpy数组储存备用。 43 | 具体要做的就是运行 “处理训练数据5.py” 44 | 三、训练 45 | 预处理完成以后运行 “训练X.py”即可。 46 | 注意!模型保存路径 在 模型_策略梯度.py 295和296行更改。 47 | # 如何训练状态判断模型 48 | 状态判断模型概述 49 | 状态判断模型实际上是一个图像分类神经网络,结构与主模型基本相同,但参数有所不同。一、获取标注数据 50 | 标注数据是在游戏运行过程中进行的。请运行 状态标注.py 脚本进行标注。 51 | 标注规则: 52 | 53 | Key.left:击杀小兵或野怪,或推掉塔。 54 | Key.down:击杀敌方英雄。 55 | Key.right:被击塔攻击。 56 | Key.up:被击杀。 57 | 注意: 标注模型会自动参与数据标注,减轻人工标注的工作负担。 58 | 59 | 二、校正标注数据 60 | 由于前一步获取的标注数据可能不准确,需要手动进行校准。 61 | 运行 筛选事件特征图片.py 脚本进行校正。 62 | 具体操作参考代码中的第 68 至 81 行,注意:其中“过”表示认同原始标注。三、训练 63 | 运行 训练状态判断模型A.py 脚本开始训练状态判断模型。 64 | 65 | -------------------------------------------------------------------------------- /json/数_词表.json: -------------------------------------------------------------------------------- 1 | {"0": "上移_攻击", "1": "上移_补刀", "2": "上移_推塔", "3": "上移_一技能", "4": "上移_二技能", "5": "上移_三技能", "6": "上移_召唤师技能", "7": "上移_回城", "8": "上移_发起进攻", "9": "上移_发起撤退", "10": "上移_发起集合", "11": "上移_无动作", "12": "上移_恢复", "13": "右移_攻击", "14": "右移_补刀", "15": "右移_推塔", "16": "右移_一技能", "17": "右移_二技能", "18": "右移_三技能", "19": "右移_召唤师技能", "20": "右移_回城", "21": "右移_发起进攻", "22": "右移_发起撤退", "23": "右移_发起集合", "24": "右移_无动作", "25": "右移_恢复", "26": "下移_攻击", "27": "下移_补刀", "28": "下移_推塔", "29": "下移_一技能", "30": "下移_二技能", "31": "下移_三技能", "32": "下移_召唤师技能", "33": "下移_回城", "34": "下移_发起进攻", "35": "下移_发起撤退", "36": "下移_发起集合", "37": "下移_无动作", "38": "下移_恢复", "39": "左移_攻击", "40": "左移_补刀", "41": "左移_推塔", "42": "左移_一技能", "43": "左移_二技能", "44": "左移_三技能", "45": "左移_召唤师技能", "46": "左移_回城", "47": "左移_发起进攻", "48": "左移_发起撤退", "49": "左移_发起集合", "50": "左移_无动作", "51": "左移_恢复", "52": "左上移_攻击", "53": "左上移_补刀", "54": "左上移_推塔", "55": "左上移_一技能", "56": "左上移_二技能", "57": "左上移_三技能", "58": "左上移_召唤师技能", "59": "左上移_回城", "60": "左上移_发起进攻", "61": "左上移_发起撤退", "62": "左上移_发起集合", "63": "左上移_无动作", "64": "左上移_恢复", "65": "左下移_攻击", "66": "左下移_补刀", "67": "左下移_推塔", "68": "左下移_一技能", "69": "左下移_二技能", "70": "左下移_三技能", "71": "左下移_召唤师技能", "72": "左下移_回城", "73": "左下移_发起进攻", "74": "左下移_发起撤退", "75": "左下移_发起集合", "76": "左下移_无动作", "77": "左下移_恢复", "78": "右下移_攻击", "79": "右下移_补刀", "80": "右下移_推塔", "81": "右下移_一技能", "82": "右下移_二技能", "83": "右下移_三技能", "84": "右下移_召唤师技能", "85": "右下移_回城", "86": "右下移_发起进攻", "87": "右下移_发起撤退", "88": "右下移_发起集合", "89": "右下移_无动作", "90": "右下移_恢复", "91": "右上移_攻击", "92": "右上移_补刀", "93": "右上移_推塔", "94": "右上移_一技能", "95": "右上移_二技能", "96": "右上移_三技能", "97": "右上移_召唤师技能", "98": "右上移_回城", "99": "右上移_发起进攻", "100": "右上移_发起撤退", "101": "右上移_发起集合", "102": "右上移_无动作", "103": "右上移_恢复", "104": "移动停_攻击", "105": "移动停_补刀", "106": "移动停_推塔", "107": "移动停_一技能", "108": "移动停_二技能", "109": "移动停_三技能", "110": "移动停_召唤师技能", "111": "移动停_回城", "112": "移动停_发起进攻", "113": "移动停_发起撤退", "114": "移动停_发起集合", "115": "移动停_无动作", "116": "移动停_恢复", "117": "无移动_攻击", "118": "无移动_补刀", "119": "无移动_推塔", "120": "无移动_一技能", "121": "无移动_二技能", "122": "无移动_三技能", "123": "无移动_召唤师技能", "124": "无移动_回城", "125": "无移动_发起进攻", "126": "无移动_发起撤退", "127": "无移动_发起集合", "128": "无移动_无动作", "129": "无移动_恢复"} -------------------------------------------------------------------------------- /Batch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | #from torchtext import data 3 | import numpy as np 4 | from torch.autograd import Variable 5 | 6 | 7 | def nopeak_mask(size, device): 8 | np_mask = np.triu(np.ones((1, size, size)), 9 | k=1).astype('uint8') 10 | variable = Variable 11 | np_mask = variable(torch.from_numpy(np_mask) == 0) 12 | np_mask = np_mask.cuda(device) 13 | return np_mask 14 | 15 | def create_masks(src, trg, device): 16 | 17 | src_mask = (src != -1).unsqueeze(-2) 18 | 19 | if trg is not None: 20 | trg_mask = (trg != -1).unsqueeze(-2) 21 | trg_mask.cuda(device) 22 | size = trg.size(1) # get seq_len for matrix 23 | np_mask = nopeak_mask(size, device) 24 | trg_mask = trg_mask & np_mask 25 | else: 26 | trg_mask = None 27 | return src_mask, trg_mask 28 | 29 | # patch on Torchtext's batching process that makes it more efficient 30 | # from http://nlp.seas.harvard.edu/2018/04/03/attention.html#position-wise-feed-forward-networks 31 | 32 | # class MyIterator(data.Iterator): 33 | # def create_batches(self): 34 | # if self.train: 35 | # def pool(d, random_shuffler): 36 | # for p in data.batch(d, self.batch_size * 100): 37 | # p_batch = data.batch( 38 | # sorted(p, key=self.sort_key), 39 | # self.batch_size, self.batch_size_fn) 40 | # for b in random_shuffler(list(p_batch)): 41 | # yield b 42 | # self.batches = pool(self.data(), self.random_shuffler) 43 | # 44 | # else: 45 | # self.batches = [] 46 | # for b in data.batch(self.data(), self.batch_size, 47 | # self.batch_size_fn): 48 | # self.batches.append(sorted(b, key=self.sort_key)) 49 | 50 | global max_src_in_batch, max_tgt_in_batch 51 | 52 | def batch_size_fn(new, count, sofar): 53 | "Keep augmenting batch and calculate total number of tokens + padding." 54 | global max_src_in_batch, max_tgt_in_batch 55 | if count == 1: 56 | max_src_in_batch = 0 57 | max_tgt_in_batch = 0 58 | max_src_in_batch = max(max_src_in_batch, len(new.src)) 59 | max_tgt_in_batch = max(max_tgt_in_batch, len(new.trg) + 2) 60 | src_elements = count * max_src_in_batch 61 | tgt_elements = count * max_tgt_in_batch 62 | return max(src_elements, tgt_elements) 63 | -------------------------------------------------------------------------------- /处理训练数据5.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import numpy as np 4 | import os 5 | import json 6 | from PIL import Image 7 | from resnet_utils import myResnet 8 | 9 | 操作记录='../训练数据样本/未用' 10 | if not os.path.exists(操作记录): 11 | os.makedirs(操作记录) 12 | 13 | device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") 14 | resnet101=torchvision.models.resnet101(pretrained=True).eval() 15 | resnet101=myResnet(resnet101).cuda(device).requires_grad_(False) 16 | 词数词典路径="./json/词_数表.json" 17 | 18 | with open(词数词典路径, encoding='utf8') as f: 19 | 词数词典=json.load(f) 20 | 21 | for root, dirs, files in os.walk(操作记录): 22 | if len(dirs)>0: 23 | break 24 | for 号 in dirs: 25 | 路径json = 操作记录+'/' + 号 + '/_操作数据.json' 26 | numpy数组路径= 操作记录+'/' + 号 + '/图片_操作预处理数据2.npz' 27 | if os.path.isfile(numpy数组路径): 28 | continue 29 | 30 | 图片张量 = torch.Tensor(0) 31 | 32 | # print(图片张量.shape[0]) 33 | 操作张量 = torch.Tensor(0) 34 | 35 | 伪词序列 = torch.from_numpy(np.ones((1, 60)).astype(np.int64)).cuda(device).unsqueeze(0) 36 | 37 | 操作序列 = np.ones((1, 1)) 38 | 结束序列 = np.ones((1, 1)) 39 | 计数 = 0 40 | print('正在处理{}'.format(号)) 41 | 数据列=[] 42 | with open(路径json, encoding='ansi') as f: 43 | 移动操作='无移动' 44 | while True: 45 | df = f.readline() 46 | df = df.replace('\'', '\"') 47 | 48 | 49 | if df == "": 50 | break 51 | df = json.loads(df) 52 | 数据列.append(df) 53 | # for i in range(len(数据列)): 54 | # if i>0 and 数据列[i]['动作操作']!='无动作' and 数据列[i-1]['动作操作']=='无动作' : 55 | # 数据列[i-1]['动作操作']=数据列[i]['动作操作'] 56 | # if i>1 and 数据列[i-2]['动作操作']=='无动作' : 57 | # 数据列[i - 2]['动作操作'] = 数据列[i]['动作操作'] 58 | 59 | 60 | 61 | with open(路径json, encoding='ansi') as f: 62 | 移动操作='无移动' 63 | for i in range(len(数据列)): 64 | df = 数据列[i] 65 | 66 | if 图片张量.shape[0] == 0: 67 | img = Image.open(操作记录+'/' + 号 + '/{}.jpg'.format(df["图片号"])) 68 | img2 = np.array(img) 69 | 70 | img2 = torch.from_numpy(img2).cuda(device).unsqueeze(0).permute(0, 3, 2, 1) / 255 71 | _,out = resnet101(img2) 72 | 图片张量 = out.reshape(1,6*6*2048) 73 | 移动操作a=df["移动操作"] 74 | if 移动操作a!='无移动': 75 | 移动操作=移动操作a 76 | 77 | 操作序列[0, 0] = 词数词典[移动操作 + "_" + df["动作操作"]] 78 | 结束序列[0, 0]=df["结束"] 79 | else: 80 | img = Image.open(操作记录+'/' + 号 + '/{}.jpg'.format(df["图片号"])) 81 | img2 = np.array(img) 82 | 83 | img2 = torch.from_numpy(img2).cuda(device).unsqueeze(0).permute(0, 3, 2, 1) / 255 84 | _,out= resnet101(img2) 85 | 86 | 图片张量 = torch.cat((图片张量, out.reshape(1,6*6*2048)), 0) 87 | 移动操作a=df["移动操作"] 88 | if 移动操作a!='无移动': 89 | 移动操作=移动操作a 90 | 操作序列=np.append(操作序列, 词数词典[移动操作 + "_" + df["动作操作"]]) 91 | 结束序列 = np.append(结束序列, df["结束"]) 92 | #操作序列[0, 0] = 词数词典[df["移动操作"] + "_" + df["动作操作"]] 93 | 94 | 图片张量np=图片张量.cpu().numpy() 95 | 操作序列=操作序列.astype(np.int64) 96 | np.savez(numpy数组路径, 图片张量np=图片张量np, 操作序列=操作序列,结束序列=结束序列) 97 | 98 | -------------------------------------------------------------------------------- /Embed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | import numpy as np 7 | 8 | 9 | class Embedder(nn.Module): 10 | def __init__(self, vocab_size, d_model): 11 | super().__init__() 12 | self.d_model = d_model 13 | self.embed = Embedder2(vocab_size, d_model) 14 | 15 | def forward(self, x): 16 | return self.embed(x) 17 | 18 | 19 | class PositionalEncoder(nn.Module): 20 | def __init__(self, d_model, max_seq_len=1024, dropout=0.1): 21 | super().__init__() 22 | self.d_model = d_model 23 | self.dropout = nn.Dropout(dropout) 24 | # create constant 'pe' matrix with values dependant on 25 | # pos and i 26 | pe = torch.zeros(max_seq_len, d_model) 27 | for pos in range(max_seq_len): 28 | for i in range(0, d_model, 2): 29 | pe[pos, i] = \ 30 | math.sin(pos / (10000 ** ((2 * i) / d_model))) 31 | pe[pos, i + 1] = \ 32 | math.cos(pos / (10000 ** ((2 * (i + 1)) / d_model))) 33 | pe = pe.unsqueeze(0) 34 | self.register_buffer('pe', pe) 35 | 36 | def forward(self, x): 37 | # make embeddings relatively larger 38 | x = x * math.sqrt(self.d_model) 39 | # add constant to embedding 40 | seq_len = x.size(1) 41 | pe = Variable(self.pe[:, :seq_len], requires_grad=False) 42 | if x.is_cuda: 43 | pe.cuda() 44 | x = x + pe 45 | x = self.dropout(x) 46 | return x 47 | 48 | 49 | class Embedder2(nn.Module): 50 | def __init__(self, num_embeddings, embedding_dim, padding_idx=None, 51 | max_norm=None, norm_type=2., scale_grad_by_freq=False, 52 | sparse=False, _weight=None): 53 | super(Embedder2, self).__init__() 54 | self.num_embeddings = num_embeddings 55 | self.embedding_dim = embedding_dim 56 | if padding_idx is not None: 57 | if padding_idx > 0: 58 | assert padding_idx < self.num_embeddings, 'Padding_idx must be within num_embeddings' 59 | elif padding_idx < 0: 60 | assert padding_idx >= -self.num_embeddings, 'Padding_idx must be within num_embeddings' 61 | padding_idx = self.num_embeddings + padding_idx 62 | self.padding_idx = padding_idx 63 | self.max_norm = max_norm 64 | self.norm_type = norm_type 65 | self.scale_grad_by_freq = scale_grad_by_freq 66 | if _weight is None: 67 | np.random.seed(1) 68 | np数 = np.random.uniform(0, 1, (num_embeddings, embedding_dim)) 69 | self.weight = nn.Parameter(torch.Tensor(np数)) 70 | # self.weight = nn.Parameter(torch.Tensor(num_embeddings, embedding_dim)) 71 | #self.reset_parameters() 72 | else: 73 | assert list(_weight.shape) == [num_embeddings, embedding_dim], \ 74 | 'Shape of weight does not match num_embeddings and embedding_dim' 75 | self.weight = nn.Parameter(_weight) 76 | self.sparse = sparse 77 | a = 0 78 | 79 | def reset_parameters(self): 80 | nn.init.normal_(self.weight) 81 | if self.padding_idx is not None: 82 | with torch.no_grad(): 83 | self.weight[self.padding_idx].fill_(0) 84 | 85 | def forward(self, input): 86 | return F.embedding( 87 | input, self.weight, self.padding_idx, self.max_norm, 88 | self.norm_type, self.scale_grad_by_freq, self.sparse) -------------------------------------------------------------------------------- /Sublayers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | import numpy as np 6 | def gelu(x): 7 | return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) 8 | class Norm(nn.Module): 9 | def __init__(self, d_model, eps = 1e-6): 10 | super().__init__() 11 | 12 | self.size = d_model 13 | 14 | # create two learnable parameters to calibrate normalisation 15 | self.alpha = nn.Parameter(torch.ones(self.size)) 16 | self.bias = nn.Parameter(torch.zeros(self.size)) 17 | 18 | self.eps = eps 19 | 20 | def forward(self, x): 21 | norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) \ 22 | / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias 23 | return norm 24 | 25 | def attention(q, k, v, d_k, mask=None, dropout=None): 26 | 27 | scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k) 28 | 29 | if mask is not None: 30 | mask = mask.unsqueeze(1) 31 | scores = scores.masked_fill(mask == 0, -1e9) 32 | 33 | scores = F.softmax(scores, dim=-1) 34 | 35 | if dropout is not None: 36 | scores = dropout(scores) 37 | 38 | output = torch.matmul(scores, v) 39 | return output 40 | 41 | class MultiHeadAttention(nn.Module): 42 | def __init__(self, heads, d_model, dropout = 0.1): 43 | super().__init__() 44 | 45 | self.d_model = d_model 46 | self.d_k = d_model // heads 47 | self.h = heads 48 | 49 | self.q_linear = 全连接层(d_model, d_model) 50 | self.v_linear = 全连接层(d_model, d_model) 51 | self.k_linear = 全连接层(d_model, d_model) 52 | 53 | self.dropout = nn.Dropout(dropout) 54 | self.out = 全连接层(d_model, d_model) 55 | 56 | def forward(self, q, k, v, mask=None): 57 | 58 | bs = q.size(0) 59 | 60 | # perform linear operation and split into N heads 61 | k = self.k_linear(k).view(bs, -1, self.h, self.d_k) 62 | q = self.q_linear(q).view(bs, -1, self.h, self.d_k) 63 | v = self.v_linear(v).view(bs, -1, self.h, self.d_k) 64 | 65 | # transpose to get dimensions bs * N * sl * d_model 66 | k = k.transpose(1,2) 67 | q = q.transpose(1,2) 68 | v = v.transpose(1,2) 69 | 70 | 71 | # calculate attention using function we will define next 72 | scores = attention(q, k, v, self.d_k, mask, self.dropout) 73 | # concatenate heads and put through final linear layer 74 | concat = scores.transpose(1,2).contiguous()\ 75 | .view(bs, -1, self.d_model) 76 | output = self.out(concat) 77 | 78 | return output 79 | 80 | class FeedForward(nn.Module): 81 | def __init__(self, d_model, d_ff=2048, dropout = 0.1): 82 | super().__init__() 83 | 84 | # We set d_ff as a default to 2048 85 | self.linear_1 = 全连接层(d_model, d_ff) 86 | self.dropout = nn.Dropout(dropout) 87 | self.linear_2 = 全连接层(d_ff, d_model) 88 | 89 | def forward(self, x): 90 | x = self.dropout(gelu(self.linear_1(x))) 91 | x = self.linear_2(x) 92 | return x 93 | class 全连接层(nn.Module): 94 | def __init__(self,输入_接口, 输出_接口): 95 | super().__init__() 96 | np.random.seed(1) 97 | self.weight = nn.Parameter(torch.FloatTensor(np.random.uniform(-1/np.sqrt(输入_接口), 1/np.sqrt(输入_接口), (输入_接口, 输出_接口)))) 98 | self.bias = nn.Parameter(torch.FloatTensor(np.random.uniform(-1/np.sqrt(输入_接口), 1/np.sqrt(输入_接口), 输出_接口))) 99 | 100 | 101 | def forward(self, x): 102 | 输出=torch.matmul(x,self.weight) 103 | 输出=输出+self.bias 104 | return 输出 -------------------------------------------------------------------------------- /训练状态判断模型A.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import json 4 | from PIL import Image 5 | from resnet_utils import myResnet 6 | import numpy as np 7 | import torch.nn as nn 8 | from Sublayers import Norm, 全连接层 9 | import math 10 | import torch.nn.functional as F 11 | from 模型_策略梯度 import Transformer 12 | from Batch import create_masks 13 | device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") 14 | resnet101=torchvision.models.resnet101(pretrained=True).eval() 15 | resnet101=myResnet(resnet101).cuda(device).requires_grad_(False) 16 | from random import shuffle 17 | def gelu(x): 18 | return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) 19 | class 判断状态(nn.Module): 20 | def __init__(self, 种类数, 隐藏层尺寸, 输入层尺寸=2048,输入尺寸A=36): 21 | super().__init__() 22 | self.隐藏层尺寸=隐藏层尺寸 23 | self.输入层尺寸=输入层尺寸 24 | self.输入尺寸A = 输入尺寸A 25 | self.输入层 = 全连接层(输入层尺寸, 隐藏层尺寸) 26 | self.隐藏层 = 全连接层(隐藏层尺寸, 隐藏层尺寸) 27 | self.输出层 = 全连接层(隐藏层尺寸*输入尺寸A, 种类数) 28 | 29 | 30 | def forward(self, 图向量): 31 | 图向量 = 图向量.reshape((图向量.shape[0], self.输入尺寸A,self.输入层尺寸)) 32 | 中间量=gelu(self.输入层 (图向量)) 33 | 中间量=self.隐藏层 (中间量) 34 | 中间量=中间量.reshape((中间量.shape[0],self.隐藏层尺寸*self.输入尺寸A)) 35 | 结果=self.输出层 (中间量) 36 | return 结果 37 | 38 | def random_dic(dicts): 39 | dict_key_ls = list(dicts.keys()) 40 | shuffle(dict_key_ls) 41 | new_dic = {} 42 | for key in dict_key_ls: 43 | new_dic[key] = dicts.get(key) 44 | return new_dic 45 | #model_判断状态=判断状态(6,1024,2048).cuda(device) 46 | model_判断状态=Transformer(6,768,2,12,0.0,6*6*2048).cuda(device) 47 | #model_判断状态.load_state_dict(torch.load('weights/model_weights_判断状态C1')) 48 | optimizer = torch.optim.Adam(model_判断状态.parameters(), lr=6.25e-5, betas=(0.9, 0.98), eps=1e-9) 49 | 路径json='../判断数据样本/判断新.json' 50 | 51 | 52 | 全部数据={} 53 | 状态辞典={'击杀小兵或野怪或推掉塔': 0, '击杀敌方英雄': 1, '被击塔攻击': 2, '被击杀': 3, '死亡': 4, '普通': 5} 54 | 状态列表=[] 55 | for K in 状态辞典: 56 | 状态列表.append(K) 57 | with open(路径json, encoding='ansi') as f: 58 | while True: 59 | df = f.readline() 60 | df = df.replace('\'', '\"') 61 | 62 | if df == "": 63 | break 64 | 单元 = json.loads(df) 65 | for key in 单元: 66 | 全部数据[key]=单元[key] 67 | 68 | 69 | 状态 = np.ones((1, ), dtype='int64') 70 | for i in range(100): 71 | 72 | 打乱顺序=random_dic(全部数据) 73 | for key in 打乱顺序: 74 | 状态编号=状态辞典[全部数据[key]] 75 | 76 | 状态[0]=状态编号 77 | 目标输出=torch.from_numpy(状态).cuda(device) 78 | 79 | 80 | 81 | 图片路径 = '../判断数据样本/' + key + '.jpg' 82 | img = Image.open(图片路径) 83 | img2 = np.array(img) 84 | 85 | img2 = torch.from_numpy(img2).cuda(device).unsqueeze(0).permute(0, 3, 2, 1).float() / 255 86 | _, out = resnet101(img2) 87 | 图片张量 = out.reshape(1, 6 * 6 * 2048) 88 | 操作序列=np.ones((1,1)) 89 | 操作张量 = torch.from_numpy(操作序列.astype(np.int64)).cuda(device) 90 | src_mask, trg_mask = create_masks(操作张量.unsqueeze(0), 操作张量.unsqueeze(0), device) 91 | 实际输出,_=model_判断状态(图片张量.unsqueeze(0), 操作张量.unsqueeze(0),trg_mask) 92 | 93 | _, 抽样 = torch.topk(实际输出, k=1, dim=-1) 94 | 抽样np = 抽样.cpu().numpy() 95 | 96 | 97 | optimizer.zero_grad() 98 | 实际输出 = 实际输出.view(-1, 实际输出.size(-1)) 99 | loss = F.cross_entropy(实际输出, 目标输出.contiguous().view(-1), ignore_index=-1) 100 | print('轮', i, '实际输出', 状态列表[抽样np[0, 0, 0, 0]], '目标输出', 全部数据[key],loss) 101 | loss.backward() 102 | 103 | optimizer.step() 104 | torch.save(model_判断状态.state_dict(), 'weights/model_weights_判断状态L') 105 | 106 | torch.save(model_判断状态.state_dict(), 'weights/model_weights_判断状态L{}'.format(str(i))) 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /筛选事件特征图片.py: -------------------------------------------------------------------------------- 1 | import os 2 | # device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") 3 | # print(device) 4 | import json 5 | import cv2 6 | import numpy as np 7 | import time 8 | from PIL import Image, ImageDraw, ImageFont 9 | import shutil 10 | 11 | from pynput.keyboard import Controller, Key, Listener 12 | from pynput import keyboard 13 | import threading 14 | 15 | 态='暂停' 16 | def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=20): 17 | if (isinstance(img, np.ndarray)): # 判断是否OpenCV图片类型 18 | img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 19 | # 创建一个可以在给定图像上绘图的对象 20 | draw = ImageDraw.Draw(img) 21 | # 字体的格式 22 | fontStyle = ImageFont.truetype( 23 | 'C:/Windows/Fonts/STHUPO.TTF', textSize, encoding="utf-8") 24 | #"D:/python/辅助/锐字真言体.ttf" 25 | # 绘制文本 26 | draw.text((left, top), text, textColor, font=fontStyle) 27 | # 转换回OpenCV格式 28 | return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) 29 | def get_key_name(key): 30 | if isinstance(key, keyboard.KeyCode): 31 | 32 | 33 | return key.char 34 | else: 35 | 36 | return str(key) 37 | # 监听按压 38 | def on_press(key): 39 | global 态 40 | 41 | key_name=get_key_name(key) 42 | # print(key_name) 43 | # 操作='' 44 | # if key_name=='w': 45 | # W键按下=True 46 | # 47 | # elif key_name=='Key.left': 48 | # 操作='一技能' 49 | # elif key_name=='Key.down': 50 | # 操作='二技能' 51 | # elif key_name=='Key.right': 52 | # 操作='三技能' 53 | # elif key_name=='Key.up' : 54 | # 攻击态=True 55 | 56 | 57 | # 监听释放 58 | def on_release(key): 59 | global 态 60 | 61 | 62 | key_name=get_key_name(key) 63 | 64 | 65 | if key_name=='Key.up' : 66 | 67 | 态='弃' 68 | elif key_name=='Key.left': 69 | 态='普通' 70 | elif key_name=='Key.down': 71 | 态='过' 72 | elif key_name=='Key.right': 73 | 态='死亡' 74 | elif key_name=='a': 75 | 态='击杀敌方英雄' 76 | elif key_name=='s': 77 | 态='击杀小兵或野怪或推掉塔' 78 | elif key_name=='d': 79 | 态='被击杀' 80 | elif key_name == 'w': 81 | 态 = '被击塔攻击' 82 | 83 | #print("已经释放:", key_name) 84 | if key == Key.esc: 85 | # 停止监听 86 | return False 87 | 88 | # 开始监听 89 | def start_listen(): 90 | with Listener(on_press=on_press, on_release=on_release) as listener: 91 | listener.join() 92 | 93 | th = threading.Thread(target=start_listen,) 94 | th.start() 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | #筛选事件特征图片 106 | #1、进入目录打开引索 方法抄 107 | 路径json='../判断数据样本test/_判断数据.json' 108 | 路径新='../判断数据样本/' 109 | if not os.path.exists(路径新): 110 | os.makedirs(路径新) 111 | 路径新 = 路径新 +'判断新.json' 112 | 全部数据={} 113 | with open(路径json, encoding='ansi') as f: 114 | while True: 115 | df = f.readline() 116 | df = df.replace('\'', '\"') 117 | 118 | if df == "": 119 | break 120 | 单元 = json.loads(df) 121 | for key in 单元: 122 | 全部数据[key]=单元[key] 123 | 124 | #print(全部数据) 125 | 126 | for key in 全部数据: 127 | 记录文件 = open(路径新, 'a+') 128 | # print(key + ':' + 全部数据[key]) 129 | 图片路径 = '../判断数据样本test/' + key + '.jpg' 130 | 图片新路径 = '../判断数据样本/'+ key + '.jpg' 131 | 132 | 133 | # 截图 = cv2.imread(图片路径) 134 | 截图 = cv2.imdecode(np.fromfile(图片路径, dtype=np.uint8), -1) 135 | 截图 = cv2ImgAddText(截图, 全部数据[key], 0, 0, (000, 222, 111), 25) 136 | cv2.imshow('AAA', 截图) 137 | cv2.waitKey() 138 | 139 | while 态 == '暂停': 140 | time.sleep(0.02) 141 | 新输出={} 142 | 143 | 校准输出=全部数据[key] 144 | if 态=='过': 145 | 校准输出 = 全部数据[key] 146 | elif 态=='普通': 147 | 校准输出 = '普通' 148 | elif 态 == '死亡': 149 | 校准输出 = '死亡' 150 | elif 态 == '被击杀': 151 | 校准输出 = '被击杀' 152 | elif 态 == '击杀小兵或野怪或推掉塔': 153 | 校准输出 = '击杀小兵或野怪或推掉塔' 154 | elif 态 == '击杀敌方英雄': 155 | 校准输出 = '击杀敌方英雄' 156 | elif 态 == '被击塔攻击': 157 | 校准输出 = '被击塔攻击' 158 | elif 态 == '弃' and key!='162098566208': 159 | 态 = '暂停' 160 | continue 161 | else: 162 | print(1) 163 | print(key, 校准输出) 164 | 新输出[key]=校准输出 165 | json.dump(新输出, 记录文件, ensure_ascii=False) 166 | 记录文件.write('\n') 167 | shutil.copy(图片路径, 图片新路径) 168 | 169 | 态 = '暂停' 170 | 记录文件.close() 171 | 172 | # def CV信息显示(): 173 | # global 全部数据,态 174 | # 175 | # 176 | # 177 | # 178 | # CV信息= threading.Thread(target=CV信息显示) 179 | # CV信息.start() 180 | # d=666 181 | # for i in range(555): 182 | # 183 | # while 态=='暂停': 184 | # time.sleep(1) 185 | # print(d) 186 | # 态 = '暂停' 187 | # d=d+1 188 | -------------------------------------------------------------------------------- /训练X.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | from PIL import Image 4 | import numpy as np 5 | import time 6 | import json 7 | from config import GPT2Config, TransformerConfig 8 | from Batch import create_masks 9 | 10 | import torch.nn.functional as F 11 | from 取训练数据 import * 12 | from 杂项 import * 13 | import os 14 | import random 15 | from 模型_策略梯度 import Transformer 16 | from 模型_策略梯度 import 智能体 17 | 状态辞典B={'击杀小兵或野怪或推掉塔': 0, '击杀敌方英雄': 1, '被击塔攻击': 2, '被击杀': 3, '死亡': 4, '普通': 5} 18 | 状态辞典={'击杀小兵或野怪或推掉塔': 2, '击杀敌方英雄': 5, '被击塔攻击': -0.5, '被击杀': -2,'无状况':0.01, '死亡': 0.01, '其它': -0.003,'普通': 0.01} 19 | 状态列表=[] 20 | for K in 状态辞典B: 21 | 状态列表.append(K) 22 | 训练数据保存目录='../训练数据样本/未用' 23 | if not os.path.exists(训练数据保存目录): 24 | os.makedirs(训练数据保存目录) 25 | for root, dirs, files in os.walk('../训练数据样本/未用'): 26 | if len(dirs)>0: 27 | break 28 | 29 | 词数词典路径="./json/词_数表.json" 30 | 数_词表路径="./json/数_词表.json" 31 | if os.path.isfile(词数词典路径) and os.path.isfile(数_词表路径): 32 | 词_数表, 数_词表 = 读出引索(词数词典路径, 数_词表路径) 33 | with open(词数词典路径, encoding='utf8') as f: 34 | 词数词典=json.load(f) 35 | device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") 36 | # 37 | # 38 | config = TransformerConfig() 39 | 模型路径 = 'model_weights_2021-05-7D' 40 | 41 | model_判断状态=Transformer(6,768,2,12,0.0,6*6*2048) 42 | model_判断状态.load_state_dict(torch.load('weights/model_weights_判断状态L')) 43 | model_判断状态.cuda(device).requires_grad_(False) 44 | N = 15000 # 运行N次后学习 45 | 条数 = 100 46 | 轮数 = 3 47 | 学习率 = 0.0003 48 | 智能体 = 智能体(动作数=7, 并行条目数=条数, 49 | 学习率=学习率, 轮数=轮数, 50 | 输入维度=6) 51 | 52 | 53 | 54 | 55 | 56 | 分块大小=600 57 | 游标大小=600 58 | 树枝=1 59 | 60 | 计数=0 61 | time_start=time.time() 62 | for j in range(100): 63 | #random.shuffle(dirs) 64 | for 号 in dirs: 65 | 预处理数据 = '../训练数据样本/未用/'+号+'/图片_操作预处理数据2.npz' 66 | if os.path.isfile(预处理数据): 67 | npz文件 = np.load(预处理数据, allow_pickle=True) 68 | 图片张量np, 操作序列 = npz文件["图片张量np"], npz文件["操作序列"] 69 | if 图片张量np.shape[0]<600: 70 | continue 71 | 循环=True 72 | 游标=0 73 | 操作序列=np.insert(操作序列,0,128) 74 | 75 | 操作_分_表 = [] 76 | 目标输出_分_表 = [] 77 | 图片_分_表 = [] 78 | 79 | while 循环: 80 | if 游标 + 分块大小 < 操作序列.shape[0]: 81 | 82 | 操作_分 = 操作序列[游标:游标 + 分块大小] 83 | 目标输出_分 = 操作序列[游标 + 1:游标 + 1 + 分块大小] 84 | 图片_分 = 图片张量np[游标:游标 + 分块大小, :] 85 | 操作_分_表.append(操作_分) 86 | 目标输出_分_表.append(目标输出_分) 87 | 图片_分_表.append(图片_分) 88 | 游标 = 游标 + 游标大小 89 | else: 90 | 操作_分 = 操作序列[-分块大小 - 1:-1] 91 | 目标输出_分 = 操作序列[-分块大小:] 92 | 93 | 图片_分 = 图片张量np[-分块大小:, :] 94 | 操作_分_表.append(操作_分) 95 | 目标输出_分_表.append(目标输出_分) 96 | 图片_分_表.append(图片_分) 97 | 循环 = False 98 | 99 | 循环=True 100 | i=0 101 | while 循环: 102 | if (i+1)*树枝 0: 331 | time.sleep(用时1) 332 | 333 | 334 | 计数 = 计数 + 1 335 | if 计数 % 10 == 0: 336 | print(用时1) 337 | 338 | 339 | if 继续 is False: 340 | 341 | print('学习中。。。。。。。。。。。。。。。。') 342 | #智能体.学习(device) 343 | print('分数', 1) 344 | #智能体.保存模型(学习次数) 345 | 分数记录 = [] 346 | 速度记录=[] 347 | print('学习完毕。。。。。。。。。。。。。。。。') 348 | #智能体.存硬盘('PPO训练数据/'+str(int(time.time()))) 349 | #智能体.保存模型(学习次数) 350 | 351 | time.sleep(1) 352 | print('继续',继续) 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | # 状态=状态_ 363 | # 延迟 = 0.22 - (time.time() - 计时开始) 364 | # if 延迟 > 0: 365 | # time.sleep(延迟) 366 | # 局内计数 = 局内计数 + 1 367 | # 368 | # 分数记录.append(分数) 369 | # 370 | # 平均分 = np.mean(分数记录[-500:]) 371 | # 平均速度 = np.mean(速度记录[-15000:]) 372 | # if 平均分 > 最高分: 373 | # 最高分 = 平均分 374 | # 375 | # print('步数', 步数, '平均分', 平均分,'最高分',最高分,'局数',i,'平均速度',平均速度) 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | #time.sleep(2) 387 | # while True: 388 | # 389 | # time.sleep(11) 390 | 391 | 392 | 393 | 394 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /训练数据截取_A.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torchvision 3 | from Batch import create_masks 4 | from 辅助功能 import 状态信息综合 5 | 6 | from 取训练数据 import * 7 | from 杂项 import * 8 | 9 | from resnet_utils import myResnet 10 | from 运行辅助 import * 11 | from pynput.keyboard import Controller, Key, Listener 12 | from pynput import keyboard 13 | import time, threading 14 | from 模型_策略梯度 import 智能体 15 | _DEVICE_ID = '68UDU17B14011947' 16 | 窗口名称="RNE-AL00" 17 | 18 | 训练数据保存目录='../训练数据样本/未用' 19 | if not os.path.exists(训练数据保存目录): 20 | os.makedirs(训练数据保存目录) 21 | lock=threading.Lock() 22 | start=time.time() 23 | end=time.time() 24 | fun_start=0 25 | time_interval=0 26 | index=0 27 | dict={'interval_times':0,'max_interval':0.,'interval_location':[]} 28 | count=0 29 | count_dict={'first_time':0.,'first_p_to_second_r':0.} 30 | keyBoard_dict={'Key.enter':'\n', 31 | 'Key.space':' ', 32 | "Key.tab":'\t'} 33 | 34 | W键按下=False 35 | S键按下=False 36 | A键按下=False 37 | D键按下=False 38 | Q键按下=False 39 | 攻击态=False 40 | 手动模式=False 41 | 攻击放开=True 42 | AI打开=True 43 | 操作列=[] 44 | 自动=0 45 | 46 | N = 15000 # 运行N次后学习 47 | 条数 = 100 48 | 轮数 = 3 49 | 学习率 = 0.0003 50 | 智能体 = 智能体(动作数=7, 并行条目数=条数, 51 | 学习率=学习率, 轮数=轮数, 52 | 输入维度=6) 53 | 54 | def get_key_name(key): 55 | if isinstance(key, keyboard.KeyCode): 56 | 57 | 58 | return key.char 59 | else: 60 | 61 | return str(key) 62 | # 监听按压 63 | def on_press(key): 64 | global fun_start,time_interval,index,dict,count,count_dict,W键按下,S键按下,A键按下,D键按下,手动模式,操作列,AI打开,攻击放开,Q键按下,攻击态 65 | 66 | key_name=get_key_name(key) 67 | 操作='' 68 | if key_name=='w': 69 | W键按下=True 70 | elif key_name=='a': 71 | A键按下=True 72 | elif key_name=='s': 73 | S键按下=True 74 | elif key_name=='d': 75 | D键按下=True 76 | elif key_name == 'q': 77 | Q键按下=True 78 | elif key_name == 'i': 79 | AI打开 = bool(1 - AI打开) 80 | 81 | elif key_name=='Key.space': 82 | 操作='召唤师技能' 83 | elif key_name=='Key.end': 84 | 操作='补刀' 85 | elif key_name=='Key.page_down': 86 | 操作='推塔' 87 | elif key_name=='j': 88 | 操作='一技能' 89 | elif key_name=='k': 90 | 操作='二技能' 91 | elif key_name=='l': 92 | 操作='三技能' 93 | elif key_name=='f': 94 | 操作='回城' 95 | elif key_name=='g': 96 | 操作='恢复' 97 | elif key_name=='h': 98 | 操作='召唤师技能' 99 | elif key_name=='Key.left': 100 | 操作='一技能' 101 | elif key_name=='Key.down': 102 | 操作='二技能' 103 | elif key_name=='Key.right': 104 | 操作='三技能' 105 | elif key_name=='Key.up' : 106 | 攻击态=True 107 | 108 | lock.acquire() 109 | if 操作!='': 110 | 操作列.append(操作) 111 | lock.release() 112 | #print("正在按压:", key_name) 113 | 114 | # 监听释放 115 | def on_release(key): 116 | global start,fun_start, time_interval, index,count,count_dict,W键按下,S键按下,A键按下,D键按下,攻击放开,Q键按下,攻击态 117 | 118 | 119 | key_name=get_key_name(key) 120 | if key_name=='w': 121 | W键按下=False 122 | elif key_name=='a': 123 | A键按下=False 124 | elif key_name=='s': 125 | S键按下=False 126 | elif key_name=='d': 127 | D键按下=False 128 | elif key_name == 'q': 129 | Q键按下 = False 130 | 131 | elif key_name=='Key.up' : 132 | 133 | 攻击态=False 134 | print("已经释放:", key_name) 135 | if key == Key.esc: 136 | # 停止监听 137 | return False 138 | 139 | # 开始监听 140 | def start_listen(): 141 | with Listener(on_press=on_press, on_release=on_release) as listener: 142 | listener.join() 143 | def 处理方向(): 144 | # W键按下 = False 145 | # S键按下 = False 146 | # A键按下 = False 147 | # D键按下 = False 148 | if Q键按下 == True: 149 | return ('移动停') 150 | elif W键按下 == True and S键按下 == False and A键按下 == False and D键按下 == False: 151 | return ('上移') 152 | elif W键按下 == False and S键按下 == True and A键按下 == False and D键按下 == False: 153 | return ('下移') 154 | elif W键按下 == False and S键按下 == False and A键按下 == True and D键按下 == False: 155 | return ('左移') 156 | elif W键按下 == False and S键按下 == False and A键按下 == False and D键按下 == True: 157 | return ('右移') 158 | elif W键按下 == True and S键按下 == False and A键按下 == True and D键按下 == False: 159 | return ('左上移') 160 | elif W键按下 == True and S键按下 == False and A键按下 == False and D键按下 == True: 161 | return ('右上移') 162 | elif W键按下 == False and S键按下 == True and A键按下 == True and D键按下 == False: 163 | return ('左下移') 164 | elif W键按下 == False and S键按下 == True and A键按下 == False and D键按下 == True: 165 | return ('右下移') 166 | else: 167 | return ('') 168 | 169 | 170 | 171 | 加三技能='d 0 552 1878 100\nc\nu 0\nc\n' 172 | 加二技能='d 0 446 1687 100\nc\nu 0\nc\n' 173 | 加一技能='d 0 241 1559 100\nc\nu 0\nc\n' 174 | 购买='d 0 651 207 100\nc\nu 0\nc\n' 175 | 词数词典路径="./json/词_数表.json" 176 | 数_词表路径="./json/数_词表.json" 177 | 操作查询路径="./json/名称_操作.json" 178 | 操作词典={"图片号":"0","移动操作":"无移动","动作操作":"无动作"} 179 | th = threading.Thread(target=start_listen,) 180 | th.start() #启动线程 181 | 182 | if os.path.isfile(词数词典路径) and os.path.isfile(数_词表路径): 183 | 词_数表, 数_词表 = 读出引索(词数词典路径, 数_词表路径) 184 | with open(词数词典路径, encoding='utf8') as f: 185 | 词数词典 = json.load(f) 186 | with open(操作查询路径, encoding='utf8') as f: 187 | 操作查询词典 = json.load(f) 188 | 189 | 方向表 = ['上移', '下移', '左移', '右移', '左上移', '左下移', '右上移', '右下移'] 190 | 191 | 192 | 设备 = MyMNTDevice(_DEVICE_ID) 193 | device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") 194 | mod = torchvision.models.resnet101(pretrained=True).eval().cuda(device).requires_grad_(False) 195 | resnet101 = myResnet(mod) 196 | 197 | 198 | while True: 199 | if AI打开 : 200 | 201 | 202 | 203 | 204 | 205 | 图片路径=训练数据保存目录+'/{}/'.format(str(int( time.time())) ) 206 | os.mkdir(图片路径) 207 | 208 | 记录文件=open(图片路径+'_操作数据.json','w+') 209 | 210 | 211 | 212 | 图片张量 = torch.Tensor(0) 213 | 操作张量 = torch.Tensor(0) 214 | 215 | 伪词序列 = torch.from_numpy(np.ones((1, 60)).astype(np.int64)).cuda(device).unsqueeze(0) 216 | 217 | 指令延时=0 218 | 219 | 操作序列 = np.ones((1, )) 220 | 操作序列[0]=128 221 | 计数 = 0 222 | time_start=time.time() 223 | 旧指令='移动停' 224 | for i in range(1000000): 225 | if AI打开==False: 226 | break 227 | try: 228 | imgA = 取图(窗口名称) 229 | except: 230 | AI打开 = False 231 | print('取图失败') 232 | break 233 | 234 | 计时开始=time.time() 235 | 236 | if 图片张量.shape[0] == 0: 237 | 238 | 239 | img = np.array(imgA) 240 | 241 | img = torch.from_numpy(img).cuda(device).unsqueeze(0).permute(0, 3, 2, 1) / 255 242 | _,out = resnet101(img) 243 | 图片张量 = out.reshape(1,6*6*2048) 244 | 245 | elif 图片张量.shape[0] < 300: 246 | 247 | img = np.array(imgA) 248 | 249 | img = torch.from_numpy(img).cuda(device).unsqueeze(0).permute(0, 3, 2, 1) / 255 250 | _,out = resnet101(img) 251 | 图片张量 = torch.cat((图片张量, out.reshape(1,6*6*2048)), 0) 252 | 操作序列 = np.append(操作序列, 动作) 253 | 254 | else: 255 | 256 | 257 | img = np.array(imgA) 258 | 259 | img = torch.from_numpy(img).cuda(device).unsqueeze(0).permute(0, 3, 2, 1) / 255 260 | _,out = resnet101(img) 261 | 图片张量 = 图片张量[1:300, :] 262 | 操作序列=操作序列[1:300] 263 | 操作序列 = np.append(操作序列, 动作) 264 | 图片张量 = torch.cat((图片张量, out.reshape(1,6*6*2048)), 0) 265 | 266 | 267 | 操作张量 = torch.from_numpy(操作序列.astype(np.int64)).cuda(device) 268 | src_mask, trg_mask = create_masks(操作张量.unsqueeze(0), 操作张量.unsqueeze(0), device) 269 | 270 | 状态 = 状态信息综合(图片张量.cpu().numpy(), 操作序列, trg_mask) 271 | 272 | 动作, 动作可能性, 评价 = 智能体.选择动作(状态,device,1,False) 273 | LI = 操作张量.contiguous().view(-1) 274 | # LA=输出_实际_A.view(-1, 输出_实际_A.size(-1)) 275 | if 计数 % 50 == 0 and 计数!=0: 276 | 277 | 设备.发送(购买) 278 | 设备.发送(加三技能) 279 | 设备.发送(加二技能) 280 | 设备.发送(加一技能) 281 | 设备.发送(操作查询词典['移动停']) 282 | print(旧指令,'周期') 283 | time.sleep(0.02) 284 | 设备.发送(操作查询词典[旧指令]) 285 | 286 | 287 | if 计数 % 1 == 0: 288 | time_end = time.time() 289 | 290 | 291 | 292 | 293 | 指令=数_词表[str(动作)] 294 | 指令集=指令.split('_') 295 | 296 | #操作词典 = {"图片号": "0", "移动操作": "无移动", "动作操作": "无动作"} 297 | 操作词典['图片号']=str(i) 298 | 方向结果=处理方向() 299 | if 方向结果!='' or len(操作列)!=0 or 攻击态==True: 300 | if 方向结果 == '': 301 | 操作词典['移动操作'] = 指令集[0] 302 | else: 303 | 操作词典['移动操作']=方向结果 304 | 305 | 306 | if len(操作列)!=0: 307 | 操作词典['动作操作'] = 操作列[0] 308 | lock.acquire() 309 | del 操作列[0] 310 | lock.release() 311 | elif 攻击态==True: 312 | 操作词典['动作操作'] = '攻击' 313 | 314 | else: 315 | 操作词典['动作操作'] ='无动作' 316 | 317 | 318 | 路径_a = 图片路径 + '{}.jpg'.format(str(i)) 319 | imgA.save(路径_a) 320 | if 自动==0: 321 | 操作词典['结束']=1 322 | else: 323 | 操作词典['结束'] = 0 324 | 自动 = 1 325 | json.dump(操作词典, 记录文件, ensure_ascii=False) 326 | 记录文件.write('\n') 327 | 328 | 新指令 = 操作词典['移动操作'] 329 | if 新指令 != 旧指令 and 新指令 != '无移动': 330 | 旧指令 = 新指令 331 | # print(旧指令,操作查询词典[旧指令]) 332 | try: 333 | print('手动模式',旧指令) 334 | 335 | 设备.发送(操作查询词典[旧指令]) 336 | 337 | except: 338 | AI打开 = False 339 | print('发送失败') 340 | break 341 | 342 | time.sleep(0.01) 343 | 344 | if 操作词典['动作操作'] != '无动作' and 操作词典['动作操作'] != '发起集合' and 操作词典['动作操作'] != '发起进攻' and 操作词典['动作操作'] != '发起撤退': 345 | print('手动',指令集[1]) 346 | try: 347 | 设备.发送(操作查询词典[操作词典['动作操作']]) 348 | except: 349 | AI打开 = False 350 | print('发送失败') 351 | break 352 | else: 353 | 操作列=[] 354 | 操作词典['移动操作'] = 指令集[0] 355 | 操作词典['动作操作'] = 指令集[1] 356 | 357 | 新指令 = 指令集[0] 358 | if 新指令 != 旧指令 and 新指令 != '无移动': 359 | 旧指令 = 新指令 360 | # print(旧指令,操作查询词典[旧指令]) 361 | try: 362 | print(旧指令) 363 | 364 | 设备.发送(操作查询词典[旧指令]) 365 | 366 | except: 367 | AI打开 = False 368 | print('发送失败') 369 | break 370 | 371 | 372 | time.sleep(0.01) 373 | 路径_a = 图片路径 + '{}.jpg'.format(str(i)) 374 | imgA.save(路径_a) 375 | 自动 = 0 376 | 操作词典['结束'] = 0 377 | json.dump(操作词典, 记录文件, ensure_ascii=False) 378 | 记录文件.write('\n') 379 | 380 | 新指令 = 操作词典['移动操作'] 381 | if 指令集[1] != '无动作' and 指令集[1] != '发起集合' and 指令集[1] != '发起进攻' and 指令集[1] != '发起撤退': 382 | print(指令集[1]) 383 | try: 384 | 设备.发送(操作查询词典[指令集[1]]) 385 | except: 386 | AI打开 = False 387 | print('发送失败') 388 | break 389 | 用时1=0.22-(time.time()-计时开始) 390 | if 用时1>0: 391 | time.sleep(用时1) 392 | 393 | #print(用时1) 394 | 用时 = time_end - time_start 395 | #print("用时{} 第{}张 延时{}".format(用时, i,用时1),'A键按下', A键按下, 'W键按下', W键按下, 'S键按下', S键按下, 'D键按下', D键按下, '旧指令', 旧指令, 'AI打开', AI打开, '操作列', 操作列) 396 | 397 | 计数=计数+1 398 | if i%3000==0: 399 | # AI打开 = False 400 | #import pygame 401 | 402 | # pygame.mixer.init() 403 | # pygame.mixer.music.load('G:/AS.mp3') 404 | # pygame.mixer.music.set_volume(0.2) 405 | # pygame.mixer.music.play() 406 | print("此处可有音乐") 407 | time.sleep(1) 408 | 409 | 410 | 411 | 记录文件.close() 412 | time.sleep(1) 413 | print('AI打开',AI打开) 414 | 415 | -------------------------------------------------------------------------------- /模型_策略梯度.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch as T 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from torch.distributions.categorical import Categorical 7 | # distributions概率分布和采样函数 8 | import torch 9 | import torch.nn as nn 10 | from Layers import DecoderLayer 11 | from Embed import Embedder, PositionalEncoder 12 | from Sublayers import Norm, 全连接层 13 | import copy 14 | import os.path 15 | import torchvision 16 | from config import TransformerConfig 17 | import torch.nn.functional as F 18 | from Batch import create_masks 19 | from 杂项 import 打印抽样数据 20 | import pickle 21 | import gc 22 | 23 | def save_obj(obj, name ): 24 | with open(name + '.pkl', 'wb') as f: 25 | pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 26 | 27 | def load_obj(name ): 28 | with open(name , 'rb') as f: 29 | return pickle.load(f) 30 | 31 | def get_clones(module, N): 32 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 33 | 34 | 35 | class Decoder(nn.Module): 36 | def __init__(self, vocab_size, d_model, N, heads, dropout, 最大长度=1024): 37 | super().__init__() 38 | self.N = N 39 | self.embedX = Embedder(vocab_size, d_model) 40 | self.embedP = Embedder(最大长度, d_model) 41 | # self.pe = PositionalEncoder(d_model, dropout=dropout) 42 | self.layers = get_clones(DecoderLayer(d_model, heads, dropout), N) 43 | self.norm = Norm(d_model) 44 | def forward(self,图向量,操作 ,trg_mask): 45 | position = torch.arange(0, 图向量.size(1), dtype=torch.long, 46 | device=图向量.device) 47 | 48 | 49 | x = 图向量+self.embedP(position)+self.embedX(操作)*0 50 | 51 | 52 | 53 | for i in range(self.N): 54 | x = self.layers[i](x, trg_mask) 55 | return self.norm(x) 56 | 57 | class Transformer(nn.Module): 58 | def __init__(self, trg_vocab, d_model, N, heads, dropout,图向量尺寸=6*6*2048): 59 | super().__init__() 60 | self.图转= 全连接层(图向量尺寸,d_model) 61 | 62 | 63 | 64 | self.decoder = Decoder(trg_vocab, d_model, N, heads, dropout) 65 | self.outX = 全连接层(d_model, trg_vocab) 66 | 67 | self.评价 = 全连接层(d_model, 1) 68 | def forward(self, 图向量 ,操作, trg_mask): 69 | 图向量=self.图转(图向量) 70 | 71 | d_output = self.decoder(图向量,操作 , trg_mask) 72 | output = self.outX(d_output) 73 | 评价 = self.评价(d_output) 74 | return output,评价 75 | 76 | def get_model(opt, trg_vocab, model_weights='model_weights'): 77 | assert opt.d_model % opt.heads == 0 78 | assert opt.dropout < 1 79 | 80 | model = Transformer(trg_vocab, opt.d_model, opt.n_layers, opt.heads, opt.dropout) 81 | 82 | if opt.load_weights is not None and os.path.isfile(opt.load_weights + '/' + model_weights): 83 | print("loading pretrained weights...") 84 | model.load_state_dict(torch.load(f'{opt.load_weights}/' + model_weights)) 85 | else: 86 | 量 = 0 87 | for p in model.parameters(): 88 | if p.dim() > 1: 89 | # nn.init.xavier_uniform_(p) 90 | a = 0 91 | 长 = len(p.shape) 92 | 点数 = 1 93 | for j in range(长): 94 | 点数 = p.shape[j] * 点数 95 | 96 | 量 += 点数 97 | print('使用参数:{}百万'.format(量 / 1000000)) 98 | return model 99 | 100 | 101 | 102 | class PPO_数据集: 103 | def __init__(self, 并行条目数量): 104 | 105 | 106 | #self.状态集 = [] 107 | self.动作概率集 = [] 108 | self.评价集 = [] 109 | self.动作集 = [] 110 | self.回报集 = [] 111 | self.完结集 = [] 112 | 113 | self.并行条目数量 = 并行条目数量 114 | self.完整数据={} 115 | self.图片信息=np.ones([1,1000, 6*6*2048], dtype='float') 116 | self.操作信息 = np.ones((0,)) 117 | 118 | 119 | 120 | 121 | def 提取数据(self): 122 | 状态集_长度 = len(self.回报集) 123 | 条目_起始位 = np.arange(0, 状态集_长度-100, self.并行条目数量) 124 | 下标集 = np.arange(状态集_长度, dtype=np.int64) 125 | 126 | 条目集 = [下标集[i:i + self.并行条目数量] for i in 条目_起始位] 127 | 128 | return np.array(self.动作集),\ 129 | np.array(self.动作概率集), \ 130 | self.评价集, \ 131 | np.array(self.回报集),\ 132 | np.array(self.完结集), \ 133 | self.图片信息, \ 134 | self.操作信息 ,\ 135 | 条目集 136 | def 记录数据(self, 状态, 动作, 动作概率, 评价, 回报, 完结,计数): 137 | #self.状态集.append(状态) 138 | self.动作集.append(动作) 139 | self.动作概率集.append(动作概率) 140 | self.评价集.append(评价) 141 | self.回报集.append(回报) 142 | self.完结集.append(完结) 143 | self.图片信息[:,计数, :]=状态['图片张量'] 144 | self.操作信息=np.append(self.操作信息, 状态['操作序列']) 145 | 146 | 147 | def 清除数据(self): 148 | self.图片信息 = [] 149 | self.动作概率集 = [] 150 | self.动作集 = [] 151 | self.回报集 = [] 152 | self.完结集 = [] 153 | self.评价集 = [] 154 | self.完整数据={} 155 | # del self.状态集,self.动作概率集,self.评价集,self.动作集,self.回报集,self.完结集,self.完整数据 156 | # gc.collect() 157 | 158 | def 存硬盘(self,文件名): 159 | self.完整数据['图片信息']=self.图片信息[:,0:len(self.动作集),:] 160 | self.完整数据['动作概率集'] = self.动作概率集 161 | self.完整数据['动作集'] = self.动作集 162 | self.完整数据['回报集'] = self.回报集 163 | self.完整数据['完结集'] = self.完结集 164 | self.完整数据['评价集'] = self.评价集 165 | self.完整数据['操作信息'] =self.操作信息 166 | save_obj(self.完整数据,文件名) 167 | self.完整数据={} 168 | #self.图片信息 = [] 169 | self.动作概率集 = [] 170 | self.动作集 = [] 171 | self.回报集 = [] 172 | self.完结集 = [] 173 | self.评价集 = [] 174 | #self.操作信息=[] 175 | 176 | #del self.图片信息,self.动作概率集,self.评价集,self.动作集,self.回报集,self.完结集,self.完整数据 177 | #gc.collect() 178 | 179 | def 读硬盘(self,文件名): 180 | self.完整数据 = load_obj(文件名) 181 | self.图片信息=self.完整数据['图片信息'] 182 | self.动作概率集=self.完整数据['动作概率集'] 183 | self.动作集=self.完整数据['动作集'] 184 | self.回报集= self.完整数据['回报集'] 185 | self.完结集= self.完整数据['完结集'] 186 | self.评价集=self.完整数据['评价集'] 187 | self.操作信息 =self.完整数据 ['操作信息'] 188 | self.完整数据={} 189 | 190 | 191 | 192 | def 处理状态参数(状态组,device): 193 | 194 | 最长=0 195 | 状态组合={} 196 | 197 | # 操作序列 = np.ones((1,)) 198 | for 状态A in 状态组: 199 | if 状态A['图片张量'].shape[1]>最长: 200 | 最长=状态A['图片张量'].shape[1] 201 | for 状态 in 状态组: 202 | 状态A = 状态.copy() 203 | if 状态A['图片张量'].shape[1] == 最长: 204 | 单元=状态A 205 | 操作序列 = np.ones((最长,)) 206 | 遮罩序列 = torch.from_numpy(操作序列.astype(np.int64)).cuda(device).unsqueeze(0) 207 | 单元['遮罩序列']=遮罩序列 208 | 209 | else: 210 | 有效长度=状态A['图片张量'].shape[1] 211 | 差值=最长-有效长度 212 | 形状=状态A['图片张量'].shape 213 | 图片张量_拼接 = torch.zeros(形状[0],差值,形状[2],形状[3]).cuda(device).float() 214 | 图片张量_拼接 = 图片张量_拼接.cpu().numpy() 215 | 状态A['图片张量']=np.append(状态A['图片张量'],图片张量_拼接, axis=1) 216 | #状态A['图片张量'] = torch.cat((状态A['图片张量'], 图片张量_拼接), 1) 217 | 形状 = 状态A['角度集张量_序列'].shape 218 | 角度集张量_拼接=torch.zeros(形状[0],差值,形状[2]).cuda(device).float() 219 | 状态A['角度集张量_序列'] = torch.cat((状态A['角度集张量_序列'], 角度集张量_拼接), 1) 220 | 221 | 形状 = 状态A['位置张量_序列'].shape 222 | 位置张量_拼接=torch.zeros(形状[0],差值,形状[2]).cuda(device).float() 223 | 状态A['位置张量_序列'] = torch.cat((状态A['位置张量_序列'], 位置张量_拼接), 1) 224 | 225 | 形状 = 状态A['速度张量_序列'].shape 226 | 速度张量_拼接=torch.zeros(形状[0],差值,形状[2]).cuda(device).float() 227 | 状态A['速度张量_序列'] = torch.cat((状态A['速度张量_序列'], 速度张量_拼接), 1) 228 | 229 | 操作序列 = np.ones((有效长度,)) 230 | 遮罩序列 = torch.from_numpy(操作序列.astype(np.int64)).cuda(device).unsqueeze(0) 231 | 状态A['遮罩序列']=遮罩序列 232 | 操作序列 = np.ones((差值,))*-1 233 | 遮罩序列 = torch.from_numpy(操作序列.astype(np.int64)).cuda(device).unsqueeze(0) 234 | 状态A['遮罩序列'] = torch.cat((状态A['遮罩序列'], 遮罩序列), 1) 235 | 单元=状态A 236 | 237 | if 状态组合=={}: 238 | 状态组合=单元 239 | else: 240 | 状态组合['遮罩序列'] = torch.cat((状态组合['遮罩序列'], 单元['遮罩序列']), 0) 241 | 状态组合['速度张量_序列'] = torch.cat((状态组合['速度张量_序列'], 单元['速度张量_序列'],), 0) 242 | 状态组合['位置张量_序列'] = torch.cat((状态组合['位置张量_序列'], 单元['位置张量_序列']), 0) 243 | 状态组合['角度集张量_序列'] = torch.cat((状态组合['角度集张量_序列'], 单元['角度集张量_序列']), 0) 244 | #状态组合['图片张量'] = torch.cat((状态组合['图片张量'], 单元['图片张量']), 0) 245 | 状态组合['图片张量'] =np.append(状态组合['图片张量'], 单元['图片张量'], axis=0) 246 | src_mask, trg_mask = create_masks(状态组合['遮罩序列'], 状态组合['遮罩序列'], device) 247 | 状态组合['trg_mask']=trg_mask 248 | return 状态组合 249 | 250 | 251 | 252 | class 智能体: 253 | def __init__(self, 动作数, 输入维度, 优势估计参数G=0.9999, 学习率=0.0003, 泛化优势估计参数L=0.985, 254 | 策略裁剪幅度=0.2, 并行条目数=64, 轮数=10,熵系数=0.01): 255 | self.优势估计参数G = 优势估计参数G 256 | self.策略裁剪幅度 = 策略裁剪幅度 257 | self.轮数 = 轮数 258 | self.熵系数=熵系数 259 | self.泛化优势估计参数L = 泛化优势估计参数L 260 | device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") 261 | 模型名称 = '模型_策略梯度_丙TA' 262 | 263 | config = TransformerConfig() 264 | model = get_model(config, 130, 模型名称) 265 | # model_dict = model.state_dict() 266 | # 267 | # pretrained_dict = torch.load('weights/model_weights_2021-05-7D11') 268 | # 269 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 270 | # 271 | # model_dict.update(pretrained_dict) 272 | # 273 | # model.load_state_dict(model_dict) 274 | 275 | 276 | model = model.cuda(device) 277 | self.动作 = model 278 | #torch.save(self.动作.state_dict(), 'weights/模型_动作ppo阶段停bZ1') 279 | self.优化函数 = torch.optim.Adam(self.动作.parameters(), lr=2e-5, betas=(0.9, 0.95), eps=1e-9) 280 | 281 | 282 | self.数据集 = PPO_数据集(并行条目数) 283 | self.文件名集=[] 284 | 285 | def 记录数据(self, 状态, 动作, 动作概率, 评价, 回报, 完结,计数): 286 | self.数据集.记录数据(状态, 动作, 动作概率, 评价, 回报, 完结,计数) 287 | def 存硬盘(self, 文件名): 288 | self.数据集.存硬盘(文件名) 289 | self.文件名集.append(文件名) 290 | def 读硬盘(self, 文件名): 291 | self.数据集.读硬盘(文件名) 292 | def 保存模型(self,轮号): 293 | print('... 保存模型 ...') 294 | 295 | torch.save(self.动作.state_dict(), 'weights/模型_策略梯度_丙N') 296 | torch.save(self.动作.state_dict(), 'weights/模型_策略梯度_丙N{}'.format(轮号)) 297 | #torch.save(self.评论.state_dict(), 'weights/模型_评论') 298 | 299 | #torch.save(self.评论.state_dict(), 'weights/模型_评论2') 300 | def 载入模型(self): 301 | print('... 载入模型 ...') 302 | self.动作.载入权重() 303 | #self.评价.载入权重() 304 | 305 | def 选择动作(self, 状态,device,传入动作,手动=False): 306 | 307 | 308 | # 分布,q_ = self.动作(状态) 309 | # r_, 价值 = self.评论(状态) 310 | self.动作.requires_grad_(False) 311 | 操作序列=torch.from_numpy(状态['操作序列'].astype(np.int64)).cuda(device) 312 | 图片张量=torch.from_numpy(状态['图片张量']).cuda(device) 313 | trg_mask=状态['trg_mask'] 314 | 分布, 价值 = self.动作(图片张量,操作序列,trg_mask) 315 | 价值 = 价值[:, - 1, :] 316 | 分布 = F.softmax(分布, dim=-1) 317 | 分布 = 分布[:, - 1, :] 318 | 分布 = Categorical(分布) 319 | if 手动: 320 | 动作 = 传入动作 321 | else: 322 | 323 | 动作 = 分布.sample() 324 | 325 | 动作概率 = T.squeeze(分布.log_prob(动作)).item() 326 | 327 | 动作 = T.squeeze(动作).item() 328 | 329 | 330 | return 动作, 动作概率, 价值 331 | def 选择动作批量(self, 状态,device,目标输出_分_torch,手动=False): 332 | 333 | 334 | # 分布,q_ = self.动作(状态) 335 | # r_, 价值 = self.评论(状态) 336 | self.动作.requires_grad_(False) 337 | 操作序列=torch.from_numpy(状态['操作序列'].astype(np.int64)).cuda(device) 338 | 图片张量=torch.from_numpy(状态['图片张量']).cuda(device) 339 | trg_mask=状态['trg_mask'] 340 | 分布, 价值 = self.动作(图片张量,操作序列,trg_mask) 341 | 分布 = F.softmax(分布, dim=-1) 342 | 分布 = Categorical(分布) 343 | if 手动: 344 | 动作 = 目标输出_分_torch 345 | else: 346 | 347 | 动作 = 分布.sample() 348 | 349 | 动作概率 = T.squeeze(分布.log_prob(动作)) 350 | 351 | 动作 = T.squeeze(动作) 352 | 353 | 354 | return 动作, 动作概率, 价值 355 | def 学习(self,device): 356 | for i in range(1): 357 | 358 | # for k, v in self.动作.named_parameters(): 359 | # 360 | # if k == '评价.weight' or k=='评价.bias': 361 | # v.requires_grad = True 362 | 363 | 364 | for _ in range(self.轮数): 365 | 动作集, 旧_动作概率集, 评价集, 回报集, 完结集,图片集合,动作数组, 条目集 = self.数据集.提取数据() 366 | print('回报集',回报集[0:10]) 367 | 价值 = 评价集 368 | 369 | 优势函数值 = np.zeros(len(回报集), dtype=np.float32) 370 | 371 | for t in range(len(回报集) - 1): 372 | 折扣率 = 1 373 | 优势值 = 0 374 | 折扣率 = self.优势估计参数G * self.泛化优势估计参数L 375 | 计数=0 376 | for k in range(t, len(回报集) - 1): 377 | 378 | 优势值 += pow(折扣率, abs(0-计数)) * (回报集[k] + self.优势估计参数G * 价值[k + 1] * (1 - int(完结集[k])) - 价值[k]) 379 | 计数=计数+1 380 | if (1 - int(完结集[k]))==0 or 计数>100: 381 | 382 | break 383 | 优势函数值[t] = 优势值 384 | # https://blog.csdn.net/zhkmxx930xperia/article/details/88257891 385 | # GAE的形式为多个价值估计的加权平均数 386 | 优势函数值 = T.tensor(优势函数值).to(device) 387 | 388 | 价值 = T.tensor(价值).to(device) 389 | for 条 in 条目集: 390 | 条末=条[-1:] 391 | 392 | 旧_动作概率s = T.tensor(旧_动作概率集[条末]).to(device) 393 | 动作s = T.tensor(动作集[条末]).to(device) 394 | 395 | 396 | self.动作.requires_grad_(True) 397 | 398 | 399 | 操作序列 = torch.from_numpy(动作数组[条].astype(np.int64)).cuda(device) 400 | 图片张量 = torch.from_numpy(图片集合[:, 条, :]).cuda(device).float() 401 | src_mask, trg_mask = create_masks(操作序列.unsqueeze(0), 操作序列.unsqueeze(0), device) 402 | 分布, 评价结果 = self.动作(图片张量,操作序列,trg_mask) 403 | 分布=分布[:,-1:,:] 404 | 评价结果 = 评价结果[:, -1:, :] 405 | 406 | 分布 = F.softmax(分布, dim=-1) 407 | # 分布 = 分布[:, - 1, :] 408 | # 评价结果 = 评价结果[:, - 1, :] 409 | 评价结果 = T.squeeze(评价结果) 410 | 分布 = Categorical(分布) 411 | 熵损失 = torch.mean(分布.entropy()) 412 | 新_动作概率s = 分布.log_prob(动作s) 413 | # 概率比 = 新_动作概率s.exp() / 旧_动作概率s.exp() 414 | # # prob_ratio = (new_probs - old_probs).exp() 415 | # 加权概率 = 优势函数值[条末] * 概率比 416 | # 加权_裁剪_概率 = T.clamp(概率比, 1 - self.策略裁剪幅度, 417 | # 1 + self.策略裁剪幅度) * 优势函数值[条末] 418 | # 动作损失 = -T.min(加权概率, 加权_裁剪_概率).mean() 419 | 420 | 总回报 = 优势函数值[条末] + 价值[条末] 421 | 动作损失 = -总回报 * 新_动作概率s 422 | 动作损失 = 动作损失.mean() 423 | 评价损失 = (总回报 - 评价结果) ** 2 424 | 评价损失 = 评价损失 .mean() 425 | 426 | 总损失 = 动作损失 + 0.5 * 评价损失-self.熵系数*熵损失 427 | #print(总损失) 428 | 429 | self.优化函数.zero_grad() 430 | # self.优化函数_评论.zero_grad() 431 | 总损失.backward() 432 | self.优化函数.step() 433 | # self.优化函数_评论.step() 434 | print('总损失',总损失) 435 | 436 | self.数据集.清除数据() 437 | self.文件名集=[] 438 | 439 | 440 | def 监督强化学习(self,device,状态,回报,动作,动作可能性,评价): 441 | #print(device,状态,回报,动作,动作可能性,评价) 442 | # for k, v in self.动作.named_parameters(): 443 | # 444 | # if k == '评价.weight' or k=='评价.bias': 445 | # v.requires_grad = True 446 | 回报集=回报 447 | 价值=评价.cpu().numpy()[0,:,0] 448 | 优势函数值 = np.zeros(回报集.shape[0], dtype=np.float32) 449 | for t in range(len(回报集) - 1): 450 | 折扣率 = 1 451 | 优势值 = 0 452 | 折扣率 = self.优势估计参数G * self.泛化优势估计参数L 453 | 计数 = 0 454 | for k in range(t, len(回报集) - 1): 455 | 456 | 优势值 += pow(折扣率, abs(0 - 计数)) * (回报集[k]) 457 | 计数 = 计数 + 1 458 | if 计数 > 200: 459 | break 460 | 优势函数值[t] = 优势值 461 | 462 | 价值 = T.tensor(价值).to(device) 463 | for i in range(3): 464 | 优势函数值 = T.tensor(优势函数值).to(device) 465 | 旧_动作概率s = T.tensor(动作可能性).to(device) 466 | 动作s = T.tensor(动作).to(device) 467 | 468 | self.动作.requires_grad_(True) 469 | 470 | 操作序列 = torch.from_numpy(状态['操作序列'].astype(np.int64)).cuda(device) 471 | 图片张量 = torch.from_numpy(状态['图片张量']).cuda(device).float() 472 | trg_mask = 状态['trg_mask'] 473 | 474 | 分布, 评价结果 = self.动作(图片张量, 操作序列, trg_mask) 475 | 476 | 分布 = F.softmax(分布, dim=-1) 477 | # 分布 = 分布[:, - 1, :] 478 | # 评价结果 = 评价结果[:, - 1, :] 479 | 评价结果 = T.squeeze(评价结果) 480 | 分布 = Categorical(分布) 481 | #熵损失 = torch.mean(分布.entropy()) 482 | 新_动作概率s = 分布.log_prob(动作s) 483 | # 旧_动作概率s=旧_动作概率s.exp() 484 | # 概率比 = 新_动作概率s / 旧_动作概率s 485 | # # prob_ratio = (new_probs - old_probs).exp() 486 | # 加权概率 = 优势函数值 * 概率比 487 | # 加权_裁剪_概率 = T.clamp(概率比, 1 - self.策略裁剪幅度, 488 | # 1 + self.策略裁剪幅度) * 优势函数值 489 | # 动作损失 = -T.min(加权概率, 加权_裁剪_概率).mean() 490 | #概率比2 = 新_动作概率s.mean() / 旧_动作概率s.mean() 491 | 总回报 = 优势函数值#+ 价值 492 | 动作损失 = -总回报 * 新_动作概率s 493 | 动作损失 = 动作损失.mean() 494 | #评价损失 = (总回报 - 评价结果) ** 2 495 | #评价损失 = 评价损失.mean() 496 | print(总回报[10:20],新_动作概率s[:,10:20].exp()) 497 | 498 | 总损失 = 动作损失# + 0.5 * 评价损失 - self.熵系数 * 熵损失 499 | # print(总损失) 500 | 501 | self.优化函数.zero_grad() 502 | # self.优化函数_评论.zero_grad() 503 | 总损失.backward() 504 | self.优化函数.step() 505 | # self.优化函数_评论.step() 506 | 507 | def 监督强化学习A(self,device,状态,回报,动作,动作可能性,评价,完结集): 508 | #print(device,状态,回报,动作,动作可能性,评价) 509 | # for k, v in self.动作.named_parameters(): 510 | # 511 | # if k == '评价.weight' or k=='评价.bias': 512 | # v.requires_grad = True 513 | 回报集=回报 514 | 价值=评价.cpu().numpy()[0,:,0] 515 | 优势函数值 = np.zeros(回报集.shape[0], dtype=np.float32) 516 | for t in range(len(回报集) - 1): 517 | 折扣率 = 1 518 | 优势值 = 0 519 | 折扣率 = self.优势估计参数G * self.泛化优势估计参数L 520 | 计数 = 0 521 | for k in range(t, len(回报集) - 1): 522 | 523 | 优势值 += pow(折扣率, abs(0 - 计数)) * (回报集[k]*(1-完结集[0,k]*0)) 524 | 计数 = 计数 + 1 525 | if 计数 > 200 or 完结集[0,k]==2111111: 526 | break 527 | 优势函数值[t] = 优势值 528 | 529 | 价值 = T.tensor(价值).to(device) 530 | for i in range(3): 531 | 优势函数值 = T.tensor(优势函数值).to(device) 532 | 旧_动作概率s = T.tensor(动作可能性).to(device) 533 | 动作s = T.tensor(动作).to(device) 534 | 535 | self.动作.requires_grad_(True) 536 | 537 | 操作序列 = torch.from_numpy(状态['操作序列'].astype(np.int64)).cuda(device) 538 | 图片张量 = torch.from_numpy(状态['图片张量']).cuda(device).float() 539 | trg_mask = 状态['trg_mask'] 540 | 541 | 分布, 评价结果 = self.动作(图片张量, 操作序列, trg_mask) 542 | 543 | 分布 = F.softmax(分布, dim=-1) 544 | # 分布 = 分布[:, - 1, :] 545 | # 评价结果 = 评价结果[:, - 1, :] 546 | 评价结果 = T.squeeze(评价结果) 547 | 分布 = Categorical(分布) 548 | #熵损失 = torch.mean(分布.entropy()) 549 | 新_动作概率s = 分布.log_prob(动作s) 550 | # 旧_动作概率s=旧_动作概率s.exp() 551 | # 概率比 = 新_动作概率s / 旧_动作概率s 552 | # # prob_ratio = (new_probs - old_probs).exp() 553 | # 加权概率 = 优势函数值 * 概率比 554 | # 加权_裁剪_概率 = T.clamp(概率比, 1 - self.策略裁剪幅度, 555 | # 1 + self.策略裁剪幅度) * 优势函数值 556 | # 动作损失 = -T.min(加权概率, 加权_裁剪_概率).mean() 557 | #概率比2 = 新_动作概率s.mean() / 旧_动作概率s.mean() 558 | 总回报 = 优势函数值#+ 价值 559 | 动作损失 = -总回报 * 新_动作概率s 560 | 动作损失 = 动作损失.mean() 561 | #评价损失 = (总回报 - 评价结果) ** 2 562 | #评价损失 = 评价损失.mean() 563 | print(总回报[10:20],新_动作概率s[:,10:20].exp()) 564 | 565 | 总损失 = 动作损失# + 0.5 * 评价损失 - self.熵系数 * 熵损失 566 | # print(总损失) 567 | 568 | self.优化函数.zero_grad() 569 | # self.优化函数_评论.zero_grad() 570 | 总损失.backward() 571 | self.优化函数.step() 572 | # self.优化函数_评论.step() 573 | def 监督学习(self, 状态,目标输出,打印,数_词表,操作_分_torch,device): 574 | 分布, 价值 = self.动作(状态,device) 575 | lin = 分布.view(-1, 分布.size(-1)) 576 | _, 抽样 = torch.topk(分布, k=1, dim=-1) 577 | 抽样np = 抽样.cpu().numpy() 578 | 579 | self.优化函数.zero_grad() 580 | loss = F.cross_entropy(lin, 目标输出.contiguous().view(-1), ignore_index=-1) 581 | if 打印: 582 | 583 | print(loss) 584 | 打印抽样数据(数_词表, 抽样np[0:1, :, :], 操作_分_torch[0, :]) 585 | loss.backward() 586 | 587 | self.优化函数.step() 588 | 589 | 590 | def 选择动作_old(self, 状态): 591 | 592 | 593 | # 分布,q_ = self.动作(状态) 594 | # r_, 价值 = self.评论(状态) 595 | 输出_实际_A, 价值 = self.动作(状态) 596 | 597 | 598 | 输出_实际_A = F.softmax(输出_实际_A, dim=-1) 599 | 输出_实际_A = 输出_实际_A[:, - 1, :] 600 | 抽样 = torch.multinomial(输出_实际_A, num_samples=1) 601 | 抽样np = 抽样.cpu().numpy() 602 | return 抽样np[0,-1] 603 | #item是得到一个元素张量里面的元素值 604 | #优势函数表达在状态s下,某动作a相对于平均而言的优势 605 | #GAE一般优势估计 606 | --------------------------------------------------------------------------------