├── README.md
├── code_01_subtraction.py
├── code_02_moons.py
├── code_03_moons_fun.py
├── code_04_use_module.py
├── code_05_L2.py
├── code_06_Dropout.py
├── code_07_Multi-sampleDropout.py
├── code_08_BN.py
├── code_09_BNdetail.py
├── code_10_CrossEntropy.py
├── code_11_skip-gram.py
├── code_12_CONV.py
├── code_13_pooling.py
├── code_14_TextCNN.py
├── code_15_rnnwordtest.py
├── code_16_AttLSTMModel.py
├── code_17_Transformer.py
├── code_18_pipline.py
├── code_19_BERTTest.py
├── code_20_GPT2Test.py
├── code_21_BERT_CH.py
├── code_22_TextCNNInterpret.py
├── code_23_GNN_BERT.py
├── code_24_BERT_PROPN.py
├── code_25_BERT_NoPUNC.py
├── code_26_RGCNDGL.py
├── code_27_spellgcn.py
├── code_28_CDial.py
├── code_29_serving.py
└── 人体阴阳与电能.txt


/README.md:
--------------------------------------------------------------------------------
1 | 《基于BERT模型的自然语言处理实战》随书代码
2 | 
3 | 随书数据资源可在官网下载：https://www.aianaconda.com/index/bert
4 | 


--------------------------------------------------------------------------------
/code_01_subtraction.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: 代码医生工作室
  4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
  5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
  6 | @配套代码技术支持：bbs.aianaconda.com  
  7 | Created on Thu Mar 30 09:43:58 2017
  8 | """
  9 | 
 10 | import copy, numpy as np
 11 | np.random.seed(0) #随机数生成器的种子，可以每次得到一样的值
 12 | # compute sigmoid nonlinearity
 13 | def sigmoid(x): #激活函数
 14 |     output = 1/(1+np.exp(-x))
 15 |     return output
 16 | # convert output of sigmoid function to its derivative
 17 | def sigmoid_output_to_derivative(output):#激活函数的导数
 18 |     return output*(1-output)
 19 | 
 20 | 
 21 | int2binary = {} #整数到其二进制表示的映射
 22 | binary_dim = 8 #暂时制作256以内的减法
 23 | ## 计算0-256的二进制表示
 24 | largest_number = pow(2,binary_dim)
 25 | binary = np.unpackbits(
 26 |     np.array([range(largest_number)],dtype=np.uint8).T,axis=1)
 27 | for i in range(largest_number):
 28 |     int2binary[i] = binary[i]
 29 |     
 30 | # input variables
 31 | alpha = 0.9 #学习速率
 32 | input_dim = 2 #输入的维度是2
 33 | hidden_dim = 16 
 34 | output_dim = 1 #输出维度为1
 35 | 
 36 | # initialize neural network weights
 37 | synapse_0 = (2*np.random.random((input_dim,hidden_dim)) - 1)*0.05 #维度为2*16， 2是输入维度，16是隐藏层维度
 38 | synapse_1 = (2*np.random.random((hidden_dim,output_dim)) - 1)*0.05
 39 | synapse_h = (2*np.random.random((hidden_dim,hidden_dim)) - 1)*0.05
 40 | # => [-0.05, 0.05)，
 41 | 
 42 | # 用于存放反向传播的权重更新值
 43 | synapse_0_update = np.zeros_like(synapse_0)
 44 | synapse_1_update = np.zeros_like(synapse_1)
 45 | synapse_h_update = np.zeros_like(synapse_h)
 46 | 
 47 | # training 
 48 | for j in range(10000):
 49 |     
 50 |     #生成一个数字a
 51 |     a_int = np.random.randint(largest_number) 
 52 |     #生成一个数字b,b的最大值取的是largest_number/2,作为被减数，让它小一点。
 53 |     b_int = np.random.randint(largest_number/2) 
 54 |     #如果生成的b大了，那么交换一下
 55 |     if a_int<b_int:
 56 |         tt = a_int
 57 |         b_int = a_int
 58 |         a_int=tt
 59 |     
 60 |     a = int2binary[a_int] # binary encoding
 61 |     b = int2binary[b_int] # binary encoding    
 62 |     # true answer
 63 |     c_int = a_int - b_int
 64 |     c = int2binary[c_int]
 65 |     
 66 |     # 存储神经网络的预测值
 67 |     d = np.zeros_like(c)
 68 |     overallError = 0 #每次把总误差清零
 69 |     
 70 |     layer_2_deltas = list() #存储每个时间点输出层的误差
 71 |     layer_1_values = list() #存储每个时间点隐藏层的值
 72 |     
 73 |     layer_1_values.append(np.ones(hidden_dim)*0.1) # 一开始没有隐藏层，所以初始化一下原始值为0.1
 74 |     
 75 |     # moving along the positions in the binary encoding
 76 |     for position in range(binary_dim):#循环遍历每一个二进制位
 77 |         
 78 |         # generate input and output
 79 |         X = np.array([[a[binary_dim - position - 1],b[binary_dim - position - 1]]])#从右到左，每次去两个输入数字的一个bit位
 80 |         y = np.array([[c[binary_dim - position - 1]]]).T#正确答案
 81 |         # hidden layer (input ~+ prev_hidden)
 82 |         layer_1 = sigmoid(np.dot(X,synapse_0) + np.dot(layer_1_values[-1],synapse_h))#（输入层 + 之前的隐藏层） -> 新的隐藏层，这是体现循环神经网络的最核心的地方！！！
 83 |         # output layer (new binary representation)
 84 |         layer_2 = sigmoid(np.dot(layer_1,synapse_1)) #隐藏层 * 隐藏层到输出层的转化矩阵synapse_1 -> 输出层
 85 |         
 86 |         layer_2_error = y - layer_2 #预测误差
 87 |         layer_2_deltas.append((layer_2_error)*sigmoid_output_to_derivative(layer_2)) #把每一个时间点的误差导数都记录下来
 88 |         overallError += np.abs(layer_2_error[0])#总误差
 89 |     
 90 |         d[binary_dim - position - 1] = np.round(layer_2[0][0]) #记录下每一个预测bit位
 91 |         
 92 |         # store hidden layer so we can use it in the next timestep
 93 |         layer_1_values.append(copy.deepcopy(layer_1))#记录下隐藏层的值，在下一个时间点用
 94 |     
 95 |     future_layer_1_delta = np.zeros(hidden_dim)
 96 |     
 97 |     #反向传播，从最后一个时间点到第一个时间点
 98 |     for position in range(binary_dim):
 99 |         
100 |         X = np.array([[a[position],b[position]]]) #最后一次的两个输入
101 |         layer_1 = layer_1_values[-position-1] #当前时间点的隐藏层
102 |         prev_layer_1 = layer_1_values[-position-2] #前一个时间点的隐藏层
103 |         
104 |         # error at output layer
105 |         layer_2_delta = layer_2_deltas[-position-1] #当前时间点输出层导数
106 |         # error at hidden layer
107 |         # 通过后一个时间点（因为是反向传播）的隐藏层误差和当前时间点的输出层误差，计算当前时间点的隐藏层误差
108 |         layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)
109 |         
110 |         
111 |        # 等到完成了所有反向传播误差计算， 才会更新权重矩阵，先暂时把更新矩阵存起来。
112 |         synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
113 |         synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
114 |         synapse_0_update += X.T.dot(layer_1_delta)
115 |         
116 |         future_layer_1_delta = layer_1_delta
117 |     
118 |     # 完成所有反向传播之后，更新权重矩阵。并把矩阵变量清零
119 |     synapse_0 += synapse_0_update * alpha
120 |     synapse_1 += synapse_1_update * alpha
121 |     synapse_h += synapse_h_update * alpha
122 |     synapse_0_update *= 0
123 |     synapse_1_update *= 0
124 |     synapse_h_update *= 0
125 |    
126 |     # print out progress
127 |     if(j % 800 == 0):
128 |         #print(synapse_0,synapse_h,synapse_1)
129 |         print("总误差:" + str(overallError))
130 |         print("Pred:" + str(d))
131 |         print("True:" + str(c))
132 |         out = 0
133 |         for index,x in enumerate(reversed(d)):
134 |             out += x*pow(2,index)
135 |         print(str(a_int) + " - " + str(b_int) + " = " + str(out))
136 |         print("------------")


--------------------------------------------------------------------------------
/code_02_moons.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Fri Feb  1 00:07:25 2019
 8 | """
 9 | 
10 | import sklearn.datasets     #引入数据集
11 | import torch
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from code_03_moons_fun import LogicNet,plot_losses,predict,plot_decision_boundary
15 | 
16 | 
17 | 
18 | torch.manual_seed(0)
19 | torch.cuda.manual_seed_all(0)
20 | 
21 | torch.backends.cudnn.deterministic = True
22 | torch.backends.cudnn.benchmark = False
23 | 
24 | np.random.seed(0)           #设置随机数种子
25 | X, Y = sklearn.datasets.make_moons(200,noise=0.2) #生成2组半圆形数据
26 | 
27 | arg = np.squeeze(np.argwhere(Y==0),axis = 1)     #获取第1组数据索引
28 | arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)#获取第2组数据索引
29 | 
30 | plt.title("moons data")
31 | plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+',label='data1')
32 | plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o',label='data2')
33 | plt.legend()
34 | plt.show()
35 | 
36 | 
37 | 
38 | model = LogicNet(inputdim=2,hiddendim=3,outputdim=2)#初始化模型
39 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01)#定义优化器
40 | 
41 | 
42 | xt = torch.from_numpy(X).type(torch.FloatTensor)#将Numpy数据转化为张量
43 | yt = torch.from_numpy(Y).type(torch.LongTensor)
44 | epochs = 1000#定义迭代次数
45 | losses = []#定义列表，用于接收每一步的损失值
46 | for i in range(epochs):
47 |     loss = model.getloss(xt,yt)
48 |     losses.append(loss.item())
49 |     optimizer.zero_grad()#清空之前的梯度
50 |     loss.backward()#反向传播损失值
51 |     optimizer.step()#更新参数
52 | 
53 | 
54 | 
55 | plot_losses(losses)
56 | 
57 | 
58 | from sklearn.metrics import accuracy_score
59 | print(accuracy_score(model.predict(xt),yt))
60 | 
61 | 
62 | plot_decision_boundary(lambda x : predict(model,x) ,xt.numpy(), yt.numpy())
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/code_03_moons_fun.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Fri Feb  1 00:07:25 2019
 8 | """
 9 | 
10 | 
11 | import torch
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | import torch.nn as nn
15 | 
16 | #继承nn.Module类，构建网络模型
17 | class LogicNet(nn.Module):
18 |     def __init__(self,inputdim,hiddendim,outputdim):#初始化网络结构
19 |         super(LogicNet,self).__init__()
20 |         self.Linear1 = nn.Linear(inputdim,hiddendim) #定义全连接层
21 |         self.Linear2 = nn.Linear(hiddendim,outputdim)#定义全连接层
22 |         self.criterion = nn.CrossEntropyLoss() #定义交叉熵函数
23 | 
24 |     def forward(self,x): #搭建用两层全连接组成的网络模型
25 |         x = self.Linear1(x)#将输入数据传入第1层
26 |         x = torch.tanh(x)#对第一层的结果进行非线性变换
27 |         x = self.Linear2(x)#再将数据传入第2层
28 | #        print("LogicNet")
29 |         return x
30 | 
31 |     def predict(self,x):#实现LogicNet类的预测接口
32 |         #调用自身网络模型，并对结果进行softmax处理,分别得出预测数据属于每一类的概率
33 |         pred = torch.softmax(self.forward(x),dim=1)
34 |         return torch.argmax(pred,dim=1)  #返回每组预测概率中最大的索引
35 | 
36 |     def getloss(self,x,y): #实现LogicNet类的损失值计算接口
37 |         y_pred = self.forward(x)
38 |         loss = self.criterion(y_pred,y)#计算损失值得交叉熵
39 |         return loss
40 | 
41 | 
42 | 
43 | 
44 | def moving_average(a, w=10):#定义函数计算移动平均损失值
45 |     if len(a) < w:
46 |         return a[:]
47 |     return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
48 | 
49 | def plot_losses(losses):
50 |     avgloss= moving_average(losses) #获得损失值的移动平均值
51 |     plt.figure(1)
52 |     plt.subplot(211)
53 |     plt.plot(range(len(avgloss)), avgloss, 'b--')
54 |     plt.xlabel('step number')
55 |     plt.ylabel('Training loss')
56 |     plt.title('step number vs. Training loss')
57 |     plt.show()
58 | 
59 | def predict(model,x):   #封装支持Numpy的预测接口
60 |     x = torch.from_numpy(x).type(torch.FloatTensor)
61 |     ans = model.predict(x)
62 |     return ans.numpy()
63 | 
64 | def plot_decision_boundary(pred_func,X,Y):#在直角坐标系中可视化模型能力
65 |     #计算取值范围
66 |     x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
67 |     y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
68 |     h = 0.01
69 |     #在坐标系中采用数据，生成网格矩阵，用于输入模型
70 |     xx,yy=np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
71 |     #将数据输入并进行预测
72 |     Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
73 |     Z = Z.reshape(xx.shape)
74 |     #将预测的结果可视化
75 |     plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
76 |     plt.title("Linear predict")
77 |     arg = np.squeeze(np.argwhere(Y==0),axis = 1)
78 |     arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)
79 |     plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+')
80 |     plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o')
81 |     plt.show()
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/code_04_use_module.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: 代码医生工作室
  4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
  5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
  6 | @配套代码技术支持：bbs.aianaconda.com  
  7 | Created on Wed Apr  3 06:12:15 2019
  8 | """
  9 | 
 10 | 
 11 | import sklearn.datasets     #引入数据集
 12 | import torch
 13 | import numpy as np
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | np.random.seed(0)           #设置随机数种子
 17 | X, Y = sklearn.datasets.make_moons(200,noise=0.2) #生成2组半圆形数据数据
 18 | 
 19 | arg = np.squeeze(np.argwhere(Y==0),axis = 1)     #获取第1组数据索引
 20 | arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)#获取第2组数据索引
 21 | 
 22 | plt.title("moons data")
 23 | plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+',label='data1')
 24 | plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o',label='data2')
 25 | plt.legend()
 26 | plt.show()
 27 | 
 28 | 
 29 | 
 30 | import torch.nn as nn
 31 | 
 32 | #继承nn.Module类，构建网络模型
 33 | class LogicNet(nn.Module):
 34 |     def __init__(self,inputdim,hiddendim,outputdim):#初始化网络结构
 35 |         super(LogicNet,self).__init__()
 36 | #        self.Linear1 = nn.Linear(inputdim,hiddendim) #定义全连接层
 37 | #        self.Linear2 = nn.Linear(hiddendim,outputdim)#定义全连接层
 38 |         self.add_module("Linear1", nn.Linear(inputdim,hiddendim))#定义全连接层
 39 |         self.add_module("Linear2", nn.Linear(hiddendim,outputdim))#定义全连接层
 40 |         self.criterion = nn.CrossEntropyLoss() #定义交叉熵函数
 41 | 
 42 |     def forward(self,x): #搭建用两层全连接组成的网络模型
 43 |         x = self.Linear1(x)#将输入数据传入第1层
 44 |         x = torch.tanh(x)#对第一层的结果进行非线性变换
 45 |         x = self.Linear2(x)#再将数据传入第2层
 46 |         return x
 47 | 
 48 |     def predict(self,x):#实现LogicNet类的预测接口
 49 |         #调用自身网络模型，并对结果进行softmax处理,分别得出预测数据属于每一类的概率
 50 |         pred = torch.softmax(self.forward(x),dim=1)
 51 |         return torch.argmax(pred,dim=1)  #返回每组预测概率中最大的索引
 52 | 
 53 |     def getloss(self,x,y): #实现LogicNet类的损失值计算接口
 54 |         y_pred = self.forward(x)
 55 |         loss = self.criterion(y_pred,y)#计算损失值得交叉熵
 56 |         return loss
 57 | 
 58 | 
 59 | 
 60 | 
 61 | model = LogicNet(inputdim=2,hiddendim=3,outputdim=2)#初始化模型
 62 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01)#定义优化器
 63 | #model = model.cuda()
 64 | for sub_module in model.children():
 65 |     print(sub_module)
 66 | 
 67 | 
 68 | for name, module in model.named_children():
 69 |     print(name,"is:",module)
 70 | 
 71 | 
 72 | for module in model.modules():
 73 |     print(module)
 74 | 
 75 | for param in model.parameters():
 76 |     print(type(param.data), param.size())
 77 | 
 78 | for name,param in model.named_parameters():
 79 |     print(type(param.data), param.size(),name)
 80 | 
 81 | 
 82 | 
 83 | xt = torch.from_numpy(X).type(torch.FloatTensor)#将Numpy数据转化为张量
 84 | yt = torch.from_numpy(Y).type(torch.LongTensor)
 85 | epochs = 1000#定义迭代次数
 86 | losses = []#定义列表，用于接收每一步的损失值
 87 | for i in range(epochs):
 88 |     loss = model.getloss(xt,yt)
 89 |     losses.append(loss.item())
 90 |     optimizer.zero_grad()#清空之前的梯度
 91 |     loss.backward()#反向传播损失值
 92 |     optimizer.step()#更新参数
 93 | 
 94 | def moving_average(a, w=10):#定义函数计算移动平均损失值
 95 |     if len(a) < w:
 96 |         return a[:]
 97 |     return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
 98 | 
 99 | avgloss= moving_average(losses) #获得损失值的移动平均值
100 | plt.figure(1)
101 | plt.subplot(211)
102 | plt.plot(range(len(avgloss)), avgloss, 'b--')
103 | plt.xlabel('step number')
104 | plt.ylabel('Training loss')
105 | plt.title('step number vs. Training loss')
106 | plt.show()
107 | 
108 | 
109 | from sklearn.metrics import accuracy_score
110 | print(accuracy_score(model.predict(xt),yt))
111 | 
112 | 
113 | def predict(x):   #封装支持Numpy的预测接口
114 |     x = torch.from_numpy(x).type(torch.FloatTensor)
115 |     ans = model.predict(x)
116 |     return ans.numpy()
117 | 
118 | def plot_decision_boundary(pred_func,X,Y):#在直角坐标系中可视化模型能力
119 |     #计算取值范围
120 |     x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
121 |     y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
122 |     h = 0.01
123 |     #在坐标系中采用数据，生成网格矩阵，用于输入模型
124 |     xx,yy=np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
125 |     #将数据输入并进行预测
126 |     Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
127 |     Z = Z.reshape(xx.shape)
128 |     #将预测的结果可视化
129 |     plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
130 |     plt.title("Linear predict")
131 |     arg = np.squeeze(np.argwhere(Y==0),axis = 1)
132 |     arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)
133 |     plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+')
134 |     plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o')
135 | 
136 | 
137 | plot_decision_boundary(lambda x : predict(x) ,xt.numpy(), yt.numpy())


--------------------------------------------------------------------------------
/code_05_L2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Tue Apr 30 08:15:15 2019
 8 | """
 9 | 
10 | import sklearn.datasets     #引入数据集
11 | import torch
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from code_03_moons_fun import LogicNet,moving_average,predict,plot_decision_boundary
15 | 
16 | np.random.seed(0)           #设置随机数种子
17 | X, Y = sklearn.datasets.make_moons(40,noise=0.2) #生成2组半圆形数据
18 | 
19 | arg = np.squeeze(np.argwhere(Y==0),axis = 1)     #获取第1组数据索引
20 | arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)#获取第2组数据索引
21 | 
22 | plt.title("train moons data")
23 | plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+',label='data1')
24 | plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o',label='data2')
25 | plt.legend()
26 | plt.show()
27 | 
28 | 
29 | model = LogicNet(inputdim=2,hiddendim=500,outputdim=2)#初始化模型
30 | #添加正则化处理
31 | weight_p, bias_p = [],[]
32 | for name, p in model.named_parameters():
33 |     if 'bias' in name:
34 |         bias_p += [p]
35 |     else:
36 |         weight_p += [p]
37 | optimizer = torch.optim.Adam([{'params': weight_p, 'weight_decay':0.001},
38 |                       {'params': bias_p, 'weight_decay':0}],
39 |                       lr=0.01)
40 | 
41 | 
42 | 
43 | xt = torch.from_numpy(X).type(torch.FloatTensor)#将Numpy数据转化为张量
44 | yt = torch.from_numpy(Y).type(torch.LongTensor)
45 | epochs = 1000#定义迭代次数
46 | losses = []#定义列表，用于接收每一步的损失值
47 | for i in range(epochs):
48 |     loss = model.getloss(xt,yt)
49 |     losses.append(loss.item())
50 |     optimizer.zero_grad()#清空之前的梯度
51 |     loss.backward()#反向传播损失值
52 |     optimizer.step()#更新参数
53 | 
54 | 
55 | avgloss= moving_average(losses) #获得损失值的移动平均值
56 | plt.figure(1)
57 | plt.subplot(211)
58 | plt.plot(range(len(avgloss)), avgloss, 'b--')
59 | plt.xlabel('step number')
60 | plt.ylabel('Training loss')
61 | plt.title('step number vs. Training loss')
62 | plt.show()
63 | 
64 | 
65 | plot_decision_boundary(lambda x : predict(model,x) ,X, Y)
66 | from sklearn.metrics import accuracy_score
67 | print("训练时的准确率：",accuracy_score(model.predict(xt),yt))
68 | 
69 | Xtest, Ytest = sklearn.datasets.make_moons(80,noise=0.2) #生成2组半圆形数据
70 | plot_decision_boundary(lambda x : predict(model,x) ,Xtest, Ytest)
71 | Xtest_t = torch.from_numpy(Xtest).type(torch.FloatTensor)#将Numpy数据转化为张量
72 | Ytest_t = torch.from_numpy(Ytest).type(torch.LongTensor)
73 | print("测试时的准确率：",accuracy_score(model.predict(Xtest_t),Ytest_t))
74 | 


--------------------------------------------------------------------------------
/code_06_Dropout.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Fri Feb  1 00:07:25 2019
 8 | """
 9 | 
10 | import sklearn.datasets     #引入数据集
11 | import torch
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from code_03_moons_fun import LogicNet,moving_average,predict,plot_decision_boundary
15 | import torch.nn as nn
16 | 
17 | 
18 | 
19 | 
20 | np.random.seed(0)           #设置随机数种子
21 | X, Y = sklearn.datasets.make_moons(40,noise=0.2) #生成2组半圆形数据
22 | 
23 | arg = np.squeeze(np.argwhere(Y==0),axis = 1)     #获取第1组数据索引
24 | arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)#获取第2组数据索引
25 | 
26 | plt.title("train moons data")
27 | plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+',label='data1')
28 | plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o',label='data2')
29 | plt.legend()
30 | plt.show()
31 | 
32 | #继承LogicNet类，构建网络模型
33 | class Logic_Dropout_Net(LogicNet):
34 |     def __init__(self,inputdim,hiddendim,outputdim):#初始化网络结构
35 |         super(Logic_Dropout_Net,self).__init__(inputdim,hiddendim,outputdim)
36 | 
37 |     def forward(self,x): #搭建用两层全连接组成的网络模型
38 |         x = self.Linear1(x)#将输入数据传入第1层
39 |         x = torch.tanh(x)#对第一层的结果进行非线性变换
40 |         x = nn.functional.dropout(x, p=0.07, training=self.training)
41 |         x = self.Linear2(x)#再将数据传入第2层
42 |         return x
43 | 
44 | 
45 | 
46 | 
47 | model = Logic_Dropout_Net(inputdim=2,hiddendim=500,outputdim=2)#初始化模型
48 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01)#定义优化器
49 | 
50 | 
51 | xt = torch.from_numpy(X).type(torch.FloatTensor)#将Numpy数据转化为张量
52 | yt = torch.from_numpy(Y).type(torch.LongTensor)
53 | epochs = 1000#定义迭代次数
54 | losses = []#定义列表，用于接收每一步的损失值
55 | for i in range(epochs):
56 |     loss = model.getloss(xt,yt)
57 |     losses.append(loss.item())
58 |     optimizer.zero_grad()#清空之前的梯度
59 |     loss.backward()#反向传播损失值
60 |     optimizer.step()#更新参数
61 | 
62 | 
63 | avgloss= moving_average(losses) #获得损失值的移动平均值
64 | plt.figure(1)
65 | plt.subplot(211)
66 | plt.plot(range(len(avgloss)), avgloss, 'b--')
67 | plt.xlabel('step number')
68 | plt.ylabel('Training loss')
69 | plt.title('step number vs. Training loss')
70 | plt.show()
71 | 
72 | 
73 | plot_decision_boundary(lambda x : predict(model,x) ,X, Y)
74 | from sklearn.metrics import accuracy_score
75 | print("训练时的准确率：",accuracy_score(model.predict(xt),yt))
76 | 
77 | Xtest, Ytest = sklearn.datasets.make_moons(80,noise=0.2) #生成2组半圆形数据
78 | plot_decision_boundary(lambda x : predict(model,x) ,Xtest, Ytest)
79 | Xtest_t = torch.from_numpy(Xtest).type(torch.FloatTensor)#将Numpy数据转化为张量
80 | Ytest_t = torch.from_numpy(Ytest).type(torch.LongTensor)
81 | print("测试时的准确率：",accuracy_score(model.predict(Xtest_t),Ytest_t))
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/code_07_Multi-sampleDropout.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Nov  5 12:11:47 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | 
  8 | 
  9 | import sklearn.datasets     #引入数据集
 10 | import torch
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | from code_03_moons_fun import LogicNet,moving_average,predict,plot_decision_boundary
 14 | import torch.nn as nn
 15 | 
 16 | 
 17 | 
 18 | 
 19 | np.random.seed(0)           #设置随机数种子
 20 | X, Y = sklearn.datasets.make_moons(40,noise=0.2) #生成2组半圆形数据
 21 | 
 22 | arg = np.squeeze(np.argwhere(Y==0),axis = 1)     #获取第1组数据索引
 23 | arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)#获取第2组数据索引
 24 | 
 25 | plt.title("train moons data")
 26 | plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+',label='data1')
 27 | plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o',label='data2')
 28 | plt.legend()
 29 | plt.show()
 30 | 
 31 | #继承LogicNet类，构建网络模型
 32 | class Logic_Dropout_Net(LogicNet):
 33 |     def __init__(self,inputdim,hiddendim,outputdim):#初始化网络结构
 34 |         super(Logic_Dropout_Net,self).__init__(inputdim,hiddendim,outputdim)
 35 |         
 36 |         self.drop = nn.Dropout(0.07, inplace=False)
 37 | 
 38 |     def forward(self,x): #搭建用两层全连接组成的网络模型
 39 |         x = self.Linear1(x)#将输入数据传入第1层
 40 |         x = torch.tanh(x)#对第一层的结果进行非线性变换
 41 | #        x = nn.functional.dropout(x, p=0.07, training=self.training)
 42 |         x = self.drop(x)
 43 |         x = self.Linear2(x)#再将数据传入第2层
 44 |         return x
 45 | 
 46 | class Logic_TDropout_Net(LogicNet):
 47 |     def __init__(self,inputdim,hiddendim,outputdim, dropout_num=8,dropout_p=0.5):#初始化网络结构
 48 |         super(Logic_TDropout_Net,self).__init__(inputdim,hiddendim,outputdim)
 49 |         
 50 |         self.dropouts = nn.ModuleList([nn.Dropout(dropout_p, inplace=False) for _ in range(dropout_num)])
 51 | 
 52 | 
 53 |     def forward(self,x): #搭建用两层全连接组成的网络模型
 54 |         x = self.Linear1(x)#将输入数据传入第1层
 55 |         x = torch.tanh(x)#对第一层的结果进行非线性变换
 56 | 
 57 |         if len(self.dropouts) == 0:
 58 |             return self.Linear2(x)#再将数据传入第2层
 59 |         else:
 60 |             for i,dropout in enumerate(self.dropouts):
 61 |                 if i== 0:
 62 |                     out = dropout(x)
 63 |                     out = self.Linear2(out)             
 64 |                 else:
 65 |                     temp_out = dropout(x)
 66 |                     out =out+ self.Linear2(temp_out)#再将数据传入第2层
 67 |         return out
 68 | 
 69 | #model = Logic_Dropout_Net(inputdim=2,hiddendim=500,outputdim=2)#初始化模型
 70 | model = Logic_TDropout_Net(inputdim=2,hiddendim=500,outputdim=2,dropout_num=8,dropout_p=0.1)#初始化模型
 71 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01)#定义优化器
 72 | 
 73 | 
 74 | xt = torch.from_numpy(X).type(torch.FloatTensor)#将Numpy数据转化为张量
 75 | yt = torch.from_numpy(Y).type(torch.LongTensor)
 76 | epochs = 300#定义迭代次数
 77 | losses = []#定义列表，用于接收每一步的损失值
 78 | for i in range(epochs):
 79 |     loss = model.getloss(xt,yt)
 80 |     losses.append(loss.item())
 81 |     optimizer.zero_grad()#清空之前的梯度
 82 |     loss.backward()#反向传播损失值
 83 |     optimizer.step()#更新参数
 84 | 
 85 | 
 86 | avgloss= moving_average(losses) #获得损失值的移动平均值
 87 | plt.figure(1)
 88 | plt.subplot(211)
 89 | plt.plot(range(len(avgloss)), avgloss, 'b--')
 90 | plt.xlabel('step number')
 91 | plt.ylabel('Training loss')
 92 | plt.title('step number vs. Training loss')
 93 | plt.show()
 94 | 
 95 | 
 96 | plot_decision_boundary(lambda x : predict(model,x) ,X, Y)
 97 | from sklearn.metrics import accuracy_score
 98 | print("训练时的准确率：",accuracy_score(model.predict(xt),yt))
 99 | 
100 | Xtest, Ytest = sklearn.datasets.make_moons(80,noise=0.2) #生成2组半圆形数据
101 | plot_decision_boundary(lambda x : predict(model,x) ,Xtest, Ytest)
102 | Xtest_t = torch.from_numpy(Xtest).type(torch.FloatTensor)#将Numpy数据转化为张量
103 | Ytest_t = torch.from_numpy(Ytest).type(torch.LongTensor)
104 | print("测试时的准确率：",accuracy_score(model.predict(Xtest_t),Ytest_t))
105 | 
106 | 
107 | 
108 | model.eval()


--------------------------------------------------------------------------------
/code_08_BN.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Fri Feb  1 00:07:25 2019
 8 | """
 9 | 
10 | import sklearn.datasets     #引入数据集
11 | import torch
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from code_02_moons_fun import LogicNet,moving_average,predict,plot_decision_boundary
15 | import torch.nn as nn
16 | 
17 | 
18 | 
19 | 
20 | np.random.seed(0)           #设置随机数种子
21 | X, Y = sklearn.datasets.make_moons(40,noise=0.2) #生成2组半圆形数据
22 | 
23 | arg = np.squeeze(np.argwhere(Y==0),axis = 1)     #获取第1组数据索引
24 | arg2 = np.squeeze(np.argwhere(Y==1),axis = 1)#获取第2组数据索引
25 | 
26 | plt.title("train moons data")
27 | plt.scatter(X[arg,0], X[arg,1], s=100,c='b',marker='+',label='data1')
28 | plt.scatter(X[arg2,0], X[arg2,1],s=40, c='r',marker='o',label='data2')
29 | plt.legend()
30 | plt.show()
31 | 
32 | #继承LogicNet类，构建网络模型
33 | class Logic_BN_Net(LogicNet):
34 |     def __init__(self,inputdim,hiddendim,outputdim):#初始化网络结构
35 |         super(Logic_BN_Net,self).__init__(inputdim,hiddendim,outputdim)
36 |         self.BN = nn.BatchNorm1d(hiddendim) #定义BN层
37 |     def forward(self,x): #搭建用两层全连接组成的网络模型
38 |         x = self.Linear1(x)#将输入数据传入第1层
39 |         x = torch.tanh(x)#对第一层的结果进行非线性变换
40 |         x = self.BN(x)#将第一层的数据做BN处理
41 |         x = self.Linear2(x)#再将数据传入第2层
42 |         return x
43 | 
44 | 
45 | 
46 | 
47 | model = Logic_BN_Net(inputdim=2,hiddendim=500,outputdim=2)#初始化模型
48 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01)#定义优化器
49 | 
50 | 
51 | xt = torch.from_numpy(X).type(torch.FloatTensor)#将Numpy数据转化为张量
52 | yt = torch.from_numpy(Y).type(torch.LongTensor)
53 | epochs = 200#定义迭代次数
54 | losses = []#定义列表，用于接收每一步的损失值
55 | for i in range(epochs):
56 |     loss = model.getloss(xt,yt)
57 |     losses.append(loss.item())
58 |     optimizer.zero_grad()#清空之前的梯度
59 |     loss.backward()#反向传播损失值
60 |     optimizer.step()#更新参数
61 | 
62 | 
63 | avgloss= moving_average(losses) #获得损失值的移动平均值
64 | plt.figure(1)
65 | plt.subplot(211)
66 | plt.plot(range(len(avgloss)), avgloss, 'b--')
67 | plt.xlabel('step number')
68 | plt.ylabel('Training loss')
69 | plt.title('step number vs. Training loss')
70 | plt.show()
71 | 
72 | 
73 | plot_decision_boundary(lambda x : predict(model,x) ,X, Y)
74 | from sklearn.metrics import accuracy_score
75 | print("训练时的准确率：",accuracy_score(model.predict(xt),yt))
76 | 
77 | Xtest, Ytest = sklearn.datasets.make_moons(80,noise=0.2) #生成2组半圆形数据
78 | plot_decision_boundary(lambda x : predict(model,x) ,Xtest, Ytest)
79 | Xtest_t = torch.from_numpy(Xtest).type(torch.FloatTensor)#将Numpy数据转化为张量
80 | Ytest_t = torch.from_numpy(Ytest).type(torch.LongTensor)
81 | print("测试时的准确率：",accuracy_score(model.predict(Xtest_t),Ytest_t))
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/code_09_BNdetail.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Tue Jan 21 19:34:35 2020
 8 | """
 9 | 
10 | 
11 | import torch
12 | import torch.nn as nn
13 | data=torch.randn(2,2,2,1)
14 | print(data)
15 | obn=nn.BatchNorm2d(2,affine=True) #实例化自适应BN对象
16 | output=obn(data)
17 |  
18 | 
19 | print(obn.weight)
20 | print(obn.bias)
21 | print(obn.eps)
22 | print(output,output.size())
23 | 
24 | 
25 | print("第1通道的数据:",data[:,0])
26 |  
27 | #计算第1通道数据的均值和方差
28 | Mean=torch.Tensor.mean(data[:,0])
29 | Var=torch.Tensor.var(data[:,0],False)   #false表示贝塞尔校正不会被使用
30 | print(Mean)
31 | print(Var)
32 | 
33 | #计算第1通道中第一个数据的BN
34 | batchnorm=((data[0][0][0][0]-Mean)/(torch.pow(Var,0.5)+obn.eps))\
35 |     *obn.weight[0]+obn.bias[0]
36 | print(batchnorm)
37 | 
38 | 
39 | 
40 | 
41 | 
42 | import torch
43 | data=torch.randn(1,1,1)#tensor([[[1.3868]]])
44 | data.expand(1, 1, 2)#tensor([[[1.3868, 1.3868]]])
45 | data.repeat(1,1,2)
46 | import torch
47 | data=torch.rand(2,4)#tensor([[0.2316, 0.3987, 0.6225, 0.5304],
48 |                     #        [0.7686, 0.3504, 0.8837, 0.7697]])
49 | torch.multinomial(data, 1)#tensor([[1], [2]])
50 | torch.multinomial(data, 1)#tensor([[1], [0]])


--------------------------------------------------------------------------------
/code_10_CrossEntropy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Mon Apr  8 22:19:48 2019
 8 | """
 9 | 
10 | import torch
11 | logits = torch.autograd.Variable(torch.tensor([[2,  0.5,6], [0.1,0,  3]]))
12 | labels = torch.autograd.Variable(torch.LongTensor([2,1]))
13 | print(logits)
14 | print(labels)
15 | print('Softmax:',torch.nn.Softmax(dim=1)(logits))
16 | logsoftmax = torch.nn.LogSoftmax(dim=1)(logits)
17 | print('logsoftmax:',logsoftmax)
18 | output = torch.nn.NLLLoss()(logsoftmax, labels)
19 | print('NLLLoss:',output)
20 | print ( 'CrossEntropyLoss:', torch.nn.CrossEntropyLoss()(logits, labels) )


--------------------------------------------------------------------------------
/code_11_skip-gram.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Mar 13 07:43:42 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | 
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | from torch.utils.data import Dataset,DataLoader
 13 | 
 14 | import collections
 15 | from collections import Counter
 16 | import numpy as np
 17 | import random
 18 | import jieba
 19 | 
 20 | from sklearn.manifold import TSNE
 21 | import matplotlib as mpl
 22 | import matplotlib.pyplot as plt
 23 | mpl.rcParams['font.sans-serif']=['SimHei']#用来正常显示中文标签  
 24 | mpl.rcParams['font.family'] = 'STSong'
 25 | mpl.rcParams['font.size'] = 20
 26 | 
 27 | 
 28 |  #指定设备
 29 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 30 | print(device)
 31 | 
 32 | 
 33 | training_file = '人体阴阳与电能.txt '
 34 | 
 35 | #中文字
 36 | def get_ch_lable(txt_file):  
 37 |     labels= ""
 38 |     with open(txt_file, 'rb') as f:
 39 |         for label in f: 
 40 |             labels =labels+label.decode('gb2312')
 41 |     return  labels
 42 |    
 43 | #分词
 44 | def fenci(training_data):
 45 |     seg_list = jieba.cut(training_data)  # 默认是精确模式  
 46 |     training_ci = " ".join(seg_list)
 47 |     training_ci = training_ci.split()
 48 |     #以空格将字符串分开
 49 |     training_ci = np.array(training_ci)
 50 |     training_ci = np.reshape(training_ci, [-1, ])
 51 |     return training_ci
 52 | 
 53 | def build_dataset(words, n_words):
 54 |   count = [['UNK', -1]]
 55 |   count.extend(collections.Counter(words).most_common(n_words - 1))
 56 |   dictionary = dict()
 57 |   for word, _ in count:
 58 |     dictionary[word] = len(dictionary)
 59 |   data = list()
 60 |   unk_count = 0
 61 |   for word in words:
 62 |     if word in dictionary:
 63 |       index = dictionary[word]
 64 |     else:
 65 |       index = 0  # dictionary['UNK']
 66 |       unk_count += 1
 67 |     data.append(index)
 68 |   count[0][1] = unk_count
 69 |   reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
 70 |   
 71 |   return data, count, dictionary, reversed_dictionary
 72 | 
 73 | training_data =get_ch_lable(training_file)
 74 | print("总字数",len(training_data))
 75 | training_ci =fenci(training_data)
 76 | print("总词数",len(training_ci))    
 77 | training_label, count, dictionary, words = build_dataset(training_ci, 350)
 78 | 
 79 | #计算词频
 80 | word_count = np.array([freq for _,freq in count], dtype=np.float32)
 81 | word_freq = word_count / np.sum(word_count)#计算每个词的词频
 82 | word_freq = word_freq ** (3. / 4.)#词频变换
 83 | words_size = len(dictionary)
 84 | print("字典词数",words_size) 
 85 | print('Sample data', training_label[:10], [words[i] for i in training_label[:10]])
 86 | 
 87 | 
 88 | C = 3 
 89 | num_sampled = 64  # 负采样个数   
 90 | BATCH_SIZE = 12 
 91 | EMBEDDING_SIZE = 128
 92 | 
 93 | 
 94 | class SkipGramDataset(Dataset):
 95 |     def __init__(self, training_label, word_to_idx, idx_to_word, word_freqs):
 96 |         super(SkipGramDataset, self).__init__()
 97 |         self.text_encoded = torch.Tensor(training_label).long()
 98 |         self.word_to_idx = word_to_idx
 99 |         self.idx_to_word = idx_to_word
100 |         self.word_freqs = torch.Tensor(word_freqs)
101 | 
102 |     def __len__(self):
103 |         return len(self.text_encoded)
104 | 
105 |     def __getitem__(self, idx):
106 |         idx = min( max(idx,C),len(self.text_encoded)-2-C)#防止越界
107 |         center_word = self.text_encoded[idx]
108 |         pos_indices = list(range(idx-C, idx)) + list(range(idx+1, idx+1+C))
109 |         pos_words = self.text_encoded[pos_indices] 
110 |         #多项式分布采样，取出指定个数的高频词
111 |         neg_words = torch.multinomial(self.word_freqs, num_sampled+2*C, False)#True)
112 |         #去掉正向标签
113 |         neg_words = torch.Tensor(np.setdiff1d(neg_words.numpy(),pos_words.numpy())[:num_sampled]).long()
114 |         return center_word, pos_words, neg_words
115 | 
116 | 
117 | print('制作数据集...')
118 | train_dataset = SkipGramDataset(training_label, dictionary, words, word_freq)
119 | dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE,drop_last=True, shuffle=True)
120 | 
121 | #测试数据集
122 | #one = train_dataset[0][0].numpy()
123 | #print('Sample data', training_label[:10], [words[i] for i in training_label[:10]])
124 | #print(one,words[np.long(one)],[words[i] for i in train_dataset[0][1].numpy()])
125 | 
126 | 
127 | sample = iter(dataloader)					#将数据集转化成迭代器
128 | center_word, pos_words, neg_words = sample.next()				#从迭代器中取出一批次样本
129 | print(center_word[0],words[np.long(center_word[0])],[words[i] for i in pos_words[0].numpy()])
130 | 
131 | 
132 | 
133 | 
134 | class Model(nn.Module):
135 |     def __init__(self, vocab_size, embed_size):
136 |         super(Model, self).__init__()
137 |         self.vocab_size = vocab_size
138 |         self.embed_size = embed_size
139 | 
140 |         initrange = 0.5 / self.embed_size
141 |         self.in_embed = nn.Embedding(self.vocab_size, self.embed_size, sparse=False)
142 |         self.in_embed.weight.data.uniform_(-initrange, initrange)
143 | 
144 |     def forward(self, input_labels, pos_labels, neg_labels):
145 |         input_embedding = self.in_embed(input_labels)
146 |                 
147 |         pos_embedding = self.in_embed(pos_labels)
148 |         neg_embedding = self.in_embed(neg_labels)
149 |         
150 |         log_pos = torch.bmm(pos_embedding, input_embedding.unsqueeze(2)).squeeze()
151 |         log_neg = torch.bmm(neg_embedding, -input_embedding.unsqueeze(2)).squeeze()
152 | 
153 |         log_pos = F.logsigmoid(log_pos).sum(1)
154 |         log_neg = F.logsigmoid(log_neg).sum(1)
155 |         loss = log_pos + log_neg
156 |         return -loss
157 | 
158 | 
159 | 
160 | model = Model(words_size, EMBEDDING_SIZE).to(device)
161 | model.train()
162 | 
163 | valid_size = 16        
164 | valid_window = words_size/2  # 取样数据的分布范围.
165 | valid_examples = np.random.choice(int(valid_window), valid_size, replace=False)#0- words_size/2,中的数取16个。不能重复。
166 | 
167 | optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
168 | NUM_EPOCHS = 200
169 | for e in range(NUM_EPOCHS):
170 |     for ei, (input_labels, pos_labels, neg_labels) in enumerate(dataloader):
171 |         input_labels = input_labels.to(device)
172 |         pos_labels = pos_labels.to(device)
173 |         neg_labels = neg_labels.to(device)
174 | 
175 |         optimizer.zero_grad()
176 |         loss = model(input_labels, pos_labels, neg_labels).mean()
177 |         loss.backward()
178 |         optimizer.step()
179 | 
180 |         if ei % 20 == 0:
181 |             print("epoch: {}, iter: {}, loss: {}".format(e, ei, loss.item()))
182 |     if e %40 == 0:           
183 |         norm = torch.sum(model.in_embed.weight.data.pow(2),-1).sqrt().unsqueeze(1)
184 |         normalized_embeddings = model.in_embed.weight.data / norm
185 |         valid_embeddings = normalized_embeddings[valid_examples]
186 |         
187 |         similarity = torch.mm(valid_embeddings, normalized_embeddings.T)
188 |         for i in range(valid_size):
189 |             valid_word = words[valid_examples[i]]
190 |             top_k = 8  # 取最近的排名前8的词
191 |             nearest = (-similarity[i, :]).argsort()[1:top_k + 1]  #argsort函数返回的是数组值从小到大的索引值
192 |             log_str = 'Nearest to %s:' % valid_word  
193 |             for k in range(top_k):
194 |                 close_word = words[nearest[k].cpu().item()]
195 |                 log_str = '%s,%s' % (log_str, close_word)
196 |             print(log_str)
197 |     
198 | def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
199 |     assert low_dim_embs.shape[0] >= len(labels), 'More labels than embeddings'
200 |     plt.figure(figsize=(18, 18))  # in inches
201 |     for i, label in enumerate(labels):
202 |         x, y = low_dim_embs[i, :]
203 |         plt.scatter(x, y)
204 |         plt.annotate(label,xy=(x, y),xytext=(5, 2), textcoords='offset points',
205 |                      ha='right',va='bottom')
206 |     plt.savefig(filename)
207 | 
208 | 
209 | final_embeddings = model.in_embed.weight.data.cpu().numpy()
210 | tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
211 | plot_only = 200#输出100个词
212 | low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
213 | labels = [words[i] for i in range(plot_only)]
214 |   
215 | plot_with_labels(low_dim_embs, labels)
216 | 
217 | 
218 |  
219 | 
220 |  
221 | 
222 |  
223 | 
224 |  
225 | 


--------------------------------------------------------------------------------
/code_12_CONV.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Thu Apr 25 15:18:57 2019
 8 | """
 9 | 
10 | import torch
11 | # [batch, in_channels, in_height, in_width] [训练时一个batch的图片数量, 图像通道数, 图片高度, 图片宽度]
12 | input1 = torch.ones([1, 1, 5, 5])
13 | input2 = torch.ones([1, 2, 5, 5])
14 | input3 = torch.ones([1, 1, 4, 4])
15 | # [ out_channels, in_channels，filter_height, filter_width] [卷积核个数，图像通道数，卷积核的高度，卷积核的宽度]
16 | filter1 =  torch.tensor([-1.0,0,0,-1]).reshape([2, 2, 1, 1])
17 | filter2 =  torch.tensor([-1.0,0,0,-1,-1.0,0,0,-1]).reshape([2,1,2, 2])
18 | filter3 =  torch.tensor([-1.0,0,0,-1,-1.0,0,0,-1,-1.0,0,0,-1]).reshape([3,1,2, 2])
19 | filter4 =  torch.tensor([-1.0,0,0,-1,-1.0,0,0,-1,
20 |                                    -1.0,0,0,-1,
21 |                                    -1.0,0,0,-1]).reshape([2, 2, 2, 2])
22 | filter5 =  torch.tensor([-1.0,0,0,-1,-1.0,0,0,-1]).reshape([1,2, 2, 2])
23 | 
24 | #class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
25 | #condv = torch.nn.Conv2d(1,1,kernel_size=1,padding=1, bias=False)
26 | #condv.weight = torch.nn.Parameter(torch.ones([1,1,1,1]))
27 | #padding1 = condv(input1)
28 | #print(padding1)
29 | 
30 | #验证padding补0的规则 ——上下左右都补0
31 | padding1 = torch.nn.functional.conv2d(input1, torch.ones([1,1,1,1]), stride=1, padding=1)
32 | print(padding1)
33 | 
34 | 
35 | padding2 = torch.nn.functional.conv2d(input1, torch.ones([1,1,1,1]), stride=1, padding=(1,2))
36 | print(padding2)
37 | 
38 | ##1个通道输入，生成1个feature map
39 | #filter1 =  torch.tensor([-1.0,0,0,-1]).reshape([1, 1, 2, 2])
40 | #op1 = torch.nn.functional.conv2d(input1, filter1, stride=2, padding=1)
41 | #print('\n')
42 | #print(padding1)
43 | #print(filter1)
44 | #print(op1)
45 | 
46 | #torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
47 | #torch.nn.functional.conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1)
48 | #torch.nn.functional.conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1)
49 | 
50 | 
51 | 
52 | 
53 | 
54 | op1 = torch.nn.functional.conv2d(input1, filter1, stride=2, padding=1) #1个通道输入，生成1个feature map
55 | op2 = torch.nn.functional.conv2d(input1, filter2, stride=2, padding=1) #1个通道输入，生成2个feature map
56 | op3 = torch.nn.functional.conv2d(input1, filter3, stride=2, padding=1) #1个通道输入，生成3个feature map
57 | 
58 | op4 = torch.nn.functional.conv2d(input2, filter4, stride=2, padding=1) # 2个通道输入，生成2个feature
59 | op5 = torch.nn.functional.conv2d(input2, filter5, stride=2, padding=1) # 2个通道输入，生成一个feature map
60 | 
61 | op6 = torch.nn.functional.conv2d(input1, filter1, stride=2, padding=0) # 5*5 对于pading不同而不同
62 | 
63 | 
64 | print("op1:\n",op1,filter1)#1-1  后面补0
65 | print("------------------")
66 | 
67 | print("op2:\n",op2,filter2) #1-2多卷积核 按列取
68 | print("op3:\n",op3,filter3) #1-3
69 | print("------------------")
70 | 
71 | print("op4:\n",op4,filter4)#2-2    通道叠加
72 | print("op5:\n",op5,filter5)#2-1
73 | print("------------------")
74 | 
75 | print("op1:\n",op1,filter1)#1-1
76 | print("op6:\n",op6,filter1)
77 | 
78 | 


--------------------------------------------------------------------------------
/code_13_pooling.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: 代码医生工作室
 4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
 5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
 6 | @配套代码技术支持：bbs.aianaconda.com  
 7 | Created on Sat Apr 27 07:04:02 2019
 8 | """
 9 | 
10 | import torch
11 | 
12 | img=torch.tensor([ [ [0.,0.,0.,0.],[1.,1.,1.,1.],[2.,2.,2.,2.],[3.,3.,3.,3.] ],
13 |                    [ [4.,4.,4.,4.],[5.,5.,5.,5.],[6.,6.,6.,6.],[7.,7.,7.,7.] ]
14 |                  ]).reshape([1,2,4,4])
15 | print(img)
16 | print(img[0][0])
17 | print(img[0][1])
18 | 
19 | #torch.nn.functional.avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True)
20 | pooling=torch.nn.functional.max_pool2d(img,kernel_size =2)
21 | print("pooling:\n",pooling)
22 | pooling1=torch.nn.functional.max_pool2d(img,kernel_size =2,stride=1)
23 | print("pooling1:\n",pooling1)
24 | pooling2=torch.nn.functional.avg_pool2d(img,kernel_size =4,stride=1,padding=1)
25 | print("pooling2:\n",pooling2)
26 | pooling3=torch.nn.functional.avg_pool2d(img,kernel_size =4)
27 | print("pooling3:\n",pooling3)
28 | 
29 | m1 = img.mean(3)
30 | print("第1次平均值结果:\n",m1)
31 | print("第2次平均值结果:\n",m1.mean(2))
32 | 


--------------------------------------------------------------------------------
/code_14_TextCNN.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Nov  8 14:22:19 2019
  4 | 
  5 | @author: ljh
  6 | """
  7 | 
  8 | 
  9 | 
 10 | import random #引入基础库
 11 | import time
 12 | 
 13 | import torch#引入PyTorch库
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | 
 17 | from torchtext import data ,datasets,vocab #引入文本处理库
 18 | import spacy 
 19 | 
 20 | torch.manual_seed(1234)                    #固定随机种子
 21 | torch.backends.cudnn.deterministic = True  #固定GPU运算方式
 22 | 
 23 | torch.backends.cudnn.benchmark = False
 24 | 
 25 | ###################
 26 | #定义字段，并按照指定标记化函数进行分词， 
 27 | TEXT = data.Field(tokenize = 'spacy',lower=True) 
 28 | LABEL = data.LabelField(dtype = torch.float)
 29 | 
 30 | #加载数据集，并根据IMDB两个文件夹，返回两个数据集
 31 | train_data, test_data = datasets.IMDB.splits(text_field=TEXT, label_field=LABEL) 
 32 | print('---------输出一条数据------')
 33 | print(vars(train_data.examples[0]),len(train_data.examples))
 34 | print('---------------')
 35 | 
 36 | #将训练数据集再次拆分
 37 | train_data, valid_data = train_data.split(random_state = random.seed(1234))  
 38 | print("训练数据集: ", len(train_data),"条")
 39 | print("验证数据集: ", len(valid_data),"条")
 40 | print("测试数据集: ", len(test_data),"条")
 41 | ###########
 42 | #建立词表
 43 | TEXT.build_vocab(train_data, 
 44 |                  max_size = 25000, #词的最大数量
 45 |                  vectors = "glove.6B.100d", 
 46 |                  unk_init = torch.Tensor.normal_)
 47 | 
 48 | LABEL.build_vocab(train_data)
 49 | 
 50 | #创建批次数据
 51 | BATCH_SIZE = 64
 52 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 53 | train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
 54 |     (train_data, valid_data, test_data), 
 55 |     batch_size = BATCH_SIZE, 
 56 |     device = device)
 57 | 
 58 | ########################
 59 | 
 60 | 
 61 | 
 62 | class Mish(nn.Module):#Mish激活函数
 63 |     def __init__(self):
 64 |         super().__init__()
 65 |         print("Mish activation loaded...")
 66 |     def forward(self,x):
 67 |         x = x * (torch.tanh(F.softplus(x)))
 68 |         return x
 69 | 
 70 | class TextCNN(nn.Module):
 71 |     def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
 72 |                  dropout, pad_idx):
 73 |         super().__init__()
 74 |         self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
 75 |         self.convs = nn.ModuleList([ 
 76 |                                     nn.Conv2d(in_channels = 1, 
 77 |                                               out_channels = n_filters, 
 78 |                                               kernel_size = (fs, embedding_dim)) 
 79 |                                     for fs in filter_sizes
 80 |                                     ])  #########注意不能用list
 81 |         self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
 82 |         self.dropout = nn.Dropout(dropout)
 83 |         self.mish = Mish()
 84 |         
 85 |     def forward(self, text): #输入形状为[sent len, batch size]
 86 | 
 87 |         text = text.permute(1, 0)#将形状变为[batch size, sent len]
 88 | 
 89 |         embedded = self.embedding(text)#形状为[batch size, sent len, emb dim]
 90 | 
 91 |         embedded = embedded.unsqueeze(1) #形状为[batch size, 1, sent len, emb dim]
 92 |         #len(filter_sizes)个元素，每个元素形状为[batch size, n_filters, sent len - filter_sizes[n] + 1]
 93 |         conved = [self.mish(conv(embedded)).squeeze(3) for conv in self.convs]
 94 | 
 95 |         #len(filter_sizes)个元素，每个元素形状为[batch size, n_filters]
 96 |         pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
 97 |         
 98 |         cat = self.dropout(torch.cat(pooled, dim = 1))#形状为[batch size, n_filters * len(filter_sizes)]
 99 | 
100 |         return self.fc(cat)
101 | 
102 | 
103 | #################
104 |         
105 | if __name__ == '__main__':   
106 | 
107 |     INPUT_DIM = len(TEXT.vocab)#25002
108 |     EMBEDDING_DIM = TEXT.vocab.vectors.size()[1] #100 
109 |     N_FILTERS = 100
110 |     FILTER_SIZES = [3,4,5]
111 |     OUTPUT_DIM = 1
112 |     DROPOUT = 0.5
113 |     PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
114 |     
115 |     model = TextCNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
116 |       
117 |     ####################################       
118 |     #复制词向量
119 |     model.embedding.weight.data.copy_(TEXT.vocab.vectors)
120 |     
121 |     #将填充的词向量清0
122 |     UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
123 |     model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
124 |     model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
125 |     
126 |     ####################################
127 |     import torch.optim as optim
128 |     from functools import partial
129 |     from ranger import *
130 |     opt_func = partial(Ranger,  betas=(.9,0.99), eps=1e-6)#betas=(Momentum,alpha)
131 |     optimizer = opt_func(model.parameters(),lr=0.004)
132 |     
133 |     
134 |     criterion = nn.BCEWithLogitsLoss()  # 带有sigmoid 2分类的cross entropy
135 |     
136 |     
137 |     model = model.to(device)
138 |     criterion = criterion.to(device)
139 |     
140 |     
141 |     def binary_accuracy(preds, y):#计算准确率
142 |         rounded_preds = torch.round(torch.sigmoid(preds))#把概率的结果 四舍五入
143 |         correct = (rounded_preds == y).float() # True False -> 转为 1， 0
144 |         acc = correct.sum() / len(correct)
145 |         return acc
146 |     
147 |     
148 |     def train(model, iterator, optimizer, criterion):
149 |         
150 |         epoch_loss = 0
151 |         epoch_acc = 0
152 |         
153 |         model.train()  #设置模型标志  ，保证dropout在训练模式下
154 |         
155 |         for batch in iterator:
156 |             optimizer.zero_grad()
157 |             predictions = model(batch.text).squeeze(1)# 在第1个维度上 去除维度=1
158 |             loss = criterion(predictions, batch.label)
159 |             acc = binary_accuracy(predictions, batch.label)
160 |             loss.backward()
161 |             optimizer.step()
162 |             epoch_loss += loss.item()
163 |             epoch_acc += acc.item()
164 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
165 |     
166 |     
167 |     
168 |     def evaluate(model, iterator, criterion):
169 |         
170 |         epoch_loss = 0
171 |         epoch_acc = 0
172 |         
173 |         model.eval()
174 |         
175 |         with torch.no_grad():
176 |             for batch in iterator:
177 |                 predictions = model(batch.text).squeeze(1)
178 |                 loss = criterion(predictions, batch.label)
179 |                 acc = binary_accuracy(predictions, batch.label)
180 |                 epoch_loss += loss.item()
181 |                 epoch_acc += acc.item()
182 |             
183 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
184 |     
185 |     
186 |     
187 |     def epoch_time(start_time, end_time):
188 |         elapsed_time = end_time - start_time
189 |         elapsed_mins = int(elapsed_time / 60)
190 |         elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
191 |         return elapsed_mins, elapsed_secs
192 |     
193 |     
194 |     N_EPOCHS = 5
195 |     best_valid_loss = float('inf')
196 |     for epoch in range(N_EPOCHS):
197 |     
198 |         start_time = time.time()
199 |         train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
200 |         valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
201 |         end_time = time.time()
202 |     
203 |         epoch_mins, epoch_secs = epoch_time(start_time, end_time)
204 |         
205 |         if valid_loss < best_valid_loss:#保存最优模型
206 |             best_valid_loss = valid_loss
207 |             torch.save(model.state_dict(), 'textcnn-model.pt')
208 |         
209 |         print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
210 |         print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
211 |         print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
212 |     
213 |     
214 |     #测试模型效果
215 |     model.load_state_dict(torch.load('textcnn-model.pt'))
216 |     test_loss, test_acc = evaluate(model, test_iterator, criterion)
217 |     print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
218 |     
219 |     ##################################################
220 |     
221 |     #使用接口
222 |     
223 |     nlp = spacy.load('en')
224 |     
225 |     def predict_sentiment(model, sentence, min_len = 5):
226 |         model.eval()
227 |     #    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
228 |         tokenized = nlp.tokenizer(sentence).text.split() #
229 |         
230 |         if len(tokenized) < min_len: #长度不足，在后面填充
231 |             tokenized += ['<pad>'] * (min_len - len(tokenized))
232 |         indexed = [TEXT.vocab.stoi[t] for t in tokenized]
233 |         tensor = torch.LongTensor(indexed).to(device)
234 |         tensor = tensor.unsqueeze(1)
235 |         prediction = torch.sigmoid(model(tensor))
236 |         return prediction.item()
237 |     
238 |     sen = "This film is terrible"
239 |     print('\n预测 sen = ', sen)
240 |     print('预测 结果:', predict_sentiment(model,sen))
241 |      
242 |     sen = "This film is great"
243 |     print('\n预测 sen = ', sen)
244 |     print('预测 结果:', predict_sentiment(model,sen))
245 |      
246 |     sen = "I like this film very much！"
247 |     print('\n预测 sen = ', sen)
248 |     print('预测 结果:', predict_sentiment(model,sen))
249 |     
250 | 
251 | 
252 | 
253 | 
254 | 
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 


--------------------------------------------------------------------------------
/code_15_rnnwordtest.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: 代码医生工作室
  4 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
  5 | @来源: <PyTorch从深度学习到图神经网络>配套代码 
  6 | @配套代码技术支持：bbs.aianaconda.com  
  7 | Created on Tue Jan 21 00:24:15 2020
  8 | """
  9 | 
 10 | import torch
 11 | import torch.nn.functional as F
 12 | import time
 13 | import random
 14 | import numpy as np
 15 | from collections import Counter
 16 | 
 17 | 
 18 | RANDOM_SEED = 123
 19 | torch.manual_seed(RANDOM_SEED)
 20 | 
 21 | DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 22 | 
 23 | 
 24 | 
 25 | def elapsed(sec):
 26 |     if sec<60:
 27 |         return str(sec) + " sec"
 28 |     elif sec<(60*60):
 29 |         return str(sec/60) + " min"
 30 |     else:
 31 |         return str(sec/(60*60)) + " hr"
 32 | 
 33 | 
 34 | training_file = 'wordstest.txt'
 35 | 
 36 | 
 37 | #中文多文件
 38 | def readalltxt(txt_files):
 39 |     labels = []
 40 |     for txt_file in txt_files:
 41 |         
 42 |         target = get_ch_lable(txt_file)
 43 |         labels.append(target)  
 44 |     return labels
 45 |     
 46 | #中文字
 47 | def get_ch_lable(txt_file):  
 48 |     labels= ""
 49 |     with open(txt_file, 'rb') as f:
 50 |         for label in f: 
 51 |             #labels =label.decode('utf-8')
 52 |             labels =labels+label.decode('gb2312')
 53 |            
 54 |     return  labels
 55 |     
 56 | 
 57 | 
 58 | #优先转文件里的字符到向量
 59 | def get_ch_lable_v(txt_file,word_num_map,txt_label=None):
 60 |       
 61 |     words_size = len(word_num_map)   
 62 |     to_num = lambda word: word_num_map.get(word, words_size) 
 63 |     if txt_file!= None:
 64 |         txt_label = get_ch_lable(txt_file)
 65 | 
 66 |     labels_vector = list(map(to_num, txt_label)) 
 67 |     return labels_vector  
 68 |     
 69 | training_data =get_ch_lable(training_file)
 70 | 
 71 | print("Loaded training data...")
 72 | 
 73 | print('样本长度:',len(training_data))
 74 | counter = Counter(training_data)  
 75 | words = sorted(counter)
 76 | words_size= len(words)
 77 | word_num_map = dict(zip(words, range(words_size))) 
 78 | 
 79 | print('字表大小:', words_size)     
 80 | wordlabel = get_ch_lable_v(training_file,word_num_map)
 81 | 
 82 | class GRURNN(torch.nn.Module):
 83 |     def __init__(self, word_size, embed_dim,
 84 |                  hidden_dim, output_size, num_layers):
 85 |         super(GRURNN, self).__init__()
 86 | 
 87 |         self.num_layers = num_layers
 88 |         self.hidden_dim = hidden_dim
 89 |         
 90 |         self.embed = torch.nn.Embedding(word_size, embed_dim)
 91 |         self.gru = torch.nn.GRU(input_size=embed_dim,
 92 |                                 hidden_size=hidden_dim,
 93 |                                 num_layers=num_layers,bidirectional=True)
 94 |         self.fc = torch.nn.Linear(hidden_dim*2, output_size)
 95 | 
 96 |     
 97 |     def forward(self, features, hidden):
 98 |         embedded = self.embed(features.view(1, -1))
 99 |         output, hidden = self.gru(embedded.view(1, 1, -1), hidden)
100 |         output = self.fc(output.view(1, -1))
101 |         return output, hidden
102 |       
103 |     def init_zero_state(self):
104 |         init_hidden = torch.zeros(self.num_layers*2, 1, self.hidden_dim).to(DEVICE)
105 |         return init_hidden
106 | 
107 | 
108 | 
109 | EMBEDDING_DIM = 10
110 | HIDDEN_DIM = 20
111 | NUM_LAYERS = 1
112 | 
113 | model = GRURNN(words_size, EMBEDDING_DIM, HIDDEN_DIM, words_size, NUM_LAYERS)
114 | model = model.to(DEVICE)
115 | optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
116 | 
117 | 
118 | def evaluate(model, prime_str, predict_len, temperature=0.8):
119 | 
120 |     hidden = model.init_zero_state().to(DEVICE)
121 |     predicted = ''
122 | 
123 |     #处理输入语义
124 |     for p in range(len(prime_str) - 1):
125 |         _, hidden = model(prime_str[p], hidden)
126 |         predicted +=words[prime_str[p]]
127 |     inp = prime_str[-1]
128 |     predicted +=words[inp]
129 |     
130 |     for p in range(predict_len):
131 |         output, hidden = model(inp, hidden)
132 |         
133 |         #从多项式分布中采样
134 |         output_dist = output.data.view(-1).div(temperature).exp()
135 |         inp = torch.multinomial(output_dist, 1)[0]
136 |         
137 |         predicted += words[inp]
138 | 
139 |     return predicted
140 | 
141 | 
142 | #定义参数训练模型
143 | training_iters = 5000
144 | display_step = 1000
145 | n_input = 4
146 | step = 0
147 | offset = random.randint(0,n_input+1)
148 | end_offset = n_input + 1
149 | 
150 | while step < training_iters:
151 |     start_time = time.time()
152 | 
153 |     # 随机取一个位置偏移
154 |     if offset > (len(training_data)-end_offset):
155 |         offset = random.randint(0, n_input+1)
156 |    
157 | 
158 |     inwords =wordlabel[offset:offset+n_input]
159 |     inwords = np.reshape(np.array(inwords), [n_input, -1,  1])
160 | 
161 |     out_onehot = wordlabel[offset+1:offset+n_input+1]
162 | 
163 | 
164 |     hidden = model.init_zero_state()
165 |     optimizer.zero_grad()
166 |     
167 |     loss = 0.
168 |     inputs, targets = torch.LongTensor(inwords).to(DEVICE), torch.LongTensor(out_onehot).to(DEVICE)
169 |     for c in range(n_input):
170 | 
171 |         outputs, hidden = model(inputs[c], hidden)
172 |         loss += F.cross_entropy(outputs, targets[c].view(1))
173 | 
174 |     loss /= n_input
175 |     loss.backward()
176 | 
177 |     optimizer.step()
178 | 
179 |     #输出日志
180 |     with torch.set_grad_enabled(False):
181 |         if (step+1) % display_step == 0:
182 |             print(f'Time elapsed: {(time.time() - start_time)/60:.4f} min')
183 |             print(f'step {step+1} | Loss {loss.item():.2f}\n\n')
184 |             with torch.no_grad():
185 |                 print(evaluate(model, inputs, 32), '\n')
186 |             print(50*'=')
187 | 
188 |     step += 1
189 |     offset += (n_input+1)#中间隔了一个，作为预测
190 | 
191 | print("Finished!")
192 | 
193 | while True:
194 |     prompt = "请输入几个字，最好是%s个: " % n_input
195 |     sentence = input(prompt)
196 |     inputword = sentence.strip()
197 |     
198 |     try:
199 |         inputword = get_ch_lable_v(None,word_num_map,inputword)
200 |         keys = np.reshape(np.array(inputword), [ len(inputword),-1, 1])
201 |         model.eval()
202 |         with torch.no_grad():
203 |             sentence =evaluate(model, torch.LongTensor(keys).to(DEVICE), 32)
204 | 
205 |         print(sentence)
206 |     except:
207 |         print("该字我还没学会")
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/code_16_AttLSTMModel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | @author: 代码医生工作室
  3 | @公众号：xiangyuejiqiren   （内有更多优秀文章及学习资料）
  4 | @来源: <深度学习之TensorFlow机器视觉算法与应用>配套代码 （700+页）
  5 | @配套代码技术支持：bbs.aianaconda.com      (有问必答)
  6 | Created on Tue Mar 19 22:24:58 2019
  7 | """
  8 | 
  9 | import random #引入基础库
 10 | import time
 11 | 
 12 | import torch#引入PyTorch库
 13 | import torch.nn as nn
 14 | import torch.nn.functional as F
 15 | 
 16 | from torchtext import data ,datasets,vocab #引入文本处理库
 17 | import spacy 
 18 | 
 19 | torch.manual_seed(1234)                    #固定随机种子
 20 | torch.backends.cudnn.deterministic = True  #固定GPU运算方式
 21 | 
 22 | torch.backends.cudnn.benchmark = False
 23 | 
 24 | 
 25 | ############数据集的制作
 26 | in_times = 64
 27 | #定义字段，并按照指定标记化函数进行分词， 
 28 | TEXT = data.Field(tokenize = 'spacy',lower=True,fix_length = in_times) 
 29 | LABEL = data.LabelField(dtype = torch.float)
 30 | 
 31 | #加载数据集，并根据IMDB两个文件夹，返回两个数据集
 32 | train_data, test_data = datasets.IMDB.splits(text_field=TEXT, label_field=LABEL) 
 33 | print('---------输出一条数据------')
 34 | print(vars(train_data.examples[0]),len(train_data.examples))
 35 | print('---------------')
 36 | 
 37 | #将训练数据集再次拆分
 38 | train_data, valid_data = train_data.split(random_state = random.seed(1234))  
 39 | print("训练数据集: ", len(train_data),"条")
 40 | print("验证数据集: ", len(valid_data),"条")
 41 | print("测试数据集: ", len(test_data),"条")
 42 | ###########
 43 | #建立词表
 44 | TEXT.build_vocab(train_data, 
 45 |                  max_size = 25000, #词的最大数量
 46 |                  vectors = "glove.6B.100d", 
 47 |                  unk_init = torch.Tensor.normal_)
 48 | 
 49 | LABEL.build_vocab(train_data)
 50 | 
 51 | #创建批次数据
 52 | BATCH_SIZE = 64
 53 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 54 | train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
 55 |     (train_data, valid_data, test_data), 
 56 |     batch_size = BATCH_SIZE, 
 57 |     device = device)
 58 | 
 59 | ############
 60 | 
 61 | #for i in test_iterator:
 62 | #    print(i)
 63 | #    break
 64 | #
 65 | #i.label
 66 | #i.text.shape
 67 | 
 68 | #########################################################################################################################
 69 | 
 70 | 
 71 | class Mish(nn.Module):#Mish激活函数
 72 |     def __init__(self):
 73 |         super().__init__()
 74 |         print("Mish activation loaded...")
 75 |     def forward(self,x):
 76 |         x = x * (torch.tanh(F.softplus(x)))
 77 |         return x
 78 |     
 79 | #定义AttLSTMModel模型类，该模型包括 2个RNN层和1个全连接层
 80 | class AttLSTMModel(nn.Module):
 81 |     def __init__(self, vocab_size, embedding_dim, in_times,
 82 |                  dropout, pad_idx,hidden_dim, n_layer, n_class):
 83 |         super(AttLSTMModel, self).__init__()
 84 |         self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
 85 |         
 86 |         #定义循环神经网络层
 87 |         self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim, n_layer,batch_first=True)
 88 |         self.Linear = torch.nn.Linear(hidden_dim*in_times, n_class)#定义全连接层
 89 |         self.attention = AttentionSeq(hidden_dim,hard=0.03)
 90 |         self.last_norm = torch.nn.LayerNorm(hidden_dim)
 91 |         self.drop_p = dropout
 92 |         
 93 | 
 94 |         self.mish = Mish()
 95 |         
 96 |     def forward(self, text): #输入形状为[sent len, batch size]
 97 | 
 98 |         text = text.permute(1, 0)#将形状变为[batch size, sent len]
 99 |         embedded = self.embedding(text)#形状为[batch size, sent len, emb dim]
100 |         t, _ = self.lstm(embedded)  #进行RNN处理
101 |         t = self.last_norm(self.mish(t) ) 
102 | 
103 |         t = self.attention(t)
104 |         t=t.reshape(t.shape[0],-1)
105 | #        t = t[:, -1, :]      #获取RNN网络的最后一个序列数据
106 |         t = nn.functional.dropout(t, p=self.drop_p, training=self.training)
107 |         out = self.Linear(t)#进行全连接处理
108 |         return out
109 | 
110 |         
111 | 
112 | class AttentionSeq(torch.nn.Module):
113 | 
114 |     def __init__(self, hidden_dim,hard= 0):
115 |         super(AttentionSeq, self).__init__()
116 |         self.hidden_dim = hidden_dim
117 |         self.dense = torch.nn.Linear(hidden_dim, hidden_dim)
118 |         self.hard = hard
119 | 
120 |     def forward(self, features, mean=False):
121 |         #[batch,seq,dim]
122 |         batch_size, time_step, hidden_dim = features.size()
123 |         weight = torch.nn.Tanh()(self.dense(features))
124 | 
125 |         # mask给负无穷使得权重为0
126 |         mask_idx = torch.sign(torch.abs(features).sum(dim=-1))
127 | #        mask_idx = mask_idx.unsqueeze(-1).expand(batch_size, time_step, hidden_dim)
128 |         mask_idx = mask_idx.unsqueeze(-1).repeat(1, 1, hidden_dim)
129 | 
130 | 
131 |         weight = torch.where(mask_idx== 1, weight,
132 |                              torch.full_like(mask_idx,(-2 ** 32 + 1)))
133 |         weight = weight.transpose(2, 1)
134 |         weight = torch.nn.Softmax(dim=2)(weight)
135 |         if self.hard!=0: #hard mode
136 |             weight = torch.where(weight>self.hard, weight, torch.full_like(weight,0))
137 |         
138 |         if mean:
139 |             weight = weight.mean(dim=1)
140 |             weight = weight.unsqueeze(1)
141 |             weight = weight.repeat(1, hidden_dim, 1)
142 |         weight = weight.transpose(2, 1)
143 |         features_attention = weight * features
144 | 
145 |         return features_attention
146 | #################
147 |         
148 | if __name__ == '__main__':   
149 | 
150 |     INPUT_DIM = len(TEXT.vocab)#25002
151 |     EMBEDDING_DIM = TEXT.vocab.vectors.size()[1] #100 
152 | 
153 |     OUTPUT_DIM = 1
154 |     DROPOUT = 0.5
155 |     PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
156 |     
157 |     model = AttLSTMModel(INPUT_DIM, EMBEDDING_DIM,in_times, DROPOUT, PAD_IDX,256,2,OUTPUT_DIM)
158 | 
159 |     ####################################       
160 |     #复制词向量
161 |     model.embedding.weight.data.copy_(TEXT.vocab.vectors)
162 |     
163 |     #将填充的词向量清0
164 |     UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
165 |     model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
166 |     model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
167 |     
168 |     ####################################
169 |     import torch.optim as optim
170 |     from functools import partial
171 |     from ranger import *
172 |     opt_func = partial(Ranger,  betas=(.9,0.99), eps=1e-6)#betas=(Momentum,alpha)
173 |     optimizer = opt_func(model.parameters(),lr=0.004)
174 |     
175 |     
176 |     criterion = nn.BCEWithLogitsLoss()  # 带有sigmoid 2分类的cross entropy
177 |     
178 |     
179 |     model = model.to(device)
180 |     criterion = criterion.to(device)
181 |     
182 |     
183 |     def binary_accuracy(preds, y):#计算准确率
184 |         rounded_preds = torch.round(torch.sigmoid(preds))#把概率的结果 四舍五入
185 |         correct = (rounded_preds == y).float() # True False -> 转为 1， 0
186 |         acc = correct.sum() / len(correct)
187 |         return acc
188 |     
189 |     
190 |     def train(model, iterator, optimizer, criterion):
191 |         
192 |         epoch_loss = 0
193 |         epoch_acc = 0
194 |         
195 |         model.train()  #设置模型标志  ，保证dropout在训练模式下
196 |         
197 |         for batch in iterator:
198 |             optimizer.zero_grad()
199 |             predictions = model(batch.text).squeeze(1)# 在第1个维度上 去除维度=1
200 |             loss = criterion(predictions, batch.label)
201 |             acc = binary_accuracy(predictions, batch.label)
202 |             loss.backward()
203 |             optimizer.step()
204 |             epoch_loss += loss.item()
205 |             epoch_acc += acc.item()
206 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
207 |     
208 |     
209 |     
210 |     def evaluate(model, iterator, criterion):
211 |         
212 |         epoch_loss = 0
213 |         epoch_acc = 0
214 |         
215 |         model.eval()
216 |         
217 |         with torch.no_grad():
218 |             for batch in iterator:
219 |                 predictions = model(batch.text).squeeze(1)
220 |                 loss = criterion(predictions, batch.label)
221 |                 acc = binary_accuracy(predictions, batch.label)
222 |                 epoch_loss += loss.item()
223 |                 epoch_acc += acc.item()
224 |             
225 |         return epoch_loss / len(iterator), epoch_acc / len(iterator)
226 |     
227 |     
228 |     
229 |     def epoch_time(start_time, end_time):
230 |         elapsed_time = end_time - start_time
231 |         elapsed_mins = int(elapsed_time / 60)
232 |         elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
233 |         return elapsed_mins, elapsed_secs
234 |     
235 |     
236 |     N_EPOCHS = 5
237 |     best_valid_loss = float('inf')
238 |     for epoch in range(N_EPOCHS):
239 |     
240 |         start_time = time.time()
241 |         train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
242 |         valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
243 |         end_time = time.time()
244 |     
245 |         epoch_mins, epoch_secs = epoch_time(start_time, end_time)
246 |         
247 |         if valid_loss < best_valid_loss:#保存最优模型
248 |             best_valid_loss = valid_loss
249 |             torch.save(model.state_dict(), 'myLSTMNet-model.pt')
250 |         
251 |         print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
252 |         print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
253 |         print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
254 |     
255 |     
256 |     #测试模型效果
257 |     model.load_state_dict(torch.load('myLSTMNet-model.pt'))
258 |     test_loss, test_acc = evaluate(model, test_iterator, criterion)
259 |     print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
260 |     
261 |     ##################################################
262 |     
263 |     #使用接口
264 |     
265 |     nlp = spacy.load('en')
266 |     
267 |     def predict_sentiment(model, sentence, min_len = in_times):
268 |         model.eval()
269 |     #    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
270 |         tokenized = nlp.tokenizer(sentence).text.split() #
271 |         
272 |         if len(tokenized) < min_len: #长度不足，在后面填充
273 |             tokenized += ['<pad>'] * (min_len - len(tokenized))
274 |         indexed = [TEXT.vocab.stoi[t] for t in tokenized]
275 |         tensor = torch.LongTensor(indexed).to(device)
276 |         tensor = tensor.unsqueeze(1)
277 |         prediction = torch.sigmoid(model(tensor))
278 |         return prediction.item()
279 |     
280 |     sen = "This film is terrible"
281 |     print('\n预测 sen = ', sen)
282 |     print('预测 结果:', predict_sentiment(model,sen))
283 |      
284 |     sen = "This film is great"
285 |     print('\n预测 sen = ', sen)
286 |     print('预测 结果:', predict_sentiment(model,sen))
287 |      
288 |     sen = "I like this film very much！"
289 |     print('\n预测 sen = ', sen)
290 |     print('预测 结果:', predict_sentiment(model,sen))
291 | 
292 | 
293 | 


--------------------------------------------------------------------------------
/code_17_Transformer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Nov  7 09:27:54 2020
  4 | 
  5 | @author: ljh
  6 | 
  7 | """
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | import torch.nn.functional as F
 13 | from torch.autograd import Variable
 14 | 
 15 | import spacy
 16 | 
 17 | import random
 18 | import math
 19 | import os
 20 | import time
 21 | from tqdm import tqdm
 22 | 
 23 | import numpy as np
 24 | import matplotlib.pyplot as plt
 25 | 
 26 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 27 | print('device type:', device)
 28 | 
 29 | 
 30 | 
 31 | from torchtext.datasets import TranslationDataset
 32 | from torchtext import data
 33 | 
 34 | 
 35 | tokenize = lambda x: x.split()
 36 | 
 37 | # source (china)
 38 | ZH = data.Field(init_token='<sos>', eos_token='<eos>', lower=True, batch_first=True)
 39 | # target (English)
 40 | ENG = data.Field( init_token='<sos>', eos_token='<eos>', lower=True, batch_first=True)
 41 | 
 42 | 
 43 | MAX_LEN = 100
 44 | MIN_FREQ = 2
 45 | train_data, valid_data, test_data = TranslationDataset.splits(
 46 |         path = 'data',
 47 |     exts=('.zh', '.en'), 
 48 |     fields=(ZH, ENG), 
 49 |     filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and len(vars(x)['trg']) <= MAX_LEN
 50 | )
 51 | 
 52 | ZH.build_vocab(train_data, min_freq=MIN_FREQ)
 53 | ENG.build_vocab(train_data, min_freq=MIN_FREQ)
 54 | #############################
 55 | BATCH_SIZE = 64
 56 | train_iterator = data.BucketIterator(
 57 |     dataset=train_data, 
 58 |     batch_size=BATCH_SIZE, 
 59 |     shuffle=True, 
 60 |     sort_key=lambda x: data.interleave_keys(len(x.src), len(x.trg)), 
 61 |     device=device)
 62 | 
 63 | valid_iterator = data.BucketIterator(
 64 |     dataset=valid_data, 
 65 |     shuffle= False,
 66 |     batch_size=BATCH_SIZE, 
 67 |     device=device)
 68 | 
 69 | test_iterator = data.BucketIterator(
 70 |     dataset=test_data, 
 71 |     shuffle= False,
 72 |     batch_size=BATCH_SIZE, 
 73 |     device=device)
 74 | 
 75 | #################测试输出数据
 76 | for one in test_iterator:
 77 |     srcindexs = one.src[0].tolist()
 78 |     print(len(srcindexs),len(one.trg[0].tolist()))
 79 | #    print(trg_tokens = [ZH.vocab.itos[i] for i in srcindexs ])
 80 |     break
 81 | #print(test_data[0].src)
 82 | #####################################
 83 | ###########################################
 84 | 
 85 | #单个头注意力
 86 | class ScaledDotProductAttention(nn.Module):
 87 |     def __init__(self, scaled_term, attn_dropout=0.1):
 88 |         super(ScaledDotProductAttention, self).__init__()
 89 |         self.scaled_term = scaled_term
 90 |         self.dropout = nn.Dropout(attn_dropout)
 91 |         self.softmax = nn.Softmax(dim=-1)
 92 | 
 93 |     def forward(self, q, k, v, mask=None):
 94 |         # Score
 95 |         attn = torch.matmul(q, k) # [B, n_head, T, T]
 96 |         attn = attn / self.scaled_term
 97 | 
 98 |         if mask is not None:
 99 |             attn = attn.masked_fill(mask==0, -1e10)
100 | 
101 |         attn = self.softmax(attn) # [B, n_head, T, T]
102 |         attn = self.dropout(attn)
103 |         
104 |         # Sum
105 |         output = torch.matmul(attn, v) # [B, n_head, T, H//n_head]
106 | 
107 |         return output, attn
108 | 
109 | #多头注意力
110 | class MultiHeadAttention(nn.Module):
111 |     def __init__(self, hidden_size, n_head, device, dropout=0.1):
112 |         super(MultiHeadAttention, self).__init__()
113 |         self.hidden_size = hidden_size
114 |         self.n_head = n_head
115 | 
116 |         self.w_q = nn.Linear(hidden_size, hidden_size)
117 |         self.w_k = nn.Linear(hidden_size, hidden_size)
118 |         self.w_v = nn.Linear(hidden_size, hidden_size)
119 |         self.scaled_dot_attention = ScaledDotProductAttention(torch.sqrt(torch.FloatTensor([hidden_size//n_head])).to(device))
120 |         self.dropout = nn.Dropout(dropout)
121 |         self.output_layer = nn.Linear(hidden_size, hidden_size)
122 |         self.device = device
123 | 
124 |     def forward(self, q, k, v, mask=None):
125 |         batch_size = q.shape[0]
126 |         '''
127 |         query = key = value: [B, T, H]
128 |         mask: [B, T, 1]
129 |         '''
130 |         # Project and split
131 |         q = self.w_q(q).view(batch_size, -1, self.n_head, self.hidden_size//self.n_head) # [B, T, H] -> [B, T, n_head, H//n_head]
132 |         k = self.w_k(k).view(batch_size, -1, self.n_head, self.hidden_size//self.n_head) # [B, T, H] -> [B, T, n_head, H//n_head]
133 |         v = self.w_v(v).view(batch_size, -1, self.n_head, self.hidden_size//self.n_head) # [B, T, H] -> [B, T, n_head, H//n_head]
134 |         
135 |         q = q.permute(0, 2, 1, 3) # [B, n_head, T, H//n_head]
136 |         k = k.permute(0, 2, 3, 1) # [B, n_head, H//n_head, T]
137 |         v = v.permute(0, 2, 1, 3) # [B, n_head, T, H//n_head]
138 | 
139 |         output, attn = self.scaled_dot_attention(q, k, v, mask) # [B, n_head, T, T], [B, n_head, T, H//n_head]
140 |         output = output.transpose(1, 2).contiguous() # [B, T, n_head, H//n_head]
141 |         output = output.view(batch_size, -1, self.n_head * (self.hidden_size//self.n_head)) # [B, T, H]
142 |         
143 |         output = self.output_layer(output)
144 |         output = self.dropout(output)
145 |         
146 |         # return output, attn
147 |         return output
148 | 
149 | class PositionwiseFeedForward(nn.Module):
150 |     def __init__(self, hidden_size, filter_size, dropout):
151 |         super(PositionwiseFeedForward, self).__init__()
152 |         self.pff = nn.Sequential(
153 |             nn.Linear(hidden_size, filter_size), 
154 |             nn.ReLU(inplace=True), 
155 |             nn.Dropout(dropout), 
156 |             nn.Linear(filter_size, hidden_size), 
157 |             nn.Dropout(dropout)
158 |         )
159 |     
160 |     def forward(self, src):
161 |         src = self.pff(src)
162 | 
163 |         return src
164 |         
165 |     
166 | class EncoderLayer(nn.Module):
167 |     def __init__(self, hidden_size, filter_size, n_head, pre_lnorm, device, dropout):
168 |         super(EncoderLayer, self).__init__()
169 |         # self-attention part
170 |         self.self_attn = MultiHeadAttention(hidden_size, n_head, device)
171 |         self.self_attn_norm = nn.LayerNorm(hidden_size)
172 |         
173 |         # feed forward network part
174 |         self.pff = PositionwiseFeedForward(hidden_size, filter_size, dropout)
175 |         self.pff_norm = nn.LayerNorm(hidden_size)
176 | 
177 |         self.pre_lnorm = pre_lnorm
178 |         
179 |     def forward(self, src, src_mask):
180 |         if self.pre_lnorm:
181 |             pre = self.self_attn_norm(src)
182 |             src = src + self.self_attn(pre, pre, pre, src_mask) # residual connection
183 | 
184 |             pre = self.pff_norm(src)
185 |             src = src + self.pff(pre) # residual connection
186 |         else:
187 |             src = self.self_attn_norm(src + self.self_attn(src, src, src, src_mask)) # residual connection + layerNorm
188 |             src = self.pff_norm(src + self.pff(src)) # residual connection + layerNorm
189 |         
190 |         return src
191 | ###################################基于位置词嵌入的多种实现
192 | class PositionalEmbedding(nn.Module):
193 |     def __init__(self, embed_size):
194 |         super(PositionalEmbedding, self).__init__()
195 |         self.embed_size = embed_size # hidden_size
196 | 
197 |         inv_timescales = 1 / (10000 ** (torch.arange(0.0, embed_size, 2.0) / embed_size))
198 |         self.register_buffer('inv_timescales', inv_timescales)
199 | 
200 |     def forward(self, pos):
201 |         scaled_time = torch.ger(pos, self.inv_timescales) # [T, H//2]
202 |         pos_embed = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=-1) # [T, H]
203 |         
204 |         return pos_embed[None, :, :]        
205 | 
206 | class PositionalEncoding(nn.Module):
207 |     "Implement the PE function."
208 |     def __init__(self, d_model, max_len=5000):
209 |         super(PositionalEncoding, self).__init__()
210 |         
211 |         # Compute the positional encodings once in log space.
212 |         pe = torch.zeros(max_len, d_model)
213 |         position = torch.arange(0, max_len).unsqueeze(1)
214 |         div_term = torch.exp(torch.arange(0, d_model, 2) *
215 |                              -(math.log(10000.0) / d_model))
216 |         pe[:, 0::2] = torch.sin(position * div_term)
217 |         pe[:, 1::2] = torch.cos(position * div_term)
218 |         pe = pe.unsqueeze(0)
219 |         self.register_buffer('pe', pe)
220 |         
221 |     def forward(self, x):
222 |         x = Variable(self.pe[:, :x.size(1)], requires_grad=False)
223 |         return x
224 |     
225 | def positional_encoding_table(n_position, d_hid, padding_idx=None):
226 |     ''' Sinusoid position encoding table '''
227 | 
228 |     def cal_angle(position, hid_idx):
229 |         return position / np.power(10000, 2 * (hid_idx // 2) / d_hid)
230 | 
231 |     def get_posi_angle_vec(position):
232 |         return [cal_angle(position, hid_j) for hid_j in range(d_hid)]
233 | 
234 |     sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
235 | 
236 |     sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
237 |     sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
238 | 
239 |     if padding_idx is not None:
240 |         # zero vector for padding dimension
241 |         sinusoid_table[padding_idx] = 0.
242 | 
243 |     return torch.FloatTensor(sinusoid_table)
244 | 
245 | class Encoder(nn.Module):
246 |     def __init__(self, input_size, hidden_size, filter_size, n_head, dropout, n_layers, pre_lnorm, device):
247 |         super(Encoder, self).__init__()
248 |         self.hidden_size = hidden_size
249 |         self.embed_scale = hidden_size ** 0.5
250 |         self.wte = nn.Embedding(input_size, hidden_size) # token embeddings
251 |         # self.wpe = PositionalEmbedding(hidden_size) # positional embeddings
252 |         # self.wpe = nn.Embedding(1000, hidden_size)
253 |         # self.wpe = PositionalEncoding(hidden_size)
254 |         max_len = 1000
255 |         self.wpe = nn.Embedding.from_pretrained(positional_encoding_table(max_len+1, hidden_size, padding_idx=ZH.vocab.stoi['<pad>']), freeze=True)
256 |         self.embed_dropout = nn.Dropout(dropout)
257 |         self.layers = nn.ModuleList([EncoderLayer(hidden_size, filter_size, n_head, pre_lnorm, device, dropout) 
258 |                                      for _ in range(n_layers)])
259 |         self.pre_lnorm = pre_lnorm
260 |         self.last_norm = nn.LayerNorm(hidden_size)
261 |         self.device = device
262 |         
263 |     def forward(self, src, src_mask):
264 |         # token embedding + positional encoding
265 |         # pos = torch.arange(src.shape[1], dtype=torch.float32).to(self.device)
266 |         pos = torch.arange(0, src.shape[1]).unsqueeze(0).repeat(src.shape[0], 1).to(self.device)
267 |         src = self.wte(src) * self.embed_scale + self.wpe(pos) # [B, T, H]
268 |         src = self.embed_dropout(src)
269 |         
270 |         for layer in self.layers:
271 |             src = layer(src, src_mask)
272 |         
273 |         if self.pre_lnorm:
274 |             src = self.last_norm(src)
275 |         
276 |         return src
277 | 
278 | ######################################
279 | class DecoderLayer(nn.Module):
280 |     def __init__(self, hidden_size, filter_size, n_head, pre_lnorm, device, dropout):
281 |         super(DecoderLayer, self).__init__()
282 |         # self-attention part
283 |         self.self_attn = MultiHeadAttention(hidden_size, n_head, device)
284 |         self.self_attn_norm = nn.LayerNorm(hidden_size)
285 |         
286 |         # encoder-to-decoder self-attention part
287 |         self.ed_self_attn = MultiHeadAttention(hidden_size, n_head, device)
288 |         self.ed_self_attn_norm = nn.LayerNorm(hidden_size)
289 |         
290 |         # feed forward network part
291 |         self.pff = PositionwiseFeedForward(hidden_size, filter_size, dropout)
292 |         self.pff_norm = nn.LayerNorm(hidden_size)
293 | 
294 |         self.pre_lnorm = pre_lnorm
295 |         
296 |     def forward(self, enc_out, enc_out_mask, trg, trg_mask):
297 |         if self.pre_lnorm:
298 | #            print("iftrg",trg.shape,self.pre_lnorm)
299 |             ris = self.self_attn_norm(trg)
300 |             trg = trg + self.self_attn(ris, ris, ris, trg_mask)
301 |             
302 |             ris = self.ed_self_attn_norm(trg)
303 |             trg = trg + self.ed_self_attn(ris, enc_out, enc_out, enc_out_mask)
304 | 
305 |             ris = self.pff_norm(trg)
306 |             trg = trg + self.pff(ris)
307 |         else:
308 | #            print("trg",trg.shape,trg_mask.shape,self.pre_lnorm)
309 |             trg = self.self_attn_norm(trg + self.self_attn(trg, trg, trg, trg_mask))
310 |             
311 |             trg = self.ed_self_attn_norm(trg + self.ed_self_attn(trg, enc_out, enc_out, enc_out_mask))
312 |             trg = self.pff_norm(trg + self.pff(trg))
313 |             
314 |         return trg
315 | 
316 | class Decoder(nn.Module):
317 |     def __init__(self, input_size, hidden_size, filter_size, n_head, dropout, n_layers, pre_lnorm, device):
318 |         super(Decoder, self).__init__()
319 |         self.hidden_size = hidden_size
320 |         self.dropout = dropout
321 |         self.embed_scale = hidden_size ** 0.5
322 |         self.wte = nn.Embedding(input_size, hidden_size) # token embeddings
323 |         # self.wpe = PositionalEmbedding(hidden_size) # positional embeddings
324 |         # self.wpe = nn.Embedding(1000, hidden_size)
325 |         # self.wpe = PositionalEncoding(hidden_size)
326 |         max_len = 1000
327 |         self.wpe = nn.Embedding.from_pretrained(positional_encoding_table(max_len+1, hidden_size, padding_idx=ENG.vocab.stoi['<pad>']), freeze=True)
328 |         self.embed_dropout = nn.Dropout(dropout)
329 |         self.layers = nn.ModuleList([DecoderLayer(hidden_size, filter_size, n_head, pre_lnorm, device, dropout)
330 |                                      for _ in range(n_layers)])
331 |         self.pre_lnorm = pre_lnorm
332 |         self.last_norm = nn.LayerNorm(hidden_size)
333 |         self.device = device
334 |         
335 |     def forward(self, enc_out, enc_out_mask, trg, trg_mask):
336 |         # token embedding + positional encoding
337 |         # pos = torch.arange(trg.shape[1], dtype=torch.float32).to(self.device)
338 |         pos = torch.arange(0, trg.shape[1]).unsqueeze(0).repeat(trg.shape[0], 1).to(self.device)
339 |         trg = self.wte(trg) * self.embed_scale + self.wpe(pos) # [B, T, H]
340 |         trg = self.embed_dropout(trg)
341 |         
342 |         #trg [B, T, H]
343 |         for layer in self.layers:
344 |             trg = layer(enc_out, enc_out_mask, trg, trg_mask)
345 |             
346 |         if self.pre_lnorm:
347 |             trg = self.last_norm(trg)
348 |         return trg
349 | 
350 | 
351 | class Transformer(nn.Module):
352 |     def __init__(self, enc_input_size, dec_input_size, hidden_size, filter_size, n_head, dropout, n_layers, pre_lnorm, device, maxlen=50):
353 |         super(Transformer, self).__init__()
354 |         self.encoder = Encoder(enc_input_size, hidden_size, filter_size, n_head, dropout, n_layers, pre_lnorm, device)
355 |         self.decoder = Decoder(dec_input_size, hidden_size, filter_size, n_head, dropout, n_layers, pre_lnorm, device)
356 |         self.project = nn.Linear(hidden_size, dec_input_size)
357 |         self.maxlen = maxlen
358 |         self.src_sos_idx = ZH.vocab.stoi['<sos>']
359 |         self.src_eos_idx = ZH.vocab.stoi['<eos>']
360 |         
361 |     def forward(self, src, src_mask, trg, trg_mask):
362 |         enc_out = self.encoder(src, src_mask)
363 |         dec_out = self.decoder(enc_out, src_mask, trg, trg_mask)
364 |         output = self.project(dec_out)
365 |         return output
366 |         
367 |     def inference(self, src):
368 |         batch_size, src_len = src.shape
369 |         trg = src.new_full((batch_size, 1), self.src_sos_idx)
370 |         src_mask, trg_mask = make_masks(src, trg)
371 |         enc_out = self.encoder(src, src_mask)
372 |         
373 |         for t in range(self.maxlen-1):
374 |             dec_out = self.decoder(enc_out, src_mask, trg, trg_mask) # [B, T, H]
375 |             output = self.project(dec_out) # [B, vocab_size]
376 |             output = torch.argmax(output[:, -1], dim=1) # [B]
377 |             output = output.unsqueeze(1) # [B, 1]
378 |             trg = torch.cat((trg, output), dim=1)
379 |             src_mask, trg_mask = make_masks(src, trg)
380 |             
381 |         return trg
382 | 
383 | 
384 | # helper function to make mask for source and target.
385 | def make_masks(src, trg):
386 |     src_mask = (src != ZH.vocab.stoi['<pad>']).unsqueeze(1).unsqueeze(2)
387 |     trg_pad_mask = (trg != ZH.vocab.stoi['<pad>']).unsqueeze(1).unsqueeze(3)
388 |     trg_len = trg.shape[1]
389 |     trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device=device)).bool()
390 |     trg_mask = trg_pad_mask & trg_sub_mask
391 |     return src_mask, trg_mask
392 | 
393 | 
394 | enc_input_size = len(ZH.vocab) # enc_vocab_size
395 | dec_input_size = len(ENG.vocab) # dec_vocab_size
396 | hidden_size = 512
397 | n_layers = 6
398 | n_head = 8
399 | filter_size = 2048
400 | dropout = 0.1
401 | pre_lnorm = True
402 | # lr = 1.5e-3
403 | lr = 7e-4
404 | betas = (0.9, 0.98)
405 | eps = 1e-09
406 | factor = 0.5
407 | warmup = 2000
408 | n_epoch = 10
409 | clip_value = 1
410 | 
411 | model = Transformer(enc_input_size, dec_input_size, hidden_size, filter_size, n_head, dropout, n_layers, pre_lnorm, device)
412 | model = model.to(device)
413 | 
414 | for param in model.parameters():
415 |     if param.dim() > 1:
416 |         nn.init.xavier_uniform_(param)
417 | 
418 | 
419 | criterion = nn.CrossEntropyLoss(ignore_index=ZH.vocab.stoi['<pad>'])
420 | criterion = criterion.to(device)
421 | 
422 | 
423 | class NoamOpt:
424 |     def __init__(self, optimizer, hidden_size, factor, warmup, step=0):
425 |         self.constant = hidden_size ** -0.5
426 |         self.factor = factor
427 |         self.curr_step = step
428 |         self._rate = 0
429 |         self.warmup = warmup
430 |         self.optimizer = optimizer
431 | 
432 |     def step(self):
433 |         self.curr_step += 1
434 |         lr = self.learning_rate()
435 |         self._rate = lr
436 |         for param in self.optimizer.param_groups:
437 |             param['lr'] = lr
438 |         self.optimizer.step()
439 | 
440 |     def learning_rate(self, step=None):
441 |         if step is None:
442 |             step = self.curr_step
443 |         lr = self.factor * (self.constant * min(step ** (-0.5), step * self.warmup ** (-1.5)))
444 |         return lr
445 | 
446 |     def zero_grad(self):
447 |         self.optimizer.zero_grad()
448 | 
449 | 
450 | opts = [NoamOpt(None, 512, factor, warmup)]
451 | total_step = n_epoch * len(train_iterator)
452 | plt.plot(np.arange(1, total_step), [[opt.learning_rate(i) for opt in opts] for i in range(1, total_step)])
453 | plt.legend([f"{hidden_size}:{warmup}"])
454 | None
455 | 
456 | if pre_lnorm:
457 |     optimizer = optim.Adam(model.parameters(), lr=lr, betas=betas, eps=eps) # no warm up
458 |     lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1, min_lr=1e-9, verbose=True)
459 | else:
460 |     # optimizer = optim.Adam(model.parameters(), lr=lr, betas=betas, eps=eps) # no warm up
461 |     optimizer = NoamOpt(optim.Adam(model.parameters(), lr=0, betas=betas, eps=eps), hidden_size, factor, warmup)
462 |     
463 |     
464 | def train(model, iterator, optimizer, criterion):
465 |     
466 |     model.train()
467 |     
468 |     epoch_loss = 0
469 |     
470 |     for batch in tqdm(iterator):
471 |         
472 |         src = batch.src
473 |         trg = batch.trg
474 |         
475 |         optimizer.zero_grad()
476 |         
477 |         src_mask, trg_mask = make_masks(src, trg[:, :-1])
478 |         output = model(src, src_mask, trg[:, :-1], trg_mask) # [B, T-1, output_size]
479 |         
480 |         output = output.contiguous().view(-1, output.shape[-1]) # [B*(T-1), output_size]
481 |         trg = trg[:, 1:].contiguous().view(-1) # [B*(T-1)]
482 | 
483 |         loss = criterion(output, trg)
484 |         
485 |         loss.backward()
486 |         
487 |         torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
488 |         
489 |         optimizer.step()
490 |         
491 |         epoch_loss += float(loss.item())
492 | 
493 |         if np.isnan(epoch_loss):
494 |             assert False, "gradient explode"
495 |         
496 |     return epoch_loss / len(iterator)    
497 |     
498 | def evaluate(model, iterator, criterion):
499 |     
500 |     model.eval()
501 |     
502 |     epoch_loss = 0
503 |     
504 |     with torch.no_grad():
505 |     
506 |         for batch in tqdm(iterator):
507 | 
508 |             src = batch.src
509 |             trg = batch.trg
510 | 
511 |             src_mask, trg_mask = make_masks(src, trg[:, :-1])
512 |             output = model(src, src_mask, trg[:, :-1], trg_mask) # [B, T-1, output_size]
513 |             
514 |             output = output.contiguous().view(-1, output.shape[-1]) # [B*(T-1), output_size]
515 |             trg = trg[:,1:].contiguous().view(-1) # [B*(T-1)]
516 |             
517 |             loss = criterion(output, trg)
518 |             
519 |             epoch_loss += float(loss.item())
520 |         
521 |     return epoch_loss / len(iterator)    
522 | 
523 | #sentence是分词后的数据    
524 | def translate_sentence(model, sentence, src_field, trg_field):
525 |     model.eval()
526 | 
527 | 
528 |     tokens = [token.lower() for token in sentence]
529 |     
530 |     tokens = [src_field.init_token] + tokens + [src_field.eos_token]
531 |     src_indexes = [src_field.vocab.stoi[token] for token in tokens]
532 |     src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
533 |     
534 |     trg_indexes = model.inference(src_tensor)
535 |     trg_indexes = trg_indexes[0]
536 |     
537 |     trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]
538 |     trg_tokens = trg_tokens[1:] # get rid of <sos>
539 |     if "<eos>" in trg_tokens:
540 |         trg_tokens = trg_tokens[:trg_tokens.index("<eos>")] # get rid of <eos>
541 | 
542 |     return trg_tokens    
543 |  
544 | from sacrebleu import corpus_bleu    
545 |     
546 | def calculate_bleu(model, data):
547 | 
548 |     bleu = 0.0
549 |     reference = []
550 |     candidate = []
551 |     
552 |     for datum in tqdm(data):
553 | 
554 |         src = vars(datum)['src']
555 |         trg = vars(datum)['trg']
556 | 
557 |         pred = translate_sentence(model, src, ZH, ENG)
558 |         strpred = ' '.join(pred)
559 |         strtrg = ' '.join(trg)
560 | 
561 |         reference.append(strtrg)
562 |         candidate.append(strpred)
563 | 
564 |     bleu = round(corpus_bleu(candidate, [reference]).score/len(candidate), 4)
565 |     
566 | 
567 |     return bleu    
568 |     
569 | def calculate_time(start_time, end_time):
570 |     second = end_time - start_time
571 |     hour = math.floor(second / 3600)
572 |     minute = second - hour * 3600
573 |     minute = math.floor(minute / 60)
574 |     second -= (minute * 60 + hour * 3600)
575 |     
576 |     return hour, minute, second    
577 |     
578 | best_valid_loss = float('inf')
579 | epoch_record = []
580 | bleu_score_record = []
581 | train_loss_record = []
582 | valid_loss_record = []
583 | 
584 | start_time = time.time()
585 | 
586 | for epoch in range(n_epoch):
587 |     # if pre_lnorm:
588 |     #     if epoch+1 == 6:
589 |     #         for param in optimizer.param_groups:
590 |     #             param['lr'] *= 0.1
591 |     
592 |     train_loss = train(model, train_iterator, optimizer, criterion)
593 |     valid_loss = evaluate(model, valid_iterator, criterion)
594 |     bleu_value = calculate_bleu(model, test_data)
595 | #    print(f"{bleu_value}%")
596 |     
597 |     if pre_lnorm:
598 |         lr_scheduler.step(valid_loss)
599 | 
600 |     end_time = time.time()
601 |     
602 |     hour, minute, second = calculate_time(start_time, end_time)
603 |     
604 |     if valid_loss < best_valid_loss:
605 |         best_valid_loss = valid_loss
606 |         torch.save(model.state_dict(), 'transformer.pt')
607 | 
608 |     epoch_record.append(epoch+1)
609 |     train_loss_record.append(train_loss)
610 |     valid_loss_record.append(valid_loss)
611 |     bleu_score_record.append(bleu_value)
612 | 
613 |     print('-----------------------------------------------------------------------------------------------')
614 |     print(f'Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Validation Loss: {valid_loss:.3f} | Bleu Score: {bleu_value:.2f} | Since: {hour}h {minute}m {second:.0f}s')
615 |     print(f'lr: {optimizer._rate:2f} | #step: {optimizer.curr_step}')
616 |     print('-----------------------------------------------------------------------------------------------')
617 |     time.sleep(0.5)
618 |     
619 |     
620 | file_path = './transformer_checkpoint/'
621 | if pre_lnorm:
622 |     torch.save(epoch_record, file_path+'epoch_record_pre.pkl')
623 |     torch.save(bleu_score_record, file_path+'bleu_score_record_pre.pkl')
624 |     torch.save(train_loss_record, file_path+'train_loss_record_pre.pkl')
625 |     torch.save(valid_loss_record, file_path+'valid_loss_record_pre.pkl')
626 | else:
627 |     torch.save(epoch_record, file_path+'epoch_record_post.pkl')
628 |     torch.save(bleu_score_record, file_path+'bleu_score_record_post.pkl')
629 |     torch.save(train_loss_record, file_path+'train_loss_record_post.pkl')
630 |     torch.save(valid_loss_record, file_path+'valid_loss_record_post.pkl')    
631 |     
632 | file_path = './transformer_checkpoint/'
633 | epoch_record_pre = torch.load(file_path+'epoch_record_pre.pkl')
634 | bleu_score_record_pre = torch.load(file_path+'bleu_score_record_pre.pkl')
635 | train_loss_record_pre = torch.load(file_path+'train_loss_record_pre.pkl')
636 | valid_loss_record_pre = torch.load(file_path+'valid_loss_record_pre.pkl')
637 | 
638 | epoch_record_post = torch.load(file_path+'epoch_record_post.pkl')
639 | bleu_score_record_post = torch.load(file_path+'bleu_score_record_post.pkl')
640 | train_loss_record_post = torch.load(file_path+'train_loss_record_post.pkl')
641 | valid_loss_record_post = torch.load(file_path+'valid_loss_record_post.pkl')    
642 |     
643 | plt.plot(epoch_record, valid_loss_record_post, 'x--', label='Post-LN (Adam w/ warm-up)')
644 | plt.plot(epoch_record, valid_loss_record_pre, '.-', color="red", label='Pre-LN (Adam w/o warm-up)')
645 | # plt.title('')
646 | plt.xlabel('Epochs', fontsize=16)
647 | plt.ylabel('Validation Loss', fontsize=16)
648 | plt.legend()
649 | plt.grid()
650 | plt.show()   
651 |     
652 | plt.plot(epoch_record, bleu_score_record_post, 'x--', label='Post-LN (Adam w/ warm-up)')
653 | plt.plot(epoch_record, bleu_score_record_pre, '.-', color="red", label='Pre-LN (Adam w/o warm-up)')
654 | # plt.title('')
655 | plt.xlabel('Epochs', fontsize=16)
656 | plt.ylabel('BLEU', fontsize=16)
657 | plt.legend()
658 | plt.grid()
659 | plt.show()
660 | 
661 | source_sentence = ["<sos>"] + train_data[5].src + ["<eos>"]
662 | target_sentence = ["<sos>"] + train_data[5].trg + ["<eos>"]
663 | pred_token = translate_sentence(model, source_sentence, ZH, ENG)
664 | print('source:', ' '.join(source_sentence))
665 | print('target:', ' '.join(target_sentence))
666 | print('inference:', ' '.join(pred_token))
667 | 


--------------------------------------------------------------------------------
/code_18_pipline.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Mon Mar 30 10:22:55 2020
 4 | 
 5 | @author: ljh
 6 | """
 7 | 
 8 | from transformers import *
 9 | 
10 | nlp = pipeline("sentiment-analysis")
11 | print(nlp("I like this book!"))
12 | 
13 | ##########################################feature-extraction
14 | import numpy as np
15 | nlp_features = pipeline('feature-extraction')
16 | output = nlp_features('Code Doctor Studio is a Chinese company based in BeiJing.')
17 | print(np.array(output).shape)   # (Samples, Tokens, Vector Size)(1, 16, 768)
18 | 
19 | 
20 | ############################掩码语言建模
21 | nlp_fill = pipeline("fill-mask")
22 | print(nlp_fill.tokenizer.mask_token)
23 | print(nlp_fill(f"Li Jinhong wrote many {nlp_fill.tokenizer.mask_token} about artificial intelligence technology and helped many people."))
24 | 
25 | 
26 | 
27 | 
28 | ############################抽取式问答
29 | 
30 | 
31 | nlp_qa = pipeline("question-answering")
32 | print(nlp_qa(context='Code Doctor Studio is a Chinese company based in BeiJing.',
33 |        question='Where is Code Doctor Studio?') )
34 | 
35 | 
36 | 
37 | 
38 | ###################################摘要
39 | 
40 | TEXT_TO_SUMMARIZE = '''
41 | In this notebook we will be using the transformer model, first introduced in this paper. Specifically, we will be using the BERT (Bidirectional Encoder Representations from Transformers) model from this paper.
42 | Transformer models are considerably larger than anything else covered in these tutorials. As such we are going to use the transformers library to get pre-trained transformers and use them as our embedding layers. We will freeze (not train) the transformer and only train the remainder of the model which learns from the representations produced by the transformer. In this case we will be using a multi-layer bi-directional GRU, however any model can learn from these representations.
43 | '''
44 | summarizer = pipeline('summarization')
45 | print(summarizer(TEXT_TO_SUMMARIZE))
46 | 
47 | 
48 | # #################命名实体识别
49 | 
50 | nlp_token_class = pipeline("ner")
51 | print(nlp_token_class(
52 |         'Code Doctor Studio is a Chinese company based in BeiJing.'))
53 | 
54 | 


--------------------------------------------------------------------------------
/code_19_BERTTest.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Mar 18 10:43:59 2020
 4 | 
 5 | @author: ljh
 6 | """
 7 | 
 8 | 
 9 | import torch
10 | from transformers import BertTokenizer, BertForMaskedLM
11 | 
12 | #加载预训练模型 tokenizer (vocabulary)
13 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
14 | 
15 | #输入文本
16 | text = "[CLS] Who is Li Jinhong ? [SEP] Li Jinhong is a programmer [SEP]"
17 | tokenized_text = tokenizer.tokenize(text)
18 | print(tokenized_text)
19 | 
20 | masked_index = 8 #掩码一个标记，用' BertForMaskedLM '预测回来
21 | tokenized_text[masked_index] = '[MASK]'
22 | print(tokenized_text)
23 | 
24 | # 将标记转换为词汇表索引
25 | indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
26 | # 将输入转换为PyTorch张量
27 | tokens_tensor = torch.tensor([indexed_tokens])
28 | 
29 | 
30 | #指定设备
31 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
32 | print(device)
33 | 
34 | # 加载预训练模型 (weights)
35 | model = BertForMaskedLM.from_pretrained('bert-base-uncased')
36 | model.eval()
37 | model.to(device)
38 | 
39 | 
40 | segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
41 | segments_tensors = torch.tensor([segments_ids]).to(device)
42 | 
43 | tokens_tensor = tokens_tensor.to(device)
44 | # 预测所有的tokens
45 | with torch.no_grad():
46 |     outputs = model(tokens_tensor, token_type_ids=segments_tensors)
47 |     
48 | predictions = outputs[0]  #[1, 15, 30522]
49 | 
50 | predicted_index = torch.argmax(predictions[0, masked_index]).item()
51 | predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] #转成单词
52 | print('Predicted token is:',predicted_token)
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/code_20_GPT2Test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Mar 20 11:10:34 2020
 4 | 
 5 | @author: ljh
 6 | """
 7 | 
 8 | import torch
 9 | from transformers import GPT2Tokenizer, GPT2LMHeadModel
10 | 
11 | # 加载预训练模型（权重）
12 | tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
13 | 
14 | 
15 | #编码输入
16 | indexed_tokens = tokenizer.encode("Who is Li Jinhong ? Li Jinhong is a")
17 | 
18 | print( tokenizer.decode(indexed_tokens))
19 | 
20 | tokens_tensor = torch.tensor([indexed_tokens])#转换为张量
21 | 
22 | # 加载预训练模型（权重）
23 | model = GPT2LMHeadModel.from_pretrained('gpt2')
24 | 
25 | #将模型设置为评估模式
26 | model.eval()
27 | 
28 | tokens_tensor = tokens_tensor.to('cuda')
29 | model.to('cuda')
30 | 
31 | # 预测所有标记
32 | with torch.no_grad():
33 |     outputs = model(tokens_tensor)
34 |     predictions = outputs[0]
35 | 
36 | # 得到预测的下一词
37 | predicted_index = torch.argmax(predictions[0, -1, :]).item()
38 | predicted_text = tokenizer.decode(indexed_tokens + [predicted_index])
39 | print(predicted_text)
40 | 
41 | 
42 | #生成一段完整的话
43 | stopids = tokenizer.convert_tokens_to_ids(["."])[0] 
44 | past = None
45 | for i in range(100):
46 |     with torch.no_grad():
47 |         output, past = model(tokens_tensor, past=past)
48 |     token = torch.argmax(output[..., -1, :])
49 | 
50 |     indexed_tokens += [token.tolist()]
51 | 
52 |     if stopids== token.tolist():
53 |         break
54 |     tokens_tensor = token.unsqueeze(0)
55 |     
56 | sequence = tokenizer.decode(indexed_tokens)
57 | 
58 | print(sequence)
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/code_21_BERT_CH.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Apr  1 21:21:46 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | import os
  8 | import torch
  9 | 
 10 | from transformers import (
 11 |         get_linear_schedule_with_warmup,BertTokenizer,
 12 |         AdamW,
 13 |         AutoModelForSequenceClassification,
 14 |         AutoConfig
 15 |         )
 16 | 
 17 | from torch.utils.data import DataLoader,dataset
 18 | import time
 19 | import numpy as np
 20 | from sklearn import metrics
 21 | from datetime import timedelta
 22 |     
 23 | data_dir='./THUCNews/data'
 24 | def read_file(path):
 25 |     with open(path, 'r', encoding="UTF-8") as file:
 26 |         docus = file.readlines()
 27 |         newDocus = []
 28 |         for data in docus:
 29 |             newDocus.append(data)
 30 |     return newDocus
 31 | 
 32 | 
 33 | #建立数据集 
 34 | class Label_Dataset(dataset.Dataset):
 35 |     def __init__(self,data):
 36 |         self.data = data
 37 |     def __len__(self):#返回数据长度
 38 |         return len(self.data)
 39 |     def __getitem__(self,ind):
 40 |         onetext = self.data[ind]
 41 |         content, label = onetext.split('\t')
 42 |         label = torch.LongTensor([int(label)])
 43 |         return content,label
 44 | 
 45 | trainContent = read_file(os.path.join(data_dir, "train.txt")) 
 46 | testContent = read_file(os.path.join(data_dir, "test.txt"))
 47 | 
 48 | traindataset =Label_Dataset( trainContent )
 49 | testdataset =Label_Dataset( testContent )
 50 | 
 51 | testdataloder = DataLoader(testdataset, batch_size=1, shuffle = False)
 52 | batch_size = 8
 53 | traindataloder = DataLoader(traindataset, batch_size=batch_size, shuffle = True)
 54 | 
 55 | class_list = [x.strip() for x in open(
 56 |         os.path.join(data_dir, "class.txt")).readlines()]
 57 | 
 58 | 
 59 | pretrained_weights = 'bert-base-chinese'#建立模型
 60 | tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
 61 | config = AutoConfig.from_pretrained(pretrained_weights,num_labels=len(class_list)) 
 62 | #单独指定config，在config中指定分类个数
 63 | nlp_classif = AutoModelForSequenceClassification.from_pretrained(pretrained_weights,
 64 |                                                            config=config)
 65 | 
 66 | 
 67 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 68 | nlp_classif = nlp_classif.to(device)
 69 | 
 70 | time_start = time.time() #开始时间
 71 | 
 72 | epochs = 2
 73 | gradient_accumulation_steps = 1
 74 | max_grad_norm =0.1  #梯度剪辑的阀值
 75 | 
 76 | require_improvement = 1000                 # 若超过1000batch效果还没提升，则提前结束训练
 77 | savedir = './myfinetun-bert_chinese/'
 78 | os.makedirs(savedir, exist_ok=True)
 79 | def get_time_dif(start_time):
 80 |     """获取已使用时间"""
 81 |     end_time = time.time()
 82 |     time_dif = end_time - start_time
 83 |     return timedelta(seconds=int(round(time_dif)))
 84 | 
 85 | def train( model, traindataloder, testdataloder):
 86 |     start_time = time.time()
 87 |     model.train()
 88 |     param_optimizer = list(model.named_parameters())
 89 |     no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
 90 |     optimizer_grouped_parameters = [
 91 |         {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
 92 |         {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}]
 93 | 
 94 |     optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5, eps=1e-8)
 95 | 
 96 |     
 97 |     scheduler = get_linear_schedule_with_warmup(optimizer,
 98 |                 num_warmup_steps=0, num_training_steps=len(traindataloder) * epochs)
 99 | 
100 | 
101 |     total_batch = 0  # 记录进行到多少batch
102 |     dev_best_loss = float('inf')
103 |     last_improve = 0  # 记录上次验证集loss下降的batch数
104 |     flag = False  # 记录是否很久没有效果提升
105 |     
106 |     for epoch in range(epochs):
107 |         print('Epoch [{}/{}]'.format(epoch + 1, epochs))
108 |         for i, (sku_name, labels) in enumerate(traindataloder):
109 |             model.train()
110 |             
111 |             ids = tokenizer.batch_encode_plus( sku_name,
112 | #                max_length=model.config.max_position_embeddings,  #模型的配置文件中就是512，当有超过这个长度的会报错
113 |                 pad_to_max_length=True,return_tensors='pt')#没有return_tensors会返回list！！！！
114 |                
115 |             labels = labels.squeeze().to(device) 
116 |             outputs = model(ids["input_ids"].to(device), labels=labels,
117 |                             attention_mask =ids["attention_mask"].to(device)  )
118 |             
119 |             loss, logits = outputs[:2]
120 |             
121 |             if gradient_accumulation_steps > 1:
122 |                 loss = loss / gradient_accumulation_steps
123 |             
124 |             loss.backward()
125 |             
126 |             if (i + 1) % gradient_accumulation_steps == 0:
127 |                 torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
128 |             
129 |             optimizer.step()
130 |             scheduler.step()  # Update learning rate schedule
131 |             model.zero_grad()
132 |             
133 |             if total_batch % 100 == 0:
134 |                 # 每多少轮输出在训练集和验证集上的效果
135 |                 truelabel = labels.data.cpu()
136 |                 predic = torch.argmax(logits,axis=1).data.cpu()
137 | #                predic = torch.max(outputs.data, 1)[1].cpu()
138 |                 train_acc = metrics.accuracy_score(truelabel, predic)
139 |                 dev_acc, dev_loss = evaluate( model, testdataloder)
140 |                 if dev_loss < dev_best_loss:
141 |                     dev_best_loss = dev_loss
142 |                     model.save_pretrained(savedir)                    
143 |                     improve = '*'
144 |                     last_improve = total_batch
145 |                 else:
146 |                     improve = ''
147 |                 time_dif = get_time_dif(start_time)
148 |                 msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%},  Time: {5} {6}'
149 |                 print(msg.format(total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_dif, improve))
150 |                 model.train()
151 |             total_batch += 1
152 |             if total_batch - last_improve > require_improvement:
153 |                 # 验证集loss超过1000batch没下降，结束训练
154 |                 print("No optimization for a long time, auto-stopping...")
155 |                 flag = True
156 |                 break
157 |         if flag:
158 |             break
159 | 
160 | def evaluate(model, testdataloder):
161 |     model.eval()
162 |     loss_total = 0
163 |     predict_all = np.array([], dtype=int)
164 |     labels_all = np.array([], dtype=int)
165 |     with torch.no_grad():
166 |         for sku_name, labels in testdataloder:
167 |             ids = tokenizer.batch_encode_plus( sku_name,
168 | #                max_length=model.config.max_position_embeddings,  #模型的配置文件中就是512，当有超过这个长度的会报错
169 |                 pad_to_max_length=True,return_tensors='pt')#没有return_tensors会返回list！！！！
170 |                
171 |             labels = labels.squeeze().to(device) 
172 |             outputs = model(ids["input_ids"].to(device), labels=labels, 
173 |                                    attention_mask =ids["attention_mask"].to(device) )
174 |             
175 |             loss, logits = outputs[:2]
176 |             loss_total += loss
177 |             labels = labels.data.cpu().numpy()
178 |             predic = torch.argmax(logits,axis=1).data.cpu().numpy()
179 |             labels_all = np.append(labels_all, labels)
180 |             predict_all = np.append(predict_all, predic)
181 |     acc = metrics.accuracy_score(labels_all, predict_all)
182 |     return acc, loss_total / len(testdataloder)
183 | 
184 | 
185 | train( nlp_classif, traindataloder, testdataloder)    
186 |     
187 |     


--------------------------------------------------------------------------------
/code_22_TextCNNInterpret.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Nov  9 10:13:29 2019
  4 | 
  5 | @author: ljh
  6 | """
  7 | 
  8 | 
  9 | 
 10 | import spacy  #引入分词库 
 11 | import torch#引入PyTorch库
 12 | import torch.nn.functional as F
 13 | #引入解释库
 14 | from captum.attr import (IntegratedGradients,TokenReferenceBase,visualization,
 15 |                          configure_interpretable_embedding_layer, remove_interpretable_embedding_layer)
 16 | 
 17 | #引入本地代码库
 18 | from code_14_TextCNN import TextCNN, TEXT,LABEL
 19 | 
 20 | class TextCNNInterpret(TextCNN):#定义TextCNN的子类
 21 |     def __init__(self, *args,**kwargs):#透传参数
 22 |         super().__init__(*args,**kwargs)
 23 |     def forward(self, text): #重载模型处理方法        
 24 |         embedded = self.embedding(text)#从词嵌入开始处理
 25 |         #后面的代码与TextCNN一样
 26 |         embedded = embedded.unsqueeze(1) 
 27 |         conved = [self.mish(conv(embedded)).squeeze(3) for conv in self.convs]
 28 |         pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]        
 29 |         cat = self.dropout(torch.cat(pooled, dim = 1))
 30 |         return self.fc(cat)
 31 | 
 32 | ##########################
 33 | #定义模型参数
 34 | INPUT_DIM = len(TEXT.vocab)#25002
 35 | EMBEDDING_DIM = TEXT.vocab.vectors.size()[1] #100 
 36 | N_FILTERS = 100
 37 | FILTER_SIZES = [3,4,5]
 38 | OUTPUT_DIM = 1
 39 | DROPOUT = 0.5
 40 | PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
 41 | #实例化模型
 42 | model = TextCNNInterpret(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
 43 | 
 44 | #加载模型权重
 45 | model.load_state_dict(torch.load('textcnn-model.pt') )
 46 | print('Vocabulary Size: ', len(TEXT.vocab))
 47 | #对嵌入层进行封装并提取
 48 | interpretable_embedding  =  configure_interpretable_embedding_layer (model, 'embedding')
 49 | 
 50 | ##########################
 51 | 
 52 | 
 53 | 
 54 | ig = IntegratedGradients(model)#创建梯度积分算法对象
 55 | 
 56 | #定义列表，存放可视化记录
 57 | vis_data_records_ig = []
 58 | 
 59 | nlp = spacy.load('en') #为分词库加载英文语言包
 60 | 
 61 | 
 62 | #定义函数对句子进行可解释性分析
 63 | def interpret_sentence(model, sentence, min_len = 7, label = 0):
 64 |     
 65 |     sentence=sentence.lower() #将句子转为小写
 66 | 
 67 |     model.eval()
 68 |     #分词处理
 69 |     text = [tok.text for tok in nlp.tokenizer(sentence)]
 70 |     if len(text) < min_len: #对小于指定长度的句子进行 填充
 71 |         text += [TEXT.pad_token] * (min_len - len(text))
 72 |     #将句子中的单词转为索引
 73 |     indexed = [TEXT.vocab.stoi[t] for t in text]
 74 |     
 75 |     model.zero_grad() #将模型中的梯度清0
 76 |     
 77 |     input_indices = torch.LongTensor(indexed) #转为张量
 78 |     input_indices = input_indices.unsqueeze(0) #增加维度
 79 | 
 80 |     #转为词嵌入
 81 |     input_embedding = interpretable_embedding.indices_to_embeddings(input_indices)
 82 | 
 83 |     #将词嵌入输入模型，进行预测
 84 |     pred = torch.sigmoid(model(input_embedding)).item()
 85 |     pred_ind = round(pred) #计算输出结果
 86 |     
 87 |     #创建梯度积分的初始输入值
 88 |     PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] #获得填充字符的索引
 89 |     token_reference = TokenReferenceBase(reference_token_idx=PAD_IDX)
 90 |     #制作初始输入索引：复制指定长度个token_reference，并扩展维度
 91 |     reference_indices = token_reference.generate_reference(len(indexed), device='cpu').unsqueeze(0)
 92 |     print("reference_indices",reference_indices)
 93 |     #将制作好的输入索引转成词嵌入
 94 |     reference_embedding = interpretable_embedding.indices_to_embeddings(reference_indices)
 95 | 
 96 | 
 97 |     #用梯度积分的方法计算可解释性
 98 |     attributions_ig, delta = ig.attribute(input_embedding, reference_embedding, n_steps=500, return_convergence_delta=True)
 99 |     #输出可解释性结果
100 |     print('attributions_ig, delta',attributions_ig.size(), delta.size())
101 |     print('pred: ', LABEL.vocab.itos[pred_ind], '(', '%.2f'%pred, ')', ', delta: ', abs(delta))
102 |     #加入可视化记录中
103 |     add_attributions_to_visualizer(attributions_ig, text, pred, pred_ind, label, delta, vis_data_records_ig)
104 | 
105 | #定义函数，将解释性结果放入可视化记录中    
106 | def add_attributions_to_visualizer(attributions, text, pred, pred_ind, label, delta, vis_data_records):
107 |     attributions = attributions.sum(dim=2).squeeze(0)
108 |     attributions = attributions / torch.norm(attributions)
109 |     attributions = attributions.detach().numpy()
110 | 
111 |     # storing couple samples in an array for visualization purposes
112 |     vis_data_records.append(visualization.VisualizationDataRecord(
113 |                             attributions,
114 |                             pred,
115 |                             LABEL.vocab.itos[pred_ind],
116 |                             LABEL.vocab.itos[label],
117 |                             LABEL.vocab.itos[1],
118 |                             attributions.sum(),       
119 |                             text[:len(attributions)],
120 |                             delta))
121 | 
122 | interpret_sentence(model, 'It was a fantastic performance !', label=1)
123 | 
124 | interpret_sentence(model, 'The film is very good！', label=1)
125 | 
126 | interpret_sentence(model, 'I think this film is not very bad！', label=1)
127 | 
128 | 
129 | #根据可视化记录生成网页
130 | visualization.visualize_text(vis_data_records_ig)
131 | 
132 | #还原模型的词嵌入层
133 | remove_interpretable_embedding_layer(model, interpretable_embedding)
134 | 
135 | 
136 | 
137 | 
138 | 
139 |  
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/code_23_GNN_BERT.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | 
  4 | from transformers import BertTokenizer, BertPreTrainedModel,BertConfig
  5 | 
  6 | from transformers.activations import  gelu,swish,gelu_new
  7 | 
  8 |     
  9 | import copy
 10 | import math
 11 | import numpy as np
 12 | from torch.utils.data import RandomSampler
 13 | 
 14 | import dgl.function as fn
 15 | from dgl.nn.pytorch import edge_softmax
 16 | 
 17 | 
 18 | import torch
 19 | 
 20 | import dgl
 21 | print(dgl.__version__)         #0.5.2
 22 | import transformers
 23 | print(transformers.__version__)#3.4.0
 24 | print(torch.__version__)       #1.6.0
 25 | 
 26 | def mish(x):
 27 |     return x * torch.tanh(nn.functional.softplus(x))
 28 | 
 29 | 
 30 | ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish, "gelu_new": gelu_new, "mish": mish}
 31 | BertLayerNorm = torch.nn.LayerNorm
 32 | 
 33 | class BertSelfOutput(nn.Module):
 34 |     def __init__(self, config):
 35 |         super().__init__()
 36 |         self.dense = nn.Linear(config.hidden_size, config.hidden_size)
 37 |         self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
 38 |         self.dropout = nn.Dropout(config.hidden_dropout_prob)
 39 | 
 40 |     def forward(self, hidden_states, input_tensor):
 41 |         hidden_states = self.dense(hidden_states)
 42 |         hidden_states = self.dropout(hidden_states)
 43 |         hidden_states = self.LayerNorm(hidden_states + input_tensor)
 44 |         return hidden_states
 45 | 
 46 | class BertOutput(nn.Module):
 47 |     def __init__(self, config):
 48 |         super().__init__()
 49 |         self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
 50 |         self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
 51 |         self.dropout = nn.Dropout(config.hidden_dropout_prob)
 52 | 
 53 |     def forward(self, hidden_states, input_tensor):
 54 |         hidden_states = self.dense(hidden_states)
 55 |         hidden_states = self.dropout(hidden_states)
 56 |         hidden_states = self.LayerNorm(hidden_states + input_tensor)
 57 |         return hidden_states
 58 | 
 59 | class BertIntermediate(nn.Module):
 60 |     def __init__(self, config):
 61 |         super().__init__()
 62 |         self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
 63 |         if isinstance(config.hidden_act, str):
 64 |             self.intermediate_act_fn = ACT2FN[config.hidden_act]
 65 |         else:
 66 |             self.intermediate_act_fn = config.hidden_act
 67 | 
 68 |     def forward(self, hidden_states):
 69 |         hidden_states = self.dense(hidden_states)
 70 |         hidden_states = self.intermediate_act_fn(hidden_states)
 71 |         return hidden_states
 72 | 
 73 | class BertSelfAttention(nn.Module):
 74 |     def __init__(self, config):
 75 |         super(BertSelfAttention, self).__init__()
 76 |         if config.hidden_size % config.num_attention_heads != 0:
 77 |             raise ValueError(
 78 |                 "The hidden size (%d) is not a multiple of the number of attention "
 79 |                 "heads (%d)" % (config.hidden_size, config.num_attention_heads))
 80 |         self.num_attention_heads = config.num_attention_heads
 81 |         self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
 82 |         self.all_head_size = self.num_attention_heads * self.attention_head_size
 83 | 
 84 |         self.query = nn.Linear(config.hidden_size, self.all_head_size)
 85 |         self.key = nn.Linear(config.hidden_size, self.all_head_size)
 86 |         self.value = nn.Linear(config.hidden_size, self.all_head_size)
 87 | 
 88 |         self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
 89 | 
 90 |     def transpose_for_scores(self, x):
 91 |         new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
 92 |         x = x.view(*new_x_shape)
 93 |         return x
 94 | 
 95 |     def forward(self, graph):
 96 |         node_num = graph.ndata['h'].size(0)
 97 | 
 98 |         Q = self.query(graph.ndata['h'])
 99 |         K = self.key(graph.ndata['h'])
100 |         V = self.value(graph.ndata['h'])
101 | 
102 |         Q = self.transpose_for_scores(Q)
103 |         K = self.transpose_for_scores(K)
104 |         V = self.transpose_for_scores(V)
105 | 
106 |         graph.ndata['Q'] = Q
107 |         graph.ndata['K'] = K
108 |         graph.ndata['V'] = V
109 | 
110 |         graph.apply_edges(fn.u_mul_v('K', 'Q', 'attn_probs'))
111 |         graph.edata['attn_probs'] = graph.edata['attn_probs'].sum(-1, keepdim=True)
112 |         graph.edata['attn_probs'] = edge_softmax(graph, graph.edata['attn_probs'])
113 |         graph.edata['attn_probs'] = self.dropout(graph.edata['attn_probs'])
114 |         graph.apply_edges(fn.u_mul_e('V', 'attn_probs', 'attn_values'))
115 | 
116 |         graph.update_all(message_func = fn.copy_e('attn_values', 'm'),
117 |                          reduce_func = fn.sum('m', 'h'))
118 |                 
119 |         graph.ndata['h'] = graph.ndata['h'].view([node_num, -1])
120 | 
121 |         return graph
122 | 
123 | 
124 | class BertAttention(nn.Module):
125 |     def __init__(self, config):
126 |         super(BertAttention, self).__init__()
127 |         self.self = BertSelfAttention(config)
128 |         self.output = BertSelfOutput(config)
129 | 
130 |     def forward(self, graph):
131 |         input_tensor = graph.ndata['h']
132 |         self_output_graph = self.self(graph)
133 |         attention_output = self.output(self_output_graph.ndata['h'], input_tensor)
134 |         graph.ndata['h'] = attention_output
135 |         return graph
136 | 
137 | 
138 | class BertLayer(nn.Module):
139 |     def __init__(self, config):
140 |         super(BertLayer, self).__init__()
141 |         self.attention = BertAttention(config)
142 |         self.intermediate = BertIntermediate(config)
143 |         self.output = BertOutput(config)
144 | 
145 |     def forward(self, graph):
146 |         graph = self.attention(graph)
147 |         intermediate_output = self.intermediate(graph.ndata['h'])
148 |         layer_output = self.output(intermediate_output, graph.ndata['h'])
149 |         graph.ndata['h'] = layer_output
150 |         return graph
151 | 
152 | 
153 | class BertEncoder(nn.Module):
154 |     def __init__(self, config):
155 |         super(BertEncoder, self).__init__()
156 |         layer = BertLayer(config)
157 |         self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])
158 | 
159 |     def forward(self, graph):
160 |         for layer_module in self.layer:
161 |             graph = layer_module(graph)
162 |         return graph
163 | 
164 | 
165 | class BertEmbeddings(nn.Module):
166 |     """Construct the embeddings from word, position and token_type embeddings.
167 |     """
168 |     def __init__(self, config):
169 |         super(BertEmbeddings, self).__init__()
170 |         self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)
171 |         self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
172 |         self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
173 | 
174 |         # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
175 |         # any TensorFlow checkpoint file
176 |         self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
177 |         self.dropout = nn.Dropout(config.hidden_dropout_prob)
178 | 
179 |     def forward(self, input_ids, position_ids, token_type_ids=None):
180 |         if token_type_ids is None:
181 |             token_type_ids = torch.zeros_like(input_ids)
182 | 
183 |         words_embeddings = self.word_embeddings(input_ids)
184 |         position_embeddings = self.position_embeddings(position_ids)
185 |         token_type_embeddings = self.token_type_embeddings(token_type_ids)
186 | 
187 |         embeddings = words_embeddings + position_embeddings + token_type_embeddings
188 |         embeddings = self.LayerNorm(embeddings)
189 |         embeddings = self.dropout(embeddings)
190 |         return embeddings
191 | 
192 | 
193 | class BertPooler(nn.Module):
194 |     def __init__(self, config):
195 |         super(BertPooler, self).__init__()
196 |         self.dense = nn.Linear(config.hidden_size, config.hidden_size)
197 |         self.activation = nn.Tanh()
198 | 
199 |     def forward(self, hidden_states):
200 |         first_token_tensor = hidden_states
201 |         pooled_output = self.dense(first_token_tensor)
202 |         pooled_output = self.activation(pooled_output)
203 |         return pooled_output
204 | 
205 | 
206 | class BertModel(BertPreTrainedModel):
207 |     def __init__(self, config):
208 |         super(BertModel, self).__init__(config)
209 |         self.embeddings = BertEmbeddings(config)
210 |         self.encoder = BertEncoder(config)
211 |         self.pooler = BertPooler(config)
212 |         self.apply(self._init_weights)
213 | 
214 |     def forward(self, graph):
215 |         embedding_output = self.embeddings(graph.ndata['input_ids'],
216 |                                            graph.ndata['position_ids'],
217 |                                            graph.ndata['segment_ids'])
218 | 
219 |         graph.ndata.pop('input_ids')
220 |         graph.ndata.pop('position_ids')
221 |         graph.ndata.pop('segment_ids')
222 | 
223 |         hidden_size = embedding_output.size(-1)
224 |         embedding_output = embedding_output.view(-1, hidden_size)
225 | 
226 |         graph.ndata['h'] = embedding_output
227 | 
228 |         graph = self.encoder(graph)
229 | 
230 |         g_list = dgl.unbatch(graph)
231 | 
232 |         pooled_output = []
233 |         for g in g_list:
234 |             pooled_output.append(g.ndata['h'][0])
235 |         pooled_output = torch.stack(pooled_output, 0)
236 | 
237 |         pooled_output = self.pooler(pooled_output)
238 |         return graph, pooled_output
239 | 
240 | 
241 | 
242 | 
243 | 
244 | class BertForSequenceClassification(BertPreTrainedModel):
245 |     def __init__(self, config):
246 |         
247 |         super(BertForSequenceClassification, self).__init__(config)
248 |         self.num_labels = config.num_labels
249 |         self.bert = BertModel(config)
250 |         self.dropout = nn.Dropout(config.hidden_dropout_prob)
251 |         self.classifier = nn.Linear(config.hidden_size, self.num_labels)
252 |         self.apply(self._init_weights)
253 | 
254 |     def forward(self, graph, labels=None):
255 |         _, pooled_output = self.bert(graph)
256 |         pooled_output = self.dropout(pooled_output)
257 |         logits = self.classifier(pooled_output)
258 | 
259 |         if labels is not None:
260 |             loss_fct = nn.CrossEntropyLoss()
261 |             loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
262 |             return loss
263 |         else:
264 |             return logits
265 | 
266 | 
267 | if __name__ == '__main__':
268 |     
269 |     #指定设备
270 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
271 |     print(device)
272 |     
273 |     import os
274 |     data_dir='./THUCNews/data'        
275 |     class_list = [x.strip() for x in open(
276 |             os.path.join(data_dir, "class.txt")).readlines()]
277 |     tokenizer = BertTokenizer.from_pretrained( r'./bert-base-chinese/')  
278 |     
279 |     input_ids = tokenizer.encode("成交活跃运作规范 钢材期货上市一周运行平稳") 
280 |     seq_length = len(input_ids)
281 |     segment_ids = np.zeros( seq_length )
282 |     
283 |     g1 = dgl.DGLGraph().to(device)
284 |     g1.add_nodes(seq_length)
285 |     g1.ndata['input_ids'] = torch.tensor(input_ids, dtype=torch.long, device=device)
286 |     g1.ndata['segment_ids'] = torch.tensor(segment_ids, dtype=torch.long, device=device)
287 |     g1.ndata['position_ids'] = torch.arange(len(input_ids), dtype=torch.long, device=device)
288 |     for i in range(seq_length):
289 |         g1.add_edges(i, range(seq_length))
290 |     
291 |     Classification = BertForSequenceClassification.from_pretrained(r'./myfinetun-bert_chinese/')
292 |     Classification.eval()
293 |     Classification.to(device)
294 |     with torch.no_grad():
295 |         value = Classification(g1)
296 |         result = torch.argmax(value,axis=1).cpu().numpy()
297 |         print("分类结果：",class_list[result[0]]," 类索引:",result)
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 
304 | 
305 | 
306 | 


--------------------------------------------------------------------------------
/code_24_BERT_PROPN.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Apr 10 07:10:37 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | 
  8 | #提取代词特征
  9 | 
 10 | import pandas as pd 
 11 | import pickle
 12 | import torch
 13 | from tqdm import tqdm
 14 | from transformers import BertTokenizer,BertModel,BertConfig
 15 | 
 16 | #指定设备
 17 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 18 | print(device)
 19 |     
 20 | #读取数据    
 21 | df_test = pd.read_csv("gap-development.tsv", delimiter="\t")
 22 | df_train_val = pd.concat([
 23 |     pd.read_csv("gap-test.tsv", delimiter="\t"),
 24 |     pd.read_csv("gap-validation.tsv", delimiter="\t")
 25 | ], axis=0)    
 26 | 
 27 | 
 28 | def getmodel():
 29 |     #加载词表文件tokenizer
 30 |     tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 31 |     
 32 |     #添加特殊词
 33 |     special_tokens_dict = {'additional_special_tokens': ["[THISISA]","[THISISB]","[THISISP]"]}
 34 |     tokenizer.add_special_tokens(special_tokens_dict)	#添加特殊词
 35 |     print(tokenizer.additional_special_tokens,tokenizer.additional_special_tokens_ids)
 36 | 
 37 |     
 38 |     model = BertModel.from_pretrained('bert-base-uncased')#加载模型
 39 |     return tokenizer,model
 40 | 
 41 | 
 42 |     
 43 | 
 44 | ############################
 45 | 
 46 | 
 47 | def insert_tag(row,hasbrack=True):#按照插入的位置，从大到小排序[(383, ' THISISP '), (366, ' THISISB '), (352, ' THISISA ')]
 48 |     orgtag=[" [THISISA] "," [THISISB] "," [THISISP] "]
 49 |     if hasbrack==False:
 50 |         orgtag=[" THISISA "," THISISB "," THISISP "]
 51 |         
 52 |     to_be_inserted = sorted([
 53 |         (row["A-offset"], orgtag[0]),
 54 |         (row["B-offset"], orgtag[1]),
 55 |         (row["Pronoun-offset"], orgtag[2])], key=lambda x: x[0], reverse=True)
 56 |     
 57 |     text = row["Text"]#len 443 
 58 |     for offset, tag in to_be_inserted:#先插最后的，不会影响前面
 59 |         text = text[:offset] + tag + text[offset:]#（插到每个代词的前面）
 60 |     return text#len 470 (443+3*9)
 61 | 
 62 | 
 63 | 
 64 | def tokenize(sequence_ind, tokenizer,sequence_mask= None):#将标签分离，并返回标签偏移位置
 65 |     entries = {}
 66 |     final_tokens=[]
 67 |     final_mask=[]
 68 | 
 69 |     for i,one in enumerate(sequence_ind):
 70 |         if one in tokenizer.additional_special_tokens_ids:
 71 |             tokenstr = tokenizer.convert_ids_to_tokens(one)
 72 |             entries[tokenstr] = len(final_tokens)
 73 |             continue
 74 |         final_tokens.append(one)
 75 |         if sequence_mask is not None:
 76 |             final_mask.append(sequence_mask[i])
 77 |     return  final_tokens, (entries["[THISISA]"], entries["[THISISB]"], entries["[THISISP]"]) ,final_mask   
 78 | 
 79 | 
 80 | 
 81 | def savepkl(df,name):
 82 |     bert_prediction = []    
 83 |     for _, row in tqdm(df.iterrows(),total=len(df)):    
 84 |         #循环内部
 85 |         text = insert_tag(row)#插入标签
 86 |         sequence_ind = tokenizer.encode(text)#向量化
 87 |         tokens, offsets,_ = tokenize(sequence_ind, tokenizer)#获取标签偏移        
 88 |         token_tensor = torch.LongTensor([tokens]).to(device)
 89 |         bert_outputs,bert_last_outputs=  model(token_tensor)  #[1, 107, 768] , [1, 768]            
 90 |         extracted_outputs = bert_outputs[:,offsets,:]#根据偏移位置抽取特征向量
 91 |         bert_prediction.append(extracted_outputs.cpu().numpy())    
 92 |     pickle.dump(bert_prediction, open(name, "wb"))
 93 | 
 94 | 
 95 | if __name__ == '__main__':
 96 |     
 97 |     tokenizer,model = getmodel()
 98 |     model.to(device)
 99 |     torch.set_grad_enabled(False)
100 |     
101 |     savepkl(df_test, 'test_bert_outputs_forPROPN.pkl')    
102 |     savepkl(df_train_val, 'bert_outputs_forPROPN.pkl')   
103 | 
104 | 


--------------------------------------------------------------------------------
/code_25_BERT_NoPUNC.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Apr 10 07:10:37 2020
 4 | 
 5 | @author: ljh
 6 | """
 7 | 
 8 | #没标点
 9 | 
10 | import re
11 | import pickle
12 | import torch
13 | from tqdm import tqdm
14 | 
15 | from code_12_BERT_PROPN import (device,df_test,df_train_val,
16 |                                 getmodel,insert_tag,tokenize)
17 | 
18 | def clean_and_replace_target_name(row):  #去掉标点符号
19 |     text = row['TextClean']
20 |     text = re.sub("[^a-zA-Z]"," ",text)  #只保留英文字符，去掉标点及数字
21 |     A = re.sub("[^a-zA-Z]"," ",row['A']) #只保留英文字符  
22 |     B = re.sub("[^a-zA-Z]"," ",row['B']) #只保留英文字符
23 | 
24 |     # replace names  # 先分词，再取第一个，Dehner--》 ['de', '##hner']--》de  确保不被分成2个词
25 |     text = re.sub(str(A), tokenizer.tokenize(A)[0], text) #将名称之换做一个词Bob Suter--》bob
26 |     text = re.sub(str(B), tokenizer.tokenize(B)[0], text)
27 |     
28 |     text = re.sub(r"THISISA", r"[THISISA]", text)
29 |     text = re.sub(r"THISISB", r"[THISISB]", text)
30 |     text = re.sub(r"THISISP", r"[THISISP]", text)
31 |     
32 |     text = re.sub(' +', ' ', text)  #去掉多个空格
33 |     return text
34 | 
35 | 
36 | def savepkl(df,prename=''):
37 |     offsets_lst = []
38 |     tokens_lst = []
39 |     max_len=269 #设置处理文本的最大长度
40 |     bert_prediction = []
41 |     for _, row in tqdm(df.iterrows(),total=len(df)):
42 | 
43 |         row.loc['TextClean']  = insert_tag(row,hasbrack= False)#插入标签,防止去标点时，一起被去掉
44 |         text = clean_and_replace_target_name(row)#去除标点、空格，并压缩被指带的名词
45 | 
46 |         encode_rel= tokenizer.encode_plus(text,max_length=max_len,pad_to_max_length=True)#向量化  len=90
47 | 
48 |         tokens, offsets ,masks= tokenize(encode_rel['input_ids'] , 
49 |                                          tokenizer,encode_rel['attention_mask'])#获取标签偏移
50 |         offsets_lst.append(offsets)
51 |         tokens_lst.append(tokens)
52 |     #验证代词位置    
53 |     #    print( tokenizer.decode(tokens),len(tokens)) 
54 |     #    print( tokenizer.decode(np.asarray(tokens)[list(offsets)]))     
55 |         token_tensor = torch.LongTensor([tokens]).to(device)
56 |         masks_tensor = torch.LongTensor([masks]).to(device)
57 |         #输入BERT模型
58 |         bert_outputs,bert_last_outputs=  model(token_tensor,attention_mask =masks_tensor)  #[1, 107, 768] , [1, 768]
59 |         bert_prediction.append(bert_outputs.cpu().numpy())#([1, 266, 768])
60 |         
61 |     pickle.dump(offsets_lst, open(prename+'offsets_NoPUNC.pkl', "wb"))
62 |     pickle.dump(tokens_lst, open(prename+'tokens_NoPUNC_padding.pkl', "wb"))
63 |     pickle.dump(bert_prediction, open(prename+'bert_outputs_forNoPUNC.pkl', "wb"))
64 | 
65 | if __name__ == '__main__':
66 |     
67 |     tokenizer,model = getmodel()
68 |     model.to(device)
69 |     torch.set_grad_enabled(False)
70 |        
71 |     savepkl(df_test, 'test_')    
72 |     savepkl(df_train_val, ) 


--------------------------------------------------------------------------------
/code_26_RGCNDGL.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Apr 11 08:03:20 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | from torch.utils.data import Dataset, DataLoader
 13 | 
 14 | import dgl
 15 | from dgl.nn.pytorch.conv import RelGraphConv
 16 | 
 17 | import re
 18 | import numpy as np
 19 | import pandas as pd
 20 | 
 21 | from code_12_BERT_PROPN import (device,df_test,df_train_val,getmodel)
 22 | 
 23 | import spacy
 24 | import pickle
 25 | import collections
 26 | 
 27 | from sklearn.metrics import log_loss
 28 | from sklearn.model_selection import StratifiedKFold
 29 | 
 30 | import matplotlib.pyplot as plt
 31 | from operator import itemgetter
 32 | from sklearn import metrics
 33 | 
 34 | 
 35 | 
 36 | '''加载预处理文件'''
 37 | offsets_NoPUNC = pickle.load(open('offsets_NoPUNC.pkl', "rb"))
 38 | tokens_NoPUNC = pickle.load(open('tokens_NoPUNC_padding.pkl', "rb")) # tokens of every sentence without padding
 39 | bert_forNoPUNC = pickle.load(open('bert_outputs_forNoPUNC.pkl', "rb")) # list of outputs of bert for every sentence
 40 | 
 41 | test_offsets_NoPUNC = pickle.load(open('test_offsets_NoPUNC.pkl', "rb"))
 42 | test_tokens_NoPUNC = pickle.load(open('test_tokens_NoPUNC_padding.pkl', "rb")) # tokens of every sentence without padding
 43 | test_bert_forNoPUNC = pickle.load(open('test_bert_outputs_forNoPUNC.pkl', "rb")) # list of outputs of bert for every sentence
 44 | 
 45 | PROPN_bert = pickle.load(open('bert_outputs_forPROPN.pkl', "rb"))
 46 | test_PROPN_bert  = pickle.load(open('test_bert_outputs_forPROPN.pkl', "rb"))
 47 | 
 48 | 
 49 | tokenizer,_ = getmodel()#加载BERT分词工具
 50 | parser = spacy.load('en') #加载SpaCy模型  'en_core_web_sm')#en_core_web_lg
 51 | 
 52 | #生成图结构数据
 53 | def getGraphsData(tokens_NoPUNC,offsets_NoPUNC,PROPN_bert,bert_forNoPUNC):
 54 |     all_graphs = []
 55 |     gcn_offsets = []
 56 |     for i, sent_token in enumerate(tokens_NoPUNC):
 57 | 
 58 |         SEPid = sent_token.index(tokenizer.convert_tokens_to_ids('[SEP]'))
 59 |         
 60 |         #去掉所有#
 61 |         sent = ' '.join(re.sub("[#]","",token)   for token in tokenizer.convert_ids_to_tokens(sent_token[1:SEPid]))
 62 |     
 63 |         doc = parser(sent)#将句子切分成单词，英文中一般使用空格分隔
 64 |         parse_rst = doc.to_json()#获得句子中各个单词间的依存关系树
 65 |     
 66 |         target_offset_list = [item - 1 for item in offsets_NoPUNC[i]] #所有的偏移都去掉一个（[CLS]）
 67 |         
 68 |         nodes = collections.OrderedDict() #带有顺序的字典 key为句子中的id，value为节点的真实索引
 69 |         edges = []
 70 |         edge_type = []
 71 |         
 72 |     #  通过  parse_rst['tokens'][69]可以看到详细信息
 73 |         #解析依存关系
 74 |         for i_word, word in enumerate(parse_rst['tokens']):
 75 |             #生成的图中，找到代词节点以及对应的边
 76 |             if (i_word  in target_offset_list) or (word['head']  in target_offset_list):
 77 |                 if i_word not in nodes: 
 78 |                     nodes[i_word] = len(nodes) #添加依存关系节点
 79 |                     edges.append( [i_word, i_word] ) #为节点添加自环
 80 |                     edge_type.append(0)             #自环关系的索引为0
 81 |                 if word['head'] not in nodes:
 82 |                     nodes[word['head']] = len(nodes) #添加依存关系节点
 83 |                     edges.append( [word['head'], word['head']] )#为节点添加自环
 84 |                     edge_type.append(0)
 85 |         
 86 |                 if word['dep'] != 'ROOT':
 87 |                         edges.append( [word['head'], word['id']] )#添加依存关系边（head-》node）
 88 |                         edge_type.append(1)                         #依存关系的索引为1
 89 |                         edges.append( [word['id'], word['head']] )#添加反向依存关系边（head《-node）
 90 |                         edge_type.append(2)                         #反向依存关系的索引为2
 91 |     
 92 |         tran_edges = []         
 93 |         for e1, e2 in edges: #将句子中的边，换成节点间的边
 94 |             tran_edges.append( [nodes[e1], nodes[e2]] ) 
 95 |         #将句子中的代词位置，换成节点中的代词索引
 96 |         gcn_offset = [nodes[offset] for offset in target_offset_list]
 97 |         gcn_offsets.append(gcn_offset)#将代词、名称A、名称B对应图中节点的索引保存起来
 98 |         
 99 |         #生成DGL图数据
100 |         G = dgl.DGLGraph()
101 |         G.add_nodes(len(nodes)) #生成DGL节点
102 |         G.add_edges(list(zip(*tran_edges))[0],list(zip(*tran_edges))[1]) 
103 |         #给每个节点添加特征属性
104 |         for i_word, word in nodes.items():
105 |             if (i_word in target_offset_list): #从PROPN_bert中获取代词、名称A、名称B的特征
106 |                 G.nodes[ [ nodes[i_word] ]].data['h'] = torch.from_numpy(
107 |                         PROPN_bert[i][0][target_offset_list.index(i_word)]).unsqueeze(0).to(device)
108 |             else:                               #bert_forNoPUNC中获取其它词的特征
109 |                 G.nodes[ [ nodes[i_word] ]].data['h'] = torch.from_numpy(
110 |                         bert_forNoPUNC[i][0][i_word + 1]).unsqueeze(0).to(device)
111 |         
112 |         edge_norm = [] #归一化算子（计算均值时的分母）
113 |         for e1, e2 in tran_edges:
114 |             if e1 == e2:
115 |                 edge_norm.append(1) #如果是自环边，则归一化算子为1
116 |             else:                   #如果是非自环边，则归一化算子为1除以去掉自环的度
117 |                 edge_norm.append( 1 / (G.in_degree(e2) - 1 ) )#去掉自环的度
118 |     
119 |         #江类型转为张量
120 |         edge_type = torch.from_numpy(np.array(edge_type)).type(torch.long)#uint8 会导致错误
121 |         edge_norm = torch.from_numpy(np.array(edge_norm)).unsqueeze(1).float().to(device)
122 |     
123 |         G.edata.update({'rel_type': edge_type,})#更新边特征
124 |         G.edata.update({'norm': edge_norm})
125 |         all_graphs.append(G)#保存子图
126 |         
127 |     return all_graphs,gcn_offsets
128 | 
129 | def getLabelData(df): #生成标签 
130 |     tmp = df[["A-coref", "B-coref"]].copy()
131 |     tmp["Neither"] = ~(df["A-coref"] | df["B-coref"])#添加一个列（A和B都不指代的情况）
132 |     y = tmp.values.astype("bool").argmax(1) #变成one-hot索引
133 |     return y
134 | 
135 | ########################################################################
136 | 
137 | 
138 | #构建数据集
139 | class GPRDataset(Dataset):
140 |     def __init__(self, y, graphs, bert_offsets, gcn_offsets, bert_embeddings):
141 |         self.y = y
142 |         self.graphs = graphs
143 |         self.bert_offsets = bert_offsets  #已经+1了
144 |         self.bert_embeddings = bert_embeddings  #有[CLS]
145 |         self.gcn_offsets = gcn_offsets
146 |     def __len__(self):
147 |         return len(self.graphs)
148 | 
149 |     def __getitem__(self, idx):
150 |         return (self.graphs[idx], self.bert_offsets[idx], self.gcn_offsets[idx], 
151 |                 self.bert_embeddings[idx], self.y[idx])
152 | 
153 | def collate(samples): #对批次数据重新加工
154 | #    print(len(samples))#数组。个数是4（批次），
155 | 
156 |     #行列转换变成list
157 |     graphs, bert_offsets, gcn_offsets, bert_embeddings, labels = map(list, zip(*samples))
158 |     
159 |     batched_graph = dgl.batch(graphs)#对图数据进行按批次重组 !!!批次介绍！！
160 |     #对其它数据进行张量转化
161 |     offsets_bert = torch.stack([torch.LongTensor(x) for x in bert_offsets], dim=0)
162 |     offsets_gcn = torch.stack([torch.LongTensor(x) for x in gcn_offsets], dim=0)
163 |     one_hot_labels = torch.from_numpy(np.asarray(labels)).type(torch.long)#.squeeze()#必须要用long
164 |     bert_embeddings = torch.from_numpy(np.asarray(bert_embeddings))
165 | 
166 |     return batched_graph, offsets_bert, offsets_gcn, bert_embeddings, one_hot_labels
167 | 
168 | #将训练数据集转化为图数据
169 | all_graphs,gcn_offsets = getGraphsData(tokens_NoPUNC,offsets_NoPUNC,PROPN_bert,bert_forNoPUNC)
170 | train_y = getLabelData(df_train_val)#获取训练数据集的标签
171 | 
172 | #将测试数据集转化为图数据
173 | test_all_graphs,test_gcn_offsets = getGraphsData(test_tokens_NoPUNC,test_offsets_NoPUNC,
174 |                                                  test_PROPN_bert,test_bert_forNoPUNC)
175 | test_y = getLabelData(df_test)#获取测试数据集的标签
176 | #生成测试数据集
177 | test_dataset = GPRDataset(test_y, test_all_graphs, test_offsets_NoPUNC, 
178 |                           test_gcn_offsets, test_PROPN_bert)
179 | #生成测试数据集的加载器
180 | test_dataloarder = DataLoader( test_dataset, collate_fn = collate,batch_size = 4 )
181 | 
182 |         
183 | #########################
184 | #构建模型
185 | 
186 | class RGCNModel(nn.Module):#多层R-GCN模型
187 |     def __init__(self, h_dim, num_rels,out_dim=256, num_hidden_layers=1):
188 |         super(RGCNModel, self).__init__()
189 |        
190 |         self.layers = nn.ModuleList() #定义网络层列表
191 |         
192 |         for _ in range(num_hidden_layers):
193 |             rgcn_layer = RelGraphConv(h_dim, out_dim,num_rels, activation=F.relu)
194 |             self.layers.append(rgcn_layer)
195 |     
196 |     def forward(self, g):
197 |         #逐层处理
198 |         for layer in self.layers:
199 |             g.ndata['h']=layer(g,g.ndata['h'].to(device), etypes=g.edata['rel_type'].to(device), norm=g.edata['norm'].to(device))
200 | 
201 |         rst_hidden = []
202 |         for sub_g in dgl.unbatch(g): #按批次解包
203 |             rst_hidden.append(  sub_g.ndata['h']   )
204 |         return rst_hidden
205 | 
206 | 
207 | #Design the Main Model (R-GCN + FFNN)
208 | class BERT_Head(nn.Module):
209 |     def __init__(self, bert_hidden_size: int):
210 |         super().__init__()
211 |         self.fc = nn.Sequential(
212 |             nn.BatchNorm1d(bert_hidden_size * 3),
213 |             nn.Dropout(0.5),
214 |             nn.Linear(bert_hidden_size * 3, 512 * 3),   
215 |             nn.ReLU(),
216 |         )
217 | 
218 |         for i, module in enumerate(self.fc):
219 |             if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
220 |                 nn.init.constant_(module.weight, 1)
221 |                 nn.init.constant_(module.bias, 0)
222 |             elif isinstance(module, nn.Linear):
223 |                 if getattr(module, "weight_v", None) is not None:
224 |                     nn.init.uniform_(module.weight_g, 0, 1)
225 |                     nn.init.kaiming_normal_(module.weight_v)
226 |                     assert model[i].weight_g is not None
227 |                 else:
228 |                     nn.init.kaiming_normal_(module.weight)
229 |                 nn.init.constant_(module.bias, 0)
230 |                 
231 |     def forward(self, bert_embeddings):
232 |         #print('BERT_Head bert_embeddings: ', bert_embeddings, bert_embeddings.view(bert_embeddings.shape[0],-1).shape)
233 |         outputs = self.fc(bert_embeddings.view(bert_embeddings.shape[0],-1))
234 |         return outputs  
235 | class Head(nn.Module):
236 |     """The MLP submodule"""
237 |     def __init__(self, gcn_out_size: int, bert_out_size: int):
238 |         super().__init__()
239 |         self.bert_out_size = bert_out_size
240 |         self.gcn_out_size = gcn_out_size
241 |         
242 |         self.fc = nn.Sequential(
243 |             nn.BatchNorm1d(bert_out_size * 3 + gcn_out_size * 3),
244 |             nn.Dropout(0.5),
245 |             nn.Linear(bert_out_size * 3 + gcn_out_size * 3, 256),    
246 |             nn.ReLU(),
247 |             nn.BatchNorm1d(256),
248 |             nn.Dropout(0.5),
249 |             nn.Linear(256, 3),
250 |         )
251 |         for i, module in enumerate(self.fc):
252 |             if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
253 |                 nn.init.constant_(module.weight, 1)
254 |                 nn.init.constant_(module.bias, 0)
255 |             elif isinstance(module, nn.Linear):
256 |                 if getattr(module, "weight_v", None) is not None:
257 |                     nn.init.uniform_(module.weight_g, 0, 1)
258 |                     nn.init.kaiming_normal_(module.weight_v)
259 |                     assert model[i].weight_g is not None
260 |                 else:
261 |                     nn.init.kaiming_normal_(module.weight)
262 |                 nn.init.constant_(module.bias, 0)
263 |                 
264 |     def forward(self, gcn_outputs, offsets_gcn, bert_embeddings):
265 |         
266 |         gcn_extracted_outputs = [gcn_outputs[i].unsqueeze(0).gather(1, offsets_gcn[i].unsqueeze(0).unsqueeze(2)
267 |                                        .expand(-1, -1, gcn_outputs[i].unsqueeze(0).size(2))).view(gcn_outputs[i].unsqueeze(0).size(0), -1) for i in range(len(gcn_outputs))]
268 |         
269 |         gcn_extracted_outputs = torch.stack(gcn_extracted_outputs, dim=0).squeeze()
270 |         
271 |         embeddings = torch.cat((gcn_extracted_outputs, bert_embeddings), 1) 
272 |         
273 |         return self.fc(embeddings)
274 | 
275 | 
276 |       
277 | class GPRModel(nn.Module):
278 |     """The main model."""
279 |     def __init__(self):
280 |         super().__init__()
281 |         self.RGCN =  RGCNModel(h_dim = 768, out_dim=256, num_rels = 3)
282 |         self.BERThead = BERT_Head(768) # bert output size
283 |         self.head = Head(256, 512)  # gcn output   berthead output
284 |     
285 |     def forward(self, offsets_bert, offsets_gcn, bert_embeddings, g):
286 |         gcn_outputs = self.RGCN(g)
287 |         bert_head_outputs = self.BERThead(bert_embeddings)
288 |         head_outputs = self.head(gcn_outputs, offsets_gcn, bert_head_outputs)
289 |         return head_outputs
290 | 
291 | def adjust_learning_rate(optimizers, epoch,lr_value):
292 |     # warm up
293 |     if epoch < 10:
294 |         lr_tmp = 0.00001
295 |     else:
296 |         lr_tmp = lr_value * pow((1 - 1.0 * epoch / 100), 0.9)
297 |     if epoch > 36:
298 |         lr_tmp =  0.000015 * pow((1 - 1.0 * epoch / 100), 0.9)
299 |     for optimizer in optimizers:
300 |         for param_group in optimizer.param_groups:
301 |             param_group['lr'] = lr_tmp
302 |     return lr_tmp    
303 | def trainmodel(train_dataloarder, val_dataloarder,model,loss_func,optimizer,lr_value):
304 |     
305 |     reg_lambda = 0.035
306 |     total_epoch = 100
307 |     best_val_loss = 11
308 |     ce_losses = []
309 |     epoch_losses = []
310 |     val_losses = []
311 |     val_acclist = []
312 |     for epoch in range(total_epoch):
313 |         
314 |         if epoch % 5 == 0:
315 |             print('|',">" * epoch," "*(80-epoch),'|')
316 |         
317 |         lr = adjust_learning_rate([optimizer],epoch,lr_value)
318 |         print("Learning rate = %4f\n" % lr)
319 |         model.train()
320 |         epoch_loss = 0
321 |         reg_loss = 0
322 |         ce_loss = 0
323 |         for iter, (batched_graph, offsets_bert, offsets_gcn, bert_embeddings, labels) in enumerate(train_dataloarder):
324 | 
325 |             bert_embeddings = bert_embeddings.to(device)
326 |             labels = labels.to(device)
327 |             offsets_gcn = offsets_gcn.to(device)
328 |             #batched_graph g.batch_size 4,g.batch_num_nodes [6, 6, 8, 6],g.batch_num_edges[12, 14, 20, 16]
329 |             prediction = model(offsets_bert, offsets_gcn, bert_embeddings, batched_graph)
330 |             l2_reg = None
331 |             for w in model.RGCN.parameters():
332 |                 if not l2_reg:
333 |                     l2_reg = w.norm(2)
334 |                 else:
335 |                     l2_reg = l2_reg + w.norm(2)  
336 |             for w in model.head.parameters():
337 |                 if not l2_reg:
338 |                     l2_reg = w.norm(2)
339 |                 else:
340 |                     l2_reg = l2_reg + w.norm(2)   
341 |             for w in model.BERThead.parameters():
342 |                 if not l2_reg:
343 |                     l2_reg = w.norm(2)
344 |                 else:
345 |                     l2_reg = l2_reg + w.norm(2) 
346 |             loss = loss_func(prediction, labels) + l2_reg * reg_lambda
347 |             #loss = loss_func(prediction, labels)
348 | 
349 |             optimizer.zero_grad()
350 |             loss.backward()
351 |             optimizer.step() 
352 |             
353 |             ###########################
354 |             epoch_loss += loss.detach().item()
355 |             reg_loss += (l2_reg*reg_lambda).detach().item()
356 |             ce_loss += (loss_func(prediction, labels)).detach().item()
357 |         epoch_loss /= (iter + 1)
358 |         ce_loss /= (iter + 1)
359 |         reg_loss /= (iter + 1)
360 |         print('Epoch {}, loss {:.4f}, ce_loss {:.4f}, reg_loss {:.4f}'.format(epoch, epoch_loss, ce_loss, reg_loss))
361 |         print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
362 |         epoch_losses.append(epoch_loss)
363 |         ce_losses.append(ce_loss)
364 |         ##################################
365 |         
366 |         val_loss = 0
367 |         model.eval()
368 |         val_accs = []
369 |         for iter, (batched_graph, offsets_bert, offsets_gcn, bert_embeddings, labels) in enumerate(val_dataloarder):
370 |             offsets_gcn = offsets_gcn.to(device)
371 |             bert_embeddings = bert_embeddings.to(device)
372 |             labelsgpu = labels.to(device)
373 |             with torch.no_grad():
374 |                 prediction = model(offsets_bert, offsets_gcn, bert_embeddings, batched_graph)
375 |             loss = loss_func(prediction, labelsgpu)
376 |             val_loss += loss.detach().item()
377 |             
378 |             val_acc = metrics.accuracy_score(labels, torch.argmax(prediction,-1).cpu().numpy())
379 |             val_accs.append(val_acc)
380 |             
381 |         val_loss = val_loss/(iter + 1)  
382 |         val_losses.append(val_loss)
383 |         val_acclist.append(np.mean(val_accs))
384 |         
385 |         if epoch%20 == 0:
386 |             print('Epoch {}, val_loss {:.4f}, val_acc {:.4f}'.format(epoch,
387 |                                               val_loss,np.mean(val_accs)))
388 | 
389 |         if val_loss < best_val_loss:
390 |             best_val_loss = val_loss
391 |             if epoch > 20:
392 |                 torch.save(model.state_dict(), 'best_model.pth') 
393 |             if epoch > 36: print('Best val loss found: ', best_val_loss)
394 | 
395 | 
396 |         ################    
397 |         print('Epoch {}, val_loss {:.4f}, val_acc {:.4f}'.format(epoch,
398 |                                                val_loss,np.mean(val_accs)))
399 |         
400 |         
401 |         if val_loss < best_val_loss:
402 |             best_val_loss = val_loss
403 |         
404 | 
405 |     #########################
406 |     print('This fold, the best val loss is: ', best_val_loss)
407 |     return ce_losses,val_losses,val_acclist
408 | 
409 | 
410 | 
411 | 
412 | 
413 | 
414 | #5 fold
415 | 
416 | kfold = StratifiedKFold(n_splits = 5) 
417 | def getdataloader( index,isshuffle=False ):
418 |     dataset = GPRDataset( train_y[index] , 
419 |                                list(itemgetter(*index)(all_graphs)), 
420 |                                list(itemgetter(*index)(offsets_NoPUNC)), 
421 |                                list(itemgetter(*index)(gcn_offsets)) , 
422 |                                 list(itemgetter(*index)(PROPN_bert))  )
423 |     dataloarder = DataLoader(dataset,collate_fn = collate,
424 |                                    batch_size = 4,shuffle=isshuffle)
425 | 
426 |     return dataloarder
427 | 
428 | test_predict_lst = [] # the test output for every fold
429 | for train_index, test_index in kfold.split(df_train_val, train_y): #循环5次
430 |     print("=" * 20)
431 |     print(f"Fold {len(test_predict_lst) + 1}")
432 |     print("=" * 20)
433 | 
434 |     val_dataloarder = getdataloader(test_index )
435 |     train_dataloarder = getdataloader(train_index,True)
436 |     print('Dataloader Success---------------------')
437 |     
438 |     model = GPRModel().to(device)
439 |     loss_func = nn.CrossEntropyLoss() 
440 |     lr_value = 0.0001
441 |     optimizer = optim.Adam(model.parameters(), lr=lr_value)
442 |     ce_losses,val_losses,val_accs= trainmodel(train_dataloarder, 
443 |                                               val_dataloarder,
444 |                                               model,loss_func,optimizer,lr_value)  
445 | 
446 |     plt.figure()
447 |     plt.plot(ce_losses, label='CE_loss')
448 |     plt.plot(val_losses , label='Val_loss')
449 |     plt.plot(val_accs , label='Val_acc')
450 |     plt.legend()  # 添加图例
451 |     plt.show()
452 | 
453 | 
454 | 
455 | 
456 |     #测试    
457 |     test_loss = 0.
458 |     test_predict = None
459 |     model.load_state_dict(torch.load('best_model.pth'))
460 |     model.to(device)
461 |     model.eval()
462 |     
463 |     
464 |     for iter, (batched_graph, offsets_bert, offsets_gcn, bert_embeddings, 
465 |                labels) in enumerate(test_dataloarder):
466 | 
467 |             
468 |         offsets_gcn = offsets_gcn.to(device)
469 |         bert_embeddings = bert_embeddings.to(device)
470 |         labels = labels.to(device)
471 |         with torch.no_grad():
472 |             prediction = model(offsets_bert, offsets_gcn, bert_embeddings, batched_graph)    
473 |             
474 |         if test_predict is None:
475 |             test_predict = prediction
476 |         else:
477 |             test_predict = torch.cat((test_predict, prediction), 0) 
478 |         loss = loss_func(prediction, labels)
479 |         test_loss += loss
480 | 
481 |     acc = metrics.accuracy_score(test_y, torch.argmax(test_predict,-1).cpu().numpy())
482 |     test_loss /= (iter + 1)
483 |     print('This fold, the test loss is: ', test_loss," acc is ",acc)
484 |     test_predict_lst.append(test_predict)
485 |     
486 | #Test Part
487 | test_predict_arr = [torch.softmax(pre.cpu(), -1).clamp(1e-4, 1-1e-4).numpy() for pre in test_predict_lst]
488 | final_test_preds = np.mean(test_predict_arr, axis=0)
489 | 
490 | 
491 | def extract_target(df):
492 |     df["Neither"] = 0
493 |     df.loc[~(df['A-coref'] | df['B-coref']), "Neither"] = 1
494 |     df["target"] = 0
495 |     df.loc[df['B-coref'] == 1, "target"] = 1
496 |     df.loc[df["Neither"] == 1, "target"] = 2
497 |     return df
498 | test_df = extract_target(df_test)
499 | log_loss(test_df.target, final_test_preds)
500 | 
501 | result = np.argmax(final_test_preds,-1).reshape(len(final_test_preds),1)
502 | 
503 | #保存结果
504 | df_sub = pd.DataFrame(np.concatenate([final_test_preds,result],-1), columns=["A", "B", "NEITHER",'result'])
505 | df_sub["ID"] = test_df.ID
506 | df_sub["target"] = test_df["target"]
507 | df_sub = df_sub[['ID',"A", "B", "NEITHER","result","target"]]
508 | df_sub.head(50)
509 | df_sub.to_csv("submission_415_copy3.csv", index=False)
510 | 
511 | 
512 | acc = metrics.accuracy_score(test_df["target"].values, np.argmax(final_test_preds,-1))
513 |     
514 |    
515 |  


--------------------------------------------------------------------------------
/code_27_spellgcn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Dec  1 20:29:59 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | from dgl.nn import GraphConv
  8 | from transformers import BertTokenizer, BertModel, BertConfig,BertLMHeadModel
  9 | import dgl
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | def load_graph(graph_dir):
 15 |     nodes_vocab = {}
 16 |     with open("%s/nodes_vocab.txt"%(graph_dir),encoding="UTF-8") as f:
 17 |       for i, line in enumerate(f):
 18 |         nodes_vocab.setdefault(line.strip(), i) 
 19 | 
 20 |     node1s,node2s = [],[]
 21 |     with open("%s/spellGraphs.txt"%(graph_dir),encoding="UTF-8") as f:
 22 |         for i, line in enumerate(f):
 23 |           e1,e2, rel = line.strip().split("|")  
 24 |           node1s.append(nodes_vocab[e1])
 25 |           node2s.append(nodes_vocab[e2])
 26 | 
 27 |     g1 = dgl.graph((node1s, node2s),num_nodes=len(nodes_vocab))
 28 |           
 29 |     w2n = []
 30 |     vocab = {}
 31 |     with open("%s/vocab.txt"%(graph_dir),encoding="UTF-8") as f:
 32 |       for i, line in enumerate(f):
 33 |         word = line.strip()
 34 |         vocab.setdefault(word, i)
 35 |         if word in nodes_vocab:
 36 |           w2n.append(nodes_vocab[word])
 37 |         else:
 38 |           w2n.append(0)
 39 |     n2w = []
 40 |     with open("%s/nodes_vocab.txt"%(graph_dir),encoding="UTF-8") as f:
 41 |       for i, line in enumerate(f):
 42 |         word = line.strip()
 43 |         if word in vocab:
 44 |           n2w.append(vocab[word])
 45 |         else:
 46 |           n2w.append(0)    
 47 |     return  g1,w2n,n2w
 48 | 
 49 | graph_dir = r'./gcn_graph'
 50 | 
 51 | config = BertConfig.from_pretrained(r'./bert-base-chinese')
 52 | config.is_decoder = True
 53 | 
 54 | g1,w2n,n2w = load_graph(graph_dir)
 55 | w2n=torch.tensor(w2n)
 56 | n2w =torch.tensor(n2w)
 57 | g = dgl.add_self_loop(g1)
 58 | 
 59 | mask_nodes_ids = torch.where( w2n !=0)[0]  #找到不为0的id
 60 | maskbase = torch.zeros( (config.vocab_size,config.hidden_size) )
 61 | maskbase[mask_nodes_ids] =1.
 62 | 
 63 | class MGCNNet(nn.Module):
 64 |     def __init__(self):
 65 |         super(MGCNNet, self).__init__()
 66 |         self.gcn1 = GraphConv(config.hidden_size, config.hidden_size)
 67 |         self.dropout = nn.Dropout(0.1)
 68 |         self.gcn2 = GraphConv(config.hidden_size, config.hidden_size)
 69 | 
 70 |     def forward(self, g, features):
 71 |         gcn1out = self.gcn1(g, features)
 72 |         x = self.dropout(gcn1out)
 73 |         gcn2out = self.gcn2(g, x)
 74 |         return features+gcn1out+gcn2out
 75 | 
 76 | class spellgcnBert(nn.Module):
 77 |     def __init__(self, MLbert):
 78 |         super(spellgcnBert, self).__init__()
 79 |         self.MLbert = MLbert
 80 |         self.gnnmodel = MGCNNet()
 81 | 
 82 | 
 83 |     def getgnnemb(self):
 84 |         feat = self.MLbert.bert.embeddings.word_embeddings( n2w )#( input_ids=torch.tensor([n2w]).to(device) )   
 85 |         node_embedding = self.gnnmodel(g, feat)  #[4755, 768]
 86 |         expanded_node_embedding = node_embedding[w2n]#21128, 768
 87 |         rest_embedding = self.MLbert.bert.get_input_embeddings().weight#21128, 768]
 88 |         gcn_embedding = maskbase * expanded_node_embedding + (1 - maskbase) *  rest_embedding
 89 |         return gcn_embedding
 90 | 
 91 | 
 92 |     def forward(self, input_ids, input_mask, segment_ids):
 93 |         gcn_embedding = self.getgnnemb()
 94 |         outputs = self.MLbert.bert(input_ids, input_mask, segment_ids) #prob [batch_size, seq_len, 1]
 95 |         sequence_output = outputs[0]        
 96 |         hidden_states = self.MLbert.cls.predictions.transform(sequence_output)
 97 |         prediction_scores =F.linear(hidden_states, gcn_embedding, MLbert.cls.predictions.bias)
 98 |         return prediction_scores
 99 |     
100 | tokenizer = BertTokenizer.from_pretrained(r'./bert-base-chinese')
101 | MLbert = BertLMHeadModel.from_pretrained(r'./bert-base-chinese', config=config)
102 | spellgcnBertmodel = spellgcnBert(MLbert)
103 |     


--------------------------------------------------------------------------------
/code_28_CDial.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Dec  5 06:15:51 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | 
  8 | 
  9 | 
 10 | from transformers import (GPT2LMHeadModel, GPT2Config,CONFIG_NAME, AdamW, BertTokenizer)
 11 | import json
 12 | import os
 13 | import torch
 14 | from torch.nn.utils.rnn import pad_sequence
 15 | from torch.utils.data import DataLoader,Dataset
 16 | from itertools import chain
 17 | from torch.optim.lr_scheduler import LambdaLR
 18 | 
 19 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 20 | 
 21 | 
 22 | class DialogDataset(Dataset):
 23 | 
 24 |     def __init__(self, tokenizer, max_history=15, batch_first=True, lm_labels=True, *inputs, **kwargs):
 25 |         super(DialogDataset, self).__init__()
 26 |         self.tokenizer = tokenizer
 27 |         self.max_history = max_history
 28 |         self.pad = tokenizer.pad_token_id
 29 |         self.batch_first = batch_first
 30 |         self.lm_labels = lm_labels
 31 |         self.target_file = kwargs['data_path']
 32 |         SPECIAL_TOKENS = ["[CLS]", "[SEP]", "[speaker1]", "[speaker2]"]
 33 |         self.bos, self.eos, self.speaker1, self.speaker2 = tokenizer.convert_tokens_to_ids(
 34 |                                         SPECIAL_TOKENS)
 35 | 
 36 |     def _get_line(self, index):
 37 |         with open(self.target_file, "r", encoding="utf-8") as f:
 38 |             f.seek(index)
 39 |             line = f.readline()
 40 |         return line
 41 |     
 42 |     def __getitem__(self, index):
 43 |         tokenizer = self.tokenizer
 44 |         dialog = self._get_line(index)
 45 |         dialog = dialog.strip().split("\t")
 46 | 
 47 |         def tokenize(obj):
 48 |             if isinstance(obj, str):
 49 |                 return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(obj))
 50 |             if isinstance(obj, dict):
 51 |                 return dict((n, tokenize(o)) for n, o in obj.items())
 52 |             return list(tokenize(o) for o in obj)
 53 | 
 54 |         dialog = tokenize(dialog)
 55 |         history = dialog[:-1]
 56 |         candidates = dialog[-1]
 57 |         return self.process(history, candidates)
 58 | 
 59 |     def process(self, history, resposne, with_eos=True):
 60 |         
 61 |         sequence = [[self.bos]] + history + [resposne + ([self.eos] if with_eos else [])]
 62 |         sequence = [sequence[0]] + [[self.speaker2 if i % 2 else self.speaker1] + s
 63 |                                     for i, s in enumerate(sequence[1:])]
 64 |         instance = {}
 65 |         instance["input_ids"] = list(chain(*sequence))
 66 |         instance["token_type_ids"] = [self.bos] + [self.speaker2 if i % 2 else self.speaker1 for i, s in
 67 |                                               enumerate(sequence[1:])
 68 |                                               for _ in s]
 69 |         instance["lm_labels"] = [-1] * len(instance["input_ids"])
 70 |         if self.lm_labels:
 71 |             instance["lm_labels"] = ([-1] * sum(len(s) for s in sequence[:-1])) + [-1] + sequence[-1][1:]
 72 | 
 73 |         return instance
 74 | 
 75 |     def collate(self, batch):
 76 |         input_ids = pad_sequence(
 77 |             [torch.tensor(instance["input_ids"], dtype=torch.long) for instance in batch],
 78 |             batch_first=self.batch_first, padding_value=self.pad)
 79 |         token_type_ids = pad_sequence(
 80 |             [torch.tensor(instance["token_type_ids"], dtype=torch.long) for instance in batch],
 81 |             batch_first=self.batch_first, padding_value=self.pad)
 82 |         labels = pad_sequence(
 83 |             [torch.tensor(instance["lm_labels"], dtype=torch.long) for instance in batch],
 84 |             batch_first=self.batch_first, padding_value=-1)
 85 |         return input_ids, token_type_ids, labels
 86 |     
 87 |     
 88 | print("Prepare datasets") 
 89 | model_checkpoint = 'cgpt'
 90 | tokenizer = BertTokenizer(os.path.join(model_checkpoint, "vocab.txt"), do_lower_case=True)
 91 | 
 92 | valid_path="dataGPT/toy_valid.txt"
 93 | valid_dataset = DialogDataset(tokenizer, data_path=valid_path) 
 94 | input_idssz = tokenizer.convert_ids_to_tokens(valid_dataset[0]['input_ids'])
 95 | token_type_idssz = tokenizer.convert_ids_to_tokens(valid_dataset[0]['token_type_ids'])
 96 | lm_labelssz = tokenizer.convert_ids_to_tokens(valid_dataset[0]['lm_labels'])
 97 | print('输入文本:',''.join(input_idssz))
 98 | print('段编码:',''.join(token_type_idssz))
 99 | print('标签:',''.join(lm_labelssz))
100 | 
101 | train_path="dataGPT/toy_train.txt"
102 | train_dataset = DialogDataset(tokenizer, data_path=train_path)
103 | 
104 | config = GPT2Config.from_json_file(os.path.join(model_checkpoint, 'config.json'))
105 | model = GPT2LMHeadModel(config)
106 | model.to(device)  
107 |     
108 | 
109 | 
110 | 
111 | 
112 | 
113 | train_batch_size = 2
114 | valid_batch_size = 2
115 | num_workers = 0
116 | 
117 |   
118 | 
119 | 
120 | 
121 | 
122 | train_loader = DataLoader(train_dataset,
123 |                               collate_fn=train_dataset.collate,
124 |                               num_workers=num_workers,
125 |                               batch_size=train_batch_size,
126 |                               shuffle=True
127 |                               )
128 | valid_loader = DataLoader(valid_dataset,
129 |                           collate_fn=valid_dataset.collate,
130 |                           num_workers=num_workers,
131 |                           batch_size=valid_batch_size,
132 |                           shuffle=False)
133 | 
134 | 
135 | lr = 5e-5
136 | optimizer = AdamW([{'params': model.parameters(), 'initial_lr': lr}], lr=lr, correct_bias=True)
137 | 
138 | lossfun = torch.nn.CrossEntropyLoss(ignore_index=-1)
139 | 
140 | def evaltest():
141 |     model.eval()
142 |     allloss = []
143 |     for batch in valid_loader:
144 |         with torch.no_grad():
145 |             input_ids, token_type_ids, lm_labels = tuple(input_tensor.to(device
146 |                                                          ) for input_tensor in batch)
147 |             lm_logits, *_ = model(input_ids, token_type_ids=token_type_ids)
148 |             lm_logits_flat_shifted = lm_logits[..., :-1, :].reshape(-1, lm_logits.size(-1))
149 |             lm_labels_flat_shifted = lm_labels[..., 1:].contiguous().view(-1)
150 |             loss = lossfun(lm_logits_flat_shifted, lm_labels_flat_shifted)
151 |             allloss.append(lossfun)
152 |     return np.mean(allloss)
153 | 
154 | 
155 | gradient_accumulation_steps = 16
156 | 
157 | model_size = 768
158 | warmup_steps = 5000
159 | from_step = -1
160 | max_epochs =80
161 | noam_lambda = lambda step: (
162 |         model_size ** (-0.5) * min((step + 1) ** (-0.5), (step + 1) * warmup_steps ** (-1.5)))
163 | noam_scheduler = LambdaLR(optimizer, lr_lambda=noam_lambda, last_epoch=from_step)
164 | 
165 | logdir='./modelGPT/'
166 | for epoch in range(max_epochs):
167 |     model.train()
168 |     allloss = []
169 |     for iteration,batch in enumerate( train_loader):
170 |         input_ids, token_type_ids, lm_labels = tuple(input_tensor.to(device) for input_tensor in batch)
171 |         lm_logits, *_ = model(input_ids, token_type_ids=token_type_ids)
172 |         lm_logits_flat_shifted = lm_logits[..., :-1, :].reshape(-1, lm_logits.size(-1))
173 |         lm_labels_flat_shifted = lm_labels[..., 1:].contiguous().view(-1)
174 |         lm_loss = lossfun(lm_logits_flat_shifted, lm_labels_flat_shifted)
175 |         
176 |         loss = lm_loss / gradient_accumulation_steps
177 |         loss.backward()
178 |         allloss.append(loss.item())
179 |         torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
180 |         if iteration % gradient_accumulation_steps == 0:
181 |             optimizer.step()
182 |             noam_scheduler.step()
183 |             optimizer.zero_grad()
184 |             
185 |     evalloss = evaltest()
186 |     print('train:',np.mean(allloss), "lr",optimizer.param_groups[0]['lr'],'eval',evalloss)
187 |     torch.save({'state_dict': model.cpu().state_dict()}, os.path.join(logdir, 'model_training.bin'))
188 |     model.config.to_json_file(os.path.join(logdir, CONFIG_NAME))
189 |     tokenizer.save_vocabulary(logdir)
190 | 


--------------------------------------------------------------------------------
/code_29_serving.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Dec  6 07:13:17 2020
  4 | 
  5 | @author: ljh
  6 | """
  7 | from abc import ABC
  8 | import json
  9 | import logging
 10 | import os
 11 | import traceback
 12 | 
 13 | import torch
 14 | from transformers import AutoModelForSequenceClassification, AutoTokenizer,AutoConfig
 15 | 
 16 | from ts.torch_handler.base_handler import BaseHandler
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class TransformersClassifierHandler(BaseHandler, ABC):
 22 |     """
 23 |     Transformers text classifier handler class. This handler takes a text (string) and
 24 |     as input and returns the classification text based on the serialized transformers checkpoint.
 25 |     """
 26 |     def __init__(self):
 27 |         super(TransformersClassifierHandler, self).__init__()
 28 |         self.initialized = False
 29 | 
 30 |     def initialize(self, ctx):
 31 |         self.manifest = ctx.manifest
 32 |         
 33 |         print("initialize________:",self.manifest)
 34 | 
 35 |         properties = ctx.system_properties
 36 |         model_dir = properties.get("model_dir")
 37 |   
 38 |         # Read model serialize/pt file
 39 | 
 40 |         self.model = AutoModelForSequenceClassification.from_pretrained(model_dir)
 41 | 
 42 |         logger.debug('Transformer initialize tokenizer： {0}'.format(model_dir))
 43 |         self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
 44 | 
 45 | 
 46 |         # self.model.to(self.device)
 47 |         self.model.eval()
 48 | 
 49 | 
 50 |         logger.debug('Transformer model from path {0} loaded successfully'.format(model_dir))
 51 | 
 52 |         # Read the mapping file, index to object name
 53 |         mapping_file_path = os.path.join(model_dir, "index_to_name.json")
 54 | 
 55 |         if os.path.isfile(mapping_file_path):
 56 |             with open(mapping_file_path) as f:
 57 |                 self.mapping = json.load(f)
 58 |         else:
 59 |             logger.warning('Missing the index_to_name.json file. Inference output will not include class name.')
 60 | 
 61 |         self.initialized = True
 62 |         print("initialize_____initialized___OK")
 63 | 
 64 |     def preprocess(self, data):
 65 |         """ Very basic preprocessing code - only tokenizes. 
 66 |             Extend with your own preprocessing steps as needed.
 67 |         """
 68 |         text = data[0].get("data")
 69 |         if text is None:
 70 |             text = data[0].get("body")
 71 |         sentences = text.decode('utf-8')
 72 |         logger.info("Received text: '%s'", sentences)
 73 |         print(sentences)
 74 | 
 75 |         inputs = self.tokenizer.encode_plus(
 76 |             sentences,
 77 |             add_special_tokens=True,
 78 |             return_tensors="pt"
 79 |         )
 80 |         return inputs
 81 | 
 82 |     def inference(self, inputs):
 83 |         """
 84 |         Predict the class of a text using a trained transformer model.
 85 |         """
 86 |         # NOTE: This makes the assumption that your model expects text to be tokenized  
 87 |         # with "input_ids" and "token_type_ids" - which is true for some popular transformer models, e.g. bert.
 88 |         # If your transformer model expects different tokenization, adapt this code to suit 
 89 |         # its expected input format.
 90 | 
 91 |         
 92 |         prediction = self.model( inputs['input_ids'], 
 93 |                                 attention_mask = inputs['attention_mask']
 94 |         )[0].argmax().item()
 95 |         
 96 |         logger.info("Model predicted: '%s'", prediction)
 97 | 
 98 |         if self.mapping:
 99 |             prediction = self.mapping[str(prediction)]
100 | 
101 |         return [prediction]
102 | 
103 |     def postprocess(self, inference_output):
104 |         # TODO: Add any needed post-processing of the model predictions here
105 |         return inference_output
106 | 
107 | 
108 | _service = TransformersClassifierHandler()
109 | 
110 | 
111 | def handle(data, context):
112 |  
113 |     try:
114 | 
115 |         if not _service.initialized:
116 | 
117 |             _service.initialize(context)
118 | 
119 | 
120 |         if data is None:
121 |             return None
122 | 
123 |         data = _service.preprocess(data)
124 |         data = _service.inference(data)
125 |         data = _service.postprocess(data)
126 | 
127 |         return data
128 |     except Exception as e:
129 |         traceback.print_exc()
130 | 
131 |         raise e
132 |         
133 | if __name__== "__main__":
134 |     from transformers import AutoConfig,AutoModelForSequenceClassification,AutoTokenizer
135 |     
136 |     config = AutoConfig.from_pretrained(r'./distilbert-base-uncased/')
137 |     modelbert = AutoModelForSequenceClassification.from_pretrained(r'./distilbert-base-uncased/', config=config)
138 |     tokenizer = AutoTokenizer.from_pretrained(r'./distilbert-base-uncased/')
139 |     
140 | #     sentences = 'you are so bad'
141 | #     inputs = tokenizer.encode_plus(
142 | #             sentences,
143 | #             add_special_tokens=True,
144 | #             # return_token_type_ids = True,
145 | #             return_tensors="pt"
146 | #         )
147 | #     modelbert( inputs['input_ids'],  attention_mask = inputs['attention_mask'])
148 | 
149 |     
150 |     NEW_DIR = 'model_store'
151 |     modelbert.save_pretrained(NEW_DIR)
152 |     tokenizer.save_pretrained(NEW_DIR)
153 |     
154 |     
155 |         
156 |         


--------------------------------------------------------------------------------
/人体阴阳与电能.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darvincy/Bert_based_book_code/4934b79a51b55374e1ac41d0e5babe89899c2724/人体阴阳与电能.txt


--------------------------------------------------------------------------------