├── .gitignore ├── LICENSE ├── README.md ├── exp1 ├── fig │ ├── activation_pane.png │ ├── activations.png │ ├── depth_1.png │ ├── depth_10.png │ ├── depth_2.png │ ├── depth_3.png │ ├── depth_5.png │ ├── depth_pane.png │ ├── depths_and_widths.png │ ├── elu.png │ ├── leakyrelu.png │ ├── learning_rates.png │ ├── lr1e-1.5.png │ ├── lr1e-1.png │ ├── lr1e-2.5.png │ ├── lr1e-2.png │ ├── lr1e-3.5.png │ ├── lr1e-3.png │ ├── lr1e-4.png │ ├── lr_pane.png │ ├── prelu.png │ ├── relu.png │ ├── sigmoid.png │ ├── softplus.png │ └── tanh.png ├── requirements.txt ├── src │ ├── main.py │ ├── model.py │ └── results.py ├── 实验一:前馈神经网络.md └── 实验一:前馈神经网络.pdf ├── exp2 ├── requirements.txt ├── src │ ├── data.py │ ├── logs1.txt │ ├── logs2.txt │ ├── logs3.txt │ ├── main.py │ ├── model.py │ └── test.py ├── 实验二:卷积神经网络.md └── 实验二:卷积神经网络.pdf ├── exp3 ├── ReadMe.md ├── img.png ├── requirements.txt ├── src │ ├── config.py │ ├── data.py │ ├── main.py │ ├── model.py │ └── utils.py ├── 实验三:循环神经网络.md ├── 实验三:循环神经网络.pdf └── 实验三:循环神经网络 │ ├── image-20210513172134556.png │ ├── image-20210513172216587.png │ ├── image-20210513172230085.png │ ├── image-20210513172244821.png │ ├── image-20210513172303845.png │ ├── image-20210513172317856.png │ ├── image-20210513172733303.png │ ├── image-20210513172747548.png │ ├── image-20210513172839199.png │ ├── image-20210513172852144.png │ ├── image-20210513172947008.png │ ├── image-20210513173012590.png │ ├── image-20210513173151864.png │ └── image-20210513173355442.png ├── exp4 ├── ReadMe.md ├── img.png ├── requirements.txt ├── src │ ├── config.py │ ├── data.py │ ├── main.py │ ├── model.py │ └── utils.py ├── 实验四:BERT.md ├── 实验四:BERT.pdf └── 实验四:BERT │ ├── image-20210605154503374.png │ ├── image-20210605154926294.png │ ├── image-20210605154945286.png │ ├── image-20210605160746073.png │ ├── image-20210605163538200.png │ ├── image-20210605163557321.png │ ├── image-20210605163641264.png │ └── image-20210605163743489.png └── exp5 ├── requirements.txt ├── result.csv ├── src ├── config.py ├── data.py ├── main.py ├── model.py └── utils.py ├── 实验五:图卷积神经网络.md ├── 实验五:图卷积神经网络.pdf └── 实验五:图卷积神经网络 ├── image-20210621105946421.png ├── image-20210621110023351.png ├── image-20210621110050539-1624244452275.png ├── image-20210621110050539.png ├── image-20210621110122757.png ├── image-20210621110149625.png ├── image-20210621110240262.png ├── image-20210621110306141.png ├── image-20210621110337194.png ├── image-20210621110419877.png ├── image-20210621110436272.png ├── image-20210621110603182.png ├── image-20210621110619585-1624244780237.png ├── image-20210621110619585.png ├── image-20210621110647970.png ├── image-20210621110714808.png ├── image-20210621110733499.png ├── image-20210621110823719.png ├── image-20210621110938485.png ├── image-20210621110951936-1624244992867.png ├── image-20210621110951936.png ├── image-20210621111247829-1624245168536.png ├── image-20210621111247829.png ├── image-20210621111339615.png ├── image-20210621111544917.png ├── image-20210621111633343.png ├── image-20210621111948461.png ├── image-20210621112110697.png ├── image-20210621112125882.png └── image-20210621112457787.png /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | *.zip 4 | tiny-imagenet-200 5 | aclImdb 6 | checkpoint 7 | data 8 | glove.6B 9 | expx 10 | GNN -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 liuwei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # USTC_2021_Spring_Deep_Learning_labs 2 | 3 | USTC-2021春季学期深度学习导论课程实验: 4 | 5 | + 前馈神经网络(FNN) 6 | + 卷积神经网络(CNN) 7 | + 循环神经网络(RNN, LSTM) 8 | + BERT 9 | + 图卷积神经网络(GCN) 10 | -------------------------------------------------------------------------------- /exp1/fig/activation_pane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/activation_pane.png -------------------------------------------------------------------------------- /exp1/fig/activations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/activations.png -------------------------------------------------------------------------------- /exp1/fig/depth_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_1.png -------------------------------------------------------------------------------- /exp1/fig/depth_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_10.png -------------------------------------------------------------------------------- /exp1/fig/depth_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_2.png -------------------------------------------------------------------------------- /exp1/fig/depth_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_3.png -------------------------------------------------------------------------------- /exp1/fig/depth_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_5.png -------------------------------------------------------------------------------- /exp1/fig/depth_pane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_pane.png -------------------------------------------------------------------------------- /exp1/fig/depths_and_widths.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depths_and_widths.png -------------------------------------------------------------------------------- /exp1/fig/elu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/elu.png -------------------------------------------------------------------------------- /exp1/fig/leakyrelu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/leakyrelu.png -------------------------------------------------------------------------------- /exp1/fig/learning_rates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/learning_rates.png -------------------------------------------------------------------------------- /exp1/fig/lr1e-1.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-1.5.png -------------------------------------------------------------------------------- /exp1/fig/lr1e-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-1.png -------------------------------------------------------------------------------- /exp1/fig/lr1e-2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-2.5.png -------------------------------------------------------------------------------- /exp1/fig/lr1e-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-2.png -------------------------------------------------------------------------------- /exp1/fig/lr1e-3.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-3.5.png -------------------------------------------------------------------------------- /exp1/fig/lr1e-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-3.png -------------------------------------------------------------------------------- /exp1/fig/lr1e-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-4.png -------------------------------------------------------------------------------- /exp1/fig/lr_pane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr_pane.png -------------------------------------------------------------------------------- /exp1/fig/prelu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/prelu.png -------------------------------------------------------------------------------- /exp1/fig/relu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/relu.png -------------------------------------------------------------------------------- /exp1/fig/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/sigmoid.png -------------------------------------------------------------------------------- /exp1/fig/softplus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/softplus.png -------------------------------------------------------------------------------- /exp1/fig/tanh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/tanh.png -------------------------------------------------------------------------------- /exp1/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2020.12.5 2 | cycler==0.10.0 3 | freeze==3.0 4 | kiwisolver==1.3.1 5 | matplotlib==3.4.0 6 | mkl-fft==1.3.0 7 | mkl-random==1.1.1 8 | mkl-service==2.3.0 9 | numpy @ file:///C:/ci/numpy_and_numpy_base_1603466732592/work 10 | olefile==0.46 11 | Pillow @ file:///C:/ci/pillow_1615224342392/work 12 | pyparsing==2.4.7 13 | python-dateutil==2.8.1 14 | six @ file:///C:/ci/six_1605187374963/work 15 | torch==1.8.1 16 | torchaudio==0.8.1 17 | torchvision==0.9.1 18 | tqdm==4.59.0 19 | typing-extensions @ file:///home/ktietz/src/ci_mi/typing_extensions_1612808209620/work 20 | wincertstore==0.2 21 | -------------------------------------------------------------------------------- /exp1/src/main.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn as nn 3 | from torch.optim import Adam 4 | import numpy as np 5 | from torch.utils.data import DataLoader, TensorDataset 6 | from tqdm import tqdm 7 | from matplotlib import pyplot as plt 8 | from model import FNN 9 | import copy 10 | 11 | 12 | class FuncFitter(object): 13 | def __init__(self, to_fit: str or callable, x_range: tuple): 14 | if isinstance(to_fit, str): 15 | self.to_fit = eval(f'np.{to_fit}') 16 | else: 17 | self.to_fit = to_fit 18 | self.x_range = x_range 19 | 20 | def gen_data(self, data_size, train_ratio, random_state): 21 | np.random.seed(random_state) 22 | x = np.linspace(self.x_range[0], self.x_range[1], data_size)[:, np.newaxis] 23 | y = self.to_fit(x) 24 | indices = np.random.permutation(data_size) 25 | ids_train, ids_test = np.split(indices, [round(train_ratio * data_size)]) 26 | x_train, y_train = x[ids_train], y[ids_train] 27 | x_test, y_test = x[ids_test], y[ids_test] 28 | self.train_data = (t.Tensor(x_train), t.Tensor(y_train)) 29 | self.test_data = (t.Tensor(x_test), t.Tensor(y_test)) 30 | return self.train_data, self.test_data 31 | 32 | def train(self, model, optim, criterion, epochs, batch_size, pbar='batch'): 33 | dataset = TensorDataset(*self.train_data) 34 | dataloader = DataLoader(dataset, batch_size, shuffle=True) 35 | pbar_epoch = range(epochs) 36 | if pbar == 'epoch': 37 | pbar_epoch = tqdm(pbar_epoch, desc='Epochs', unit='epoch', 38 | bar_format='{desc:<7.7}{percentage:3.0f}%|{bar:30}{r_bar}') 39 | for i in pbar_epoch: 40 | loss = None 41 | pbar_batch = dataloader 42 | if pbar == 'batch': 43 | pbar_batch = tqdm(pbar_batch, desc=f'[Epoch {i + 1}/{epochs}]', unit='batch', 44 | bar_format='{desc:<15.15}{percentage:3.0f}%|{bar:30}{r_bar}') 45 | for x, y in pbar_batch: 46 | y_pred = model(x) 47 | loss = criterion(y_pred, y) 48 | optim.zero_grad() 49 | loss.backward() 50 | optim.step() 51 | if pbar == 'batch': 52 | pbar_batch.set_postfix({'train_loss': loss.item()}) 53 | pbar_epoch.set_postfix({'train_loss': loss.item()}) 54 | return model 55 | 56 | def test(self, model, criterion, plot=True): 57 | x_test, y_test = self.test_data 58 | y_pred = model(x_test) 59 | loss = criterion(y_pred, y_test).item() 60 | 61 | if plot: 62 | fig, ax = plt.subplots() 63 | ax.set_title('Function Fitter: {}\ntest loss: {}'.format(str(self.to_fit), loss)) 64 | ax.set_xlabel('x') 65 | ax.set_ylabel('y') 66 | ax.set_xlim(self.x_range) 67 | ax.scatter(x_test.detach().numpy(), y_test.detach().numpy(), s=5, label='true') 68 | ax.scatter(x_test.detach().numpy(), y_pred.detach().numpy(), s=5, label='pred') 69 | ax.legend() 70 | plt.show() 71 | 72 | return loss 73 | 74 | 75 | if __name__ == '__main__': 76 | fitter = FuncFitter(np.sin, (0, 4 * np.pi)) 77 | fitter.gen_data(data_size=10000, train_ratio=0.8, random_state=7) 78 | num_rounds = 10 79 | losses = [] 80 | for i in range(num_rounds): 81 | fnn = FNN(neurons=[1, *[20] * 2, 1], activation='tanh') 82 | if i == 0: 83 | print('number of parameters: {}'.format(sum(p.numel() for p in fnn.parameters() if p.requires_grad))) 84 | optim = Adam(fnn.parameters(), lr=10 ** -3) 85 | criterion = nn.MSELoss() 86 | fitter.train(fnn, optim, criterion, epochs=10, batch_size=10, pbar='epoch') 87 | test_loss = fitter.test(fnn, criterion, plot=True) 88 | losses.append(round(test_loss, 6)) 89 | print(losses) 90 | -------------------------------------------------------------------------------- /exp1/src/model.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn as nn 3 | 4 | 5 | class FNN(nn.Module): 6 | def __init__(self, neurons: list, activation: str): 7 | super(FNN, self).__init__() 8 | activation = activation.lower() 9 | act_map = { 10 | 'relu': nn.ReLU, 11 | 'tanh': nn.Tanh, 12 | 'sigmoid': nn.Sigmoid, 13 | 'elu': nn.ELU, 14 | 'leakyrelu': nn.LeakyReLU, 15 | 'prelu': nn.PReLU, 16 | 'softplus': nn.Softplus, 17 | } 18 | self.fc_layers = nn.ModuleList() 19 | self.activations = nn.ModuleList() 20 | num_layers = len(neurons) - 1 21 | for i in range(num_layers): 22 | self.fc_layers.append(nn.Linear(neurons[i], neurons[i+1])) 23 | if i < num_layers - 1: 24 | self.activations.append(act_map[activation]()) 25 | 26 | def forward(self, x): 27 | for i, layer in enumerate(self.fc_layers): 28 | x = layer(x) 29 | if i < len(self.activations): 30 | x = self.activations[i](x) 31 | return x 32 | 33 | 34 | class net(nn.Module): 35 | def __init__(self): 36 | super(net, self).__init__() 37 | layers = nn.ModuleList() 38 | layers.append(nn.Linear(1, 10)) 39 | layers.append(nn.ReLU()) 40 | layers.append(nn.Linear(10, 1)) 41 | self.layers = layers 42 | 43 | def forward(self, x): 44 | for layer in self.layers: 45 | x = layer(x) 46 | return x 47 | 48 | 49 | if __name__ == '__main__': 50 | model = net() 51 | print('number of parameters: {}'.format(sum(p.numel() for p in model.parameters()))) -------------------------------------------------------------------------------- /exp1/src/results.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from torch.optim import Adam 4 | from matplotlib import pyplot as plt 5 | 6 | 7 | default_params = { 8 | 'to_fit': np.sin, 9 | 'x_range': (0, 4*np.pi), 10 | 'data_size': 10000, 11 | 'train_ratio': 0.8, 12 | 'random_state': 7, 13 | 'epochs': 10, 14 | 'batch_size': 10, 15 | 'criterion': nn.MSELoss() 16 | } 17 | 18 | 19 | # Comparision 1: depth and width 20 | # fix total number of parameters about equal to 1000 and keep the width of each layer the same 21 | # fix activation='tanh', lr=0.001 22 | # following are five setups about depths and widths(depths only considers hidden layers) 23 | # setup1: neurons=[1, 333, 1] (depths=1) #(params)=1000 24 | # setup2: neurons=[1, *[30]*2, 1] (depths=2) #(params)=1021 25 | # setup3: neurons=[1, *[21]*3, 1] (depths=3) #(params)=988 26 | # setup4: neurons=[1, *[15]*5, 1] (depths=5) #(params)=1006 27 | # setup5: neurons=[1, *[10]*10, 1] (depths=10) #(params)=1021 28 | # repeat each setup 10 times, record each test loss 29 | result1_raw = { 30 | 1: [0.09346, 0.095363, 0.12187, 0.111205, 0.096947, 0.107818, 0.093153, 0.09429, 0.095786, 0.103154], 31 | 2: [0.029461, 0.05171, 0.1099, 0.083763, 0.040612, 0.003585, 0.065114, 0.030679, 0.042282, 0.078191], 32 | 3: [0.000704, 0.000287, 0.027187, 0.007998, 0.000721, 0.001373, 0.011837, 0.002505, 0.001198, 0.001492], 33 | 5: [0.000982, 0.000224, 0.000635, 0.000259, 0.002295, 0.000729, 0.0007, 0.001617, 0.000853, 0.000275], 34 | 10: [0.013834, 0.001034, 0.017508, 0.015356, 0.000298, 0.032453, 0.001733, 0.000175, 0.000309, 0.001068] 35 | } 36 | result1_mean = {k: np.mean(np.sort(v)[1:-1]) for k, v in result1_raw.items()} 37 | print('## Comparision 1: depth and width ##') 38 | print(result1_mean) 39 | 40 | fig, ax = plt.subplots() 41 | ax.set_title('Comparision of depths(widths)') 42 | ax.set_xlabel('depth') 43 | ax.set_ylabel('log10(test loss)') 44 | ax.plot(result1_mean.keys(), np.log10(list(result1_mean.values())), marker='x') 45 | plt.show() 46 | 47 | 48 | # Comparision 2: activation function 49 | # fix model structure as neurons=[1, 20, 20, 1], fix lr=0.001 50 | # there are seven optional activation functions: 51 | # sigmoid, tanh, relu, leakyrelu, prelu, elu, softplus 52 | # repeat run 10 times with each activation function 53 | result2_raw = { 54 | 'sigmoid': [0.320537, 0.316262, 0.323674, 0.349383, 0.315778, 0.350341, 0.328564, 0.33681, 0.323076, 0.34205], 55 | 'tanh': [0.095256, 0.085254, 0.077649, 0.136754, 0.068252, 0.065716, 0.116979, 0.07541, 0.054631, 0.047086], 56 | 'relu': [0.09296, 0.032559, 0.013368, 0.109147, 0.04267, 0.02434, 0.027428, 0.061975, 0.065116, 0.022743], 57 | 'leakyrelu': [0.030271, 0.069982, 0.066474, 0.048033, 0.024873, 0.028954, 0.069328, 0.063356, 0.021585, 0.063143], 58 | 'prelu': [0.035761, 0.051486, 0.005814, 0.014586, 0.012871, 0.048049, 0.066837, 0.004329, 0.00277, 0.076942], 59 | 'elu': [0.041305, 0.004289, 0.01048, 0.030719, 0.036398, 0.033117, 0.030902, 0.017483, 0.079652, 0.058757], 60 | 'softplus': [0.11326, 0.093088, 0.088635, 0.081033, 0.080066, 0.08392, 0.094294, 0.079997, 0.058466, 0.070052] 61 | } 62 | result2_mean = {k: np.mean(np.sort(v)[1:-1]) for k, v in result2_raw.items()} 63 | result2_std = {k: np.std(v) for k, v in result2_raw.items()} 64 | print("## Comparision 2: activation function ##") 65 | print(result2_mean) 66 | 67 | fig, ax = plt.subplots() 68 | ax.set_title('Comparision of activation functions') 69 | ax.set_xlabel('activation function') 70 | ax.set_ylabel('test loss') 71 | ax.set_xticks(np.arange(len(result2_mean))) 72 | ax.set_xticklabels(result2_mean.keys()) 73 | ax.scatter(np.arange(len(result2_mean)), list(result2_mean.values()), marker='x') 74 | plt.show() 75 | 76 | 77 | # Comparision 3: learning rate 78 | # fix model structure as neurons=[1, 20, 20, 1], fix activation = 'tanh' 79 | # following are five lr setups: 80 | # {10^k: k=-1, -1.5, -2, -2.5, -3, -3.5, -4} 81 | # repeat each setup 10 times, record each test loss 82 | result3_raw = { 83 | 10**-1: [0.370254, 0.641283, 0.366993, 0.396108, 0.501332, 0.404945, 0.86187, 0.377093, 0.769633, 0.407323], 84 | 10**-1.5: [0.022817, 0.021794, 0.027867, 0.034896, 0.067046, 0.131822, 0.027459, 0.042532, 0.01076, 0.019996], 85 | 10**-2: [0.004666, 0.001577, 0.003348, 0.000552, 0.001009, 0.000547, 0.001086, 0.061021, 0.01173, 0.005232], 86 | 10**-2.5: [0.002043, 0.003938, 0.002026, 9.1e-05, 0.001185, 0.000199, 0.000501, 0.000886, 0.000758, 0.004475], 87 | 10**-3: [0.094007, 0.023405, 0.155973, 0.066218, 0.079838, 0.006589, 0.073311, 0.002712, 0.062652, 0.136194], 88 | 10**-3.5: [0.3407, 0.352521, 0.328751, 0.330109, 0.347772, 0.318212, 0.10451, 0.135686, 0.130908, 0.120176], 89 | 10**-4: [0.365907, 0.369544, 0.345024, 0.37749, 0.36385, 0.36186, 0.376415, 0.385564, 0.352906, 0.360354], 90 | } 91 | result3_mean = {k: np.mean(np.sort(v)[1:-1]) for k, v in result3_raw.items()} 92 | result3_std = {k: np.std(v) for k, v in result3_raw.items()} 93 | print("## Comparision 3: learning rate") 94 | print(result3_mean) 95 | print(result3_std) 96 | 97 | fig, ax = plt.subplots() 98 | ax.set_title('Comparision of learning rates') 99 | ax.set_xlabel('-log10(lr)') 100 | ax.set_ylabel('log10(test loss)') 101 | ax.plot(-np.log10(list(result3_mean.keys())), np.log10(list(result3_mean.values())), marker='x') 102 | plt.show() -------------------------------------------------------------------------------- /exp1/实验一:前馈神经网络.md: -------------------------------------------------------------------------------- 1 | # 实验一:前馈神经网络 2 | 3 | 姓名:刘威 4 | 5 | 学号:PB18010469 6 | 7 | ## 实验目的 8 | 9 | + 了解并熟悉前馈神经网络的原理及其学习算法 10 | + 了解激活函数在神经网络中的作用 11 | + 了解不同深度及宽度对前馈神经网络性能的影响 12 | + 了解不同学习率对神经网络性能的影响 13 | 14 | ## 实验原理 15 | 16 | ### 神经网络 17 | 18 | * 神经网络的定义 19 | 20 | “神经网络是由具有适应性的简单单元组成的广泛并行互联的网络, 它的组 21 | 织能够模拟生物神经系统对真实世界物体所作出的反应” 22 | 23 | * 神经元模型 24 | 25 | ![image-20210331002813255](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331002813255.png) 26 | 27 | * 激活函数的性质 28 | 29 | * 连续并可导(允许少数点上不可导)的非线性函数,可导的激活函数可以直接利用数值优化的方法来学习网络参数 30 | * 连续并可导(允许少数点上不可导)的非线性函数,可导的激活函数可以直接利用数值优化的方法来学习网络参数 31 | * 激活函数的导函数的值域要在一个合适的区间内,不能太大也不能太小,否则会影响训练的效率和稳定性 32 | 33 | * 常用激活函数 34 | 35 | Sigmoid, Tanh, ReLU, LeakyReLU, PReLU, softplus, ELU 36 | 37 | * 神经网络的主要三个特性 38 | 39 | * 信息表示是分布式的 40 | * 记忆和知识是存储在单元之间的连接上的 41 | * 通过逐渐改变单元之间的连接强度来学习新知识 42 | 43 | * 网络结构 44 | 45 | * 神经网络设计的另一个关键点是确定它的结构:具有多少单元,以及这些单元应该如何连接。 46 | * 大多数神经网络被组织成层的单元组 47 | * 大多数神经网络架构将这些层布置成链式结构,其中每一层都是 48 | 前一层的函数 49 | * 神经网络设计在于选择网络的深度和每一层的宽度,更深层网络通常能在每一层使用更少的单元数和更少的参数,并且有更强的泛化能力。但是通常也更难以优化。 50 | 51 | ### 前馈神经网络 52 | 53 | ​ 前馈神经网络又称为多层感知机, 主要特点为: 54 | 55 | + 各神经元分别属于不同的层,层内无连接 56 | 57 | + 相邻两层之间的神经元全部两两连接 58 | 59 | + 整个网络中无反馈,信号从输入层向输出层单向传播 60 | 61 | ![image-20210331003916295](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331003916295.png) 62 | 63 | ![image-20210331004129942](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331004129942.png) 64 | 65 | **万能近似定理(Universal Approximation Theorem)** 66 | 67 | 一个前馈神经网络如果具有线性输出层和至少一层具有任何一种“挤压” 性质的激活函数(例如logistic sigmoid激活函数)的隐藏层,只要给予网络足够数量的隐藏单元,它可以以任意的精度来近似任何从一个有限维空间到另一个有限维空间的Borel 可测函数。 68 | 69 | 万能近似定理只说明神经网络表达能力强大到可以近似任意一个的连续函数,却并没有给出如何找到这样的神经网络,以及是否是最优的。 70 | 71 | ### 网络参数学习:梯度下降 72 | 73 | + 梯度下降图示: 74 | 75 | ![image-20210331005040953](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005040953.png) 76 | 77 | + 反向传播算法:针对前馈神经网络而设计的高效方法 78 | 79 | ![image-20210331005234604](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005234604.png) 80 | 81 | ![image-20210331005247254](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005247254.png) 82 | 83 | ![image-20210331005342486](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005342486.png) 84 | 85 | + 前馈神经网络的训练过程可以分为以下三步: 86 | 87 | + 前向计算:每一层的状态和激活值,直到最后一层 88 | + 反向计算:每一层的参数的偏导数 89 | + 更新参数 90 | 91 | ## 实验内容 92 | 93 | 使用`pytorch`或者`tensorflow`手写一个前馈神经网络,用于近似正弦函数$y=\sin(x),x\in[0,4\pi)$。研究网络深度、学习率、网络宽度、激活函数对模型性能的影响。 94 | 95 | 即回归任务,输入和输出均为一维。 96 | 97 | 98 | ## 实验结果 99 | 100 | 实验使用`pytorch`进行。 101 | 102 | 103 | ### 源码结构及说明 104 | 105 | **模型结构** 106 | 107 | 实现了可自由调整深度及各隐藏层的宽度的前馈神经网络,激活函数可以通过参数设置。 108 | 109 | 具体来说构建模型需要以下两个参数: 110 | 111 | * `neurons`: 列表类型,表示各网络层的神经元个数,包含输入层和输出层。在本实验中输入层和输出层神经元个数固定为 1。例如`neurons=[1, 20, 10, 1]`表示模型包含两个神经元个数分别为 20 和 10 的隐藏层。 112 | * `activations`: 字符串类型,指定要使用的激活函数,可供选择的有 `'sigmoid'`,`'tanh'`,`'relu'`,`'leakyrelu'`,`'prelu'`,`'elu'`,`'softplus'`。例如`activation='relu'`表示输入层及各隐藏层都将使用`relu`作为激活函数。因为是回归任务,输出层没有激活函数。 113 | 114 | **优化器及损失函数** 115 | 116 | 优化器固定使用`torch.nn.optim.Adam`,但学习率`lr` 是可调节的参数。 117 | 118 | 训练的损失函数及测试的性能评估均固定使用`torch.nn.MSELoss` 119 | 120 | **数据生成** 121 | 122 | 使用`numpy`生成区间$[0,4\pi)$上的均匀样本点作为训练及测试数据,可以控制总样本点数目及训练样本的比例。具体来说有如下三个参数: 123 | 124 | + `data_size`: 整型,表示生成的样本点个数。包含训练及测试的样本。 125 | + `train_ratio`: 浮点数,表示训练集的比例,训练集的大小将为`round(data_size*train_ratio)`。训练集从生成的样本中随机选择。 126 | + `random_state`: 整型,随机种子,决定训练集测试集的划分。 127 | 128 | **训练及测试** 129 | 130 | 固定批大小为10,训练10轮。即`batch_size=10,epoch=10`. 131 | 132 | 133 | ### 结果及分析 134 | 135 | 本实验固定的参数如下: 136 | 137 | ```python 138 | default_params = { 139 | 'data_size': 10000, 140 | 'train_ratio': 0.8, 141 | 'random_state': 7, 142 | 'epochs': 10, 143 | 'batch_size': 10 144 | } 145 | ``` 146 | 147 | 值得注意的是,这里的固定参数也就是说**以下实验的训练数据量及训练轮数都是一样的,而不是各自充分训练的结果。** 148 | 149 | 调节的参数有: 150 | 151 | + `neurons`: 研究不同网络深度及宽度对模型性能的影响。 152 | + `activation`: 研究不同激活函数对模型性能的影响。 153 | + `lr`: 研究不同学习率对模型性能的影响。 154 | 155 | #### 比较一:网络深度与宽度 156 | 157 | 固定参数 `activation=tanh`, `lr=0.001`。 158 | 159 | 改变参数`neurons`设置五组对照,在固定网络参数保持在1000左右的情况下设置5个不同的深度,并保持各层宽度一致(相应地也是五个不同的宽度)。注意:这里深度为隐藏层的个数,不考虑输入输出层。具体设置及各设置下模型的参数个数如下: 160 | 161 | ```python 162 | # setup1: neurons=[1, 333, 1] (depths=1) #(params)=1000 163 | # setup2: neurons=[1, *[30]*2, 1] (depths=2) #(params)=1021 164 | # setup3: neurons=[1, *[21]*3, 1] (depths=3) #(params)=988 165 | # setup4: neurons=[1, *[15]*5, 1] (depths=5) #(params)=1006 166 | # setup5: neurons=[1, *[10]*10, 1] (depths=10) #(params)=1021 167 | ``` 168 | 169 | 对上述五种设置,独立地训练并测试10次得到10次测试的loss,为避免极端值的影响,去掉最高值和最低值后取平均,作为评价不同设置下模型性能的标准。结果为: 170 | 171 | ```python 172 | {1: 0.09975287499999999, 2: 0.052726499999999996, 3: 0.0034785, 5: 0.00075625, 10: 0.006392500000000001} # 键为深度,值为loss 173 | ``` 174 | 175 | 对loss取对数并作图如下:![](D:\course\DL\exps\exp1\fig\depths_and_widths.png) 176 | 177 | 可以看到,在保持网络参数不变的情况下,随着网络深度的加深(对应地,网络宽度减小),同样的训练条件下,模型的性能可能会慢慢变好,但当网络过深时,模型的性能也会下降,这可能是由于梯度消失,优化过程比浅层的网络更难。 178 | 179 | 细节的拟合效果可以观察拟合图: 180 | 181 | ![depth_pane](D:\course\DL\exps\exp1\fig\depth_pane.png) 182 | 183 | 可以注意到当深度较大时(depth=10),x较小的部分的拟合效果还不错,但当数值较大靠近尾部时,突然出现很大偏差,可能是数值太大时,激活函数的梯度很小,而深层网络会进一步扩大这个影响(也就是所谓的梯度消失问题),导致参数几乎无法更新。 184 | 185 | #### 比较二:激活函数 186 | 187 | 固定参数`neurons=[1, 20, 20, 1]`,`lr=0.001` 188 | 189 | 在 7 种激活函数的选择下,分别独立地训练并测试10次得到10次的测试loss,为避免极端值的影响,去掉最高值和最低值后取平均,作为评价不同设置下模型性能的标准。结果如下: 190 | 191 | ```python 192 | {'sigmoid': 0.33004449999999996, 'tanh': 0.07989337499999999, 'relu': 0.046223875, 'leakyrelu': 0.049304, 'prelu': 0.029966624999999997, 'elu': 0.032395125, 'softplus': 0.08388562499999999} 193 | ``` 194 | 195 | ![](D:\course\DL\exps\exp1\fig\activations.png) 196 | 197 | 可见`sigmoid`激活函数效果非常差,`tanh`和`softplus`效果较好但不如`relu`家族。而`relu`家族中`prelu`和`elu`效果最好。这可能是因为`prelu,elu`均包含额外的可学习参数。 198 | 199 | 除了比较测试loss外,我们还可以观察拟合图,分析各激活函数的特点。![activation_pane](D:\course\DL\exps\exp1\fig\activation_pane.png) 200 | 201 | 分析拟合图可以看出: 202 | 203 | + `sigmoid`出现了类似于深层网络所有的梯度消失的问题,即当x较大时,模型性能急剧变差。一方面该激活函数确实存在饱和的问题,另一方面该激活函数是非零中心化的,会使得后一层的神经元的输入发生偏置偏移,使得梯度下降的收敛速度变慢。 204 | + `relu,leakyrelu,prelu` 的拟合图均出现**不光滑的现象**,这是由于激活函数本身不光滑导致的。而`elu`本身是光滑的,没有出现类似的现象。它们都不会像`sigmoid,tanh`那样有梯度饱和的问题,因而学习效率更高。 205 | 206 | 207 | #### 比较三:学习率 208 | 209 | 固定参数`neurons=[1, 20, 20, 1]`,`activation='tanh'` 210 | 211 | 设置七个不同的学习率:$10^{-k},k=1,1.5,2,2.5,3,3.5,4$,分别独立地训练并测试10次得到10次的测试loss,为避免极端值的影响,去掉最高值和最低值后取平均,作为评价不同设置下模型性能的标准。结果如下: 212 | 213 | ```python 214 | {0.1: 0.17341449586998198, 0.03162277660168379: 0.0337293358056455, 0.01: 0.01761509341899724, 0.0031622776601683794: 0.0014473155011952299, 0.001: 0.04821309684815943, 0.00031622776601683794: 0.1052767455255433, 0.0001: 0.011405165033439895} 215 | ``` 216 | 217 | ![learning_rates](D:\course\DL\exps\exp1\fig\learning_rates.png) 218 | 219 | ![](D:\course\DL\exps\exp1\fig\lr_pane.png) 220 | 221 | 可见随着学习率慢慢变大,模型性能先变好后变差。 222 | 223 | 具体来说,可以从拟合图上得到一些结论: 224 | 225 | + 当学习率太大时,参数更新步长较大,导致目标函数波动较大,会使得收敛速度较慢,并且在结果上出现类似阶跃函数输出值突变的现象。 226 | + 当学习率太小时,参数更新步长较小,导致目标函数减小地很慢,收敛速度较慢,需要更长的训练时间才能达到较好地收敛。 227 | 228 | 229 | ## 实验总结 230 | 231 | 本实验中,实现了前馈神经网络并用它来拟合初等函数。 232 | 233 | 通过三个比较实验,初步了解了网络深度和宽度,激活函数,学习率对前馈神经网络性能的影响。 234 | 235 | 对于网络深度和宽度而言,深层的网络收敛速度更快,但会引发梯度消失的问题; 236 | 237 | 对于激活函数而言,`relu`及其变种是很好的选择,但因为不光滑的特点会导致输出不光滑(`elu`不存在这样的问题)。 238 | 239 | 对于学习率而言,学习率太大和太小都不利于学习,针对特定的问题应该选择大小适中的学习率。 -------------------------------------------------------------------------------- /exp1/实验一:前馈神经网络.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/实验一:前馈神经网络.pdf -------------------------------------------------------------------------------- /exp2/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv_python==4.5.1.48 2 | matplotlib==3.4.1 3 | tqdm==4.56.0 4 | numpy==1.19.2 5 | torch==1.2.0 6 | torchvision==0.4.0a0+6b959ee 7 | -------------------------------------------------------------------------------- /exp2/src/data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import cv2 4 | from torch.utils.data import Dataset, DataLoader 5 | from torchvision import transforms 6 | from tqdm import tqdm 7 | 8 | 9 | class TinyImageNet(Dataset): 10 | def __init__(self, data_dir, data_type, transform): 11 | self.type = data_type 12 | self.transform = transform 13 | 14 | labels_t = open(f'{data_dir}wnids.txt').read().strip().split('\n') 15 | labels_map = {label_t: label for label, label_t in enumerate(labels_t)} 16 | if self.type == 'train': 17 | self.train_labels = [] 18 | self.train_images = [] 19 | for i, label_t in tqdm(enumerate(labels_t), desc='[Load train images]'): 20 | txt_path = f'{data_dir}train/{label_t}/{label_t}_boxes.txt' 21 | image_names = [line.split('\t')[0] for line in open( 22 | txt_path).read().strip().split('\n')] 23 | for image_name in image_names: 24 | image_path = f'{data_dir}train/{label_t}/images/{image_name}' 25 | img = cv2.imread(image_path) 26 | self.train_images.append(img) 27 | self.train_labels.append(i) 28 | self.train_images = np.array(self.train_images) 29 | self.train_labels = np.array(self.train_labels) 30 | elif self.type == 'val': 31 | self.val_images = [] 32 | self.val_labels = [] 33 | with open(f'{data_dir}val/val_annotations.txt') as txt: 34 | for line in tqdm(txt, desc='[Load val images]'): 35 | image_name, label_t = line.strip('\n').split('\t')[:2] 36 | image_path = f'{data_dir}val/images/{image_name}' 37 | val_label = labels_map[label_t] 38 | img = cv2.imread(image_path) 39 | self.val_images.append(img) 40 | self.val_labels.append(val_label) 41 | self.val_images = np.array(self.val_images) 42 | self.val_labels = np.array(self.val_labels) 43 | 44 | def __getitem__(self, index): 45 | image, label = None, None 46 | if self.type == 'train': 47 | label = self.train_labels[index] 48 | image = self.train_images[index] 49 | elif self.type == 'val': 50 | label = self.val_labels[index] 51 | image = self.val_images[index] 52 | return self.transform(image), label 53 | 54 | def __len__(self): 55 | size = None 56 | if self.type == 'train': 57 | size = self.train_labels.shape[0] 58 | elif self.type == 'val': 59 | size = self.val_labels.shape[0] 60 | return size 61 | 62 | 63 | if __name__ == "__main__": 64 | batch_size = 64 65 | train_dataset = TinyImageNet( 66 | './imagenet/tiny-imagenet-200/', 'train', transforms.Compose([transforms.ToTensor()])) 67 | val_dataset = TinyImageNet( 68 | './imagenet/tiny-imagenet-200/', 'val', transforms.Compose([transforms.ToTensor()])) 69 | train_dataloader = DataLoader( 70 | dataset=train_dataset, batch_size=batch_size, shuffle=True) 71 | val_dataloader = DataLoader( 72 | dataset=val_dataset, batch_size=batch_size, shuffle=False) 73 | for batch_image, batch_label in train_dataloader: 74 | print(batch_image.shape) 75 | print(batch_label.shape) 76 | print(np.uni) 77 | exit() 78 | -------------------------------------------------------------------------------- /exp2/src/logs1.txt: -------------------------------------------------------------------------------- 1 | default parameters are: 2 | {'block_sizes': [(64, 64, 1), 3 | (64, 128, 2), 4 | (128, 256, 2), 5 | (256, 512, 2), 6 | (512, 1024, 2)], 7 | 'dropout': (0.2, 0.5), 8 | 'epochs': 40, 9 | 'lr_decay': 0.1, 10 | 'lr_init': 0.001, 11 | 'lr_min': 1e-05, 12 | 'lr_min_delta': 0.0, 13 | 'lr_patience': 1, 14 | 'norm': True, 15 | 'res': True, 16 | 'restore_best_weights': True, 17 | 'top': [1, 5, 10], 18 | 'val_min_delta': 0.0, 19 | 'val_patience': 3} 20 | >>> set `block_sizes` to [(64, 64, 1), (64, 128, 2), (128, 256, 2), (256, 512, 2), (512, 1024, 2)] 21 | val_loss = 2.382565525686665, val_acc = [0.4432, 0.7116, 0.8041] 22 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1)] 23 | val_loss = 2.5370353900702898, val_acc = [0.3952, 0.6694, 0.7711] 24 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)] 25 | val_loss = 3.336798567680796, val_acc = [0.2576, 0.5174, 0.6403] 26 | >>> set `norm` to False 27 | val_loss = 5.298343752599825, val_acc = [0.005, 0.025, 0.05] 28 | -------------------------------------------------------------------------------- /exp2/src/logs2.txt: -------------------------------------------------------------------------------- 1 | default parameters are: 2 | {'block_sizes': [(64, 64, 1), 3 | (64, 128, 2), 4 | (128, 256, 2), 5 | (256, 512, 2), 6 | (512, 1024, 2)], 7 | 'dropout': (0.2, 0.5), 8 | 'epochs': 40, 9 | 'lr_decay': 0.1, 10 | 'lr_init': 0.001, 11 | 'lr_min': 1e-05, 12 | 'lr_min_delta': 0.0, 13 | 'lr_patience': 1, 14 | 'norm': True, 15 | 'res': True, 16 | 'restore_best_weights': True, 17 | 'top': [1, 5, 10], 18 | 'val_min_delta': 0.0, 19 | 'val_patience': 3} 20 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)] 21 | >>> set `res` to True 22 | val_loss = 3.299391880156888, val_acc = [0.257, 0.5236, 0.647] 23 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)] 24 | >>> set `res` to False 25 | val_loss = 4.193392032270978, val_acc = [0.0827, 0.2677, 0.4042] 26 | -------------------------------------------------------------------------------- /exp2/src/logs3.txt: -------------------------------------------------------------------------------- 1 | default parameters are: 2 | {'block_sizes': [(64, 64, 1), 3 | (64, 128, 2), 4 | (128, 256, 2), 5 | (256, 512, 2), 6 | (512, 1024, 2)], 7 | 'dropout': (0.2, 0.5), 8 | 'epochs': 40, 9 | 'lr_decay': 0.1, 10 | 'lr_init': 0.001, 11 | 'lr_min': 1e-05, 12 | 'lr_min_delta': 0.0, 13 | 'lr_patience': 1, 14 | 'norm': True, 15 | 'res': True, 16 | 'restore_best_weights': True, 17 | 'top': [1, 5, 10], 18 | 'val_min_delta': 0.0, 19 | 'val_patience': 3} 20 | >>> set `dropout` to (0.0, 0.0) 21 | val_loss = 2.6277607641402323, val_acc = [0.4119, 0.675, 0.7767] 22 | >>> set `dropout` to (0.1, 0.3) 23 | val_loss = 2.534848990713715, val_acc = [0.4245, 0.691, 0.7916] 24 | >>> set `dropout` to (0.3, 0.7) 25 | val_loss = 2.4117816208274503, val_acc = [0.4233, 0.6981, 0.7954] 26 | >>> set `lr_decay` to 0.5 27 | val_loss = 2.4610023521314, val_acc = [0.4162, 0.6913, 0.7891] 28 | >>> set `lr_decay` to 0.99 29 | val_loss = 2.5230503021531803, val_acc = [0.4066, 0.6773, 0.7828] 30 | -------------------------------------------------------------------------------- /exp2/src/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as t 3 | import torch.nn as nn 4 | from torch.optim import Adam 5 | from torch.optim.lr_scheduler import ReduceLROnPlateau 6 | from data import TinyImageNet 7 | from torch.utils.data import DataLoader 8 | from torchvision import transforms 9 | from model import CNN 10 | from tqdm import tqdm 11 | import os 12 | from pprint import pprint 13 | import random 14 | from copy import deepcopy 15 | import sys 16 | from matplotlib import pyplot as plt 17 | 18 | 19 | def set_seed(seed=123): 20 | random.seed(seed) 21 | os.environ['PYTHONHASHSEED'] = str(seed) 22 | np.random.seed(seed) 23 | t.manual_seed(seed) 24 | t.cuda.manual_seed(seed) 25 | t.backends.cudnn.deterministic = True 26 | 27 | 28 | def train(model, train_data, epochs, 29 | lr_init=1e-3, lr_min=1e-5, lr_decay=1., lr_min_delta=0., lr_patience=1, 30 | val_data=None, val_min_delta=0., val_patience=1, 31 | restore_best_weights=True, top=1, verbose=True, device='cpu'): 32 | min_val_loss = np.inf 33 | wait = 0 34 | best_weights = None 35 | history = {} 36 | model = nn.DataParallel(model) 37 | model = model.to(device) 38 | optimizer = Adam(model.parameters(), lr=lr_init) 39 | scheduler = ReduceLROnPlateau(optimizer, 'min', factor=lr_decay, patience=lr_patience, 40 | threshold=lr_min_delta, verbose=verbose, min_lr=lr_min) 41 | loss_fc = nn.CrossEntropyLoss() 42 | for epoch in range(epochs): 43 | pbar_batch = train_data 44 | if verbose: 45 | pbar_batch = tqdm(train_data, desc=f'[Epoch {epoch + 1}/{epochs}]', unit='batch', 46 | ascii=True, bar_format='{desc:<13.13}{percentage:3.0f}%|{bar:10}{r_bar}') 47 | for i, data in enumerate(pbar_batch, 1): 48 | inputs, labels = data 49 | inputs = inputs.to(device) 50 | labels = labels.to(device) 51 | outputs = model(inputs) 52 | loss = loss_fc(outputs, labels.long()) 53 | optimizer.zero_grad() 54 | loss.backward() 55 | optimizer.step() 56 | 57 | predicts = t.max(outputs, 1)[1] 58 | batch_total_num = labels.size(0) 59 | batch_correct_num = (predicts == labels.data).sum().item() 60 | if verbose: 61 | pbar_batch.set_postfix({'train_loss': loss.item(), 62 | 'train_acc': f'{batch_correct_num}/{batch_total_num}'}) 63 | train_loss, train_acc = validate(model, train_data, 'train', top, verbose, device) 64 | 65 | history.setdefault('epoch', []).append(epoch) 66 | history.setdefault('train_loss', []).append(train_loss) 67 | history.setdefault('train_acc', []).append(train_acc[0]) 68 | if val_data is not None: 69 | val_loss, val_acc = validate(model, val_data, 'val', top, verbose, device) 70 | history.setdefault('val_loss', []).append(val_loss) 71 | history.setdefault('val_acc', []).append(val_acc[0]) 72 | 73 | # schedule lr 74 | scheduler.step(val_loss) 75 | # control early stopping 76 | if val_loss < min_val_loss - val_min_delta: 77 | min_val_loss = val_loss 78 | wait = 0 79 | best_weights = deepcopy(model.state_dict()) 80 | # print(best_weights['fc.bias']) 81 | else: 82 | wait += 1 83 | if wait > val_patience: 84 | if verbose: 85 | print('>>> Early Stopped.') 86 | if restore_best_weights: 87 | model.load_state_dict(best_weights) 88 | # print(model.state_dict().copy()['fc.bias']) 89 | break 90 | return model, history 91 | 92 | 93 | def validate(model, dataloader, mode='val', top=1, verbose=True, device='cpu'): 94 | model.eval() 95 | tops = top if isinstance(top, list) else [top] 96 | losses = [] 97 | correct_nums = np.zeros_like(tops) 98 | total_nums = np.zeros_like(tops) 99 | loss_fc = nn.CrossEntropyLoss() 100 | for inputs, labels in dataloader: 101 | inputs = inputs.to(device) 102 | labels = labels.to(device) 103 | outputs = model(inputs) 104 | loss = loss_fc(outputs, labels.long()) 105 | predicts = [t.argsort(outputs, dim=1, descending=True)[:, :top] for top in tops] 106 | 107 | losses.append(loss.item()) 108 | for i, predict in enumerate(predicts): 109 | total_nums[i] += labels.size(0) 110 | correct_nums[i] += (predict == labels.data.unsqueeze(1)).sum().item() 111 | 112 | avg_loss = np.mean(losses) 113 | acc_list = [correct_num / total_num for correct_num, total_num in zip(correct_nums, total_nums)] 114 | if verbose: 115 | print(f"{f'{mode}_loss':>11} = {avg_loss:<6.4f}, ", end='') 116 | print(', '.join([f"{f'{mode}_acc_top{top}':>15} = {acc:<6.4f}" for top, acc in zip(tops, acc_list)])) 117 | model.train() 118 | return avg_loss, acc_list 119 | 120 | 121 | def plot_history(history): 122 | fig, ax1 = plt.subplots() 123 | ax2 = ax1.twinx() 124 | x = history['epoch'] 125 | ax1.set_title('Training history') 126 | ax1.set_xlabel('epoch') 127 | ax1.set_ylabel('loss') 128 | ax2.set_ylabel('acc') 129 | p1 = ax1.plot(x, history['train_loss'], label='train_loss') 130 | p2 = ax1.plot(x, history['val_loss'], label='val_loss') 131 | p3 = ax2.plot(x, history['train_acc'], '-.', label='train_acc') 132 | p4 = ax2.plot(x, history['val_acc'], '-.', label='val_acc') 133 | lines = p1 + p2 + p3 + p4 134 | labels = [line.get_label() for line in lines] 135 | plt.legend(lines, labels) 136 | plt.savefig('history.png') 137 | 138 | 139 | def run(params, plot=False, verbose=False): 140 | cnn = CNN(params['block_sizes'], params['res'], params['norm'], 141 | params['dropout'][0], params['dropout'][1]) 142 | if verbose: 143 | print('number of parameters: {}'.format(sum(p.numel() for p in cnn.parameters() if p.requires_grad))) 144 | cnn, history = train( 145 | model=cnn, 146 | train_data=train_dataloader, 147 | epochs=params['epochs'], 148 | lr_init=params['lr_init'], 149 | lr_min=params['lr_min'], 150 | lr_decay=params['lr_decay'], 151 | lr_min_delta=params['lr_min_delta'], 152 | lr_patience=params['lr_patience'], 153 | val_data=val_dataloader, 154 | val_min_delta=params['val_min_delta'], 155 | val_patience=params['val_patience'], 156 | restore_best_weights=params['restore_best_weights'], 157 | top=params['top'], 158 | verbose=verbose, 159 | device=params['device'] 160 | ) 161 | if plot: 162 | plot_history(history) 163 | loss, acc = validate(cnn, val_dataloader, 'val', top=params['top'], verbose=False, device=params['device']) 164 | return loss, acc 165 | 166 | 167 | if __name__ == "__main__": 168 | data_root = '/home/liuwei/projects/DL_exps/exp2/tiny-imagenet-200/' 169 | batch_size = 256*3 170 | 171 | train_dataset = TinyImageNet(data_root, 'train', transforms.Compose([transforms.ToTensor()])) 172 | val_dataset = TinyImageNet(data_root, 'val', transforms.Compose([transforms.ToTensor()])) 173 | train_dataloader = DataLoader( 174 | dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) 175 | val_dataloader = DataLoader( 176 | dataset=val_dataset, batch_size=batch_size, shuffle=False) 177 | 178 | default_params = { 179 | 'block_sizes': 180 | # [ 181 | # (64, 64, 1), 182 | # (64, 128, 2), 183 | # (128, 256, 2), 184 | # (256, 512, 2), 185 | # (512, 1024, 2), 186 | # ], # 11 conv layers 187 | [ 188 | (64, 64, 1), 189 | (64, 64, 1), 190 | (64, 64, 1), 191 | (64, 128, 2), 192 | (128, 128, 1), 193 | (128, 128, 1), 194 | (128, 256, 2), 195 | (256, 256, 1), 196 | (256, 256, 1), 197 | (256, 512, 2), 198 | (512, 512, 1), 199 | (512, 512, 1), 200 | (512, 1024, 2), 201 | (1024, 1024, 1), 202 | (1024, 1024, 1), 203 | ], # 31 conv layers 204 | 'epochs': 80, 205 | 'res': True, 206 | 'norm': True, 207 | 'dropout': (0.1, 0.5), 208 | 'lr_init': 1e-3, 209 | 'lr_min': 1e-5, 210 | 'lr_decay': 0.5, 211 | 'lr_min_delta': 0., 212 | 'lr_patience': 2, 213 | 'val_min_delta': 0., 214 | 'val_patience': 30, 215 | 'top': [1, 5, 10], 216 | 'restore_best_weights': True 217 | } 218 | param_grids = { 219 | 'block_sizes': [ 220 | [ 221 | (64, 64, 1), 222 | (64, 128, 2), 223 | (128, 256, 2), 224 | (256, 512, 2), 225 | (512, 1024, 2), 226 | ], # 11 conv layers 227 | [ 228 | (64, 64, 1), 229 | (64, 64, 1), 230 | (64, 128, 2), 231 | (128, 128, 1), 232 | (128, 256, 2), 233 | (256, 256, 1), 234 | (256, 512, 2), 235 | (512, 512, 1), 236 | (512, 1024, 2), 237 | (1024, 1024, 1) 238 | ], # 21 conv layers 239 | [ 240 | (64, 64, 1), 241 | (64, 64, 1), 242 | (64, 64, 1), 243 | (64, 128, 2), 244 | (128, 128, 1), 245 | (128, 128, 1), 246 | (128, 256, 2), 247 | (256, 256, 1), 248 | (256, 256, 1), 249 | (256, 512, 2), 250 | (512, 512, 1), 251 | (512, 512, 1), 252 | (512, 1024, 2), 253 | (1024, 1024, 1), 254 | (1024, 1024, 1), 255 | ], # 31 conv layers 256 | ], 257 | 'res': [True, False], 258 | 'norm': [True, False], 259 | 'dropout': [(0., 0.), (0.1, 0.3), (0.2, 0.5), (0.3, 0.7)], 260 | 'lr_decay': [0.1, 0.5, 0.99], 261 | } 262 | 263 | set_seed(17717) 264 | 265 | try: 266 | job = int(sys.argv[1]) 267 | except IndexError as e: 268 | job = 0 269 | 270 | os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3' 271 | device = 'cuda' 272 | # devices = [0, 1, 2, 3] 273 | # device = f'cuda:{devices[job]}' 274 | 275 | logfile = f'logs{job}.txt' 276 | 277 | print("default parameters are:") 278 | pprint(default_params, width=40) 279 | with open(logfile, 'w') as file: 280 | file.write("default parameters are:\n") 281 | pprint(default_params, stream=file, width=40) 282 | 283 | if job == 0: 284 | f = open(logfile, 'a') 285 | loss, acc_list = run(default_params, plot=True, verbose=True) 286 | info = f'val_loss = {loss}, val_acc = {acc_list}' 287 | print(info) 288 | f.write(info + '\n') 289 | f.close() 290 | exit() 291 | 292 | job_keys = { 293 | 1: ['block_sizes', 'norm'], 294 | 2: ['res'], 295 | 3: ['dropout', 'lr_decay'] 296 | } 297 | 298 | for key in job_keys[job]: 299 | for v in param_grids[key]: 300 | if key not in ['block_sizes', 'res'] and v == default_params[key]: 301 | continue 302 | f = open(logfile, 'a') 303 | new_params = default_params.copy() 304 | if key == 'res': 305 | new_params.update({'block_sizes': param_grids['block_sizes'][-1]}) 306 | info = f'>>> set `block_sizes` to {param_grids["block_sizes"][-1]}\n' 307 | else: 308 | info = '' 309 | new_params.update({key: v}) 310 | info += f'>>> set `{key}` to {v}' 311 | print(info) 312 | f.write(info + '\n') 313 | loss, acc_list = run(new_params, verbose=True) 314 | info = f'val_loss = {loss}, val_acc = {acc_list}' 315 | print(info) 316 | f.write(info + '\n') 317 | f.close() 318 | 319 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # 320 | # run following command in terminal # 321 | # $ python main.py 1 # 322 | # $ python main.py 2 # 323 | # $ python main.py 3 # 324 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # 325 | -------------------------------------------------------------------------------- /exp2/src/model.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn as nn 3 | from torch.nn.modules.activation import ReLU 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, c_in: int, c_out: int, stride: int = 1, res: bool = False, norm: bool = True, dropout: float = 0.2): 8 | super().__init__() 9 | self.res = res 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride, 1) 11 | self.bn1 = nn.BatchNorm2d(c_out) if norm else nn.Identity() 12 | self.relu1 = nn.ReLU() 13 | self.dropout1 = nn.Dropout(dropout) 14 | self.conv2 = nn.Conv2d(c_out, c_out, 3, 1, 1) 15 | self.bn2 = nn.BatchNorm2d(c_out) if norm else nn.Identity() 16 | if res and stride != 1: 17 | self.downsample = nn.Sequential( 18 | nn.Conv2d(c_in, c_out, 1, stride, 0), 19 | nn.BatchNorm2d(c_out) if norm else nn.Identity() 20 | ) 21 | else: 22 | self.downsample = None 23 | self.relu2 = nn.ReLU() 24 | self.dropout2 = nn.Dropout(dropout) 25 | 26 | def forward(self, x): 27 | identity = x 28 | out = self.conv1(x) 29 | out = self.bn1(out) 30 | out = self.relu1(out) 31 | out = self.dropout1(out) 32 | out = self.conv2(out) 33 | out = self.bn2(out) 34 | 35 | if self.downsample is not None: 36 | identity = self.downsample(identity) 37 | if self.res: 38 | out += identity 39 | out = self.relu2(out) 40 | out = self.dropout2(out) 41 | return out 42 | 43 | 44 | class CNN(nn.Module): 45 | def __init__(self, block_sizes, res: bool = False, norm: bool = True, conv_dropout=0.2, fc_dropout=0.5): 46 | super().__init__() 47 | self.conv1 = nn.Sequential( 48 | nn.Conv2d(3, 64, 5, 1, 2), # 64*64*64 49 | nn.BatchNorm2d(64) if norm else nn.Identity(), 50 | nn.ReLU(), 51 | nn.MaxPool2d(2, 2) # 64*32*32 52 | ) 53 | self.block_list = nn.ModuleList() 54 | for block_size in block_sizes: 55 | block = BasicBlock(*block_size, res=res, norm=norm, dropout=conv_dropout) 56 | self.block_list.append(block) 57 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 58 | self.dropout = nn.Dropout(fc_dropout) 59 | self.fc = nn.Linear(block_sizes[-1][1], 200) 60 | 61 | def forward(self, x): 62 | x = self.conv1(x) 63 | for block in self.block_list: 64 | x = block(x) 65 | x = self.avgpool(x) 66 | x = x.view(x.shape[0], -1) 67 | x = self.dropout(x) 68 | x = self.fc(x) 69 | return x 70 | 71 | 72 | class ResNet(nn.Module): 73 | def __init__(self, block_sizes, res: bool = False, norm: bool = True, conv_dropout=0.2, fc_dropout=0.5): 74 | super().__init__() 75 | self.conv1 = nn.Sequential( 76 | nn.Conv2d(3, 64, 7, 2, 3), # 64*32*32 77 | nn.BatchNorm2d(64) if norm else nn.Identity(), 78 | nn.ReLU(), 79 | nn.MaxPool2d(3, 2, 1) # 64*16*16 80 | ) 81 | self.block_list = nn.ModuleList() 82 | for block_size in block_sizes: 83 | block = BasicBlock(*block_size, res=res, norm=norm, dropout=conv_dropout) 84 | self.block_list.append(block) 85 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 86 | self.dropout = nn.Dropout(fc_dropout) 87 | self.fc = nn.Linear(block_sizes[-1][1], 200) 88 | 89 | def forward(self, x): 90 | x = self.conv1(x) 91 | for block in self.block_list: 92 | x = block(x) 93 | x = self.avgpool(x) 94 | x = x.view(x.shape[0], -1) 95 | x = self.dropout(x) 96 | x = self.fc(x) 97 | return x 98 | -------------------------------------------------------------------------------- /exp2/src/test.py: -------------------------------------------------------------------------------- 1 | from model import ResNet 2 | from main import train, validate, plot_history, set_seed, os, pprint 3 | from data import TinyImageNet 4 | from torch.utils.data import DataLoader 5 | from torchvision import transforms 6 | 7 | 8 | def run(params, plot=False, verbose=False): 9 | cnn = ResNet(params['block_sizes'], params['res'], params['norm'], 10 | params['dropout'][0], params['dropout'][1]) 11 | if verbose: 12 | print('number of parameters: {}'.format(sum(p.numel() for p in cnn.parameters() if p.requires_grad))) 13 | cnn, history = train( 14 | model=cnn, 15 | train_data=train_dataloader, 16 | epochs=params['epochs'], 17 | lr_init=params['lr_init'], 18 | lr_min=params['lr_min'], 19 | lr_decay=params['lr_decay'], 20 | lr_min_delta=params['lr_min_delta'], 21 | lr_patience=params['lr_patience'], 22 | val_data=val_dataloader, 23 | val_min_delta=params['val_min_delta'], 24 | val_patience=params['val_patience'], 25 | restore_best_weights=params['restore_best_weights'], 26 | top=params['top'], 27 | verbose=verbose, 28 | device=params['device'] 29 | ) 30 | if plot: 31 | plot_history(history) 32 | loss, acc = validate(cnn, val_dataloader, 'val', top=params['top'], verbose=False, device=params['device']) 33 | return loss, acc 34 | 35 | 36 | if __name__ == "__main__": 37 | data_root = '/home/liuwei/projects/DL_exps/exp2/tiny-imagenet-200/' 38 | batch_size = 256 * 3 39 | 40 | train_dataset = TinyImageNet(data_root, 'train', transforms.Compose([transforms.ToTensor()])) 41 | val_dataset = TinyImageNet(data_root, 'val', transforms.Compose([transforms.ToTensor()])) 42 | train_dataloader = DataLoader( 43 | dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) 44 | val_dataloader = DataLoader( 45 | dataset=val_dataset, batch_size=batch_size, shuffle=False) 46 | 47 | default_params = { 48 | 'block_sizes': 49 | [ 50 | (64, 64, 1), 51 | (64, 64, 1), 52 | (64, 64, 1), 53 | (64, 128, 2), 54 | (128, 128, 1), 55 | (128, 128, 1), 56 | (128, 128, 1), 57 | (128, 256, 2), 58 | (256, 256, 1), 59 | (256, 256, 1), 60 | (256, 256, 1), 61 | (256, 256, 1), 62 | (256, 256, 1), 63 | (256, 512, 2), 64 | (512, 512, 1), 65 | (512, 512, 1), 66 | ], # 33 conv layers 67 | 'epochs': 40, 68 | 'res': True, 69 | 'norm': True, 70 | 'dropout': (0., 0.), 71 | 'lr_init': 1e-3, 72 | 'lr_min': 1e-5, 73 | 'lr_decay': 0.5, 74 | 'lr_min_delta': 0., 75 | 'lr_patience': 2, 76 | 'val_min_delta': 0., 77 | 'val_patience': 30, 78 | 'top': [1, 5, 10], 79 | 'restore_best_weights': True, 80 | 'device': 'cuda' 81 | } 82 | 83 | set_seed(17717) 84 | os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3' 85 | 86 | logfile = f'logs_test.txt' 87 | 88 | print("default parameters are:") 89 | pprint(default_params, width=40) 90 | with open(logfile, 'w') as file: 91 | file.write("default parameters are:\n") 92 | pprint(default_params, stream=file, width=40) 93 | 94 | f = open(logfile, 'a') 95 | loss, acc_list = run(default_params, plot=True, verbose=True) 96 | info = f'val_loss = {loss}, val_acc = {acc_list}' 97 | print(info) 98 | f.write(info + '\n') 99 | f.close() 100 | exit() 101 | -------------------------------------------------------------------------------- /exp2/实验二:卷积神经网络.md: -------------------------------------------------------------------------------- 1 | # 实验二:卷积神经网络 2 | 3 | 姓名:刘威 4 | 5 | 学号:PB18010469 6 | 7 | ## 实验目的 8 | 9 | + 了解并熟悉卷积神经网络的原理及其学习算法 10 | + 研究dropout对卷积神经网络泛化性能的影响 11 | + 研究normalization对卷积神经网络的影响 12 | + 研究residual connection对深层卷积神经网络性能的影响 13 | + 研究学习率学习率衰减对卷积神经网络性能的影响 14 | + 研究网络深度对卷积神经网络性能的影响 15 | 16 | ## 实验原理 17 | 18 | **二维卷积:** 19 | 20 | ![image-20210428210821879](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428210821879.png) 21 | 22 | ![image-20210428210908364](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428210908364.png) 23 | 24 | **多层卷积:** 25 | 26 | ![image-20210428210947000](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428210947000.png) 27 | 28 | **Padding:** 29 | 30 | ![image-20210428211010668](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428211010668.png) 31 | 32 | **池化:** 33 | 34 | ![image-20210428211123780](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428211123780.png) 35 | 36 | **卷积神经网络的典型结构:** 37 | 38 | ![image-20210428211258560](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428211258560.png) 39 | 40 | **残差网络:** 41 | 42 | ![image-20210428212519451](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428212519451.png) 43 | 44 | ## 实验内容 45 | 46 | 使用`pytorch`或者`tensorflow`实现卷积神经网络,在`ImageNet`数据集上进行图片分类。研究dropout、 normalization、 learning rate decay、 residual connection、网络深度等超参数对分类性能的影响。 47 | 48 | 数据集:`tiny-imagenet-200` 49 | 50 | 51 | ## 实验结果 52 | 53 | 实验使用`pytorch`进行。 54 | 55 | ### 源码结构及说明 56 | 57 | **模型结构:** 58 | 59 | 模型的主体结构由基础块堆叠而成。基础块由两层`3 x 3`的卷积网络构成,可以通过参数设定该基础块的输入通道数,输出通道数,是否使用批量标准化,以及是否使用残差连接。除此之外还可以选择是否在第一层卷积网络中使用长度为2的步长来缩减特征图的大小,使用多大舍弃概率的dropout层。具体实现如下: 60 | 61 | ```python 62 | class BasicBlock(nn.Module): 63 | def __init__(self, c_in: int, c_out: int, stride: int = 1, res: bool = False, norm: bool = True, dropout: float = 0.2): 64 | super().__init__() 65 | self.res = res 66 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride, 1) 67 | self.bn1 = nn.BatchNorm2d(c_out) if norm else nn.Identity() 68 | self.relu1 = nn.ReLU() 69 | self.dropout1 = nn.Dropout(dropout) 70 | self.conv2 = nn.Conv2d(c_out, c_out, 3, 1, 1) 71 | self.bn2 = nn.BatchNorm2d(c_out) if norm else nn.Identity() 72 | if res and stride != 1: 73 | self.downsample = nn.Sequential( 74 | nn.Conv2d(c_in, c_out, 1, stride, 0), 75 | nn.BatchNorm2d(c_out) if norm else nn.Identity() 76 | ) 77 | else: 78 | self.downsample = None 79 | self.relu2 = nn.ReLU() 80 | self.dropout2 = nn.Dropout(dropout) 81 | 82 | def forward(self, x): 83 | identity = x 84 | out = self.conv1(x) 85 | out = self.bn1(out) 86 | out = self.relu1(out) 87 | out = self.dropout1(out) 88 | out = self.conv2(out) 89 | out = self.bn2(out) 90 | 91 | if self.downsample is not None: 92 | identity = self.downsample(identity) 93 | if self.res: 94 | out += identity 95 | out = self.relu2(out) 96 | out = self.dropout2(out) 97 | return out 98 | ``` 99 | 100 | 可见如果统一参数`res, norm, dropout`, 决定基础块结构的参数为`(c_in, c_out, stride)`,只需要确定这个三元组即可确定网络的结构。 101 | 102 | 固定网络的输入层和输出层,中间层使用若干基础块堆叠,可以自由调整网络的深度。传入各个基础块的三元组参数,就可以确定整个网络的结构。总的网络实现如下: 103 | 104 | ```python 105 | class CNN(nn.Module): 106 | def __init__(self, block_sizes, res: bool = False, norm: bool = True, conv_dropout=0.2, fc_dropout=0.5): 107 | super().__init__() 108 | self.conv1 = nn.Sequential( 109 | nn.Conv2d(3, 64, 5, 1, 2), # 64*64*64 110 | nn.BatchNorm2d(64) if norm else nn.Identity(), 111 | nn.ReLU(), 112 | nn.MaxPool2d(2, 2) # 64*32*32 113 | ) 114 | self.block_list = nn.ModuleList() 115 | for block_size in block_sizes: 116 | block = BasicBlock(*block_size, res=res, norm=norm, dropout=conv_dropout) 117 | self.block_list.append(block) 118 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 119 | self.dropout = nn.Dropout(fc_dropout) 120 | self.fc = nn.Linear(block_sizes[-1][1], 200) 121 | 122 | def forward(self, x): 123 | x = self.conv1(x) 124 | for block in self.block_list: 125 | x = block(x) 126 | x = self.avgpool(x) 127 | x = x.view(x.shape[0], -1) 128 | x = self.dropout(x) 129 | x = self.fc(x) 130 | return x 131 | ``` 132 | 133 | 其中`block_sizes`就是各个基础块的三元组参数组成的列表。例如,本实验使用了如下三组`block_sizes`: 134 | 135 | ```python 136 | # 结构1: 共11层卷积(算上输入层),约20M参数 137 | [(64, 64, 1), (64, 128, 2), (128, 256, 2), (256, 512, 2), (512, 1024, 2)], 138 | # 结构2: 共21层卷积,约45M参数 139 | [(64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1)] 140 | # 结构3: 共31层卷积,约70M参数 141 | [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)] 142 | ``` 143 | 144 | 145 | ### 结果及分析 146 | 147 | 本实验的默认参数如下: 148 | 149 | ```python 150 | batch_size = 64 151 | default_params = { 152 | 'block_sizes': 153 | [ 154 | (64, 64, 1), 155 | (64, 128, 2), 156 | (128, 256, 2), 157 | (256, 512, 2), 158 | (512, 1024, 2), 159 | ], # 堆叠的各个基础块的参数,即结构1 160 | 'epochs': 40, # 最大训练轮数 161 | 'res': True, # 是否在基础块中使用残差连接 162 | 'norm': True, # 是否在卷积层后使用批量标准化 163 | 'dropout': (0.2, 0.5), # 分别为卷积层,全连接层后的drop概率 164 | 'lr_init': 1e-3, # 学习率的初始值 165 | 'lr_min': 1e-5, # 学习率的最小值 166 | 'lr_decay': 0.1, # 学习率的衰减倍率 167 | 'lr_min_delta': 0., # 认为验证集loss有明显降低的阈值,用于调整学习率 168 | 'lr_patience': 1, # 验证集loss没有明显降低的连续轮数,用于调整学习率 169 | 'val_min_delta': 0., # 认为验证集loss有明显降低的阈值, 由于控制早停 170 | 'val_patience': 3, # 验证集loss没有明显降低的连续轮数,用于控制早停 171 | 'top': [1, 5, 10], # top n 准确率 172 | 'restore_best_weights': True # 早停后,是否将模型权值恢复为验证集loss最低时的权值 173 | } 174 | ``` 175 | 176 | 将会进行调节并加以对比的参数包括: 177 | 178 | ```python 179 | 'block_sizes', 'res', 'norm', 'dropout', 'lr_decay' 180 | ``` 181 | 182 | 此外,本实验固定随机种子, 保证结果可复现: 183 | 184 | ```python 185 | import random 186 | import numpy as np 187 | import torch as t 188 | import os 189 | 190 | def set_seed(seed): 191 | random.seed(seed) 192 | os.environ['PYTHONHASHSEED'] = str(seed) 193 | np.random.seed(seed) 194 | t.manual_seed(seed) 195 | t.cuda.manual_seed(seed) 196 | t.backends.cudnn.deterministic = True 197 | set_seed(17717) 198 | ``` 199 | 200 | #### 对比一:Dropout 201 | 202 | 固定其他参数为默认参数,调整`dropout`为如下四组值: 203 | 204 | ```python 205 | 'dropout': [(0., 0.), (0.1, 0.3), (0.2, 0.5), (0.3, 0.7)], 206 | ``` 207 | 208 | 训练结束后,对所得模型在验证集上进行验证,得到的结果如下表: 209 | 210 | | `dropout` | loss | top 1 accuracy | top 5 accuracy | top 10 accuracy | 211 | | :----------: | :--------: | :------------: | :------------: | :-------------: | 212 | | `(0., 0.)` | 2.6278 | 0.4119 | 0.6750 | 0.7767 | 213 | | `(0.1, 0.3)` | 2.5348 | 0.4245 | 0.6910 | 0.7916 | 214 | | `(0.2, 0.5)` | **2.3826** | **0.4432** | **0.7116** | **0.8041** | 215 | | `(0.3, 0.7)` | 2.4118 | 0.4233 | 0.6981 | 0.7954 | 216 | 217 | **分析:**dropout概率太小或者不使用dropout技术,过拟合风险更大;dropout概率太大,会导致欠拟合。应该选择合适的dropout概率来降低过拟合风险,提高模型性能。 218 | 219 | #### 对比二:Normalization 220 | 221 | 固定其他参数为默认参数,调整`norm`分别为`True` 或者 `False`. 222 | 223 | 训练结束后,对所得模型在验证集上进行验证,得到的结果如下表: 224 | 225 | | `norm` | loss | top 1 accuracy | top 5 accuracy | top 10 accuracy | 226 | | :-----: | :--------: | :------------: | :------------: | :-------------: | 227 | | `True` | **2.3826** | **0.4432** | **0.7116** | **0.8041** | 228 | | `False` | 5.2983 | 0.0050 | 0.0250 | 0.0500 | 229 | 230 | **分析:**如果不加标准化层,学不到东西。因此在使用卷积神经网络时标准化层是必不可少的。 231 | 232 | #### 对比三:Learning rate decay 233 | 234 | 固定其他参数为默认参数,调整`lr_decay`分别为`[0.1, 0.5, 0.99]`. 235 | 236 | 训练结束后,对所得模型在验证集上进行验证,得到的结果如下表: 237 | 238 | | `lr_decay` | loss | top 1 accuracy | top 5 accuracy | top 10 accuracy | 239 | | :--------: | :--------: | :------------: | :------------: | :-------------: | 240 | | `0.1` | **2.3826** | **0.4431** | **0.7116** | **0.8041** | 241 | | `0.5` | 2.4610 | 0.4162 | 0.6913 | 0.7891 | 242 | | `0.99`* | 2.5231 | 0.4066 | 0.6773 | 0.7828 | 243 | 244 | > *注:设置为`0.99`是因为`pytorch`的`ReduceLROnPlateau`只允许小于`1.0`的值,可以认为是不对学习率进行衰减。 245 | 246 | **分析:**在训练网络时,应该适当调节学习率。在刚开始训练时可以使用较大的学习率以加快收敛速度,在接近极小值时,如果不对学习率进行调整,会由于步长过大而越过极小值而无法收敛,这时应该减小学习率,以更好的收敛。 247 | 248 | #### 对比四:Residual connection 249 | 250 | 固定其他参数为默认参数,将`block_sizes`设置为**结构3**, 调整`res`分别为`True` 或者 `False`. 251 | 252 | 训练结束后,对所得模型在验证集上进行验证,得到的结果如下表: 253 | 254 | | `res` | loss | top 1 accuracy | top 5 accuracy | top 10 accuracy | 255 | | :-----: | :--------: | :------------: | :------------: | :-------------: | 256 | | `True` | **3.2994** | **0.2570** | **0.5236** | **0.6470** | 257 | | `False` | 4.1934 | 0.0827 | 0.2677 | 0.4042 | 258 | 259 | **分析:**在**结构3**(31层卷积层)中,加残差连接相比不加有很大的性能提升,证实了残差连接能够极大改善深层网络难以优化的问题。 260 | 261 | #### 对比五:网络深度 262 | 263 | 固定其他参数为默认参数,将`block_sizes`分别设置为前述三种结构。 264 | 265 | 训练结束后,对所得模型在验证集上进行验证,得到的结果如下表: 266 | 267 | | `block_sizes` | loss | top 1 accuracy | top 5 accuracy | top 10 accuracy | 268 | | :---------------: | :--------: | :------------: | :------------: | :-------------: | 269 | | 结构1(11层卷积) | **2.3826** | **0.4432** | **0.7116** | **0.8041** | 270 | | 结构2(21层卷积) | 2.5370 | 0.3952 | 0.6694 | 0.7711 | 271 | | 结构3(31层卷积) | 3.3368 | 0.2576 | 0.5174 | 0.6403 | 272 | 273 | **分析:**性能随着网络深度的增加变差了,但理论上深层网络应该至少能和浅层网络一样好。在实践中,更深层的网络更难优化,优化地形更为复杂,更容易陷入局部极小值,即使使用的残差连接技术,如果由于网络结构不适合当前任务,训练方法不当,徒增加网络深度仍很可能会导致更差的性能。 274 | 275 | 在何凯明等人的论文中,引入残差连接后,深层网络相比浅层网络的训练loss降得更快了,最终效果也更好。而在我的实验中,深层网络的收敛速度却慢得多, 最终性能也要差一些,调了很久的参数也没有改善。其中的具体原因有待进一步做实验来弄明白,在这次实验中没有更多的时间、精力和算力来继续研究下去了(DDL要到了:-)。 276 | 277 | ## 实验总结 278 | 279 | 因为卷积神经网络的结构及原理比较简单,目标也很明确,本次实验实现上来说并不难。但是限于算力有限,想要跑出一次结果都要等很久,参数调节起来颇有困难,花了很多时间也没有明显提升。因此研究的并不够细致,有一些地方没有达到理论上那么好的结果。今后有机会还得多尝试尝试。 280 | 281 | -------------------------------------------------------------------------------- /exp2/实验二:卷积神经网络.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp2/实验二:卷积神经网络.pdf -------------------------------------------------------------------------------- /exp3/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 说明 2 | 3 | ## glove.6B.zip 4 | 5 | glove预训练词向量,也可以自己使用想用的词向量,使用方法参见https://blog.csdn.net/bqw18744018044/article/details/89575127 6 | 7 | ## aclImdb_v1.tar.gz 8 | 9 | 所使用的数据集 10 | 11 | 具体可见:Maas, A. L., Daly, R. E., Pham, P. T., Huang, D., Ng, A. Y., & Potts, C. (2011, June). Learning word vectors for sentiment analysis. In Proceedings of the 49th annual meeting of the association for computational linguistics: Human language technologies-volume 1 (pp. 142-150). Association for Computational Linguistics. 12 | 13 | ## IMDb.py 14 | 15 | 数据集的基本处理代码 16 | 17 | ## 模型评价指标 18 | 19 | 预测准确率 20 | 21 | ## SOTA方法准确率 22 | 23 | ![img.png](img.png) 24 | 25 | 可能有用的资料: 26 | 27 | https://arxiv.org/pdf/1905.05583v3.pdf -------------------------------------------------------------------------------- /exp3/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/img.png -------------------------------------------------------------------------------- /exp3/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.19.2 2 | spacy==3.0.6 3 | torch==1.8.1 4 | torchtext==0.9.1 5 | tqdm==4.56.0 6 | -------------------------------------------------------------------------------- /exp3/src/config.py: -------------------------------------------------------------------------------- 1 | raw_data_folder = './aclImdb/' 2 | 3 | vectors_folder = './glove.6B/' 4 | 5 | data_folder = './data/' 6 | 7 | SEED = 2077 8 | DEVICE = 'cuda:0' 9 | 10 | VAL_RATIO = 0.2 11 | 12 | VECTORS = 'glove.6B.100d' 13 | 14 | VOCAB_SIZE = 400000 15 | EMBEDDING_DIM = 100 16 | HIDDEN_DIM = 64 17 | N_LAYERS = 1 18 | BIDIRECTIONAL = False 19 | DROPOUT = 0. 20 | BATCH_SIZE = 128 21 | N_EPOCHS = 10 22 | MODEL_BASE = 'RNN' 23 | -------------------------------------------------------------------------------- /exp3/src/data.py: -------------------------------------------------------------------------------- 1 | from torchtext.legacy import data 2 | import os 3 | import torch 4 | from tqdm import tqdm 5 | from config import * 6 | import spacy 7 | import pickle 8 | 9 | 10 | _nlp = spacy.load('en_core_web_sm') 11 | TEXT = data.Field(tokenize=lambda x: [t.text for t in _nlp(x)], 12 | include_lengths=True, lower=True) 13 | LABEL = data.LabelField(use_vocab=False) 14 | 15 | 16 | def _get_examples(which, fields=None): 17 | cache_file = os.path.join(data_folder, which + '.pkl') 18 | if os.path.exists(cache_file): 19 | with open(cache_file, 'rb') as f: 20 | examples = pickle.load(f) 21 | return examples 22 | 23 | examples = [] 24 | raw_folder = os.path.join(raw_data_folder, which) 25 | for subfolder in ['pos', 'neg']: 26 | folder_name = os.path.join(raw_folder, subfolder) 27 | for file in tqdm(os.listdir(folder_name), 28 | bar_format='{percentage:3.0f}%|{bar:20}{r_bar}'): 29 | with open(os.path.join(folder_name, file), 'rb') as f: 30 | text = f.read().decode('utf-8').replace('\n', '').lower() 31 | label = 1 if subfolder == 'pos' else 0 32 | examples.append(data.Example.fromlist([text, label], fields)) 33 | with open(cache_file, 'wb') as f: 34 | pickle.dump(examples, f) 35 | return examples 36 | 37 | 38 | def get_dataloader(text_field=TEXT, label_field=LABEL): 39 | 40 | fields = [('text', text_field), ('label', label_field)] 41 | 42 | train_data = _get_examples('train', fields) 43 | test_data = _get_examples('test', fields) 44 | train_data = data.Dataset(train_data, fields) 45 | test_data = data.Dataset(test_data, fields) 46 | val_data = None 47 | if VAL_RATIO: 48 | train_data, val_data = train_data.split(split_ratio=1 - VAL_RATIO) 49 | vectors = VECTORS.replace('.txt', '') 50 | text_field.build_vocab(train_data, max_size=VOCAB_SIZE, 51 | vectors=vectors, vectors_cache=vectors_folder) 52 | label_field.build_vocab(train_data) 53 | if VAL_RATIO: 54 | train_iterator, val_iterator, test_iterator = data.BucketIterator.splits( 55 | (train_data, val_data, test_data), 56 | batch_size=BATCH_SIZE, 57 | sort_key=lambda x: len(x.text), 58 | sort_within_batch=True, 59 | ) 60 | return train_iterator, val_iterator, test_iterator 61 | else: 62 | train_iterator, test_iterator = data.BucketIterator.splits( 63 | (train_data, test_data), 64 | batch_size=BATCH_SIZE, 65 | sort=False 66 | ) 67 | return train_iterator, test_iterator 68 | -------------------------------------------------------------------------------- /exp3/src/main.py: -------------------------------------------------------------------------------- 1 | from utils import binary_acc, set_seed 2 | from config import * 3 | import spacy 4 | import torch 5 | import torch.nn as nn 6 | from torch.optim import Adam 7 | from data import TEXT, get_dataloader 8 | import random 9 | import os 10 | from model import RNNClassifier 11 | from utils import binary_acc 12 | import numpy as np 13 | from tqdm import tqdm 14 | 15 | 16 | def train(model, iterator, optimizer, criterion): 17 | epoch_loss = 0 18 | epoch_acc = 0 19 | 20 | pbar = tqdm(iterator, unit='batch', ascii=True, 21 | bar_format='{percentage:3.0f}%|{bar:20}{r_bar}') 22 | model.train() 23 | for batch in pbar: 24 | optimizer.zero_grad() 25 | 26 | (text, text_lengths), label = batch.text, batch.label 27 | text = text.to(DEVICE) 28 | label = label.to(DEVICE) 29 | 30 | preds = model(text, text_lengths).squeeze() 31 | loss = criterion(preds, label.float()) 32 | loss.backward() 33 | optimizer.step() 34 | 35 | acc = binary_acc(preds, label) 36 | 37 | pbar.set_postfix({'train_loss': loss.item(), 38 | 'train_acc': acc.item()}) 39 | 40 | epoch_loss += loss.item() 41 | epoch_acc += acc.item() 42 | 43 | return epoch_loss / len(iterator), epoch_acc / len(iterator) 44 | 45 | 46 | def evaluate(model, iterator, criterion): 47 | epoch_loss = 0 48 | epoch_acc = 0 49 | 50 | model.eval() 51 | with torch.no_grad(): 52 | for batch in iterator: 53 | (text, text_lengths), label = batch.text, batch.label 54 | text = text.to(DEVICE) 55 | label = label.to(DEVICE) 56 | 57 | preds = model(text, text_lengths).squeeze() 58 | loss = criterion(preds, label.float()) 59 | acc = binary_acc(preds, label) 60 | 61 | epoch_loss += loss.item() 62 | epoch_acc += acc.item() 63 | 64 | return epoch_loss / len(iterator), epoch_acc / len(iterator) 65 | 66 | 67 | if __name__ == '__main__': 68 | set_seed(SEED) 69 | train_iterator, val_iterator, test_iterator = get_dataloader() 70 | rnn = RNNClassifier( 71 | vocab_size=len(TEXT.vocab), 72 | embedding_dim=EMBEDDING_DIM, 73 | hidden_dim=HIDDEN_DIM, 74 | n_layers=N_LAYERS, 75 | bidirectional=BIDIRECTIONAL, 76 | dropout=DROPOUT, 77 | model_base=MODEL_BASE 78 | ) 79 | pretrained_embeddings = TEXT.vocab.vectors 80 | rnn.embedding.weight.data.copy_(pretrained_embeddings) 81 | optimizer = Adam(rnn.parameters()) 82 | criterion = nn.BCELoss() 83 | 84 | rnn = rnn.to(DEVICE) 85 | criterion = criterion.to(DEVICE) 86 | 87 | best_valid_loss = np.inf 88 | for epoch in range(N_EPOCHS): 89 | print(f">>> Epoch {epoch+1}/{N_EPOCHS}") 90 | 91 | train_loss, train_acc = train(rnn, train_iterator, optimizer, criterion) 92 | valid_loss, valid_acc = evaluate(rnn, val_iterator, criterion) 93 | 94 | if valid_loss < best_valid_loss: 95 | best_valid_loss = valid_loss 96 | torch.save(rnn.state_dict(), './checkpoint/best_weights.pt') 97 | 98 | print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc * 100:.2f}%') 99 | print(f'\tValid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc * 100:.2f}%') 100 | print(">>> Testing...") 101 | rnn.load_state_dict(torch.load("./checkpoint/best_weights.pt")) 102 | test_loss, test_acc = evaluate(rnn, test_iterator, criterion) 103 | print(f'\tTest Loss: {test_loss:.4f} | Test Acc: {test_acc * 100:.2f}%') 104 | -------------------------------------------------------------------------------- /exp3/src/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.utils.rnn import pack_padded_sequence 4 | 5 | 6 | class RNNClassifier(nn.Module): 7 | def __init__(self, vocab_size, embedding_dim, hidden_dim, 8 | n_layers: int = 1, bidirectional: bool = False, 9 | dropout: float = 0., model_base: str = 'RNN'): 10 | super(RNNClassifier, self).__init__() 11 | self.bidirectional = bidirectional 12 | self.model_base = model_base.lower() 13 | if self.model_base == 'lstm': 14 | model = nn.LSTM 15 | else: 16 | model = nn.RNN 17 | 18 | self.embedding = nn.Embedding(vocab_size, embedding_dim) 19 | self.rnn = model(embedding_dim, 20 | hidden_dim, 21 | num_layers=n_layers, 22 | bidirectional=bidirectional, 23 | dropout=dropout) 24 | if self.bidirectional: 25 | hidden_dim *= 2 26 | self.fc = nn.Linear(hidden_dim, 1) 27 | self.act = nn.Sigmoid() 28 | 29 | def forward(self, x, x_len): 30 | x = self.embedding(x) 31 | x = pack_padded_sequence(x, x_len) 32 | if self.model_base == 'lstm': 33 | _, (h_n, _) = self.rnn(x) 34 | else: 35 | _, h_n = self.rnn(x) # h_n.shape = (num_layers * num_directions, batch, hidden_size) 36 | if self.bidirectional: 37 | hidden = torch.cat((h_n[-2], h_n[-1]), dim=1) # get last layer 38 | else: 39 | hidden = h_n[-1] 40 | logits = self.fc(hidden) 41 | output = self.act(logits) 42 | return output 43 | -------------------------------------------------------------------------------- /exp3/src/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | 7 | def binary_acc(preds, label): 8 | preds = torch.round(preds) 9 | correct = torch.eq(preds, label).float() 10 | acc = correct.sum() / correct.shape[0] 11 | return acc 12 | 13 | 14 | def set_seed(seed=123): 15 | random.seed(seed) 16 | np.random.seed(seed) 17 | os.environ["PYTHONHASHSEED"] = str(seed) 18 | torch.manual_seed(seed) 19 | torch.cuda.manual_seed_all(seed) 20 | # torch.use_deterministic_algorithms(True) 21 | # torch.backends.cudnn.enabled = False 22 | torch.backends.cudnn.benchmark = False 23 | torch.backends.cudnn.deterministic = True 24 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2" 25 | -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络.md: -------------------------------------------------------------------------------- 1 | # 实验三:循环神经网络 2 | 3 | 姓名:刘威 4 | 5 | 学号:PB18010469 6 | 7 | 8 | ## 实验目的 9 | 10 | * 了解并熟悉循环神经网络的原理 11 | * 了解随时间反向传播算法(BPTT) 12 | * 学会使用循环神经网络完成文本分类任务 13 | 14 | 15 | ## 实验原理 16 | 17 | ![image-20210513172244821](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172244821.png) 18 | 19 | ![image-20210513172303845](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172303845.png) 20 | 21 | ![image-20210513172317856](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172317856.png) 22 | 23 | ![image-20210513172733303](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172733303.png) 24 | 25 | 26 | 27 | ![image-20210513172747548](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172747548.png) 28 | 29 | ![image-20210513172852144](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172852144.png) 30 | 31 | ![image-20210513172947008](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172947008.png) 32 | 33 | 34 | 35 | ![image-20210513173012590](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513173012590.png) 36 | 37 | ![image-20210513173151864](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513173151864.png) 38 | 39 | 40 | 41 | ![image-20210513173355442](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513173355442.png) 42 | 43 | 44 | ## 实验内容 45 | 46 | 编写RNN的语言模型,并基于训练好的词向量,编写RNN模型用于文本分类 47 | 48 | 数据集:aclIMDB 49 | 50 | 预训练词向量:GloVe.6B 51 | 52 | ## 实验结果 53 | 54 | 实验使用`pytorch`进行 55 | 56 | 57 | ### 源码结构及说明 58 | 59 | #### 数据预处理部分 60 | 61 | 使用`torchtext`库处理文本;使用`spaCy`库进行分词。 62 | 63 | 将`train/`目录下的数据集划分为`train/validation`, 划分比例为`0.8/0.2`. 64 | 65 | #### 模型部分 66 | 67 | 由一个`Embedding`层和一个`RNN/LSTM`模块构成,后者可以调节层数和是否双向。 68 | 69 | `Embedding`层使用`GloVe`预训练词向量进行初始化。 70 | 71 | 模型定义如下: 72 | 73 | ```python 74 | class RNNClassifier(nn.Module): 75 | def __init__(self, vocab_size, embedding_dim, hidden_dim, 76 | n_layers: int = 1, bidirectional: bool = False, 77 | dropout: float = 0., model_base: str = 'RNN'): 78 | super(RNNClassifier, self).__init__() 79 | self.bidirectional = bidirectional 80 | self.model_base = model_base.lower() 81 | if self.model_base == 'lstm': 82 | model = nn.LSTM 83 | else: 84 | model = nn.RNN 85 | 86 | self.embedding = nn.Embedding(vocab_size, embedding_dim) 87 | self.rnn = model(embedding_dim, 88 | hidden_dim, 89 | num_layers=n_layers, 90 | bidirectional=bidirectional, 91 | dropout=dropout) 92 | if self.bidirectional: 93 | hidden_dim *= 2 94 | self.fc = nn.Linear(hidden_dim, 1) 95 | self.act = nn.Sigmoid() 96 | 97 | def forward(self, x, x_len): 98 | x = self.embedding(x) 99 | x = pack_padded_sequence(x, x_len) 100 | if self.model_base == 'lstm': 101 | _, (h_n, _) = self.rnn(x) 102 | else: 103 | _, h_n = self.rnn(x) # h_n.shape = (num_layers * num_directions, batch, hidden_size) 104 | if self.bidirectional: 105 | hidden = torch.cat((h_n[-2], h_n[-1]), dim=1) # get last layer 106 | else: 107 | hidden = h_n[-1] 108 | logits = self.fc(hidden) 109 | output = self.act(logits) 110 | return output 111 | ``` 112 | 113 | ### 结果及分析 114 | 115 | 本实验的可选参数为 116 | 117 | ```python 118 | VOCAB_SIZE = 400000 119 | EMBEDDING_DIM = 100 120 | HIDDEN_DIM = 64 121 | N_LAYERS = 1 # RNN/LSTM 层数 122 | BIDIRECTIONAL = False # 是否双向 123 | DROPOUT = 0. 124 | BATCH_SIZE = 128 125 | N_EPOCHS = 10 126 | MODEL_BASE = 'RNN' # 使用`Elman RNN` 还是 `LSTM` 127 | ``` 128 | 129 | 此外,本实验固定随机种子: 130 | 131 | ```python 132 | import torch 133 | import random 134 | import os 135 | import numpy as np 136 | 137 | def set_seed(seed=123): 138 | random.seed(seed) 139 | np.random.seed(seed) 140 | os.environ["PYTHONHASHSEED"] = str(seed) 141 | torch.manual_seed(seed) 142 | torch.cuda.manual_seed_all(seed) 143 | # torch.use_deterministic_algorithms(True) 144 | # torch.backends.cudnn.enabled = False 145 | torch.backends.cudnn.benchmark = False 146 | torch.backends.cudnn.deterministic = True 147 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2" 148 | 149 | set_seed(2077) 150 | ``` 151 | 152 | **词向量维度:100维** 153 | 154 | 在验证集上验证保存`val_loss`最低的模型用于测试,得到的测试集准确率(%)如下表: 155 | 156 | |N_L-N_D|RNN| LSTM| 157 | |:---:|:---:|:---:| 158 | | 1-1 |77.22|85.57| 159 | | 1-2 |77.87| 85.64 | 160 | | 2-1 | 77.30 | 86.56 | 161 | | 2-2 | 76.41 | 85.65 | 162 | | 5-1 | 75.28 | 84.47 | 163 | | 5-2 | 74.14 | 84.56 | 164 | 165 | **注:**`N_L`代表`N_Layers`, 及循环神经网络的层数, `N_D`代表`N_Direction`, 当`bidirectional`设为`False`时为 `1`, 否则为 `2`. 166 | 167 | 结果表明,`LSTM`明显优于`RNN`;设置双向对于网络浅时略有提升,对于网络深时有副作用;简单地加深网络会使模型性能变差。 168 | 169 | **词向量维度:300维** 170 | 171 | 将词向量维度增加到300维,比较模型表现。 172 | 173 | | N_L-N_D | RNN | LSTM | 174 | | :-----: | :---: | :---: | 175 | | 1-1 | 76.29 | 86.20 | 176 | | 1-2 | 76.80 | 86.62 | 177 | 178 | RNN性能变差,LSTM性能变好。 179 | 180 | 181 | ## 实验总结 182 | 183 | 本实验地主要难点在于: 184 | 185 | + 认清并理解完成任务所需要地流程; 186 | + 文本处理的流程。 187 | + 词向量嵌入的原理和实践方法。 188 | 189 | 因此主要时间花在如何处理数据上。模型结构上相比前几次实验反而要简单一些。 -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络.pdf -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172134556.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172134556.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172216587.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172216587.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172230085.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172230085.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172244821.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172244821.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172303845.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172303845.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172317856.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172317856.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172733303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172733303.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172747548.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172747548.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172839199.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172839199.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172852144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172852144.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513172947008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513172947008.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513173012590.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513173012590.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513173151864.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513173151864.png -------------------------------------------------------------------------------- /exp3/实验三:循环神经网络/image-20210513173355442.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三:循环神经网络/image-20210513173355442.png -------------------------------------------------------------------------------- /exp4/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 说明 2 | 3 | ## glove.6B.zip 4 | 5 | glove预训练词向量,也可以自己使用想用的词向量,使用方法参见https://blog.csdn.net/bqw18744018044/article/details/89575127 6 | 7 | ## aclImdb_v1.tar.gz 8 | 9 | 所使用的数据集 10 | 11 | 具体可见:Maas, A. L., Daly, R. E., Pham, P. T., Huang, D., Ng, A. Y., & Potts, C. (2011, June). Learning word vectors for sentiment analysis. In Proceedings of the 49th annual meeting of the association for computational linguistics: Human language technologies-volume 1 (pp. 142-150). Association for Computational Linguistics. 12 | 13 | ## IMDb.py 14 | 15 | 数据集的基本处理代码 16 | 17 | ## 模型评价指标 18 | 19 | 预测准确率 20 | 21 | ## SOTA方法准确率 22 | 23 | ![img.png](img.png) 24 | 25 | 可能有用的资料: 26 | 27 | https://arxiv.org/pdf/1905.05583v3.pdf -------------------------------------------------------------------------------- /exp4/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/img.png -------------------------------------------------------------------------------- /exp4/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.6.1 2 | tqdm==4.56.0 3 | torch==1.8.1 4 | numpy==1.19.2 5 | -------------------------------------------------------------------------------- /exp4/src/config.py: -------------------------------------------------------------------------------- 1 | raw_data_folder = './aclImdb/' 2 | vectors_folder = './glove.6B/' 3 | data_folder = './data/' 4 | 5 | 6 | SEED = 2077 7 | DEVICE = 'cuda:0' 8 | 9 | VAL_RATIO = 0.2 10 | 11 | HIDDEN_DIM = 32 12 | BATCH_SIZE = 8 13 | N_EPOCHS = 10 14 | MAX_LEN = 256 15 | LEARNING_RATE = 1e-5 16 | FREEZE_BERT = True 17 | JUST_TEST = False 18 | -------------------------------------------------------------------------------- /exp4/src/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from tqdm import tqdm 4 | from config import * 5 | import re 6 | import pickle 7 | from transformers import BertTokenizer 8 | import numpy as np 9 | from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler 10 | 11 | 12 | def get_raw_data(which): 13 | cache_file = os.path.join(data_folder, which + '_raw.pkl') 14 | if os.path.exists(cache_file): 15 | with open(cache_file, 'rb') as f: 16 | data = pickle.load(f) 17 | return data 18 | 19 | texts = [] 20 | labels = [] 21 | raw_folder = os.path.join(raw_data_folder, which) 22 | for subfolder in ['pos', 'neg']: 23 | folder_name = os.path.join(raw_folder, subfolder) 24 | for file in tqdm(os.listdir(folder_name), 25 | bar_format='{percentage:3.0f}%|{bar:20}{r_bar}'): 26 | with open(os.path.join(folder_name, file), 'rb') as f: 27 | text = f.read().decode('utf-8') 28 | label = 1 if subfolder == 'pos' else 0 29 | texts.append(text) 30 | labels.append(label) 31 | data = (texts, labels) 32 | with open(cache_file, 'wb') as f: 33 | pickle.dump(data, f) 34 | return data 35 | 36 | 37 | def text_preprocessing(text): 38 | # Remove '@name' 39 | text = re.sub(r'(@.*?)[\s]', ' ', text) 40 | 41 | # Replace '&' with '&' 42 | text = re.sub(r'&', '&', text) 43 | 44 | # Remove trailing whitespace 45 | text = re.sub(r'\s+', ' ', text).strip() 46 | 47 | return text 48 | 49 | 50 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) 51 | 52 | 53 | def preprocessing_for_bert(sentences): 54 | # Create empty lists to store outputs 55 | input_ids = [] 56 | attention_masks = [] 57 | 58 | for sent in tqdm(sentences): 59 | encoded_sent = tokenizer.encode_plus( 60 | text=text_preprocessing(sent), # Preprocess sentence 61 | add_special_tokens=True, # Add `[CLS]` and `[SEP]` 62 | max_length=MAX_LEN, # Max length to truncate/pad 63 | padding='max_length', # Pad sentence to max length 64 | truncation=True, 65 | # return_tensors='pt', # Return PyTorch tensor 66 | return_attention_mask=True # Return attention mask 67 | ) 68 | 69 | # Add the outputs to the lists 70 | input_ids.append(encoded_sent.get('input_ids')) 71 | attention_masks.append(encoded_sent.get('attention_mask')) 72 | 73 | # Convert lists to tensors 74 | input_ids = torch.tensor(input_ids) 75 | attention_masks = torch.tensor(attention_masks) 76 | 77 | return input_ids, attention_masks 78 | 79 | 80 | def get_dataloader(): 81 | dataset_cache_path = data_folder + 'train_data.pkl' 82 | dataset = dict() 83 | if not os.path.exists(dataset_cache_path): 84 | train_texts, train_labels = get_raw_data('train') 85 | test_texts, test_labels = get_raw_data('test') 86 | 87 | indices = np.random.permutation(len(train_texts)) 88 | val_indices, train_indices = np.split(indices, [round(len(train_texts) * VAL_RATIO)]) 89 | train_texts, train_labels = np.array(train_texts), np.array(train_labels) 90 | val_texts, val_labels = train_texts[val_indices], train_labels[val_indices] 91 | train_texts, train_labels = train_texts[train_indices], train_labels[train_indices] 92 | 93 | train_inputs, train_masks = preprocessing_for_bert(train_texts) 94 | val_inputs, val_masks = preprocessing_for_bert(val_texts) 95 | test_inputs, test_masks = preprocessing_for_bert(test_texts) 96 | 97 | # Convert other data types to torch.Tensor 98 | train_labels = torch.tensor(train_labels) 99 | val_labels = torch.tensor(val_labels) 100 | test_labels = torch.tensor(test_labels) 101 | 102 | dataset['train'] = TensorDataset(train_inputs, train_masks, train_labels) 103 | dataset['val'] = TensorDataset(val_inputs, val_masks, val_labels) 104 | dataset['test'] = TensorDataset(test_inputs, test_masks, test_labels) 105 | for which in ['train', 'val', 'test']: 106 | pickle.dump(dataset[which], open(os.path.join(data_folder, f'{which}_data.pkl'), 'wb')) 107 | else: 108 | for which in ['train', 'val', 'test']: 109 | dataset[which] = pickle.load(open(os.path.join(data_folder, f'{which}_data.pkl'), 'rb')) 110 | 111 | train_sampler = RandomSampler(dataset['train']) 112 | train_dataloader = DataLoader(dataset['train'], sampler=train_sampler, batch_size=BATCH_SIZE) 113 | 114 | val_sampler = SequentialSampler(dataset['val']) 115 | val_dataloader = DataLoader(dataset['val'], sampler=val_sampler, batch_size=BATCH_SIZE) 116 | 117 | test_sampler = SequentialSampler(dataset['test']) 118 | test_dataloader = DataLoader(dataset['test'], sampler=test_sampler, batch_size=BATCH_SIZE) 119 | 120 | return train_dataloader, val_dataloader, test_dataloader 121 | 122 | 123 | if __name__ == "__main__": 124 | get_dataloader() 125 | -------------------------------------------------------------------------------- /exp4/src/main.py: -------------------------------------------------------------------------------- 1 | from utils import set_seed 2 | from config import * 3 | import torch 4 | import torch.nn as nn 5 | from torch.optim import Adam 6 | from data import get_dataloader 7 | import random 8 | import os 9 | import numpy as np 10 | from tqdm import tqdm 11 | from model import BertClassifier 12 | 13 | 14 | def train(model, dataloader, optimizer, criterion): 15 | epoch_loss = 0 16 | epoch_acc = 0 17 | 18 | pbar = tqdm(dataloader, unit='batch', ascii=True, 19 | bar_format='{percentage:3.0f}%|{bar:20}{r_bar}') 20 | model.train() 21 | for batch in pbar: 22 | optimizer.zero_grad() 23 | 24 | input_ids, attn_mask, labels = tuple(t.to(DEVICE) for t in batch) 25 | 26 | logits = model(input_ids, attn_mask) 27 | loss = criterion(logits, labels) 28 | loss.backward() 29 | optimizer.step() 30 | 31 | # Get the predictions 32 | preds = torch.argmax(logits, dim=1).flatten() 33 | acc = (preds == labels).cpu().numpy().mean() 34 | 35 | pbar.set_postfix({'train_loss': loss.item(), 36 | 'train_acc': acc.item()}) 37 | 38 | epoch_loss += loss.item() 39 | epoch_acc += acc.item() 40 | 41 | return epoch_loss / len(dataloader), epoch_acc / len(dataloader) 42 | 43 | 44 | def evaluate(model, dataloader, criterion): 45 | epoch_loss = 0 46 | epoch_acc = 0 47 | 48 | model.eval() 49 | with torch.no_grad(): 50 | for batch in dataloader: 51 | input_ids, attn_mask, labels = tuple(t.to(DEVICE) for t in batch) 52 | 53 | logits = model(input_ids, attn_mask) 54 | loss = criterion(logits, labels) 55 | 56 | # Get the predictions 57 | preds = torch.argmax(logits, dim=1).flatten() 58 | acc = (preds == labels).cpu().numpy().mean() 59 | 60 | epoch_loss += loss.item() 61 | epoch_acc += acc.item() 62 | 63 | return epoch_loss / len(dataloader), epoch_acc / len(dataloader) 64 | 65 | 66 | if __name__ == '__main__': 67 | set_seed(SEED) 68 | train_dataloader, val_dataloader, test_dataloader = get_dataloader() 69 | bert = BertClassifier(freeze_bert=FREEZE_BERT) 70 | # pretrained_embeddings = TEXT.vocab.vectors 71 | # rnn.embedding.weight.data.copy_(pretrained_embeddings) 72 | optimizer = Adam(bert.parameters(), lr=LEARNING_RATE) 73 | criterion = nn.CrossEntropyLoss() 74 | 75 | bert = bert.to(DEVICE) 76 | criterion = criterion.to(DEVICE) 77 | if not JUST_TEST: 78 | best_valid_loss = np.inf 79 | for epoch in range(N_EPOCHS): 80 | print(f">>> Epoch {epoch+1}/{N_EPOCHS}") 81 | 82 | train_loss, train_acc = train(bert, train_dataloader, optimizer, criterion) 83 | valid_loss, valid_acc = evaluate(bert, val_dataloader, criterion) 84 | 85 | if valid_loss < best_valid_loss: 86 | best_valid_loss = valid_loss 87 | torch.save(bert.state_dict(), './checkpoint/best_weights.pt') 88 | 89 | print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc * 100:.2f}%') 90 | print(f'\tValid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc * 100:.2f}%') 91 | print(">>> Testing...") 92 | bert.load_state_dict(torch.load("./checkpoint/best_weights.pt")) 93 | test_loss, test_acc = evaluate(bert, test_dataloader, criterion) 94 | print(f'\tTest Loss: {test_loss:.4f} | Test Acc: {test_acc * 100:.2f}%') 95 | -------------------------------------------------------------------------------- /exp4/src/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import BertModel 4 | from config import * 5 | import numpy as np 6 | 7 | 8 | class BertClassifier(nn.Module): 9 | def __init__(self, freeze_bert=False): 10 | super(BertClassifier, self).__init__() 11 | D_in, H, D_out = 768, HIDDEN_DIM, 2 12 | # Instantiate BERT model 13 | self.bert = BertModel.from_pretrained('bert-base-uncased') 14 | 15 | self.classifier = nn.Sequential( 16 | nn.Linear(D_in, H), 17 | nn.ReLU(), 18 | # nn.Dropout(0.5), 19 | nn.Linear(H, D_out) 20 | ) 21 | 22 | if freeze_bert: 23 | for param in self.bert.parameters(): 24 | param.requires_grad = False 25 | 26 | def forward(self, input_ids, attention_mask): 27 | # Feed input to BERT 28 | outputs = self.bert(input_ids=input_ids, 29 | attention_mask=attention_mask) 30 | 31 | # Extract the last hidden state of the token `[CLS]` for classification task 32 | last_hidden_state_cls = outputs[0][:, 0, :] 33 | 34 | # Feed input to classifier to compute logits 35 | logits = self.classifier(last_hidden_state_cls) 36 | return logits 37 | 38 | 39 | # ============= Following are step-by-step implementation ============= 40 | 41 | def get_attn_pad_mask(seq_q, seq_k): 42 | batch_size, len_q = seq_q.size() 43 | batch_size, len_k = seq_k.size() 44 | # eq(zero) is PAD token 45 | pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking 46 | return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k 47 | 48 | 49 | def gelu(x): 50 | return x * 0.5 * (1.0 + torch.erf(x / np.sqrt(2.0))) 51 | 52 | 53 | class Embedding(nn.Module): 54 | def __init__(self, d_model, vocab_size, max_len, n_segments): 55 | super(Embedding, self).__init__() 56 | self.tok_embed = nn.Embedding(vocab_size, d_model) # token embedding 57 | self.pos_embed = nn.Embedding(max_len, d_model) # position embedding 58 | self.seg_embed = nn.Embedding(n_segments, d_model) # segment(token type) embedding 59 | self.norm = nn.LayerNorm(d_model) 60 | 61 | def forward(self, x, seg): 62 | seq_len = x.size(1) 63 | pos = torch.arange(seq_len, dtype=torch.long) 64 | pos = pos.unsqueeze(0).expand_as(x) # (seq_len,) -> (batch_size, seq_len) 65 | embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg) 66 | return self.norm(embedding) 67 | 68 | 69 | class ScaledDotProductAttention(nn.Module): 70 | def __init__(self): 71 | super(ScaledDotProductAttention, self).__init__() 72 | 73 | def forward(self, Q, K, V, attn_mask): 74 | scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt( 75 | Q.shape[-1]) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] 76 | scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one. 77 | attn = nn.Softmax(dim=-1)(scores) 78 | context = torch.matmul(attn, V) 79 | return context, attn 80 | 81 | 82 | class MultiHeadAttention(nn.Module): 83 | def __init__(self, d_model, d_k, d_v, n_heads): 84 | super(MultiHeadAttention, self).__init__() 85 | self.d_model = d_model 86 | self.d_k = d_k 87 | self.d_v = d_v 88 | self.n_heads = n_heads 89 | self.W_Q = nn.Linear(d_model, d_k * n_heads) 90 | self.W_K = nn.Linear(d_model, d_k * n_heads) 91 | self.W_V = nn.Linear(d_model, d_v * n_heads) 92 | 93 | def forward(self, Q, K, V, attn_mask): 94 | # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model] 95 | residual, batch_size = Q, Q.size(0) 96 | # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W) 97 | q_s = self.W_Q(Q).view(batch_size, -1, 98 | self.n_heads, self.d_k).transpose(1, 2) # q_s: [batch_size x n_heads x len_q x d_k] 99 | k_s = self.W_K(K).view(batch_size, -1, 100 | self.n_heads, self.d_k).transpose(1, 2) # k_s: [batch_size x n_heads x len_k x d_k] 101 | v_s = self.W_V(V).view(batch_size, -1, 102 | self.n_heads, self.d_v).transpose(1, 2) # v_s: [batch_size x n_heads x len_k x d_v] 103 | 104 | attn_mask = attn_mask.unsqueeze(1).repeat(1, self.n_heads, 1, 105 | 1) # attn_mask : [batch_size x n_heads x len_q x len_k] 106 | 107 | # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)] 108 | context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask) 109 | context = context.transpose(1, 2).contiguous().view(batch_size, -1, 110 | self.n_heads * self.d_v) # context: [batch_size x len_q x n_heads * d_v] 111 | output = nn.Linear(self.n_heads * self.d_v, self.d_model)(context) 112 | 113 | return nn.LayerNorm(self.d_model)(output + residual), attn # output: [batch_size x len_q x d_model] 114 | 115 | 116 | class PoswiseFeedForwardNet(nn.Module): 117 | def __init__(self, d_model, d_ff): 118 | super(PoswiseFeedForwardNet, self).__init__() 119 | self.fc1 = nn.Linear(d_model, d_ff) 120 | self.fc2 = nn.Linear(d_ff, d_model) 121 | 122 | def forward(self, x): 123 | # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model) 124 | return self.fc2(gelu(self.fc1(x))) 125 | 126 | 127 | class EncoderLayer(nn.Module): 128 | def __init__(self, d_model, d_k, d_v, d_ff, n_heads): 129 | super(EncoderLayer, self).__init__() 130 | self.enc_self_attn = MultiHeadAttention(d_model, d_k, d_v, n_heads) 131 | self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff) 132 | 133 | def forward(self, enc_inputs, enc_self_attn_mask): 134 | enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, 135 | enc_self_attn_mask) # enc_inputs to same Q,K,V 136 | enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model] 137 | return enc_outputs, attn 138 | 139 | 140 | class BERT(nn.Module): 141 | def __init__(self, d_model, d_k, d_v, d_ff, vocab_size, max_len, n_segments, n_heads, n_layers): 142 | super(BERT, self).__init__() 143 | self.embedding = Embedding(d_model, vocab_size, max_len, n_segments) 144 | self.layers = nn.ModuleList([EncoderLayer(d_model, d_k, d_v, d_ff, n_heads) for _ in range(n_layers)]) 145 | self.fc = nn.Linear(d_model, d_model) 146 | self.activ1 = nn.Tanh() 147 | self.linear = nn.Linear(d_model, d_model) 148 | self.activ2 = gelu 149 | self.norm = nn.LayerNorm(d_model) 150 | self.classifier = nn.Linear(d_model, 2) 151 | # decoder is shared with embedding layer 152 | embed_weight = self.embedding.tok_embed.weight 153 | n_vocab, n_dim = embed_weight.size() 154 | self.decoder = nn.Linear(n_dim, n_vocab, bias=False) 155 | self.decoder.weight = embed_weight 156 | self.decoder_bias = nn.Parameter(torch.zeros(n_vocab)) 157 | 158 | def forward(self, input_ids, segment_ids, masked_pos): 159 | output = self.embedding(input_ids, segment_ids) 160 | enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids) 161 | for layer in self.layers: 162 | output, enc_self_attn = layer(output, enc_self_attn_mask) 163 | # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model] 164 | # it will be decided by first token(CLS) 165 | h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model] 166 | logits_clf = self.classifier(h_pooled) # [batch_size, 2] 167 | 168 | masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model] 169 | 170 | # get masked position from final output of transformer. 171 | h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model] 172 | h_masked = self.norm(self.activ2(self.linear(h_masked))) 173 | logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab] 174 | 175 | return logits_lm, logits_clf 176 | 177 | 178 | if __name__ == '__main__': 179 | # BERT Parameters 180 | max_len = 30 # maximum of length 181 | batch_size = 6 182 | n_layers = 6 # number of Encoder of Encoder Layer 183 | n_heads = 12 # number of heads in Multi-Head Attention 184 | d_model = 768 # Embedding Size 185 | d_ff = 768 * 4 # 4*d_model, FeedForward dimension 186 | d_k = d_v = 64 # dimension of K(=Q), V 187 | n_segments = 2 188 | vocab_size = 1000 189 | 190 | model = BERT(d_model, d_k, d_v, d_ff, vocab_size, max_len, n_segments, n_heads, n_layers) 191 | 192 | input_ids = torch.tensor([[1, 2, 3, 4, 5, 1, 2, 1, 2, 0, 0, 0]]) 193 | segment_ids = torch.tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0]]) 194 | masked_pos = torch.tensor([[6, 0, 0, 0, 0]]) 195 | logits_lm, logits_clf = model(input_ids, segment_ids, masked_pos) 196 | logits_lm = logits_lm.data.max(2)[1][0].data.numpy() 197 | logits_clf = logits_clf.data.max(1)[1].data.numpy()[0] 198 | 199 | print(logits_lm) 200 | print(logits_clf) 201 | -------------------------------------------------------------------------------- /exp4/src/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | 7 | def set_seed(seed=123): 8 | random.seed(seed) 9 | np.random.seed(seed) 10 | os.environ["PYTHONHASHSEED"] = str(seed) 11 | torch.manual_seed(seed) 12 | torch.cuda.manual_seed_all(seed) 13 | # torch.use_deterministic_algorithms(True) 14 | # torch.backends.cudnn.enabled = False 15 | torch.backends.cudnn.benchmark = False 16 | torch.backends.cudnn.deterministic = True 17 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2" 18 | -------------------------------------------------------------------------------- /exp4/实验四:BERT.md: -------------------------------------------------------------------------------- 1 | # 实验四:BERT 2 | 3 | 姓名:刘威 4 | 5 | 学号:PB18010469 6 | 7 | 8 | ## 实验目的 9 | 10 | * 了解并熟悉Attention机制的原理 11 | * 了解self-attention的原理以及transformer的结构 12 | * 学会使用 bert 预训练模型完成文本分类任务 13 | 14 | 15 | ## 实验原理 16 | 17 | ![image-20210605154503374](实验四:BERT/image-20210605154503374.png) 18 | 19 | ![image-20210605154926294](实验四:BERT/image-20210605154926294.png) 20 | 21 | ![image-20210605154945286](实验四:BERT/image-20210605154945286.png) 22 | 23 | ![image-20210605160746073](实验四:BERT/image-20210605160746073.png) 24 | 25 | ![image-20210605163538200](实验四:BERT/image-20210605163538200.png) 26 | 27 | ![image-20210605163557321](实验四:BERT/image-20210605163557321.png) 28 | 29 | ![image-20210605163641264](实验四:BERT/image-20210605163641264.png) 30 | 31 | ![image-20210605163743489](实验四:BERT/image-20210605163743489.png) 32 | 33 | 34 | ## 实验内容 35 | 36 | 编写BERT的语言模型,并基于训练好的词向量,利用少量的训练数据,微调BERT模型用于文本分类,并和之前的RNN模型进行对比分析 37 | 38 | 数据集:aclIMDB 39 | 40 | ## 实验结果 41 | 42 | + 实验使用`pytorch`进行 43 | + 使用了`Huggingface`维护的`transformers`库中的预训练bert模型`bert-base-uncased`. 44 | 45 | 46 | ### 源码结构及说明 47 | 48 | #### 数据预处理部分 49 | 50 | 使用`transformer`库处理文本, 使得输入符合bert模型的要求 51 | 52 | 将`train/`目录下的数据集划分为`train/validation`, 划分比例为`0.8/0.2`. 53 | 54 | #### 模型部分 55 | 56 | 取bert模型的输出中`[CLS]`对应的隐藏层,作为二分类全连接的输入。 57 | 58 | 模型定义如下: 59 | 60 | ```python 61 | import torch 62 | import torch.nn as nn 63 | from transformers import BertModel 64 | 65 | class BertClassifier(nn.Module): 66 | def __init__(self, freeze_bert=False): 67 | super(BertClassifier, self).__init__() 68 | D_in, H, D_out = 768, HIDDEN_DIM, 2 69 | # Instantiate BERT model 70 | self.bert = BertModel.from_pretrained('bert-base-uncased') 71 | 72 | self.classifier = nn.Sequential( 73 | nn.Linear(D_in, H), 74 | nn.ReLU(), 75 | # nn.Dropout(0.5), 76 | nn.Linear(H, D_out) 77 | ) 78 | 79 | if freeze_bert: 80 | for param in self.bert.parameters(): 81 | param.requires_grad = False 82 | 83 | def forward(self, input_ids, attention_mask): 84 | # Feed input to BERT 85 | outputs = self.bert(input_ids=input_ids, 86 | attention_mask=attention_mask) 87 | 88 | # Extract the last hidden state of the token `[CLS]` for classification task 89 | last_hidden_state_cls = outputs[0][:, 0, :] 90 | 91 | # Feed input to classifier to compute logits 92 | logits = self.classifier(last_hidden_state_cls) 93 | return logits 94 | ``` 95 | 96 | ### 结果及分析 97 | 98 | 本实验的可选参数为 99 | 100 | ```python 101 | HIDDEN_DIM = 32 # 最后的全连接层的隐藏层维度 102 | BATCH_SIZE = 8 103 | N_EPOCHS = 10 104 | MAX_LEN = 256 # 序列的最大长度 105 | LEARNING_RATE = 1e-5 106 | FREEZE_BERT = False # 是否固定bert的参数 107 | ``` 108 | 109 | 此外,本实验固定随机种子: 110 | 111 | ```python 112 | import torch 113 | import random 114 | import os 115 | import numpy as np 116 | 117 | def set_seed(seed=123): 118 | random.seed(seed) 119 | np.random.seed(seed) 120 | os.environ["PYTHONHASHSEED"] = str(seed) 121 | torch.manual_seed(seed) 122 | torch.cuda.manual_seed_all(seed) 123 | # torch.use_deterministic_algorithms(True) 124 | # torch.backends.cudnn.enabled = False 125 | torch.backends.cudnn.benchmark = False 126 | torch.backends.cudnn.deterministic = True 127 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2" 128 | 129 | set_seed(2077) 130 | ``` 131 | 132 | 设置`FREEZE_BERT`为 `True` 和 `False`, 分别训练模型。每轮训练结束时在验证集上验证,并保存`val_loss`最低的模型用于测试,得到的测试集准确率(%)如下表, RNN和BERT的结果来自于上一次实验的最好结果: 133 | 134 | |模型|RNN| LSTM| BERT(Freeze) | BERT(no Freeze) | 135 | |:---:|:---:|:---:|:---:|:---:| 136 | | acc |77.87|86.62| 84.58 | 93.53 | 137 | 138 | 可以看到,如果固定BERT的参数,只训练最后的全连接层,结果与LSTM差异不大,但如果BERT的参数也进行微调,能够得到远好于 LSTM 的结果。 139 | 140 | 结果分析:当固定BERT的参数时,BERT 相当于是一个固定的 Embedding 层, 而且这个Embedding 是在大量的语料上训练得到的,可能无法准确的表达我们的任务情景下的语义。而当我们对 BERT的参数也进行微调时,可以使得这种语义的表达更加确切。 141 | 142 | ## 实验总结 143 | 144 | Transformer采用了相较于循环神经网络完全不同的机制:Attention, 不仅能有效地解决循环神经网络无法并行计算的问题,而且能够引入任意距离的依赖关系,在应用到文本这种序列化且有上下文依赖关系的数据时有很好的效果。BERT堆叠了若干transformer encoder结构,并使用完形填空,句子预测的预训练方式,在许多任务上都取得了 SOTA 结果。 145 | 146 | 本实验采用预训练的 BERT 模型进行情感分析的二分类任务,在提供的数据集上进行微调,取得了远好于LSTM的结果,充分展现了attetion机制以及BERT预训练模型的强大。 -------------------------------------------------------------------------------- /exp4/实验四:BERT.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT.pdf -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605154503374.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605154503374.png -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605154926294.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605154926294.png -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605154945286.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605154945286.png -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605160746073.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605160746073.png -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605163538200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605163538200.png -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605163557321.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605163557321.png -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605163641264.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605163641264.png -------------------------------------------------------------------------------- /exp4/实验四:BERT/image-20210605163743489.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四:BERT/image-20210605163743489.png -------------------------------------------------------------------------------- /exp5/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.8.1 2 | numpy==1.19.2 3 | torch_geometric==1.7.1 4 | pandas==1.2.0 5 | -------------------------------------------------------------------------------- /exp5/result.csv: -------------------------------------------------------------------------------- 1 | data_name,add_self_loop,n_layers,drop_edge,pari_norm,activations,test_acc 2 | citeseer,True,1,0.0,True,relu,0.443 3 | citeseer,True,1,0.0,True,tanh,0.443 4 | citeseer,True,1,0.0,True,sigmoid,0.443 5 | citeseer,True,1,0.0,False,relu,0.68 6 | citeseer,True,1,0.0,False,tanh,0.68 7 | citeseer,True,1,0.0,False,sigmoid,0.68 8 | citeseer,True,1,0.1,True,relu,0.449 9 | citeseer,True,1,0.1,True,tanh,0.449 10 | citeseer,True,1,0.1,True,sigmoid,0.449 11 | citeseer,True,1,0.1,False,relu,0.68 12 | citeseer,True,1,0.1,False,tanh,0.68 13 | citeseer,True,1,0.1,False,sigmoid,0.68 14 | citeseer,True,1,0.2,True,relu,0.451 15 | citeseer,True,1,0.2,True,tanh,0.451 16 | citeseer,True,1,0.2,True,sigmoid,0.451 17 | citeseer,True,1,0.2,False,relu,0.653 18 | citeseer,True,1,0.2,False,tanh,0.653 19 | citeseer,True,1,0.2,False,sigmoid,0.653 20 | citeseer,True,1,0.3,True,relu,0.431 21 | citeseer,True,1,0.3,True,tanh,0.431 22 | citeseer,True,1,0.3,True,sigmoid,0.431 23 | citeseer,True,1,0.3,False,relu,0.661 24 | citeseer,True,1,0.3,False,tanh,0.661 25 | citeseer,True,1,0.3,False,sigmoid,0.661 26 | citeseer,True,1,0.5,True,relu,0.426 27 | citeseer,True,1,0.5,True,tanh,0.426 28 | citeseer,True,1,0.5,True,sigmoid,0.426 29 | citeseer,True,1,0.5,False,relu,0.62 30 | citeseer,True,1,0.5,False,tanh,0.62 31 | citeseer,True,1,0.5,False,sigmoid,0.62 32 | citeseer,True,2,0.0,True,relu,0.526 33 | citeseer,True,2,0.0,True,tanh,0.547 34 | citeseer,True,2,0.0,True,sigmoid,0.454 35 | citeseer,True,2,0.0,False,relu,0.685 36 | citeseer,True,2,0.0,False,tanh,0.683 37 | citeseer,True,2,0.0,False,sigmoid,0.207 38 | citeseer,True,2,0.1,True,relu,0.535 39 | citeseer,True,2,0.1,True,tanh,0.556 40 | citeseer,True,2,0.1,True,sigmoid,0.456 41 | citeseer,True,2,0.1,False,relu,0.683 42 | citeseer,True,2,0.1,False,tanh,0.681 43 | citeseer,True,2,0.1,False,sigmoid,0.207 44 | citeseer,True,2,0.2,True,relu,0.527 45 | citeseer,True,2,0.2,True,tanh,0.545 46 | citeseer,True,2,0.2,True,sigmoid,0.451 47 | citeseer,True,2,0.2,False,relu,0.671 48 | citeseer,True,2,0.2,False,tanh,0.676 49 | citeseer,True,2,0.2,False,sigmoid,0.207 50 | citeseer,True,2,0.3,True,relu,0.524 51 | citeseer,True,2,0.3,True,tanh,0.544 52 | citeseer,True,2,0.3,True,sigmoid,0.439 53 | citeseer,True,2,0.3,False,relu,0.658 54 | citeseer,True,2,0.3,False,tanh,0.666 55 | citeseer,True,2,0.3,False,sigmoid,0.207 56 | citeseer,True,2,0.5,True,relu,0.49 57 | citeseer,True,2,0.5,True,tanh,0.53 58 | citeseer,True,2,0.5,True,sigmoid,0.435 59 | citeseer,True,2,0.5,False,relu,0.626 60 | citeseer,True,2,0.5,False,tanh,0.63 61 | citeseer,True,2,0.5,False,sigmoid,0.207 62 | citeseer,True,3,0.0,True,relu,0.568 63 | citeseer,True,3,0.0,True,tanh,0.591 64 | citeseer,True,3,0.0,True,sigmoid,0.439 65 | citeseer,True,3,0.0,False,relu,0.645 66 | citeseer,True,3,0.0,False,tanh,0.667 67 | citeseer,True,3,0.0,False,sigmoid,0.207 68 | citeseer,True,3,0.1,True,relu,0.56 69 | citeseer,True,3,0.1,True,tanh,0.576 70 | citeseer,True,3,0.1,True,sigmoid,0.427 71 | citeseer,True,3,0.1,False,relu,0.671 72 | citeseer,True,3,0.1,False,tanh,0.66 73 | citeseer,True,3,0.1,False,sigmoid,0.202 74 | citeseer,True,3,0.2,True,relu,0.571 75 | citeseer,True,3,0.2,True,tanh,0.588 76 | citeseer,True,3,0.2,True,sigmoid,0.432 77 | citeseer,True,3,0.2,False,relu,0.655 78 | citeseer,True,3,0.2,False,tanh,0.647 79 | citeseer,True,3,0.2,False,sigmoid,0.208 80 | citeseer,True,3,0.3,True,relu,0.564 81 | citeseer,True,3,0.3,True,tanh,0.582 82 | citeseer,True,3,0.3,True,sigmoid,0.423 83 | citeseer,True,3,0.3,False,relu,0.663 84 | citeseer,True,3,0.3,False,tanh,0.623 85 | citeseer,True,3,0.3,False,sigmoid,0.198 86 | citeseer,True,3,0.5,True,relu,0.543 87 | citeseer,True,3,0.5,True,tanh,0.578 88 | citeseer,True,3,0.5,True,sigmoid,0.362 89 | citeseer,True,3,0.5,False,relu,0.609 90 | citeseer,True,3,0.5,False,tanh,0.647 91 | citeseer,True,3,0.5,False,sigmoid,0.193 92 | citeseer,True,5,0.0,True,relu,0.545 93 | citeseer,True,5,0.0,True,tanh,0.58 94 | citeseer,True,5,0.0,True,sigmoid,0.246 95 | citeseer,True,5,0.0,False,relu,0.522 96 | citeseer,True,5,0.0,False,tanh,0.588 97 | citeseer,True,5,0.0,False,sigmoid,0.207 98 | citeseer,True,5,0.1,True,relu,0.47 99 | citeseer,True,5,0.1,True,tanh,0.576 100 | citeseer,True,5,0.1,True,sigmoid,0.231 101 | citeseer,True,5,0.1,False,relu,0.351 102 | citeseer,True,5,0.1,False,tanh,0.613 103 | citeseer,True,5,0.1,False,sigmoid,0.207 104 | citeseer,True,5,0.2,True,relu,0.471 105 | citeseer,True,5,0.2,True,tanh,0.567 106 | citeseer,True,5,0.2,True,sigmoid,0.258 107 | citeseer,True,5,0.2,False,relu,0.182 108 | citeseer,True,5,0.2,False,tanh,0.6 109 | citeseer,True,5,0.2,False,sigmoid,0.207 110 | citeseer,True,5,0.3,True,relu,0.457 111 | citeseer,True,5,0.3,True,tanh,0.57 112 | citeseer,True,5,0.3,True,sigmoid,0.255 113 | citeseer,True,5,0.3,False,relu,0.201 114 | citeseer,True,5,0.3,False,tanh,0.567 115 | citeseer,True,5,0.3,False,sigmoid,0.207 116 | citeseer,True,5,0.5,True,relu,0.391 117 | citeseer,True,5,0.5,True,tanh,0.536 118 | citeseer,True,5,0.5,True,sigmoid,0.22 119 | citeseer,True,5,0.5,False,relu,0.188 120 | citeseer,True,5,0.5,False,tanh,0.542 121 | citeseer,True,5,0.5,False,sigmoid,0.207 122 | citeseer,True,10,0.0,True,relu,0.3 123 | citeseer,True,10,0.0,True,tanh,0.61 124 | citeseer,True,10,0.0,True,sigmoid,0.197 125 | citeseer,True,10,0.0,False,relu,0.176 126 | citeseer,True,10,0.0,False,tanh,0.472 127 | citeseer,True,10,0.0,False,sigmoid,0.195 128 | citeseer,True,10,0.1,True,relu,0.245 129 | citeseer,True,10,0.1,True,tanh,0.562 130 | citeseer,True,10,0.1,True,sigmoid,0.198 131 | citeseer,True,10,0.1,False,relu,0.18 132 | citeseer,True,10,0.1,False,tanh,0.207 133 | citeseer,True,10,0.1,False,sigmoid,0.146 134 | citeseer,True,10,0.2,True,relu,0.218 135 | citeseer,True,10,0.2,True,tanh,0.49 136 | citeseer,True,10,0.2,True,sigmoid,0.209 137 | citeseer,True,10,0.2,False,relu,0.207 138 | citeseer,True,10,0.2,False,tanh,0.228 139 | citeseer,True,10,0.2,False,sigmoid,0.143 140 | citeseer,True,10,0.3,True,relu,0.24 141 | citeseer,True,10,0.3,True,tanh,0.488 142 | citeseer,True,10,0.3,True,sigmoid,0.18 143 | citeseer,True,10,0.3,False,relu,0.182 144 | citeseer,True,10,0.3,False,tanh,0.2 145 | citeseer,True,10,0.3,False,sigmoid,0.195 146 | citeseer,True,10,0.5,True,relu,0.213 147 | citeseer,True,10,0.5,True,tanh,0.306 148 | citeseer,True,10,0.5,True,sigmoid,0.196 149 | citeseer,True,10,0.5,False,relu,0.182 150 | citeseer,True,10,0.5,False,tanh,0.2 151 | citeseer,True,10,0.5,False,sigmoid,0.207 152 | citeseer,False,1,0.0,True,relu,0.372 153 | citeseer,False,1,0.0,True,tanh,0.372 154 | citeseer,False,1,0.0,True,sigmoid,0.372 155 | citeseer,False,1,0.0,False,relu,0.619 156 | citeseer,False,1,0.0,False,tanh,0.619 157 | citeseer,False,1,0.0,False,sigmoid,0.619 158 | citeseer,False,1,0.1,True,relu,0.359 159 | citeseer,False,1,0.1,True,tanh,0.359 160 | citeseer,False,1,0.1,True,sigmoid,0.359 161 | citeseer,False,1,0.1,False,relu,0.583 162 | citeseer,False,1,0.1,False,tanh,0.583 163 | citeseer,False,1,0.1,False,sigmoid,0.583 164 | citeseer,False,1,0.2,True,relu,0.345 165 | citeseer,False,1,0.2,True,tanh,0.345 166 | citeseer,False,1,0.2,True,sigmoid,0.345 167 | citeseer,False,1,0.2,False,relu,0.552 168 | citeseer,False,1,0.2,False,tanh,0.552 169 | citeseer,False,1,0.2,False,sigmoid,0.552 170 | citeseer,False,1,0.3,True,relu,0.31 171 | citeseer,False,1,0.3,True,tanh,0.31 172 | citeseer,False,1,0.3,True,sigmoid,0.31 173 | citeseer,False,1,0.3,False,relu,0.53 174 | citeseer,False,1,0.3,False,tanh,0.53 175 | citeseer,False,1,0.3,False,sigmoid,0.53 176 | citeseer,False,1,0.5,True,relu,0.283 177 | citeseer,False,1,0.5,True,tanh,0.283 178 | citeseer,False,1,0.5,True,sigmoid,0.283 179 | citeseer,False,1,0.5,False,relu,0.437 180 | citeseer,False,1,0.5,False,tanh,0.437 181 | citeseer,False,1,0.5,False,sigmoid,0.437 182 | citeseer,False,2,0.0,True,relu,0.522 183 | citeseer,False,2,0.0,True,tanh,0.553 184 | citeseer,False,2,0.0,True,sigmoid,0.364 185 | citeseer,False,2,0.0,False,relu,0.667 186 | citeseer,False,2,0.0,False,tanh,0.665 187 | citeseer,False,2,0.0,False,sigmoid,0.207 188 | citeseer,False,2,0.1,True,relu,0.505 189 | citeseer,False,2,0.1,True,tanh,0.523 190 | citeseer,False,2,0.1,True,sigmoid,0.321 191 | citeseer,False,2,0.1,False,relu,0.645 192 | citeseer,False,2,0.1,False,tanh,0.634 193 | citeseer,False,2,0.1,False,sigmoid,0.205 194 | citeseer,False,2,0.2,True,relu,0.476 195 | citeseer,False,2,0.2,True,tanh,0.5 196 | citeseer,False,2,0.2,True,sigmoid,0.327 197 | citeseer,False,2,0.2,False,relu,0.594 198 | citeseer,False,2,0.2,False,tanh,0.605 199 | citeseer,False,2,0.2,False,sigmoid,0.209 200 | citeseer,False,2,0.3,True,relu,0.469 201 | citeseer,False,2,0.3,True,tanh,0.478 202 | citeseer,False,2,0.3,True,sigmoid,0.355 203 | citeseer,False,2,0.3,False,relu,0.583 204 | citeseer,False,2,0.3,False,tanh,0.557 205 | citeseer,False,2,0.3,False,sigmoid,0.211 206 | citeseer,False,2,0.5,True,relu,0.349 207 | citeseer,False,2,0.5,True,tanh,0.425 208 | citeseer,False,2,0.5,True,sigmoid,0.311 209 | citeseer,False,2,0.5,False,relu,0.324 210 | citeseer,False,2,0.5,False,tanh,0.466 211 | citeseer,False,2,0.5,False,sigmoid,0.137 212 | citeseer,False,3,0.0,True,relu,0.551 213 | citeseer,False,3,0.0,True,tanh,0.547 214 | citeseer,False,3,0.0,True,sigmoid,0.301 215 | citeseer,False,3,0.0,False,relu,0.628 216 | citeseer,False,3,0.0,False,tanh,0.63 217 | citeseer,False,3,0.0,False,sigmoid,0.187 218 | citeseer,False,3,0.1,True,relu,0.501 219 | citeseer,False,3,0.1,True,tanh,0.54 220 | citeseer,False,3,0.1,True,sigmoid,0.262 221 | citeseer,False,3,0.1,False,relu,0.571 222 | citeseer,False,3,0.1,False,tanh,0.602 223 | citeseer,False,3,0.1,False,sigmoid,0.188 224 | citeseer,False,3,0.2,True,relu,0.497 225 | citeseer,False,3,0.2,True,tanh,0.526 226 | citeseer,False,3,0.2,True,sigmoid,0.25 227 | citeseer,False,3,0.2,False,relu,0.574 228 | citeseer,False,3,0.2,False,tanh,0.577 229 | citeseer,False,3,0.2,False,sigmoid,0.188 230 | citeseer,False,3,0.3,True,relu,0.474 231 | citeseer,False,3,0.3,True,tanh,0.484 232 | citeseer,False,3,0.3,True,sigmoid,0.224 233 | citeseer,False,3,0.3,False,relu,0.505 234 | citeseer,False,3,0.3,False,tanh,0.53 235 | citeseer,False,3,0.3,False,sigmoid,0.208 236 | citeseer,False,3,0.5,True,relu,0.397 237 | citeseer,False,3,0.5,True,tanh,0.373 238 | citeseer,False,3,0.5,True,sigmoid,0.237 239 | citeseer,False,3,0.5,False,relu,0.313 240 | citeseer,False,3,0.5,False,tanh,0.419 241 | citeseer,False,3,0.5,False,sigmoid,0.198 242 | citeseer,False,5,0.0,True,relu,0.473 243 | citeseer,False,5,0.0,True,tanh,0.573 244 | citeseer,False,5,0.0,True,sigmoid,0.21 245 | citeseer,False,5,0.0,False,relu,0.513 246 | citeseer,False,5,0.0,False,tanh,0.576 247 | citeseer,False,5,0.0,False,sigmoid,0.207 248 | citeseer,False,5,0.1,True,relu,0.437 249 | citeseer,False,5,0.1,True,tanh,0.53 250 | citeseer,False,5,0.1,True,sigmoid,0.178 251 | citeseer,False,5,0.1,False,relu,0.326 252 | citeseer,False,5,0.1,False,tanh,0.533 253 | citeseer,False,5,0.1,False,sigmoid,0.206 254 | citeseer,False,5,0.2,True,relu,0.425 255 | citeseer,False,5,0.2,True,tanh,0.521 256 | citeseer,False,5,0.2,True,sigmoid,0.175 257 | citeseer,False,5,0.2,False,relu,0.169 258 | citeseer,False,5,0.2,False,tanh,0.477 259 | citeseer,False,5,0.2,False,sigmoid,0.204 260 | citeseer,False,5,0.3,True,relu,0.428 261 | citeseer,False,5,0.3,True,tanh,0.489 262 | citeseer,False,5,0.3,True,sigmoid,0.18 263 | citeseer,False,5,0.3,False,relu,0.161 264 | citeseer,False,5,0.3,False,tanh,0.506 265 | citeseer,False,5,0.3,False,sigmoid,0.194 266 | citeseer,False,5,0.5,True,relu,0.308 267 | citeseer,False,5,0.5,True,tanh,0.395 268 | citeseer,False,5,0.5,True,sigmoid,0.173 269 | citeseer,False,5,0.5,False,relu,0.188 270 | citeseer,False,5,0.5,False,tanh,0.399 271 | citeseer,False,5,0.5,False,sigmoid,0.183 272 | citeseer,False,10,0.0,True,relu,0.231 273 | citeseer,False,10,0.0,True,tanh,0.517 274 | citeseer,False,10,0.0,True,sigmoid,0.208 275 | citeseer,False,10,0.0,False,relu,0.183 276 | citeseer,False,10,0.0,False,tanh,0.416 277 | citeseer,False,10,0.0,False,sigmoid,0.172 278 | citeseer,False,10,0.1,True,relu,0.227 279 | citeseer,False,10,0.1,True,tanh,0.458 280 | citeseer,False,10,0.1,True,sigmoid,0.206 281 | citeseer,False,10,0.1,False,relu,0.201 282 | citeseer,False,10,0.1,False,tanh,0.207 283 | citeseer,False,10,0.1,False,sigmoid,0.151 284 | citeseer,False,10,0.2,True,relu,0.238 285 | citeseer,False,10,0.2,True,tanh,0.397 286 | citeseer,False,10,0.2,True,sigmoid,0.199 287 | citeseer,False,10,0.2,False,relu,0.192 288 | citeseer,False,10,0.2,False,tanh,0.207 289 | citeseer,False,10,0.2,False,sigmoid,0.155 290 | citeseer,False,10,0.3,True,relu,0.182 291 | citeseer,False,10,0.3,True,tanh,0.406 292 | citeseer,False,10,0.3,True,sigmoid,0.196 293 | citeseer,False,10,0.3,False,relu,0.158 294 | citeseer,False,10,0.3,False,tanh,0.193 295 | citeseer,False,10,0.3,False,sigmoid,0.169 296 | citeseer,False,10,0.5,True,relu,0.161 297 | citeseer,False,10,0.5,True,tanh,0.327 298 | citeseer,False,10,0.5,True,sigmoid,0.176 299 | citeseer,False,10,0.5,False,relu,0.189 300 | citeseer,False,10,0.5,False,tanh,0.231 301 | citeseer,False,10,0.5,False,sigmoid,0.198 302 | cora,True,1,0.0,True,relu,0.562 303 | cora,True,1,0.0,True,tanh,0.562 304 | cora,True,1,0.0,True,sigmoid,0.562 305 | cora,True,1,0.0,False,relu,0.72 306 | cora,True,1,0.0,False,tanh,0.72 307 | cora,True,1,0.0,False,sigmoid,0.72 308 | cora,True,1,0.1,True,relu,0.547 309 | cora,True,1,0.1,True,tanh,0.547 310 | cora,True,1,0.1,True,sigmoid,0.547 311 | cora,True,1,0.1,False,relu,0.711 312 | cora,True,1,0.1,False,tanh,0.711 313 | cora,True,1,0.1,False,sigmoid,0.711 314 | cora,True,1,0.2,True,relu,0.568 315 | cora,True,1,0.2,True,tanh,0.568 316 | cora,True,1,0.2,True,sigmoid,0.568 317 | cora,True,1,0.2,False,relu,0.696 318 | cora,True,1,0.2,False,tanh,0.696 319 | cora,True,1,0.2,False,sigmoid,0.696 320 | cora,True,1,0.3,True,relu,0.553 321 | cora,True,1,0.3,True,tanh,0.553 322 | cora,True,1,0.3,True,sigmoid,0.553 323 | cora,True,1,0.3,False,relu,0.693 324 | cora,True,1,0.3,False,tanh,0.693 325 | cora,True,1,0.3,False,sigmoid,0.693 326 | cora,True,1,0.5,True,relu,0.533 327 | cora,True,1,0.5,True,tanh,0.533 328 | cora,True,1,0.5,True,sigmoid,0.533 329 | cora,True,1,0.5,False,relu,0.672 330 | cora,True,1,0.5,False,tanh,0.672 331 | cora,True,1,0.5,False,sigmoid,0.672 332 | cora,True,2,0.0,True,relu,0.666 333 | cora,True,2,0.0,True,tanh,0.686 334 | cora,True,2,0.0,True,sigmoid,0.587 335 | cora,True,2,0.0,False,relu,0.797 336 | cora,True,2,0.0,False,tanh,0.792 337 | cora,True,2,0.0,False,sigmoid,0.1 338 | cora,True,2,0.1,True,relu,0.664 339 | cora,True,2,0.1,True,tanh,0.684 340 | cora,True,2,0.1,True,sigmoid,0.601 341 | cora,True,2,0.1,False,relu,0.796 342 | cora,True,2,0.1,False,tanh,0.789 343 | cora,True,2,0.1,False,sigmoid,0.1 344 | cora,True,2,0.2,True,relu,0.663 345 | cora,True,2,0.2,True,tanh,0.666 346 | cora,True,2,0.2,True,sigmoid,0.602 347 | cora,True,2,0.2,False,relu,0.773 348 | cora,True,2,0.2,False,tanh,0.776 349 | cora,True,2,0.2,False,sigmoid,0.1 350 | cora,True,2,0.3,True,relu,0.644 351 | cora,True,2,0.3,True,tanh,0.654 352 | cora,True,2,0.3,True,sigmoid,0.58 353 | cora,True,2,0.3,False,relu,0.764 354 | cora,True,2,0.3,False,tanh,0.756 355 | cora,True,2,0.3,False,sigmoid,0.1 356 | cora,True,2,0.5,True,relu,0.617 357 | cora,True,2,0.5,True,tanh,0.626 358 | cora,True,2,0.5,True,sigmoid,0.511 359 | cora,True,2,0.5,False,relu,0.734 360 | cora,True,2,0.5,False,tanh,0.742 361 | cora,True,2,0.5,False,sigmoid,0.101 362 | cora,True,3,0.0,True,relu,0.698 363 | cora,True,3,0.0,True,tanh,0.691 364 | cora,True,3,0.0,True,sigmoid,0.472 365 | cora,True,3,0.0,False,relu,0.776 366 | cora,True,3,0.0,False,tanh,0.779 367 | cora,True,3,0.0,False,sigmoid,0.297 368 | cora,True,3,0.1,True,relu,0.687 369 | cora,True,3,0.1,True,tanh,0.683 370 | cora,True,3,0.1,True,sigmoid,0.428 371 | cora,True,3,0.1,False,relu,0.783 372 | cora,True,3,0.1,False,tanh,0.761 373 | cora,True,3,0.1,False,sigmoid,0.271 374 | cora,True,3,0.2,True,relu,0.685 375 | cora,True,3,0.2,True,tanh,0.687 376 | cora,True,3,0.2,True,sigmoid,0.36 377 | cora,True,3,0.2,False,relu,0.755 378 | cora,True,3,0.2,False,tanh,0.757 379 | cora,True,3,0.2,False,sigmoid,0.3 380 | cora,True,3,0.3,True,relu,0.663 381 | cora,True,3,0.3,True,tanh,0.671 382 | cora,True,3,0.3,True,sigmoid,0.352 383 | cora,True,3,0.3,False,relu,0.736 384 | cora,True,3,0.3,False,tanh,0.743 385 | cora,True,3,0.3,False,sigmoid,0.311 386 | cora,True,3,0.5,True,relu,0.639 387 | cora,True,3,0.5,True,tanh,0.622 388 | cora,True,3,0.5,True,sigmoid,0.365 389 | cora,True,3,0.5,False,relu,0.699 390 | cora,True,3,0.5,False,tanh,0.714 391 | cora,True,3,0.5,False,sigmoid,0.31 392 | cora,True,5,0.0,True,relu,0.711 393 | cora,True,5,0.0,True,tanh,0.711 394 | cora,True,5,0.0,True,sigmoid,0.24 395 | cora,True,5,0.0,False,relu,0.161 396 | cora,True,5,0.0,False,tanh,0.746 397 | cora,True,5,0.0,False,sigmoid,0.182 398 | cora,True,5,0.1,True,relu,0.688 399 | cora,True,5,0.1,True,tanh,0.682 400 | cora,True,5,0.1,True,sigmoid,0.225 401 | cora,True,5,0.1,False,relu,0.09 402 | cora,True,5,0.1,False,tanh,0.735 403 | cora,True,5,0.1,False,sigmoid,0.303 404 | cora,True,5,0.2,True,relu,0.694 405 | cora,True,5,0.2,True,tanh,0.683 406 | cora,True,5,0.2,True,sigmoid,0.213 407 | cora,True,5,0.2,False,relu,0.123 408 | cora,True,5,0.2,False,tanh,0.734 409 | cora,True,5,0.2,False,sigmoid,0.303 410 | cora,True,5,0.3,True,relu,0.677 411 | cora,True,5,0.3,True,tanh,0.637 412 | cora,True,5,0.3,True,sigmoid,0.209 413 | cora,True,5,0.3,False,relu,0.162 414 | cora,True,5,0.3,False,tanh,0.723 415 | cora,True,5,0.3,False,sigmoid,0.303 416 | cora,True,5,0.5,True,relu,0.621 417 | cora,True,5,0.5,True,tanh,0.622 418 | cora,True,5,0.5,True,sigmoid,0.197 419 | cora,True,5,0.5,False,relu,0.156 420 | cora,True,5,0.5,False,tanh,0.653 421 | cora,True,5,0.5,False,sigmoid,0.302 422 | cora,True,10,0.0,True,relu,0.603 423 | cora,True,10,0.0,True,tanh,0.607 424 | cora,True,10,0.0,True,sigmoid,0.069 425 | cora,True,10,0.0,False,relu,0.1 426 | cora,True,10,0.0,False,tanh,0.667 427 | cora,True,10,0.0,False,sigmoid,0.302 428 | cora,True,10,0.1,True,relu,0.396 429 | cora,True,10,0.1,True,tanh,0.622 430 | cora,True,10,0.1,True,sigmoid,0.075 431 | cora,True,10,0.1,False,relu,0.129 432 | cora,True,10,0.1,False,tanh,0.665 433 | cora,True,10,0.1,False,sigmoid,0.303 434 | cora,True,10,0.2,True,relu,0.373 435 | cora,True,10,0.2,True,tanh,0.633 436 | cora,True,10,0.2,True,sigmoid,0.072 437 | cora,True,10,0.2,False,relu,0.089 438 | cora,True,10,0.2,False,tanh,0.639 439 | cora,True,10,0.2,False,sigmoid,0.303 440 | cora,True,10,0.3,True,relu,0.218 441 | cora,True,10,0.3,True,tanh,0.453 442 | cora,True,10,0.3,True,sigmoid,0.077 443 | cora,True,10,0.3,False,relu,0.182 444 | cora,True,10,0.3,False,tanh,0.574 445 | cora,True,10,0.3,False,sigmoid,0.303 446 | cora,True,10,0.5,True,relu,0.114 447 | cora,True,10,0.5,True,tanh,0.299 448 | cora,True,10,0.5,True,sigmoid,0.067 449 | cora,True,10,0.5,False,relu,0.293 450 | cora,True,10,0.5,False,tanh,0.16 451 | cora,True,10,0.5,False,sigmoid,0.303 452 | cora,False,1,0.0,True,relu,0.523 453 | cora,False,1,0.0,True,tanh,0.523 454 | cora,False,1,0.0,True,sigmoid,0.523 455 | cora,False,1,0.0,False,relu,0.664 456 | cora,False,1,0.0,False,tanh,0.664 457 | cora,False,1,0.0,False,sigmoid,0.664 458 | cora,False,1,0.1,True,relu,0.506 459 | cora,False,1,0.1,True,tanh,0.506 460 | cora,False,1,0.1,True,sigmoid,0.506 461 | cora,False,1,0.1,False,relu,0.659 462 | cora,False,1,0.1,False,tanh,0.659 463 | cora,False,1,0.1,False,sigmoid,0.659 464 | cora,False,1,0.2,True,relu,0.491 465 | cora,False,1,0.2,True,tanh,0.491 466 | cora,False,1,0.2,True,sigmoid,0.491 467 | cora,False,1,0.2,False,relu,0.633 468 | cora,False,1,0.2,False,tanh,0.633 469 | cora,False,1,0.2,False,sigmoid,0.633 470 | cora,False,1,0.3,True,relu,0.499 471 | cora,False,1,0.3,True,tanh,0.499 472 | cora,False,1,0.3,True,sigmoid,0.499 473 | cora,False,1,0.3,False,relu,0.628 474 | cora,False,1,0.3,False,tanh,0.628 475 | cora,False,1,0.3,False,sigmoid,0.628 476 | cora,False,1,0.5,True,relu,0.424 477 | cora,False,1,0.5,True,tanh,0.424 478 | cora,False,1,0.5,True,sigmoid,0.424 479 | cora,False,1,0.5,False,relu,0.524 480 | cora,False,1,0.5,False,tanh,0.524 481 | cora,False,1,0.5,False,sigmoid,0.524 482 | cora,False,2,0.0,True,relu,0.673 483 | cora,False,2,0.0,True,tanh,0.677 484 | cora,False,2,0.0,True,sigmoid,0.505 485 | cora,False,2,0.0,False,relu,0.781 486 | cora,False,2,0.0,False,tanh,0.78 487 | cora,False,2,0.0,False,sigmoid,0.103 488 | cora,False,2,0.1,True,relu,0.646 489 | cora,False,2,0.1,True,tanh,0.654 490 | cora,False,2,0.1,True,sigmoid,0.511 491 | cora,False,2,0.1,False,relu,0.761 492 | cora,False,2,0.1,False,tanh,0.756 493 | cora,False,2,0.1,False,sigmoid,0.1 494 | cora,False,2,0.2,True,relu,0.64 495 | cora,False,2,0.2,True,tanh,0.634 496 | cora,False,2,0.2,True,sigmoid,0.475 497 | cora,False,2,0.2,False,relu,0.749 498 | cora,False,2,0.2,False,tanh,0.751 499 | cora,False,2,0.2,False,sigmoid,0.11 500 | cora,False,2,0.3,True,relu,0.626 501 | cora,False,2,0.3,True,tanh,0.601 502 | cora,False,2,0.3,True,sigmoid,0.503 503 | cora,False,2,0.3,False,relu,0.696 504 | cora,False,2,0.3,False,tanh,0.697 505 | cora,False,2,0.3,False,sigmoid,0.104 506 | cora,False,2,0.5,True,relu,0.537 507 | cora,False,2,0.5,True,tanh,0.526 508 | cora,False,2,0.5,True,sigmoid,0.492 509 | cora,False,2,0.5,False,relu,0.633 510 | cora,False,2,0.5,False,tanh,0.631 511 | cora,False,2,0.5,False,sigmoid,0.103 512 | cora,False,3,0.0,True,relu,0.685 513 | cora,False,3,0.0,True,tanh,0.681 514 | cora,False,3,0.0,True,sigmoid,0.357 515 | cora,False,3,0.0,False,relu,0.761 516 | cora,False,3,0.0,False,tanh,0.757 517 | cora,False,3,0.0,False,sigmoid,0.129 518 | cora,False,3,0.1,True,relu,0.673 519 | cora,False,3,0.1,True,tanh,0.677 520 | cora,False,3,0.1,True,sigmoid,0.307 521 | cora,False,3,0.1,False,relu,0.733 522 | cora,False,3,0.1,False,tanh,0.743 523 | cora,False,3,0.1,False,sigmoid,0.142 524 | cora,False,3,0.2,True,relu,0.661 525 | cora,False,3,0.2,True,tanh,0.643 526 | cora,False,3,0.2,True,sigmoid,0.315 527 | cora,False,3,0.2,False,relu,0.699 528 | cora,False,3,0.2,False,tanh,0.715 529 | cora,False,3,0.2,False,sigmoid,0.145 530 | cora,False,3,0.3,True,relu,0.654 531 | cora,False,3,0.3,True,tanh,0.625 532 | cora,False,3,0.3,True,sigmoid,0.243 533 | cora,False,3,0.3,False,relu,0.68 534 | cora,False,3,0.3,False,tanh,0.695 535 | cora,False,3,0.3,False,sigmoid,0.159 536 | cora,False,3,0.5,True,relu,0.598 537 | cora,False,3,0.5,True,tanh,0.567 538 | cora,False,3,0.5,True,sigmoid,0.253 539 | cora,False,3,0.5,False,relu,0.587 540 | cora,False,3,0.5,False,tanh,0.619 541 | cora,False,3,0.5,False,sigmoid,0.172 542 | cora,False,5,0.0,True,relu,0.699 543 | cora,False,5,0.0,True,tanh,0.716 544 | cora,False,5,0.0,True,sigmoid,0.206 545 | cora,False,5,0.0,False,relu,0.095 546 | cora,False,5,0.0,False,tanh,0.735 547 | cora,False,5,0.0,False,sigmoid,0.208 548 | cora,False,5,0.1,True,relu,0.69 549 | cora,False,5,0.1,True,tanh,0.687 550 | cora,False,5,0.1,True,sigmoid,0.195 551 | cora,False,5,0.1,False,relu,0.092 552 | cora,False,5,0.1,False,tanh,0.718 553 | cora,False,5,0.1,False,sigmoid,0.123 554 | cora,False,5,0.2,True,relu,0.653 555 | cora,False,5,0.2,True,tanh,0.687 556 | cora,False,5,0.2,True,sigmoid,0.191 557 | cora,False,5,0.2,False,relu,0.167 558 | cora,False,5,0.2,False,tanh,0.694 559 | cora,False,5,0.2,False,sigmoid,0.131 560 | cora,False,5,0.3,True,relu,0.605 561 | cora,False,5,0.3,True,tanh,0.644 562 | cora,False,5,0.3,True,sigmoid,0.205 563 | cora,False,5,0.3,False,relu,0.157 564 | cora,False,5,0.3,False,tanh,0.082 565 | cora,False,5,0.3,False,sigmoid,0.142 566 | cora,False,5,0.5,True,relu,0.499 567 | cora,False,5,0.5,True,tanh,0.509 568 | cora,False,5,0.5,True,sigmoid,0.211 569 | cora,False,5,0.5,False,relu,0.149 570 | cora,False,5,0.5,False,tanh,0.06 571 | cora,False,5,0.5,False,sigmoid,0.175 572 | cora,False,10,0.0,True,relu,0.108 573 | cora,False,10,0.0,True,tanh,0.51 574 | cora,False,10,0.0,True,sigmoid,0.074 575 | cora,False,10,0.0,False,relu,0.129 576 | cora,False,10,0.0,False,tanh,0.162 577 | cora,False,10,0.0,False,sigmoid,0.275 578 | cora,False,10,0.1,True,relu,0.139 579 | cora,False,10,0.1,True,tanh,0.603 580 | cora,False,10,0.1,True,sigmoid,0.197 581 | cora,False,10,0.1,False,relu,0.108 582 | cora,False,10,0.1,False,tanh,0.723 583 | cora,False,10,0.1,False,sigmoid,0.251 584 | cora,False,10,0.2,True,relu,0.136 585 | cora,False,10,0.2,True,tanh,0.577 586 | cora,False,10,0.2,True,sigmoid,0.199 587 | cora,False,10,0.2,False,relu,0.125 588 | cora,False,10,0.2,False,tanh,0.082 589 | cora,False,10,0.2,False,sigmoid,0.221 590 | cora,False,10,0.3,True,relu,0.131 591 | cora,False,10,0.3,True,tanh,0.539 592 | cora,False,10,0.3,True,sigmoid,0.07 593 | cora,False,10,0.3,False,relu,0.17 594 | cora,False,10,0.3,False,tanh,0.567 595 | cora,False,10,0.3,False,sigmoid,0.219 596 | cora,False,10,0.5,True,relu,0.15 597 | cora,False,10,0.5,True,tanh,0.299 598 | cora,False,10,0.5,True,sigmoid,0.184 599 | cora,False,10,0.5,False,relu,0.24 600 | cora,False,10,0.5,False,tanh,0.153 601 | cora,False,10,0.5,False,sigmoid,0.184 602 | -------------------------------------------------------------------------------- /exp5/src/config.py: -------------------------------------------------------------------------------- 1 | default_cfg = { 2 | 'data_root': './../GNN/', 3 | 'data_name': 'cora', 4 | 'num_train_per_class': 20, 5 | 'num_val': 500, 6 | 'num_test': 1000, 7 | 'seed': 114514, 8 | 'device': 'cuda:0', 9 | 'epochs': 1000, 10 | 'patience': 5, 11 | 'lr': 5e-3, 12 | 'weight_decay': 5e-4, 13 | 'hidden_dim': 32, 14 | 'n_layers': 2, 15 | 'activations': 'relu', 16 | 'dropout': 0.5, 17 | 'drop_edge': 0., 18 | 'add_self_loop': True, 19 | 'pair_norm': False, 20 | 'test_only': False 21 | } 22 | 23 | 24 | class Config(object): 25 | def __init__(self, ): 26 | self.data_root = None 27 | self.data_name = None 28 | self.num_train_per_class = None 29 | self.num_val = None 30 | self.num_test = None 31 | self.seed = None 32 | self.device = None 33 | self.epochs = None 34 | self.patience = None 35 | self.lr = None 36 | self.weight_decay = None 37 | self.hidden_dim = None 38 | self.n_layers = None 39 | self.activations = None 40 | self.dropout = None 41 | self.drop_edge = None 42 | self.add_self_loop = None 43 | self.pair_norm = None 44 | self.test_only = None 45 | self.reset() 46 | 47 | def reset(self): 48 | for key, val in default_cfg.items(): 49 | setattr(self, key, val) 50 | 51 | def update(self, new_cfg): 52 | for key, val in new_cfg.items(): 53 | setattr(self, key, val) 54 | -------------------------------------------------------------------------------- /exp5/src/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.data import InMemoryDataset, Data 3 | from torch_geometric.utils import to_undirected 4 | import os 5 | 6 | 7 | classes = { 8 | 'citeseer': ['Agents', 'AI', 'DB', 'IR', 'ML', 'HCI'], 9 | 'cora': ['Case_Based', 'Genetic_Algorithms', 'Neural_Networks', 'Probabilistic_Methods', 10 | 'Reinforcement_Learning', 'Rule_Learning', 'Theory'] 11 | } 12 | 13 | 14 | class NodeClsDataset(InMemoryDataset): 15 | def __init__(self, root, name, num_train_per_class: int = 20, 16 | num_val: int = 500, num_test: int = 1000, transform=None): 17 | self.name = name.lower() 18 | self.num_train_per_class = num_train_per_class 19 | self.num_val = num_val 20 | self.num_test = num_test 21 | super(NodeClsDataset, self).__init__(root, transform=transform) 22 | self.data, self.slices = torch.load(self.processed_paths[0]) 23 | 24 | @property 25 | def raw_dir(self) -> str: 26 | return os.path.join(self.root, self.name) 27 | 28 | @property 29 | def processed_dir(self) -> str: 30 | return os.path.join(self.root, 'processed') 31 | 32 | @property 33 | def raw_file_names(self): 34 | return [f'{self.name}.content', f'{self.name}.cites'] 35 | 36 | @property 37 | def processed_file_names(self): 38 | return [f'{self.name}.pt'] 39 | 40 | def download(self): 41 | pass 42 | 43 | def process(self): 44 | label2index = {label: i for i, label in enumerate(classes[f'{self.name}'])} 45 | id2index, x, y = read_content(self.raw_paths[0], label2index) 46 | edge_index = read_cites(self.raw_paths[1], id2index) 47 | data = Data(x=x, y=y, edge_index=edge_index) 48 | 49 | data.train_mask = torch.zeros(data.y.size(0), dtype=torch.bool) 50 | data.val_mask = torch.zeros(data.y.size(0), dtype=torch.bool) 51 | data.test_mask = torch.zeros(data.y.size(0), dtype=torch.bool) 52 | for c in range(len(label2index)): 53 | idx = (data.y == c).nonzero(as_tuple=False).view(-1) 54 | idx = idx[torch.randperm(idx.size(0))[:self.num_train_per_class]] 55 | data.train_mask[idx] = True 56 | 57 | remaining = (~data.train_mask).nonzero(as_tuple=False).view(-1) 58 | remaining = remaining[torch.randperm(remaining.size(0))] 59 | 60 | data.val_mask[remaining[:self.num_val]] = True 61 | data.test_mask[remaining[self.num_val:self.num_val + self.num_test]] = True 62 | 63 | data, slices = self.collate([data]) 64 | torch.save((data, slices), self.processed_paths[0]) 65 | 66 | 67 | def read_content(content_file, label2index): 68 | with open(content_file, 'r') as f: 69 | lines = f.read().strip().split('\n')[:-1] 70 | id2index = {} 71 | x = [] 72 | y = [] 73 | for i, line in enumerate(lines): 74 | line = line.strip().split('\t') 75 | paper_id, attr, label = line[0], line[1:-1], line[-1] 76 | id2index[paper_id] = i 77 | x.append([float(e) for e in attr]) 78 | y.append(label2index[label]) 79 | return id2index, torch.tensor(x), torch.tensor(y, dtype=torch.long) 80 | 81 | 82 | def read_cites(cites_file, id2index): 83 | with open(cites_file, 'r') as f: 84 | lines = f.read().strip().split('\n')[:-1] 85 | edge_index = [] 86 | for line in lines: 87 | cited, citing = line.strip().split('\t') 88 | if citing not in id2index or cited not in id2index: 89 | continue 90 | id_cited, id_citing = id2index[cited], id2index[citing] 91 | edge_index.append([id_citing, id_cited]) 92 | 93 | edge_index = torch.tensor(edge_index, dtype=torch.long) 94 | edge_index = to_undirected(edge_index) 95 | return edge_index.t().contiguous() 96 | -------------------------------------------------------------------------------- /exp5/src/main.py: -------------------------------------------------------------------------------- 1 | from utils import set_seed 2 | from config import Config 3 | import torch 4 | import torch.nn as nn 5 | from torch.optim import Adam 6 | from data import NodeClsDataset 7 | from torch_geometric.transforms import NormalizeFeatures 8 | import numpy as np 9 | from model import GCN 10 | from itertools import product 11 | import pandas as pd 12 | 13 | 14 | def train(model, data, optimizer, loss_fc): 15 | model.train() 16 | optimizer.zero_grad() 17 | 18 | logits = model(data.x, data.edge_index) 19 | loss = loss_fc(logits[data.train_mask], data.y[data.train_mask]) 20 | loss.backward() 21 | optimizer.step() 22 | 23 | # Get the predictions 24 | preds = torch.argmax(logits, dim=1).flatten() 25 | acc = (preds[data.train_mask] == data.y[data.train_mask]).cpu().numpy().mean() 26 | 27 | return loss, acc 28 | 29 | 30 | def evaluate(model, data, loss_fc, mode='val'): 31 | model.eval() 32 | with torch.no_grad(): 33 | logits = model(data.x, data.edge_index) 34 | mask = getattr(data, f'{mode}_mask') 35 | loss = loss_fc(logits[mask], data.y[mask]) 36 | # Get the predictions 37 | preds = torch.argmax(logits, dim=1).flatten() 38 | acc = (preds[mask] == data.y[mask]).cpu().numpy().mean() 39 | 40 | return loss, acc 41 | 42 | 43 | def main(cfg: Config): 44 | set_seed(cfg.seed) 45 | dataset = NodeClsDataset(cfg.data_root, cfg.data_name, cfg.num_train_per_class, 46 | cfg.num_val, cfg.num_test, transform=NormalizeFeatures()) 47 | # from torch_geometric.datasets import Planetoid 48 | # dataset = Planetoid(root='./tmp/Cora', name='Cora', split='random', transform=NormalizeFeatures()) 49 | 50 | model = GCN(dataset.num_node_features, cfg.hidden_dim, dataset.num_classes, 51 | n_layers=cfg.n_layers, act=cfg.activations, add_self_loops=cfg.add_self_loop, 52 | pair_norm=cfg.pair_norm, dropout=cfg.dropout, drop_edge=cfg.drop_edge) 53 | optimizer = Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) 54 | criterion = nn.CrossEntropyLoss() 55 | 56 | data = dataset[0].to(cfg.device) 57 | model = model.to(device=cfg.device) 58 | criterion = criterion.to(cfg.device) 59 | if not cfg.test_only: 60 | best_valid_loss = np.inf 61 | wait = 0 62 | for epoch in range(cfg.epochs): 63 | print(f">>> Epoch {epoch + 1}/{cfg.epochs}") 64 | 65 | train_loss, train_acc = train(model, data, optimizer, criterion) 66 | valid_loss, valid_acc = evaluate(model, data, criterion, mode='val') 67 | 68 | print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc * 100:.2f}%') 69 | print(f'\tValid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc * 100:.2f}%') 70 | 71 | if valid_loss < best_valid_loss: 72 | best_valid_loss = valid_loss 73 | wait = 0 74 | torch.save(model.state_dict(), './checkpoint/best_weights.pt') 75 | else: 76 | wait += 1 77 | if wait > cfg.patience: 78 | print('>>> Early stopped.') 79 | break 80 | 81 | print(">>> Testing...") 82 | model.load_state_dict(torch.load("./checkpoint/best_weights.pt")) 83 | test_loss, test_acc = evaluate(model, data, criterion, mode='test') 84 | print(f'\tTest Loss: {test_loss:.4f} | Test Acc: {test_acc * 100:.2f}%') 85 | return test_acc 86 | 87 | 88 | if __name__ == '__main__': 89 | config = Config() 90 | # main(config) 91 | # exit() 92 | 93 | cfg_grid = { 94 | 'data_name': ['citeseer', 'cora'], 95 | 'add_self_loop': [True, False], 96 | 'n_layers': [1, 2, 3, 5, 10], 97 | 'drop_edge': [0, .1, .2, .3, .5], 98 | 'pair_norm': [True, False], 99 | 'activations': ['relu', 'tanh', 'sigmoid'] 100 | } 101 | results = [] 102 | keys = cfg_grid.keys() 103 | for values in product(*cfg_grid.values()): 104 | new_cfg = dict(zip(keys, values)) 105 | config.update(new_cfg) 106 | acc = main(config) 107 | results.append([*new_cfg.values, acc]) 108 | df = pd.DataFrame(results, columns=[*cfg_grid.keys(), 'test_acc']) 109 | df.to_csv('./result.csv', index=False) 110 | -------------------------------------------------------------------------------- /exp5/src/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn import GCNConv, PairNorm 4 | from torch_geometric.utils import dropout_adj 5 | 6 | 7 | activations = { 8 | 'relu': torch.relu, 9 | 'sigmoid': torch.sigmoid, 10 | 'tanh': torch.tanh, 11 | } 12 | 13 | 14 | class GCN(torch.nn.Module): 15 | def __init__(self, in_channels: int, hidden_channels: int, num_classes: int, 16 | n_layers: int, act: str = 'relu', add_self_loops: bool = True, 17 | pair_norm: bool = True, dropout: float = .0, drop_edge: float = .0): 18 | super(GCN, self).__init__() 19 | self.dropout = dropout 20 | self.drop_edge = drop_edge 21 | self.pair_norm = pair_norm 22 | self.act = activations[act] if isinstance(act, str) else act 23 | 24 | self.conv_list = torch.nn.ModuleList() 25 | for i in range(n_layers): 26 | in_c, out_c = hidden_channels, hidden_channels 27 | if i == 0: 28 | in_c = in_channels 29 | elif i == n_layers - 1: 30 | out_c = num_classes 31 | self.conv_list.append(GCNConv(in_c, out_c, add_self_loops=add_self_loops)) 32 | 33 | def forward(self, x, edge_index): 34 | edge_index, _ = dropout_adj(edge_index, p=self.drop_edge) 35 | 36 | for i, conv in enumerate(self.conv_list): 37 | x = conv(x, edge_index) 38 | if self.pair_norm: 39 | x = PairNorm()(x) 40 | if i < len(self.conv_list) - 1: 41 | x = self.act(x) 42 | x = F.dropout(x, p=self.dropout, training=self.training) 43 | 44 | return x 45 | -------------------------------------------------------------------------------- /exp5/src/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import os 4 | import numpy as np 5 | 6 | 7 | def set_seed(seed=123): 8 | random.seed(seed) 9 | np.random.seed(seed) 10 | os.environ["PYTHONHASHSEED"] = str(seed) 11 | torch.manual_seed(seed) 12 | torch.cuda.manual_seed_all(seed) 13 | # torch.use_deterministic_algorithms(True) 14 | # torch.backends.cudnn.enabled = False 15 | torch.backends.cudnn.benchmark = False 16 | torch.backends.cudnn.deterministic = True 17 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2" 18 | -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络.md: -------------------------------------------------------------------------------- 1 | # 实验五:图卷积神经网络 2 | 3 | 姓名:刘威 4 | 5 | 学号:PB18010469 6 | 7 | Click [here](#完) to finish reading:-) 8 | 9 | 10 | ## 实验目的 11 | 12 | + 熟悉图卷积神经网络的基本原理 13 | + 了解网络层数对图卷积神经网络性能的影响 14 | + 了解不同激活函数,Add self loop, DropEdge, PairNorm等技术对图卷积神经网络性能的影响。 15 | 16 | 17 | ## 实验原理 18 | 19 | ![image-20210621110149625](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110149625.png) 20 | 21 | ![image-20210621110122757](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110122757.png) 22 | 23 | ![image-20210621110050539](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110050539-1624244452275.png) 24 | 25 | ![image-20210621110306141](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110306141.png) 26 | 27 | ![image-20210621110337194](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110337194.png) 28 | 29 | ![image-20210621110419877](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110419877.png) 30 | 31 | ![image-20210621110603182](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110603182.png) 32 | 33 | ![image-20210621110647970](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110647970.png) 34 | 35 | ![image-20210621110619585](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110619585-1624244780237.png) 36 | 37 | ![image-20210621110714808](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110714808.png) 38 | 39 | ![image-20210621110733499](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110733499.png) 40 | 41 | ![image-20210621110823719](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110823719.png) 42 | 43 | ![image-20210621110938485](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110938485.png) 44 | 45 | ![image-20210621110951936](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110951936-1624244992867.png) 46 | 47 | ![image-20210621111247829](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111247829-1624245168536.png) 48 | 49 | ![image-20210621111339615](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111339615.png) 50 | 51 | ![image-20210621111544917](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111544917.png) 52 | 53 | ![image-20210621111633343](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111633343.png) 54 | 55 | ![image-20210621111948461](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111948461.png) 56 | 57 | ![image-20210621112110697](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621112110697.png) 58 | 59 | ![image-20210621112125882](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621112125882.png) 60 | 61 | 62 | ## 实验内容 63 | 64 | ![image-20210621112457787](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621112457787.png) 65 | 66 | 67 | ## 实验结果 68 | 69 | + 本实验使用`PyTorch`进行,并主要使用了`pytorch_geometric`库。 70 | + 本实验再`Cora`和`Citeseer`两个数据集上进行了节点分类,并比较了自环,层数,DropEdge, PairNorm,激活函数对其分类性能的影响。 71 | 72 | 73 | ### 源码结构及说明 74 | 75 | #### 数据处理部分 76 | 77 | **数据集概览:** 78 | 79 | | Dataset | Nodes | Edges | Classes | Features | 80 | | :------: | :---: | :---: | :-----: | :------: | 81 | | Citeseer | 3327 | 4732 | 6 | 3703 | 82 | | Cora | 2708 | 5429 | 7 | 1433 | 83 | 84 | **数据处理方法:** 85 | 86 | 按照`pytorch_geometric`的数据输入格式,将顶点关联的`features`组织成一个二维矩阵`x: shape=(Nodes, Features)`, 将图结构,即顶点的连接关系用 `COO` 格式组织成一个二维矩阵`edge_index: shape=(2, Edges)`(邻接矩阵的稀疏表示). 将标签处理为一维向量`y: shape=(Nodes,)` 其取值范围为`range(Classes)`. 87 | 88 | 通过`mask`将顶点划分为`train, val, test`. 其中`train_mask`覆盖每个类别分别20个顶点,`val_mask`覆盖除`train_mask`外的随机500个顶点,`test_mask`覆盖除前两者外的随机1000个顶点。图结构难以拆解成三个部分,因此图是一整个输入到网络中的,也即所有的顶点都会参与计算。`train_mask`的作用是,在计算损失时,将其他顶点mask掉,只计算训练顶点的损失。同样地,通过`val_mask,test_mask`我们可以分别计算`val`和`test`顶点的分类准确率。 89 | 90 | #### 模型部分 91 | 92 | 网络用**`n_layers`层`GCN`**堆叠而成,在每层`GCN`后都紧跟一层**可选的`PairNorm`层**;除了最后一层外,每层的`PairNorm`后还有有激活函数和`dropout`,**激活函数可以选择`relu, tanh, sigmoid`**, `dropout`可以调节drop的概率p. 93 | 94 | 其中`GCN`直接使用`pytorch_geometric`库中的`GCNConv`层,它可以**通过参数`add_self_loop`设置是否添加自环**。在输入`GCN`之前,还可以通过**设置`drop_edge`的drop比例**去掉部分`edge_index`. 95 | 96 | 完整的模型定义如下: 97 | 98 | ```python 99 | import torch 100 | import torch.nn.functional as F 101 | from torch_geometric.nn import GCNConv, PairNorm 102 | from torch_geometric.utils import dropout_adj 103 | 104 | activations = { 105 | 'relu': torch.relu, 106 | 'sigmoid': torch.sigmoid, 107 | 'tanh': torch.tanh, 108 | } 109 | 110 | class GCN(torch.nn.Module): 111 | def __init__(self, in_channels: int, hidden_channels: int, num_classes: int, 112 | n_layers: int, act: str = 'relu', add_self_loops: bool = True, 113 | pair_norm: bool = True, dropout: float = .0, drop_edge: float = .0): 114 | super(GCN, self).__init__() 115 | self.dropout = dropout 116 | self.drop_edge = drop_edge 117 | self.pair_norm = pair_norm 118 | self.act = activations[act] if isinstance(act, str) else act 119 | 120 | self.conv_list = torch.nn.ModuleList() 121 | for i in range(n_layers): 122 | in_c, out_c = hidden_channels, hidden_channels 123 | if i == 0: 124 | in_c = in_channels 125 | elif i == n_layers - 1: 126 | out_c = num_classes 127 | self.conv_list.append(GCNConv(in_c, out_c, add_self_loops=add_self_loops)) 128 | 129 | def forward(self, x, edge_index): 130 | edge_index, _ = dropout_adj(edge_index, p=self.drop_edge) 131 | 132 | for i, conv in enumerate(self.conv_list): 133 | x = conv(x, edge_index) 134 | if self.pair_norm: 135 | x = PairNorm()(x) 136 | if i < len(self.conv_list) - 1: 137 | x = self.act(x) 138 | x = F.dropout(x, p=self.dropout, training=self.training) 139 | 140 | return x 141 | ``` 142 | 143 | ### 结果及分析 144 | 145 | #### 参数设置 146 | 147 | 本实验的可选参数及其默认值为 148 | 149 | ```python 150 | default_cfg = { 151 | 'data_root': './GNN/', # 数据根目录 152 | 'data_name': 'cora', # citeseer or cora 153 | 'num_train_per_class': 20, # 训练集包含的每个类别的顶点数目 154 | 'num_val': 500, # 验证集顶点数目 155 | 'num_test': 1000, # 测试集顶点数目 156 | 'seed': 114514, 157 | 'device': 'cuda:0', 158 | 'epochs': 1000, 159 | 'patience': 5, # 早停的等待轮数 160 | 'lr': 5e-3, 161 | 'weight_decay': 5e-4, 162 | 'hidden_dim': 32, 163 | 'n_layers': 2, 164 | 'activations': 'relu', 165 | 'dropout': 0.5, 166 | 'drop_edge': 0., 167 | 'add_self_loop': True, 168 | 'pair_norm': False, 169 | 'test_only': False 170 | } 171 | ``` 172 | 173 | 其中本实验进行调节的参数及调节的范围为 174 | 175 | ```python 176 | cfg_grid = { 177 | 'data_name': ['citeseer', 'cora'], 178 | 'add_self_loop': [True, False], 179 | 'n_layers': [1, 2, 3, 5, 10], 180 | 'drop_edge': [0, .1, .2, .3, .5], 181 | 'pair_norm': [True, False], 182 | 'activations': ['relu', 'tanh', 'sigmoid'] 183 | } 184 | ``` 185 | 186 | **共有600种可能的参数组合。** **在每种参数组合下**,分别训练模型,并通过验证集`val_loss`进行早停,以`val_loss`最低时的模型权重对测试集进行测试,以其分类准确率`test_acc`作为最终评价指标。 187 | 188 | #### 结果对比分析 189 | 190 | 所有的组合下的`test_acc`结果可以在附件[`result.csv`](./result.csv)中查看,下面仅列举出部分结果。 191 | 192 | **两个数据集上的最好结果及对应参数** 193 | 194 | | `data_name` | `add_self_loop` | `n_layers` | `drop_edge` | `pair_norm` | `activations` | `test_acc` | 195 | | :---------: | :-------------: | :--------: | :---------: | :---------: | :-----------: | :--------: | 196 | | 'cora' | True | 2 | 0. | False | 'relu' | 0.797 | 197 | | 'citeseer' | True | 2 | 0. | False | 'relu' | 0.685 | 198 | 199 | > **Note**: 下面的对比均以`citeseer`数据集为例, 即`data_name='citeseer'` 200 | 201 | **是否添加自环的对比** 202 | 203 | Selected Compairson: 204 | 205 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc | 206 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- | 207 | | citeseer | True | 3 | 0.0 | False | relu | 0.645 | 208 | | citeseer | False | 3 | 0.0 | False | relu | 0.628 | 209 | | citeseer | True | 2 | 0.0 | False | relu | 0.685 | 210 | | citeseer | False | 2 | 0.0 | False | relu | 0.667 | 211 | 212 | 分析:添加自环效果好,在某些参数下提升非常显著。 213 | 214 | **不同层数的对比** 215 | 216 | Selected Compairson: 217 | 218 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc | 219 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- | 220 | | citeseer | True | 1 | 0.0 | False | relu | 0.68 | 221 | | citeseer | True | 2 | 0.0 | False | relu | 0.685 | 222 | | citeseer | True | 3 | 0.0 | False | relu | 0.645 | 223 | | citeseer | True | 5 | 0.0 | False | relu | 0.522 | 224 | | citeseer | True | 10 | 0.0 | False | relu | 0.176 | 225 | 226 | 分析:两层效果最好, 层数多难以优化。 227 | 228 | **drop edge的对比** 229 | 230 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc | 231 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- | 232 | | citeseer | True | 5 | 0.0 | False | relu | 0.522 | 233 | | citeseer | True | 5 | 0.1 | False | relu | 0.351 | 234 | | citeseer | True | 5 | 0.2 | False | relu | 0.182 | 235 | | citeseer | True | 5 | 0.3 | False | relu | 0.201 | 236 | | citeseer | True | 5 | 0.5 | False | relu | 0.188 | 237 | | citeseer | True | 3 | 0.0 | False | relu | 0.645 | 238 | | citeseer | True | 3 | 0.1 | False | relu | 0.671 | 239 | | citeseer | True | 3 | 0.2 | False | relu | 0.655 | 240 | | citeseer | True | 3 | 0.3 | False | relu | 0.663 | 241 | | citeseer | True | 3 | 0.5 | False | relu | 0.609 | 242 | 243 | 分析:层数少时drop edge 有点效果,层数深时效果不好。 244 | 245 | **是否使用PairNorm的对比** 246 | 247 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc | 248 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- | 249 | | citeseer | True | 1 | 0.0 | False | relu | 0.68 | 250 | | citeseer | True | 1 | 0.0 | True | relu | 0.443 | 251 | | citeseer | True | 2 | 0.0 | False | relu | 0.685 | 252 | | citeseer | True | 2 | 0.0 | True | relu | 0.526 | 253 | | citeseer | True | 3 | 0.0 | False | relu | 0.645 | 254 | | citeseer | True | 3 | 0.0 | True | relu | 0.568 | 255 | | citeseer | True | 5 | 0.0 | False | relu | 0.522 | 256 | | citeseer | True | 5 | 0.0 | True | relu | 0.545 | 257 | | citeseer | True | 10 | 0.0 | False | relu | 0.176 | 258 | | citeseer | True | 10 | 0.0 | True | relu | 0.3 | 259 | 260 | 分析:层数少时加`PairNorm`效果变差,层数多时`PairNorm`有效果。 261 | 262 | **不同激活函数的对比** 263 | 264 | | data_name | add_self_loop | n_layers | drop_edge | pari_norm | activations | test_acc | 265 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- | 266 | | citeseer | True | 2 | 0.0 | False | relu | 0.685 | 267 | | citeseer | True | 2 | 0.0 | False | tanh | 0.683 | 268 | | citeseer | True | 2 | 0.0 | False | sigmoid | 0.207 | 269 | | citeseer | True | 3 | 0.0 | False | relu | 0.645 | 270 | | citeseer | True | 3 | 0.0 | False | tanh | 0.667 | 271 | | citeseer | True | 3 | 0.0 | False | sigmoid | 0.207 | 272 | | citeseer | True | 5 | 0.0 | False | relu | 0.522 | 273 | | citeseer | True | 5 | 0.0 | False | tanh | 0.588 | 274 | | citeseer | True | 5 | 0.0 | False | sigmoid | 0.207 | 275 | | citeseer | True | 10 | 0.0 | False | relu | 0.176 | 276 | | citeseer | True | 10 | 0.0 | False | tanh | 0.472 | 277 | | citeseer | True | 10 | 0.0 | False | sigmoid | 0.195 | 278 | 279 | 分析:2层和3层时`relu~=tanh>>sigmoid`, 3层和5层`tanh>relu>>sigmoid`, 10层`tanh>>sigmoid~=relu`。 280 | 281 | 282 | ## 实验总结 283 | 284 | 本次实验的最大收获在于了解的图神经网络的原理,以及学会了使用`torch_geometric`库。 285 | 286 | [原GCN论文](https://arxiv.org/pdf/1609.02907.pdf)里面`citeseer`和`cora`数据集的最好结果(%)分别为 70.3 和 81.5, 我这里略差,分别是 68.5 和 79.7。其实网络结构是一样的,也是两层,用`relu`作为激活函数。我对比了一下才发现原因:**它那个数据集划分是某种特定的划分**。虽然划分比例相同,但在它那个划分下结果就是好不少,主要差别就在于这里。~~(你们这些做学术的人都在调些什么啊,dataset split is all you need ?)~~ 287 | 288 | ##### (完) 289 | 290 | -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络.pdf -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621105946421.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621105946421.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110023351.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110023351.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110050539-1624244452275.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110050539-1624244452275.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110050539.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110050539.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110122757.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110122757.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110149625.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110149625.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110240262.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110240262.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110306141.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110306141.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110337194.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110337194.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110419877.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110419877.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110436272.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110436272.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110603182.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110603182.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110619585-1624244780237.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110619585-1624244780237.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110619585.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110619585.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110647970.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110647970.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110714808.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110714808.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110733499.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110733499.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110823719.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110823719.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110938485.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110938485.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110951936-1624244992867.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110951936-1624244992867.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621110951936.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621110951936.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621111247829-1624245168536.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621111247829-1624245168536.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621111247829.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621111247829.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621111339615.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621111339615.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621111544917.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621111544917.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621111633343.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621111633343.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621111948461.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621111948461.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621112110697.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621112110697.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621112125882.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621112125882.png -------------------------------------------------------------------------------- /exp5/实验五:图卷积神经网络/image-20210621112457787.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五:图卷积神经网络/image-20210621112457787.png --------------------------------------------------------------------------------