├── .gitignore
├── LICENSE
├── README.md
├── exp1
    ├── fig
    │   ├── activation_pane.png
    │   ├── activations.png
    │   ├── depth_1.png
    │   ├── depth_10.png
    │   ├── depth_2.png
    │   ├── depth_3.png
    │   ├── depth_5.png
    │   ├── depth_pane.png
    │   ├── depths_and_widths.png
    │   ├── elu.png
    │   ├── leakyrelu.png
    │   ├── learning_rates.png
    │   ├── lr1e-1.5.png
    │   ├── lr1e-1.png
    │   ├── lr1e-2.5.png
    │   ├── lr1e-2.png
    │   ├── lr1e-3.5.png
    │   ├── lr1e-3.png
    │   ├── lr1e-4.png
    │   ├── lr_pane.png
    │   ├── prelu.png
    │   ├── relu.png
    │   ├── sigmoid.png
    │   ├── softplus.png
    │   └── tanh.png
    ├── requirements.txt
    ├── src
    │   ├── main.py
    │   ├── model.py
    │   └── results.py
    ├── 实验一：前馈神经网络.md
    └── 实验一：前馈神经网络.pdf
├── exp2
    ├── requirements.txt
    ├── src
    │   ├── data.py
    │   ├── logs1.txt
    │   ├── logs2.txt
    │   ├── logs3.txt
    │   ├── main.py
    │   ├── model.py
    │   └── test.py
    ├── 实验二：卷积神经网络.md
    └── 实验二：卷积神经网络.pdf
├── exp3
    ├── ReadMe.md
    ├── img.png
    ├── requirements.txt
    ├── src
    │   ├── config.py
    │   ├── data.py
    │   ├── main.py
    │   ├── model.py
    │   └── utils.py
    ├── 实验三：循环神经网络.md
    ├── 实验三：循环神经网络.pdf
    └── 实验三：循环神经网络
    │   ├── image-20210513172134556.png
    │   ├── image-20210513172216587.png
    │   ├── image-20210513172230085.png
    │   ├── image-20210513172244821.png
    │   ├── image-20210513172303845.png
    │   ├── image-20210513172317856.png
    │   ├── image-20210513172733303.png
    │   ├── image-20210513172747548.png
    │   ├── image-20210513172839199.png
    │   ├── image-20210513172852144.png
    │   ├── image-20210513172947008.png
    │   ├── image-20210513173012590.png
    │   ├── image-20210513173151864.png
    │   └── image-20210513173355442.png
├── exp4
    ├── ReadMe.md
    ├── img.png
    ├── requirements.txt
    ├── src
    │   ├── config.py
    │   ├── data.py
    │   ├── main.py
    │   ├── model.py
    │   └── utils.py
    ├── 实验四：BERT.md
    ├── 实验四：BERT.pdf
    └── 实验四：BERT
    │   ├── image-20210605154503374.png
    │   ├── image-20210605154926294.png
    │   ├── image-20210605154945286.png
    │   ├── image-20210605160746073.png
    │   ├── image-20210605163538200.png
    │   ├── image-20210605163557321.png
    │   ├── image-20210605163641264.png
    │   └── image-20210605163743489.png
└── exp5
    ├── requirements.txt
    ├── result.csv
    ├── src
        ├── config.py
        ├── data.py
        ├── main.py
        ├── model.py
        └── utils.py
    ├── 实验五：图卷积神经网络.md
    ├── 实验五：图卷积神经网络.pdf
    └── 实验五：图卷积神经网络
        ├── image-20210621105946421.png
        ├── image-20210621110023351.png
        ├── image-20210621110050539-1624244452275.png
        ├── image-20210621110050539.png
        ├── image-20210621110122757.png
        ├── image-20210621110149625.png
        ├── image-20210621110240262.png
        ├── image-20210621110306141.png
        ├── image-20210621110337194.png
        ├── image-20210621110419877.png
        ├── image-20210621110436272.png
        ├── image-20210621110603182.png
        ├── image-20210621110619585-1624244780237.png
        ├── image-20210621110619585.png
        ├── image-20210621110647970.png
        ├── image-20210621110714808.png
        ├── image-20210621110733499.png
        ├── image-20210621110823719.png
        ├── image-20210621110938485.png
        ├── image-20210621110951936-1624244992867.png
        ├── image-20210621110951936.png
        ├── image-20210621111247829-1624245168536.png
        ├── image-20210621111247829.png
        ├── image-20210621111339615.png
        ├── image-20210621111544917.png
        ├── image-20210621111633343.png
        ├── image-20210621111948461.png
        ├── image-20210621112110697.png
        ├── image-20210621112125882.png
        └── image-20210621112457787.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | __pycache__
 3 | *.zip
 4 | tiny-imagenet-200
 5 | aclImdb
 6 | checkpoint
 7 | data
 8 | glove.6B
 9 | expx
10 | GNN


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 liuwei
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # USTC_2021_Spring_Deep_Learning_labs
 2 | 
 3 | USTC-2021春季学期深度学习导论课程实验：
 4 | 
 5 | + 前馈神经网络（FNN）
 6 | + 卷积神经网络（CNN）
 7 | + 循环神经网络（RNN, LSTM)
 8 | + BERT
 9 | + 图卷积神经网络（GCN）
10 | 


--------------------------------------------------------------------------------
/exp1/fig/activation_pane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/activation_pane.png


--------------------------------------------------------------------------------
/exp1/fig/activations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/activations.png


--------------------------------------------------------------------------------
/exp1/fig/depth_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_1.png


--------------------------------------------------------------------------------
/exp1/fig/depth_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_10.png


--------------------------------------------------------------------------------
/exp1/fig/depth_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_2.png


--------------------------------------------------------------------------------
/exp1/fig/depth_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_3.png


--------------------------------------------------------------------------------
/exp1/fig/depth_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_5.png


--------------------------------------------------------------------------------
/exp1/fig/depth_pane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depth_pane.png


--------------------------------------------------------------------------------
/exp1/fig/depths_and_widths.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/depths_and_widths.png


--------------------------------------------------------------------------------
/exp1/fig/elu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/elu.png


--------------------------------------------------------------------------------
/exp1/fig/leakyrelu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/leakyrelu.png


--------------------------------------------------------------------------------
/exp1/fig/learning_rates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/learning_rates.png


--------------------------------------------------------------------------------
/exp1/fig/lr1e-1.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-1.5.png


--------------------------------------------------------------------------------
/exp1/fig/lr1e-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-1.png


--------------------------------------------------------------------------------
/exp1/fig/lr1e-2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-2.5.png


--------------------------------------------------------------------------------
/exp1/fig/lr1e-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-2.png


--------------------------------------------------------------------------------
/exp1/fig/lr1e-3.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-3.5.png


--------------------------------------------------------------------------------
/exp1/fig/lr1e-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-3.png


--------------------------------------------------------------------------------
/exp1/fig/lr1e-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr1e-4.png


--------------------------------------------------------------------------------
/exp1/fig/lr_pane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/lr_pane.png


--------------------------------------------------------------------------------
/exp1/fig/prelu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/prelu.png


--------------------------------------------------------------------------------
/exp1/fig/relu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/relu.png


--------------------------------------------------------------------------------
/exp1/fig/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/sigmoid.png


--------------------------------------------------------------------------------
/exp1/fig/softplus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/softplus.png


--------------------------------------------------------------------------------
/exp1/fig/tanh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/fig/tanh.png


--------------------------------------------------------------------------------
/exp1/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2020.12.5
 2 | cycler==0.10.0
 3 | freeze==3.0
 4 | kiwisolver==1.3.1
 5 | matplotlib==3.4.0
 6 | mkl-fft==1.3.0
 7 | mkl-random==1.1.1
 8 | mkl-service==2.3.0
 9 | numpy @ file:///C:/ci/numpy_and_numpy_base_1603466732592/work
10 | olefile==0.46
11 | Pillow @ file:///C:/ci/pillow_1615224342392/work
12 | pyparsing==2.4.7
13 | python-dateutil==2.8.1
14 | six @ file:///C:/ci/six_1605187374963/work
15 | torch==1.8.1
16 | torchaudio==0.8.1
17 | torchvision==0.9.1
18 | tqdm==4.59.0
19 | typing-extensions @ file:///home/ktietz/src/ci_mi/typing_extensions_1612808209620/work
20 | wincertstore==0.2
21 | 


--------------------------------------------------------------------------------
/exp1/src/main.py:
--------------------------------------------------------------------------------
 1 | import torch as t
 2 | import torch.nn as nn
 3 | from torch.optim import Adam
 4 | import numpy as np
 5 | from torch.utils.data import DataLoader, TensorDataset
 6 | from tqdm import tqdm
 7 | from matplotlib import pyplot as plt
 8 | from model import FNN
 9 | import copy
10 | 
11 | 
12 | class FuncFitter(object):
13 |     def __init__(self, to_fit: str or callable, x_range: tuple):
14 |         if isinstance(to_fit, str):
15 |             self.to_fit = eval(f'np.{to_fit}')
16 |         else:
17 |             self.to_fit = to_fit
18 |         self.x_range = x_range
19 | 
20 |     def gen_data(self, data_size, train_ratio, random_state):
21 |         np.random.seed(random_state)
22 |         x = np.linspace(self.x_range[0], self.x_range[1], data_size)[:, np.newaxis]
23 |         y = self.to_fit(x)
24 |         indices = np.random.permutation(data_size)
25 |         ids_train, ids_test = np.split(indices, [round(train_ratio * data_size)])
26 |         x_train, y_train = x[ids_train], y[ids_train]
27 |         x_test, y_test = x[ids_test], y[ids_test]
28 |         self.train_data = (t.Tensor(x_train), t.Tensor(y_train))
29 |         self.test_data = (t.Tensor(x_test), t.Tensor(y_test))
30 |         return self.train_data, self.test_data
31 | 
32 |     def train(self, model, optim, criterion, epochs, batch_size, pbar='batch'):
33 |         dataset = TensorDataset(*self.train_data)
34 |         dataloader = DataLoader(dataset, batch_size, shuffle=True)
35 |         pbar_epoch = range(epochs)
36 |         if pbar == 'epoch':
37 |             pbar_epoch = tqdm(pbar_epoch, desc='Epochs', unit='epoch',
38 |                               bar_format='{desc:<7.7}{percentage:3.0f}%|{bar:30}{r_bar}')
39 |         for i in pbar_epoch:
40 |             loss = None
41 |             pbar_batch = dataloader
42 |             if pbar == 'batch':
43 |                 pbar_batch = tqdm(pbar_batch, desc=f'[Epoch {i + 1}/{epochs}]', unit='batch',
44 |                                   bar_format='{desc:<15.15}{percentage:3.0f}%|{bar:30}{r_bar}')
45 |             for x, y in pbar_batch:
46 |                 y_pred = model(x)
47 |                 loss = criterion(y_pred, y)
48 |                 optim.zero_grad()
49 |                 loss.backward()
50 |                 optim.step()
51 |                 if pbar == 'batch':
52 |                     pbar_batch.set_postfix({'train_loss': loss.item()})
53 |             pbar_epoch.set_postfix({'train_loss': loss.item()})
54 |         return model
55 | 
56 |     def test(self, model, criterion, plot=True):
57 |         x_test, y_test = self.test_data
58 |         y_pred = model(x_test)
59 |         loss = criterion(y_pred, y_test).item()
60 | 
61 |         if plot:
62 |             fig, ax = plt.subplots()
63 |             ax.set_title('Function Fitter: {}\ntest loss: {}'.format(str(self.to_fit), loss))
64 |             ax.set_xlabel('x')
65 |             ax.set_ylabel('y')
66 |             ax.set_xlim(self.x_range)
67 |             ax.scatter(x_test.detach().numpy(), y_test.detach().numpy(), s=5, label='true')
68 |             ax.scatter(x_test.detach().numpy(), y_pred.detach().numpy(), s=5, label='pred')
69 |             ax.legend()
70 |             plt.show()
71 | 
72 |         return loss
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     fitter = FuncFitter(np.sin, (0, 4 * np.pi))
77 |     fitter.gen_data(data_size=10000, train_ratio=0.8, random_state=7)
78 |     num_rounds = 10
79 |     losses = []
80 |     for i in range(num_rounds):
81 |         fnn = FNN(neurons=[1, *[20] * 2, 1], activation='tanh')
82 |         if i == 0:
83 |             print('number of parameters: {}'.format(sum(p.numel() for p in fnn.parameters() if p.requires_grad)))
84 |         optim = Adam(fnn.parameters(), lr=10 ** -3)
85 |         criterion = nn.MSELoss()
86 |         fitter.train(fnn, optim, criterion, epochs=10, batch_size=10, pbar='epoch')
87 |         test_loss = fitter.test(fnn, criterion, plot=True)
88 |         losses.append(round(test_loss, 6))
89 |     print(losses)
90 | 


--------------------------------------------------------------------------------
/exp1/src/model.py:
--------------------------------------------------------------------------------
 1 | import torch as t
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class FNN(nn.Module):
 6 |     def __init__(self, neurons: list, activation: str):
 7 |         super(FNN, self).__init__()
 8 |         activation = activation.lower()
 9 |         act_map = {
10 |             'relu': nn.ReLU,
11 |             'tanh': nn.Tanh,
12 |             'sigmoid': nn.Sigmoid,
13 |             'elu': nn.ELU,
14 |             'leakyrelu': nn.LeakyReLU,
15 |             'prelu': nn.PReLU,
16 |             'softplus': nn.Softplus,
17 |         }
18 |         self.fc_layers = nn.ModuleList()
19 |         self.activations = nn.ModuleList()
20 |         num_layers = len(neurons) - 1
21 |         for i in range(num_layers):
22 |             self.fc_layers.append(nn.Linear(neurons[i], neurons[i+1]))
23 |             if i < num_layers - 1:
24 |                 self.activations.append(act_map[activation]())
25 | 
26 |     def forward(self, x):
27 |         for i, layer in enumerate(self.fc_layers):
28 |             x = layer(x)
29 |             if i < len(self.activations):
30 |                 x = self.activations[i](x)
31 |         return x
32 | 
33 | 
34 | class net(nn.Module):
35 |     def __init__(self):
36 |         super(net, self).__init__()
37 |         layers = nn.ModuleList()
38 |         layers.append(nn.Linear(1, 10))
39 |         layers.append(nn.ReLU())
40 |         layers.append(nn.Linear(10, 1))
41 |         self.layers = layers
42 | 
43 |     def forward(self, x):
44 |         for layer in self.layers:
45 |             x = layer(x)
46 |         return x
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     model = net()
51 |     print('number of parameters: {}'.format(sum(p.numel() for p in model.parameters())))


--------------------------------------------------------------------------------
/exp1/src/results.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch.nn as nn
  3 | from torch.optim import Adam
  4 | from matplotlib import pyplot as plt
  5 | 
  6 | 
  7 | default_params = {
  8 |     'to_fit': np.sin,
  9 |     'x_range': (0, 4*np.pi),
 10 |     'data_size': 10000,
 11 |     'train_ratio': 0.8,
 12 |     'random_state': 7,
 13 |     'epochs': 10,
 14 |     'batch_size': 10,
 15 |     'criterion': nn.MSELoss()
 16 | }
 17 | 
 18 | 
 19 | # Comparision 1: depth and width
 20 | # fix total number of parameters about equal to 1000 and keep the width of each layer the same
 21 | # fix activation='tanh', lr=0.001
 22 | # following are five setups about depths and widths(depths only considers hidden layers)
 23 | # setup1: neurons=[1, 333, 1] (depths=1)       #(params)=1000
 24 | # setup2: neurons=[1, *[30]*2, 1] (depths=2)   #(params)=1021
 25 | # setup3: neurons=[1, *[21]*3, 1] (depths=3)   #(params)=988
 26 | # setup4: neurons=[1, *[15]*5, 1] (depths=5)   #(params)=1006
 27 | # setup5: neurons=[1, *[10]*10, 1] (depths=10) #(params)=1021
 28 | # repeat each setup 10 times, record each test loss
 29 | result1_raw = {
 30 |     1: [0.09346, 0.095363, 0.12187, 0.111205, 0.096947, 0.107818, 0.093153, 0.09429, 0.095786, 0.103154],
 31 |     2: [0.029461, 0.05171, 0.1099, 0.083763, 0.040612, 0.003585, 0.065114, 0.030679, 0.042282, 0.078191],
 32 |     3: [0.000704, 0.000287, 0.027187, 0.007998, 0.000721, 0.001373, 0.011837, 0.002505, 0.001198, 0.001492],
 33 |     5: [0.000982, 0.000224, 0.000635, 0.000259, 0.002295, 0.000729, 0.0007, 0.001617, 0.000853, 0.000275],
 34 |     10: [0.013834, 0.001034, 0.017508, 0.015356, 0.000298, 0.032453, 0.001733, 0.000175, 0.000309, 0.001068]
 35 | }
 36 | result1_mean = {k: np.mean(np.sort(v)[1:-1]) for k, v in result1_raw.items()}
 37 | print('## Comparision 1: depth and width ##')
 38 | print(result1_mean)
 39 | 
 40 | fig, ax = plt.subplots()
 41 | ax.set_title('Comparision of depths(widths)')
 42 | ax.set_xlabel('depth')
 43 | ax.set_ylabel('log10(test loss)')
 44 | ax.plot(result1_mean.keys(), np.log10(list(result1_mean.values())), marker='x')
 45 | plt.show()
 46 | 
 47 | 
 48 | # Comparision 2: activation function
 49 | # fix model structure as neurons=[1, 20, 20, 1], fix lr=0.001
 50 | # there are seven optional activation functions:
 51 | # sigmoid, tanh, relu, leakyrelu, prelu, elu, softplus
 52 | # repeat run 10 times with each activation function
 53 | result2_raw = {
 54 |     'sigmoid': [0.320537, 0.316262, 0.323674, 0.349383, 0.315778, 0.350341, 0.328564, 0.33681, 0.323076, 0.34205],
 55 |     'tanh': [0.095256, 0.085254, 0.077649, 0.136754, 0.068252, 0.065716, 0.116979, 0.07541, 0.054631, 0.047086],
 56 |     'relu': [0.09296, 0.032559, 0.013368, 0.109147, 0.04267, 0.02434, 0.027428, 0.061975, 0.065116, 0.022743],
 57 |     'leakyrelu': [0.030271, 0.069982, 0.066474, 0.048033, 0.024873, 0.028954, 0.069328, 0.063356, 0.021585, 0.063143],
 58 |     'prelu': [0.035761, 0.051486, 0.005814, 0.014586, 0.012871, 0.048049, 0.066837, 0.004329, 0.00277, 0.076942],
 59 |     'elu': [0.041305, 0.004289, 0.01048, 0.030719, 0.036398, 0.033117, 0.030902, 0.017483, 0.079652, 0.058757],
 60 |     'softplus': [0.11326, 0.093088, 0.088635, 0.081033, 0.080066, 0.08392, 0.094294, 0.079997, 0.058466, 0.070052]
 61 | }
 62 | result2_mean = {k: np.mean(np.sort(v)[1:-1]) for k, v in result2_raw.items()}
 63 | result2_std = {k: np.std(v) for k, v in result2_raw.items()}
 64 | print("## Comparision 2: activation function ##")
 65 | print(result2_mean)
 66 | 
 67 | fig, ax = plt.subplots()
 68 | ax.set_title('Comparision of activation functions')
 69 | ax.set_xlabel('activation function')
 70 | ax.set_ylabel('test loss')
 71 | ax.set_xticks(np.arange(len(result2_mean)))
 72 | ax.set_xticklabels(result2_mean.keys())
 73 | ax.scatter(np.arange(len(result2_mean)), list(result2_mean.values()), marker='x')
 74 | plt.show()
 75 | 
 76 | 
 77 | # Comparision 3: learning rate
 78 | # fix model structure as neurons=[1, 20, 20, 1], fix activation = 'tanh'
 79 | # following are five lr setups:
 80 | # {10^k: k=-1, -1.5, -2, -2.5, -3, -3.5, -4}
 81 | # repeat each setup 10 times, record each test loss
 82 | result3_raw = {
 83 |     10**-1: [0.370254, 0.641283, 0.366993, 0.396108, 0.501332, 0.404945, 0.86187, 0.377093, 0.769633, 0.407323],
 84 |     10**-1.5: [0.022817, 0.021794, 0.027867, 0.034896, 0.067046, 0.131822, 0.027459, 0.042532, 0.01076, 0.019996],
 85 |     10**-2: [0.004666, 0.001577, 0.003348, 0.000552, 0.001009, 0.000547, 0.001086, 0.061021, 0.01173, 0.005232],
 86 |     10**-2.5: [0.002043, 0.003938, 0.002026, 9.1e-05, 0.001185, 0.000199, 0.000501, 0.000886, 0.000758, 0.004475],
 87 |     10**-3: [0.094007, 0.023405, 0.155973, 0.066218, 0.079838, 0.006589, 0.073311, 0.002712, 0.062652, 0.136194],
 88 |     10**-3.5: [0.3407, 0.352521, 0.328751, 0.330109, 0.347772, 0.318212, 0.10451, 0.135686, 0.130908, 0.120176],
 89 |     10**-4: [0.365907, 0.369544, 0.345024, 0.37749, 0.36385, 0.36186, 0.376415, 0.385564, 0.352906, 0.360354],
 90 | }
 91 | result3_mean = {k: np.mean(np.sort(v)[1:-1]) for k, v in result3_raw.items()}
 92 | result3_std = {k: np.std(v) for k, v in result3_raw.items()}
 93 | print("## Comparision 3: learning rate")
 94 | print(result3_mean)
 95 | print(result3_std)
 96 | 
 97 | fig, ax = plt.subplots()
 98 | ax.set_title('Comparision of learning rates')
 99 | ax.set_xlabel('-log10(lr)')
100 | ax.set_ylabel('log10(test loss)')
101 | ax.plot(-np.log10(list(result3_mean.keys())), np.log10(list(result3_mean.values())), marker='x')
102 | plt.show()


--------------------------------------------------------------------------------
/exp1/实验一：前馈神经网络.md:
--------------------------------------------------------------------------------
  1 | # 实验一：前馈神经网络
  2 | 
  3 | 姓名：刘威
  4 | 
  5 | 学号：PB18010469
  6 | 
  7 | ## 实验目的
  8 | 
  9 | + 了解并熟悉前馈神经网络的原理及其学习算法
 10 | + 了解激活函数在神经网络中的作用
 11 | + 了解不同深度及宽度对前馈神经网络性能的影响
 12 | + 了解不同学习率对神经网络性能的影响
 13 | 
 14 | ## 实验原理
 15 | 
 16 | ### 神经网络
 17 | 
 18 | * 神经网络的定义
 19 | 
 20 |   “神经网络是由具有适应性的简单单元组成的广泛并行互联的网络, 它的组
 21 |   织能够模拟生物神经系统对真实世界物体所作出的反应”
 22 | 
 23 | * 神经元模型
 24 | 
 25 |   ![image-20210331002813255](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331002813255.png)
 26 | 
 27 | * 激活函数的性质
 28 | 
 29 |   * 连续并可导（允许少数点上不可导）的非线性函数，可导的激活函数可以直接利用数值优化的方法来学习网络参数
 30 |   * 连续并可导（允许少数点上不可导）的非线性函数，可导的激活函数可以直接利用数值优化的方法来学习网络参数
 31 |   *  激活函数的导函数的值域要在一个合适的区间内，不能太大也不能太小，否则会影响训练的效率和稳定性
 32 | 
 33 | * 常用激活函数
 34 | 
 35 |   Sigmoid, Tanh, ReLU, LeakyReLU, PReLU, softplus, ELU
 36 | 
 37 | * 神经网络的主要三个特性
 38 | 
 39 |   * 信息表示是分布式的
 40 |   *  记忆和知识是存储在单元之间的连接上的
 41 |   *  通过逐渐改变单元之间的连接强度来学习新知识  
 42 | 
 43 | * 网络结构
 44 | 
 45 |   * 神经网络设计的另一个关键点是确定它的结构：具有多少单元，以及这些单元应该如何连接。
 46 |   *  大多数神经网络被组织成层的单元组
 47 |   * 大多数神经网络架构将这些层布置成链式结构，其中每一层都是
 48 |     前一层的函数
 49 |   * 神经网络设计在于选择网络的深度和每一层的宽度，更深层网络通常能在每一层使用更少的单元数和更少的参数，并且有更强的泛化能力。但是通常也更难以优化。
 50 | 
 51 | ### 前馈神经网络
 52 | 
 53 | ​	前馈神经网络又称为多层感知机， 主要特点为：
 54 | 
 55 | + 各神经元分别属于不同的层，层内无连接
 56 | 
 57 | + 相邻两层之间的神经元全部两两连接
 58 | 
 59 | + 整个网络中无反馈，信号从输入层向输出层单向传播
 60 | 
 61 |   ![image-20210331003916295](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331003916295.png)
 62 | 
 63 |   ![image-20210331004129942](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331004129942.png)
 64 | 
 65 | **万能近似定理（Universal Approximation Theorem）**
 66 | 
 67 | 一个前馈神经网络如果具有线性输出层和至少一层具有任何一种“挤压” 性质的激活函数（例如logistic sigmoid激活函数）的隐藏层，只要给予网络足够数量的隐藏单元，它可以以任意的精度来近似任何从一个有限维空间到另一个有限维空间的Borel 可测函数。
 68 | 
 69 | 万能近似定理只说明神经网络表达能力强大到可以近似任意一个的连续函数，却并没有给出如何找到这样的神经网络，以及是否是最优的。
 70 | 
 71 | ### 网络参数学习：梯度下降
 72 | 
 73 | + 梯度下降图示：
 74 | 
 75 | ![image-20210331005040953](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005040953.png)
 76 | 
 77 | + 反向传播算法：针对前馈神经网络而设计的高效方法
 78 | 
 79 |   ![image-20210331005234604](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005234604.png)
 80 | 
 81 |   ![image-20210331005247254](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005247254.png)
 82 | 
 83 |   ![image-20210331005342486](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210331005342486.png)
 84 | 
 85 | + 前馈神经网络的训练过程可以分为以下三步：
 86 | 
 87 |   + 前向计算：每一层的状态和激活值，直到最后一层
 88 |   + 反向计算：每一层的参数的偏导数
 89 |   + 更新参数
 90 | 
 91 | ## 实验内容
 92 | 
 93 | 使用`pytorch`或者`tensorflow`手写一个前馈神经网络，用于近似正弦函数$y=\sin(x),x\in[0,4\pi)$。研究网络深度、学习率、网络宽度、激活函数对模型性能的影响。
 94 | 
 95 | 即回归任务，输入和输出均为一维。
 96 | 
 97 | 
 98 | ## 实验结果
 99 | 
100 | 实验使用`pytorch`进行。
101 | 
102 | 
103 | ### 源码结构及说明
104 | 
105 | **模型结构**
106 | 
107 | 实现了可自由调整深度及各隐藏层的宽度的前馈神经网络，激活函数可以通过参数设置。
108 | 
109 | 具体来说构建模型需要以下两个参数：
110 | 
111 | * `neurons`: 列表类型，表示各网络层的神经元个数，包含输入层和输出层。在本实验中输入层和输出层神经元个数固定为 1。例如`neurons=[1, 20, 10, 1]`表示模型包含两个神经元个数分别为 20 和 10 的隐藏层。
112 | * `activations`: 字符串类型，指定要使用的激活函数，可供选择的有 `'sigmoid'`,`'tanh'`,`'relu'`,`'leakyrelu'`,`'prelu'`,`'elu'`,`'softplus'`。例如`activation='relu'`表示输入层及各隐藏层都将使用`relu`作为激活函数。因为是回归任务，输出层没有激活函数。
113 | 
114 | **优化器及损失函数**
115 | 
116 | 优化器固定使用`torch.nn.optim.Adam`，但学习率`lr` 是可调节的参数。
117 | 
118 | 训练的损失函数及测试的性能评估均固定使用`torch.nn.MSELoss`
119 | 
120 | **数据生成**
121 | 
122 | 使用`numpy`生成区间$[0,4\pi)$上的均匀样本点作为训练及测试数据，可以控制总样本点数目及训练样本的比例。具体来说有如下三个参数：
123 | 
124 | + `data_size`: 整型，表示生成的样本点个数。包含训练及测试的样本。
125 | + `train_ratio`: 浮点数，表示训练集的比例，训练集的大小将为`round(data_size*train_ratio)`。训练集从生成的样本中随机选择。
126 | + `random_state`: 整型，随机种子，决定训练集测试集的划分。
127 | 
128 | **训练及测试**
129 | 
130 | 固定批大小为10，训练10轮。即`batch_size=10,epoch=10`.
131 | 
132 | 
133 | ### 结果及分析
134 | 
135 | 本实验固定的参数如下:
136 | 
137 | ```python
138 | default_params = {
139 |     'data_size': 10000,
140 |     'train_ratio': 0.8,
141 |     'random_state': 7,
142 |     'epochs': 10,
143 |     'batch_size': 10
144 | }
145 | ```
146 | 
147 | 值得注意的是，这里的固定参数也就是说**以下实验的训练数据量及训练轮数都是一样的，而不是各自充分训练的结果。**
148 | 
149 | 调节的参数有:
150 | 
151 | + `neurons`: 研究不同网络深度及宽度对模型性能的影响。
152 | + `activation`: 研究不同激活函数对模型性能的影响。
153 | + `lr`: 研究不同学习率对模型性能的影响。
154 | 
155 | #### 比较一：网络深度与宽度
156 | 
157 | 固定参数 `activation=tanh`, `lr=0.001`。
158 | 
159 | 改变参数`neurons`设置五组对照，在固定网络参数保持在1000左右的情况下设置5个不同的深度，并保持各层宽度一致（相应地也是五个不同的宽度）。注意：这里深度为隐藏层的个数，不考虑输入输出层。具体设置及各设置下模型的参数个数如下：
160 | 
161 | ```python
162 | # setup1: neurons=[1, 333, 1] (depths=1)       #(params)=1000
163 | # setup2: neurons=[1, *[30]*2, 1] (depths=2)   #(params)=1021
164 | # setup3: neurons=[1, *[21]*3, 1] (depths=3)   #(params)=988
165 | # setup4: neurons=[1, *[15]*5, 1] (depths=5)   #(params)=1006
166 | # setup5: neurons=[1, *[10]*10, 1] (depths=10) #(params)=1021
167 | ```
168 | 
169 | 对上述五种设置，独立地训练并测试10次得到10次测试的loss，为避免极端值的影响，去掉最高值和最低值后取平均，作为评价不同设置下模型性能的标准。结果为：
170 | 
171 | ```python
172 | {1: 0.09975287499999999, 2: 0.052726499999999996, 3: 0.0034785, 5: 0.00075625, 10: 0.006392500000000001} # 键为深度，值为loss
173 | ```
174 | 
175 | 对loss取对数并作图如下：![](D:\course\DL\exps\exp1\fig\depths_and_widths.png)
176 | 
177 | 可以看到，在保持网络参数不变的情况下，随着网络深度的加深（对应地，网络宽度减小），同样的训练条件下，模型的性能可能会慢慢变好，但当网络过深时，模型的性能也会下降，这可能是由于梯度消失，优化过程比浅层的网络更难。
178 | 
179 | 细节的拟合效果可以观察拟合图：
180 | 
181 | ![depth_pane](D:\course\DL\exps\exp1\fig\depth_pane.png)
182 | 
183 | 可以注意到当深度较大时（depth=10），x较小的部分的拟合效果还不错，但当数值较大靠近尾部时，突然出现很大偏差，可能是数值太大时，激活函数的梯度很小，而深层网络会进一步扩大这个影响（也就是所谓的梯度消失问题），导致参数几乎无法更新。
184 | 
185 | #### 比较二：激活函数
186 | 
187 | 固定参数`neurons=[1, 20, 20, 1]`,`lr=0.001`
188 | 
189 | 在 7 种激活函数的选择下，分别独立地训练并测试10次得到10次的测试loss，为避免极端值的影响，去掉最高值和最低值后取平均，作为评价不同设置下模型性能的标准。结果如下：
190 | 
191 | ```python
192 | {'sigmoid': 0.33004449999999996, 'tanh': 0.07989337499999999, 'relu': 0.046223875, 'leakyrelu': 0.049304, 'prelu': 0.029966624999999997, 'elu': 0.032395125, 'softplus': 0.08388562499999999}
193 | ```
194 | 
195 | ![](D:\course\DL\exps\exp1\fig\activations.png)
196 | 
197 | 可见`sigmoid`激活函数效果非常差，`tanh`和`softplus`效果较好但不如`relu`家族。而`relu`家族中`prelu`和`elu`效果最好。这可能是因为`prelu,elu`均包含额外的可学习参数。
198 | 
199 | 除了比较测试loss外，我们还可以观察拟合图，分析各激活函数的特点。![activation_pane](D:\course\DL\exps\exp1\fig\activation_pane.png)
200 | 
201 | 分析拟合图可以看出：
202 | 
203 | + `sigmoid`出现了类似于深层网络所有的梯度消失的问题，即当x较大时，模型性能急剧变差。一方面该激活函数确实存在饱和的问题，另一方面该激活函数是非零中心化的，会使得后一层的神经元的输入发生偏置偏移，使得梯度下降的收敛速度变慢。
204 | + `relu,leakyrelu,prelu` 的拟合图均出现**不光滑的现象**，这是由于激活函数本身不光滑导致的。而`elu`本身是光滑的，没有出现类似的现象。它们都不会像`sigmoid,tanh`那样有梯度饱和的问题，因而学习效率更高。
205 | 
206 | 
207 | #### 比较三：学习率
208 | 
209 | 固定参数`neurons=[1, 20, 20, 1]`,`activation='tanh'`
210 | 
211 | 设置七个不同的学习率：$10^{-k},k=1,1.5,2,2.5,3,3.5,4$，分别独立地训练并测试10次得到10次的测试loss，为避免极端值的影响，去掉最高值和最低值后取平均，作为评价不同设置下模型性能的标准。结果如下：
212 | 
213 | ```python
214 | {0.1: 0.17341449586998198, 0.03162277660168379: 0.0337293358056455, 0.01: 0.01761509341899724, 0.0031622776601683794: 0.0014473155011952299, 0.001: 0.04821309684815943, 0.00031622776601683794: 0.1052767455255433, 0.0001: 0.011405165033439895}
215 | ```
216 | 
217 | ![learning_rates](D:\course\DL\exps\exp1\fig\learning_rates.png)
218 | 
219 | ![](D:\course\DL\exps\exp1\fig\lr_pane.png)
220 | 
221 | 可见随着学习率慢慢变大，模型性能先变好后变差。
222 | 
223 | 具体来说，可以从拟合图上得到一些结论：
224 | 
225 | + 当学习率太大时，参数更新步长较大，导致目标函数波动较大，会使得收敛速度较慢，并且在结果上出现类似阶跃函数输出值突变的现象。
226 | + 当学习率太小时，参数更新步长较小，导致目标函数减小地很慢，收敛速度较慢，需要更长的训练时间才能达到较好地收敛。
227 | 
228 | 
229 | ## 实验总结
230 | 
231 | 本实验中，实现了前馈神经网络并用它来拟合初等函数。
232 | 
233 | 通过三个比较实验，初步了解了网络深度和宽度，激活函数，学习率对前馈神经网络性能的影响。
234 | 
235 | 对于网络深度和宽度而言，深层的网络收敛速度更快，但会引发梯度消失的问题；
236 | 
237 | 对于激活函数而言，`relu`及其变种是很好的选择，但因为不光滑的特点会导致输出不光滑（`elu`不存在这样的问题）。
238 | 
239 | 对于学习率而言，学习率太大和太小都不利于学习，针对特定的问题应该选择大小适中的学习率。


--------------------------------------------------------------------------------
/exp1/实验一：前馈神经网络.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp1/实验一：前馈神经网络.pdf


--------------------------------------------------------------------------------
/exp2/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv_python==4.5.1.48
2 | matplotlib==3.4.1
3 | tqdm==4.56.0
4 | numpy==1.19.2
5 | torch==1.2.0
6 | torchvision==0.4.0a0+6b959ee
7 | 


--------------------------------------------------------------------------------
/exp2/src/data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import cv2
 4 | from torch.utils.data import Dataset, DataLoader
 5 | from torchvision import transforms
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | class TinyImageNet(Dataset):
10 |     def __init__(self, data_dir, data_type, transform):
11 |         self.type = data_type
12 |         self.transform = transform
13 | 
14 |         labels_t = open(f'{data_dir}wnids.txt').read().strip().split('\n')
15 |         labels_map = {label_t: label for label, label_t in enumerate(labels_t)}
16 |         if self.type == 'train':
17 |             self.train_labels = []
18 |             self.train_images = []
19 |             for i, label_t in tqdm(enumerate(labels_t), desc='[Load train images]'):
20 |                 txt_path = f'{data_dir}train/{label_t}/{label_t}_boxes.txt'
21 |                 image_names = [line.split('\t')[0] for line in open(
22 |                     txt_path).read().strip().split('\n')]
23 |                 for image_name in image_names:
24 |                     image_path = f'{data_dir}train/{label_t}/images/{image_name}'
25 |                     img = cv2.imread(image_path)
26 |                     self.train_images.append(img)
27 |                     self.train_labels.append(i)
28 |             self.train_images = np.array(self.train_images)
29 |             self.train_labels = np.array(self.train_labels)
30 |         elif self.type == 'val':
31 |             self.val_images = []
32 |             self.val_labels = []
33 |             with open(f'{data_dir}val/val_annotations.txt') as txt:
34 |                 for line in tqdm(txt, desc='[Load val images]'):
35 |                     image_name, label_t = line.strip('\n').split('\t')[:2]
36 |                     image_path = f'{data_dir}val/images/{image_name}'
37 |                     val_label = labels_map[label_t]
38 |                     img = cv2.imread(image_path)
39 |                     self.val_images.append(img)
40 |                     self.val_labels.append(val_label)
41 |             self.val_images = np.array(self.val_images)
42 |             self.val_labels = np.array(self.val_labels)
43 | 
44 |     def __getitem__(self, index):
45 |         image, label = None, None
46 |         if self.type == 'train':
47 |             label = self.train_labels[index]
48 |             image = self.train_images[index]
49 |         elif self.type == 'val':
50 |             label = self.val_labels[index]
51 |             image = self.val_images[index]
52 |         return self.transform(image), label
53 | 
54 |     def __len__(self):
55 |         size = None
56 |         if self.type == 'train':
57 |             size = self.train_labels.shape[0]
58 |         elif self.type == 'val':
59 |             size = self.val_labels.shape[0]
60 |         return size
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     batch_size = 64
65 |     train_dataset = TinyImageNet(
66 |         './imagenet/tiny-imagenet-200/', 'train', transforms.Compose([transforms.ToTensor()]))
67 |     val_dataset = TinyImageNet(
68 |         './imagenet/tiny-imagenet-200/', 'val', transforms.Compose([transforms.ToTensor()]))
69 |     train_dataloader = DataLoader(
70 |         dataset=train_dataset, batch_size=batch_size, shuffle=True)
71 |     val_dataloader = DataLoader(
72 |         dataset=val_dataset, batch_size=batch_size, shuffle=False)
73 |     for batch_image, batch_label in train_dataloader:
74 |         print(batch_image.shape)
75 |         print(batch_label.shape)
76 |         print(np.uni)
77 |         exit()
78 | 


--------------------------------------------------------------------------------
/exp2/src/logs1.txt:
--------------------------------------------------------------------------------
 1 | default parameters are:
 2 | {'block_sizes': [(64, 64, 1),
 3 |                  (64, 128, 2),
 4 |                  (128, 256, 2),
 5 |                  (256, 512, 2),
 6 |                  (512, 1024, 2)],
 7 |  'dropout': (0.2, 0.5),
 8 |  'epochs': 40,
 9 |  'lr_decay': 0.1,
10 |  'lr_init': 0.001,
11 |  'lr_min': 1e-05,
12 |  'lr_min_delta': 0.0,
13 |  'lr_patience': 1,
14 |  'norm': True,
15 |  'res': True,
16 |  'restore_best_weights': True,
17 |  'top': [1, 5, 10],
18 |  'val_min_delta': 0.0,
19 |  'val_patience': 3}
20 | >>> set `block_sizes` to [(64, 64, 1), (64, 128, 2), (128, 256, 2), (256, 512, 2), (512, 1024, 2)]
21 | val_loss = 2.382565525686665, val_acc = [0.4432, 0.7116, 0.8041]
22 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1)]
23 | val_loss = 2.5370353900702898, val_acc = [0.3952, 0.6694, 0.7711]
24 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)]
25 | val_loss = 3.336798567680796, val_acc = [0.2576, 0.5174, 0.6403]
26 | >>> set `norm` to False
27 | val_loss = 5.298343752599825, val_acc = [0.005, 0.025, 0.05]
28 | 


--------------------------------------------------------------------------------
/exp2/src/logs2.txt:
--------------------------------------------------------------------------------
 1 | default parameters are:
 2 | {'block_sizes': [(64, 64, 1),
 3 |                  (64, 128, 2),
 4 |                  (128, 256, 2),
 5 |                  (256, 512, 2),
 6 |                  (512, 1024, 2)],
 7 |  'dropout': (0.2, 0.5),
 8 |  'epochs': 40,
 9 |  'lr_decay': 0.1,
10 |  'lr_init': 0.001,
11 |  'lr_min': 1e-05,
12 |  'lr_min_delta': 0.0,
13 |  'lr_patience': 1,
14 |  'norm': True,
15 |  'res': True,
16 |  'restore_best_weights': True,
17 |  'top': [1, 5, 10],
18 |  'val_min_delta': 0.0,
19 |  'val_patience': 3}
20 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)]
21 | >>> set `res` to True
22 | val_loss = 3.299391880156888, val_acc = [0.257, 0.5236, 0.647]
23 | >>> set `block_sizes` to [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)]
24 | >>> set `res` to False
25 | val_loss = 4.193392032270978, val_acc = [0.0827, 0.2677, 0.4042]
26 | 


--------------------------------------------------------------------------------
/exp2/src/logs3.txt:
--------------------------------------------------------------------------------
 1 | default parameters are:
 2 | {'block_sizes': [(64, 64, 1),
 3 |                  (64, 128, 2),
 4 |                  (128, 256, 2),
 5 |                  (256, 512, 2),
 6 |                  (512, 1024, 2)],
 7 |  'dropout': (0.2, 0.5),
 8 |  'epochs': 40,
 9 |  'lr_decay': 0.1,
10 |  'lr_init': 0.001,
11 |  'lr_min': 1e-05,
12 |  'lr_min_delta': 0.0,
13 |  'lr_patience': 1,
14 |  'norm': True,
15 |  'res': True,
16 |  'restore_best_weights': True,
17 |  'top': [1, 5, 10],
18 |  'val_min_delta': 0.0,
19 |  'val_patience': 3}
20 | >>> set `dropout` to (0.0, 0.0)
21 | val_loss = 2.6277607641402323, val_acc = [0.4119, 0.675, 0.7767]
22 | >>> set `dropout` to (0.1, 0.3)
23 | val_loss = 2.534848990713715, val_acc = [0.4245, 0.691, 0.7916]
24 | >>> set `dropout` to (0.3, 0.7)
25 | val_loss = 2.4117816208274503, val_acc = [0.4233, 0.6981, 0.7954]
26 | >>> set `lr_decay` to 0.5
27 | val_loss = 2.4610023521314, val_acc = [0.4162, 0.6913, 0.7891]
28 | >>> set `lr_decay` to 0.99
29 | val_loss = 2.5230503021531803, val_acc = [0.4066, 0.6773, 0.7828]
30 | 


--------------------------------------------------------------------------------
/exp2/src/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch as t
  3 | import torch.nn as nn
  4 | from torch.optim import Adam
  5 | from torch.optim.lr_scheduler import ReduceLROnPlateau
  6 | from data import TinyImageNet
  7 | from torch.utils.data import DataLoader
  8 | from torchvision import transforms
  9 | from model import CNN
 10 | from tqdm import tqdm
 11 | import os
 12 | from pprint import pprint
 13 | import random
 14 | from copy import deepcopy
 15 | import sys
 16 | from matplotlib import pyplot as plt
 17 | 
 18 | 
 19 | def set_seed(seed=123):
 20 |     random.seed(seed)
 21 |     os.environ['PYTHONHASHSEED'] = str(seed)
 22 |     np.random.seed(seed)
 23 |     t.manual_seed(seed)
 24 |     t.cuda.manual_seed(seed)
 25 |     t.backends.cudnn.deterministic = True
 26 | 
 27 | 
 28 | def train(model, train_data, epochs,
 29 |           lr_init=1e-3, lr_min=1e-5, lr_decay=1., lr_min_delta=0., lr_patience=1,
 30 |           val_data=None, val_min_delta=0., val_patience=1,
 31 |           restore_best_weights=True, top=1, verbose=True, device='cpu'):
 32 |     min_val_loss = np.inf
 33 |     wait = 0
 34 |     best_weights = None
 35 |     history = {}
 36 |     model = nn.DataParallel(model)
 37 |     model = model.to(device)
 38 |     optimizer = Adam(model.parameters(), lr=lr_init)
 39 |     scheduler = ReduceLROnPlateau(optimizer, 'min', factor=lr_decay, patience=lr_patience,
 40 |                                   threshold=lr_min_delta, verbose=verbose, min_lr=lr_min)
 41 |     loss_fc = nn.CrossEntropyLoss()
 42 |     for epoch in range(epochs):
 43 |         pbar_batch = train_data
 44 |         if verbose:
 45 |             pbar_batch = tqdm(train_data, desc=f'[Epoch {epoch + 1}/{epochs}]', unit='batch',
 46 |                               ascii=True, bar_format='{desc:<13.13}{percentage:3.0f}%|{bar:10}{r_bar}')
 47 |         for i, data in enumerate(pbar_batch, 1):
 48 |             inputs, labels = data
 49 |             inputs = inputs.to(device)
 50 |             labels = labels.to(device)
 51 |             outputs = model(inputs)
 52 |             loss = loss_fc(outputs, labels.long())
 53 |             optimizer.zero_grad()
 54 |             loss.backward()
 55 |             optimizer.step()
 56 | 
 57 |             predicts = t.max(outputs, 1)[1]
 58 |             batch_total_num = labels.size(0)
 59 |             batch_correct_num = (predicts == labels.data).sum().item()
 60 |             if verbose:
 61 |                 pbar_batch.set_postfix({'train_loss': loss.item(),
 62 |                                         'train_acc': f'{batch_correct_num}/{batch_total_num}'})
 63 |         train_loss, train_acc = validate(model, train_data, 'train', top, verbose, device)
 64 | 
 65 |         history.setdefault('epoch', []).append(epoch)
 66 |         history.setdefault('train_loss', []).append(train_loss)
 67 |         history.setdefault('train_acc', []).append(train_acc[0])
 68 |         if val_data is not None:
 69 |             val_loss, val_acc = validate(model, val_data, 'val', top, verbose, device)
 70 |             history.setdefault('val_loss', []).append(val_loss)
 71 |             history.setdefault('val_acc', []).append(val_acc[0])
 72 | 
 73 |             # schedule lr
 74 |             scheduler.step(val_loss)
 75 |             # control early stopping
 76 |             if val_loss < min_val_loss - val_min_delta:
 77 |                 min_val_loss = val_loss
 78 |                 wait = 0
 79 |                 best_weights = deepcopy(model.state_dict())
 80 |                 # print(best_weights['fc.bias'])
 81 |             else:
 82 |                 wait += 1
 83 |             if wait > val_patience:
 84 |                 if verbose:
 85 |                     print('>>> Early Stopped.')
 86 |                 if restore_best_weights:
 87 |                     model.load_state_dict(best_weights)
 88 |                     # print(model.state_dict().copy()['fc.bias'])
 89 |                 break
 90 |     return model, history
 91 | 
 92 | 
 93 | def validate(model, dataloader, mode='val', top=1,  verbose=True, device='cpu'):
 94 |     model.eval()
 95 |     tops = top if isinstance(top, list) else [top]
 96 |     losses = []
 97 |     correct_nums = np.zeros_like(tops)
 98 |     total_nums = np.zeros_like(tops)
 99 |     loss_fc = nn.CrossEntropyLoss()
100 |     for inputs, labels in dataloader:
101 |         inputs = inputs.to(device)
102 |         labels = labels.to(device)
103 |         outputs = model(inputs)
104 |         loss = loss_fc(outputs, labels.long())
105 |         predicts = [t.argsort(outputs, dim=1, descending=True)[:, :top] for top in tops]
106 | 
107 |         losses.append(loss.item())
108 |         for i, predict in enumerate(predicts):
109 |             total_nums[i] += labels.size(0)
110 |             correct_nums[i] += (predict == labels.data.unsqueeze(1)).sum().item()
111 | 
112 |     avg_loss = np.mean(losses)
113 |     acc_list = [correct_num / total_num for correct_num, total_num in zip(correct_nums, total_nums)]
114 |     if verbose:
115 |         print(f"{f'{mode}_loss':>11} = {avg_loss:<6.4f}, ", end='')
116 |         print(', '.join([f"{f'{mode}_acc_top{top}':>15} = {acc:<6.4f}" for top, acc in zip(tops, acc_list)]))
117 |     model.train()
118 |     return avg_loss, acc_list
119 | 
120 | 
121 | def plot_history(history):
122 |     fig, ax1 = plt.subplots()
123 |     ax2 = ax1.twinx()
124 |     x = history['epoch']
125 |     ax1.set_title('Training history')
126 |     ax1.set_xlabel('epoch')
127 |     ax1.set_ylabel('loss')
128 |     ax2.set_ylabel('acc')
129 |     p1 = ax1.plot(x, history['train_loss'], label='train_loss')
130 |     p2 = ax1.plot(x, history['val_loss'], label='val_loss')
131 |     p3 = ax2.plot(x, history['train_acc'], '-.', label='train_acc')
132 |     p4 = ax2.plot(x, history['val_acc'], '-.', label='val_acc')
133 |     lines = p1 + p2 + p3 + p4
134 |     labels = [line.get_label() for line in lines]
135 |     plt.legend(lines, labels)
136 |     plt.savefig('history.png')
137 | 
138 | 
139 | def run(params, plot=False, verbose=False):
140 |     cnn = CNN(params['block_sizes'], params['res'], params['norm'],
141 |               params['dropout'][0], params['dropout'][1])
142 |     if verbose:
143 |         print('number of parameters: {}'.format(sum(p.numel() for p in cnn.parameters() if p.requires_grad)))
144 |     cnn, history = train(
145 |         model=cnn,
146 |         train_data=train_dataloader,
147 |         epochs=params['epochs'],
148 |         lr_init=params['lr_init'],
149 |         lr_min=params['lr_min'],
150 |         lr_decay=params['lr_decay'],
151 |         lr_min_delta=params['lr_min_delta'],
152 |         lr_patience=params['lr_patience'],
153 |         val_data=val_dataloader,
154 |         val_min_delta=params['val_min_delta'],
155 |         val_patience=params['val_patience'],
156 |         restore_best_weights=params['restore_best_weights'],
157 |         top=params['top'],
158 |         verbose=verbose,
159 |         device=params['device']
160 |     )
161 |     if plot:
162 |         plot_history(history)
163 |     loss, acc = validate(cnn, val_dataloader, 'val', top=params['top'], verbose=False, device=params['device'])
164 |     return loss, acc
165 | 
166 | 
167 | if __name__ == "__main__":
168 |     data_root = '/home/liuwei/projects/DL_exps/exp2/tiny-imagenet-200/'
169 |     batch_size = 256*3
170 | 
171 |     train_dataset = TinyImageNet(data_root, 'train', transforms.Compose([transforms.ToTensor()]))
172 |     val_dataset = TinyImageNet(data_root, 'val', transforms.Compose([transforms.ToTensor()]))
173 |     train_dataloader = DataLoader(
174 |         dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
175 |     val_dataloader = DataLoader(
176 |         dataset=val_dataset, batch_size=batch_size, shuffle=False)
177 | 
178 |     default_params = {
179 |         'block_sizes':
180 |             # [
181 |             #     (64, 64, 1),
182 |             #     (64, 128, 2),
183 |             #     (128, 256, 2),
184 |             #     (256, 512, 2),
185 |             #     (512, 1024, 2),
186 |             # ],  # 11 conv layers
187 |             [
188 |                 (64, 64, 1),
189 |                 (64, 64, 1),
190 |                 (64, 64, 1),
191 |                 (64, 128, 2),
192 |                 (128, 128, 1),
193 |                 (128, 128, 1),
194 |                 (128, 256, 2),
195 |                 (256, 256, 1),
196 |                 (256, 256, 1),
197 |                 (256, 512, 2),
198 |                 (512, 512, 1),
199 |                 (512, 512, 1),
200 |                 (512, 1024, 2),
201 |                 (1024, 1024, 1),
202 |                 (1024, 1024, 1),
203 |             ],  # 31 conv layers
204 |         'epochs': 80,
205 |         'res': True,
206 |         'norm': True,
207 |         'dropout': (0.1, 0.5),
208 |         'lr_init': 1e-3,
209 |         'lr_min': 1e-5,
210 |         'lr_decay': 0.5,
211 |         'lr_min_delta': 0.,
212 |         'lr_patience': 2,
213 |         'val_min_delta': 0.,
214 |         'val_patience': 30,
215 |         'top': [1, 5, 10],
216 |         'restore_best_weights': True
217 |     }
218 |     param_grids = {
219 |         'block_sizes': [
220 |             [
221 |                 (64, 64, 1),
222 |                 (64, 128, 2),
223 |                 (128, 256, 2),
224 |                 (256, 512, 2),
225 |                 (512, 1024, 2),
226 |             ],  # 11 conv layers
227 |             [
228 |                 (64, 64, 1),
229 |                 (64, 64, 1),
230 |                 (64, 128, 2),
231 |                 (128, 128, 1),
232 |                 (128, 256, 2),
233 |                 (256, 256, 1),
234 |                 (256, 512, 2),
235 |                 (512, 512, 1),
236 |                 (512, 1024, 2),
237 |                 (1024, 1024, 1)
238 |             ],  # 21 conv layers
239 |             [
240 |                 (64, 64, 1),
241 |                 (64, 64, 1),
242 |                 (64, 64, 1),
243 |                 (64, 128, 2),
244 |                 (128, 128, 1),
245 |                 (128, 128, 1),
246 |                 (128, 256, 2),
247 |                 (256, 256, 1),
248 |                 (256, 256, 1),
249 |                 (256, 512, 2),
250 |                 (512, 512, 1),
251 |                 (512, 512, 1),
252 |                 (512, 1024, 2),
253 |                 (1024, 1024, 1),
254 |                 (1024, 1024, 1),
255 |             ],  # 31 conv layers
256 |         ],
257 |         'res': [True, False],
258 |         'norm': [True, False],
259 |         'dropout': [(0., 0.), (0.1, 0.3), (0.2, 0.5), (0.3, 0.7)],
260 |         'lr_decay': [0.1, 0.5, 0.99],
261 |     }
262 | 
263 |     set_seed(17717)
264 | 
265 |     try:
266 |         job = int(sys.argv[1])
267 |     except IndexError as e:
268 |         job = 0
269 | 
270 |     os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3'
271 |     device = 'cuda'
272 |     # devices = [0, 1, 2, 3]
273 |     # device = f'cuda:{devices[job]}'
274 | 
275 |     logfile = f'logs{job}.txt'
276 | 
277 |     print("default parameters are:")
278 |     pprint(default_params, width=40)
279 |     with open(logfile, 'w') as file:
280 |         file.write("default parameters are:\n")
281 |         pprint(default_params, stream=file, width=40)
282 | 
283 |     if job == 0:
284 |         f = open(logfile, 'a')
285 |         loss, acc_list = run(default_params, plot=True, verbose=True)
286 |         info = f'val_loss = {loss}, val_acc = {acc_list}'
287 |         print(info)
288 |         f.write(info + '\n')
289 |         f.close()
290 |         exit()
291 | 
292 |     job_keys = {
293 |         1: ['block_sizes', 'norm'],
294 |         2: ['res'],
295 |         3: ['dropout', 'lr_decay']
296 |     }
297 | 
298 |     for key in job_keys[job]:
299 |         for v in param_grids[key]:
300 |             if key not in ['block_sizes', 'res'] and v == default_params[key]:
301 |                 continue
302 |             f = open(logfile, 'a')
303 |             new_params = default_params.copy()
304 |             if key == 'res':
305 |                 new_params.update({'block_sizes': param_grids['block_sizes'][-1]})
306 |                 info = f'>>> set `block_sizes` to {param_grids["block_sizes"][-1]}\n'
307 |             else:
308 |                 info = ''
309 |             new_params.update({key: v})
310 |             info += f'>>> set `{key}` to {v}'
311 |             print(info)
312 |             f.write(info + '\n')
313 |             loss, acc_list = run(new_params, verbose=True)
314 |             info = f'val_loss = {loss}, val_acc = {acc_list}'
315 |             print(info)
316 |             f.write(info + '\n')
317 |             f.close()
318 | 
319 |     # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> #
320 |     # run following command in terminal     #
321 |     # $ python main.py 1                    #
322 |     # $ python main.py 2                    #
323 |     # $ python main.py 3                    #
324 |     # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> #
325 | 


--------------------------------------------------------------------------------
/exp2/src/model.py:
--------------------------------------------------------------------------------
 1 | import torch as t
 2 | import torch.nn as nn
 3 | from torch.nn.modules.activation import ReLU
 4 | 
 5 | 
 6 | class BasicBlock(nn.Module):
 7 |     def __init__(self, c_in: int, c_out: int, stride: int = 1, res: bool = False, norm: bool = True, dropout: float = 0.2):
 8 |         super().__init__()
 9 |         self.res = res
10 |         self.conv1 = nn.Conv2d(c_in, c_out, 3, stride, 1)
11 |         self.bn1 = nn.BatchNorm2d(c_out) if norm else nn.Identity()
12 |         self.relu1 = nn.ReLU()
13 |         self.dropout1 = nn.Dropout(dropout)
14 |         self.conv2 = nn.Conv2d(c_out, c_out, 3, 1, 1)
15 |         self.bn2 = nn.BatchNorm2d(c_out) if norm else nn.Identity()
16 |         if res and stride != 1:
17 |             self.downsample = nn.Sequential(
18 |                 nn.Conv2d(c_in, c_out, 1, stride, 0),
19 |                 nn.BatchNorm2d(c_out) if norm else nn.Identity()
20 |             )
21 |         else:
22 |             self.downsample = None
23 |         self.relu2 = nn.ReLU()
24 |         self.dropout2 = nn.Dropout(dropout)
25 | 
26 |     def forward(self, x):
27 |         identity = x
28 |         out = self.conv1(x)
29 |         out = self.bn1(out)
30 |         out = self.relu1(out)
31 |         out = self.dropout1(out)
32 |         out = self.conv2(out)
33 |         out = self.bn2(out)
34 | 
35 |         if self.downsample is not None:
36 |             identity = self.downsample(identity)
37 |         if self.res:
38 |             out += identity
39 |         out = self.relu2(out)
40 |         out = self.dropout2(out)
41 |         return out
42 | 
43 | 
44 | class CNN(nn.Module):
45 |     def __init__(self, block_sizes, res: bool = False, norm: bool = True, conv_dropout=0.2, fc_dropout=0.5):
46 |         super().__init__()
47 |         self.conv1 = nn.Sequential(
48 |             nn.Conv2d(3, 64, 5, 1, 2),  # 64*64*64
49 |             nn.BatchNorm2d(64) if norm else nn.Identity(),
50 |             nn.ReLU(),
51 |             nn.MaxPool2d(2, 2)  # 64*32*32
52 |         )
53 |         self.block_list = nn.ModuleList()
54 |         for block_size in block_sizes:
55 |             block = BasicBlock(*block_size, res=res, norm=norm, dropout=conv_dropout)
56 |             self.block_list.append(block)
57 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
58 |         self.dropout = nn.Dropout(fc_dropout)
59 |         self.fc = nn.Linear(block_sizes[-1][1], 200)
60 | 
61 |     def forward(self, x):
62 |         x = self.conv1(x)
63 |         for block in self.block_list:
64 |             x = block(x)
65 |         x = self.avgpool(x)
66 |         x = x.view(x.shape[0], -1)
67 |         x = self.dropout(x)
68 |         x = self.fc(x)
69 |         return x
70 | 
71 | 
72 | class ResNet(nn.Module):
73 |     def __init__(self, block_sizes, res: bool = False, norm: bool = True, conv_dropout=0.2, fc_dropout=0.5):
74 |         super().__init__()
75 |         self.conv1 = nn.Sequential(
76 |             nn.Conv2d(3, 64, 7, 2, 3),  # 64*32*32
77 |             nn.BatchNorm2d(64) if norm else nn.Identity(),
78 |             nn.ReLU(),
79 |             nn.MaxPool2d(3, 2, 1)  # 64*16*16
80 |         )
81 |         self.block_list = nn.ModuleList()
82 |         for block_size in block_sizes:
83 |             block = BasicBlock(*block_size, res=res, norm=norm, dropout=conv_dropout)
84 |             self.block_list.append(block)
85 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
86 |         self.dropout = nn.Dropout(fc_dropout)
87 |         self.fc = nn.Linear(block_sizes[-1][1], 200)
88 | 
89 |     def forward(self, x):
90 |         x = self.conv1(x)
91 |         for block in self.block_list:
92 |             x = block(x)
93 |         x = self.avgpool(x)
94 |         x = x.view(x.shape[0], -1)
95 |         x = self.dropout(x)
96 |         x = self.fc(x)
97 |         return x
98 | 


--------------------------------------------------------------------------------
/exp2/src/test.py:
--------------------------------------------------------------------------------
  1 | from model import ResNet
  2 | from main import train, validate, plot_history, set_seed, os, pprint
  3 | from data import TinyImageNet
  4 | from torch.utils.data import DataLoader
  5 | from torchvision import transforms
  6 | 
  7 | 
  8 | def run(params, plot=False, verbose=False):
  9 |     cnn = ResNet(params['block_sizes'], params['res'], params['norm'],
 10 |                    params['dropout'][0], params['dropout'][1])
 11 |     if verbose:
 12 |         print('number of parameters: {}'.format(sum(p.numel() for p in cnn.parameters() if p.requires_grad)))
 13 |     cnn, history = train(
 14 |         model=cnn,
 15 |         train_data=train_dataloader,
 16 |         epochs=params['epochs'],
 17 |         lr_init=params['lr_init'],
 18 |         lr_min=params['lr_min'],
 19 |         lr_decay=params['lr_decay'],
 20 |         lr_min_delta=params['lr_min_delta'],
 21 |         lr_patience=params['lr_patience'],
 22 |         val_data=val_dataloader,
 23 |         val_min_delta=params['val_min_delta'],
 24 |         val_patience=params['val_patience'],
 25 |         restore_best_weights=params['restore_best_weights'],
 26 |         top=params['top'],
 27 |         verbose=verbose,
 28 |         device=params['device']
 29 |     )
 30 |     if plot:
 31 |         plot_history(history)
 32 |     loss, acc = validate(cnn, val_dataloader, 'val', top=params['top'], verbose=False, device=params['device'])
 33 |     return loss, acc
 34 | 
 35 | 
 36 | if __name__ == "__main__":
 37 |     data_root = '/home/liuwei/projects/DL_exps/exp2/tiny-imagenet-200/'
 38 |     batch_size = 256 * 3
 39 | 
 40 |     train_dataset = TinyImageNet(data_root, 'train', transforms.Compose([transforms.ToTensor()]))
 41 |     val_dataset = TinyImageNet(data_root, 'val', transforms.Compose([transforms.ToTensor()]))
 42 |     train_dataloader = DataLoader(
 43 |         dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
 44 |     val_dataloader = DataLoader(
 45 |         dataset=val_dataset, batch_size=batch_size, shuffle=False)
 46 | 
 47 |     default_params = {
 48 |         'block_sizes':
 49 |             [
 50 |                 (64, 64, 1),
 51 |                 (64, 64, 1),
 52 |                 (64, 64, 1),
 53 |                 (64, 128, 2),
 54 |                 (128, 128, 1),
 55 |                 (128, 128, 1),
 56 |                 (128, 128, 1),
 57 |                 (128, 256, 2),
 58 |                 (256, 256, 1),
 59 |                 (256, 256, 1),
 60 |                 (256, 256, 1),
 61 |                 (256, 256, 1),
 62 |                 (256, 256, 1),
 63 |                 (256, 512, 2),
 64 |                 (512, 512, 1),
 65 |                 (512, 512, 1),
 66 |             ],  # 33 conv layers
 67 |         'epochs': 40,
 68 |         'res': True,
 69 |         'norm': True,
 70 |         'dropout': (0., 0.),
 71 |         'lr_init': 1e-3,
 72 |         'lr_min': 1e-5,
 73 |         'lr_decay': 0.5,
 74 |         'lr_min_delta': 0.,
 75 |         'lr_patience': 2,
 76 |         'val_min_delta': 0.,
 77 |         'val_patience': 30,
 78 |         'top': [1, 5, 10],
 79 |         'restore_best_weights': True,
 80 |         'device': 'cuda'
 81 |     }
 82 | 
 83 |     set_seed(17717)
 84 |     os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3'
 85 | 
 86 |     logfile = f'logs_test.txt'
 87 | 
 88 |     print("default parameters are:")
 89 |     pprint(default_params, width=40)
 90 |     with open(logfile, 'w') as file:
 91 |         file.write("default parameters are:\n")
 92 |         pprint(default_params, stream=file, width=40)
 93 | 
 94 |     f = open(logfile, 'a')
 95 |     loss, acc_list = run(default_params, plot=True, verbose=True)
 96 |     info = f'val_loss = {loss}, val_acc = {acc_list}'
 97 |     print(info)
 98 |     f.write(info + '\n')
 99 |     f.close()
100 |     exit()
101 | 


--------------------------------------------------------------------------------
/exp2/实验二：卷积神经网络.md:
--------------------------------------------------------------------------------
  1 | # 实验二：卷积神经网络
  2 | 
  3 | 姓名：刘威
  4 | 
  5 | 学号：PB18010469
  6 | 
  7 | ## 实验目的
  8 | 
  9 | + 了解并熟悉卷积神经网络的原理及其学习算法
 10 | + 研究dropout对卷积神经网络泛化性能的影响
 11 | + 研究normalization对卷积神经网络的影响
 12 | + 研究residual connection对深层卷积神经网络性能的影响
 13 | + 研究学习率学习率衰减对卷积神经网络性能的影响
 14 | + 研究网络深度对卷积神经网络性能的影响
 15 | 
 16 | ## 实验原理
 17 | 
 18 | **二维卷积:**
 19 | 
 20 | ![image-20210428210821879](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428210821879.png)
 21 | 
 22 | ![image-20210428210908364](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428210908364.png)
 23 | 
 24 | **多层卷积：**
 25 | 
 26 | ![image-20210428210947000](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428210947000.png)
 27 | 
 28 | **Padding:**
 29 | 
 30 | ![image-20210428211010668](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428211010668.png)
 31 | 
 32 | **池化：**
 33 | 
 34 | ![image-20210428211123780](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428211123780.png)
 35 | 
 36 | **卷积神经网络的典型结构：**
 37 | 
 38 | ![image-20210428211258560](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428211258560.png)
 39 | 
 40 | **残差网络：**
 41 | 
 42 | ![image-20210428212519451](C:\Users\Infiniv\AppData\Roaming\Typora\typora-user-images\image-20210428212519451.png)
 43 | 
 44 | ## 实验内容
 45 | 
 46 | 使用`pytorch`或者`tensorflow`实现卷积神经网络，在`ImageNet`数据集上进行图片分类。研究dropout、 normalization、 learning rate decay、 residual connection、网络深度等超参数对分类性能的影响。
 47 | 
 48 | 数据集：`tiny-imagenet-200`
 49 | 
 50 | 
 51 | ## 实验结果
 52 | 
 53 | 实验使用`pytorch`进行。
 54 | 
 55 | ### 源码结构及说明
 56 | 
 57 | **模型结构：**
 58 | 
 59 | 模型的主体结构由基础块堆叠而成。基础块由两层`3 x 3`的卷积网络构成，可以通过参数设定该基础块的输入通道数，输出通道数，是否使用批量标准化，以及是否使用残差连接。除此之外还可以选择是否在第一层卷积网络中使用长度为2的步长来缩减特征图的大小，使用多大舍弃概率的dropout层。具体实现如下：
 60 | 
 61 | ```python
 62 | class BasicBlock(nn.Module):
 63 |     def __init__(self, c_in: int, c_out: int, stride: int = 1, res: bool = False, norm: bool = True, dropout: float = 0.2):
 64 |         super().__init__()
 65 |         self.res = res
 66 |         self.conv1 = nn.Conv2d(c_in, c_out, 3, stride, 1)
 67 |         self.bn1 = nn.BatchNorm2d(c_out) if norm else nn.Identity()
 68 |         self.relu1 = nn.ReLU()
 69 |         self.dropout1 = nn.Dropout(dropout)
 70 |         self.conv2 = nn.Conv2d(c_out, c_out, 3, 1, 1)
 71 |         self.bn2 = nn.BatchNorm2d(c_out) if norm else nn.Identity()
 72 |         if res and stride != 1:
 73 |             self.downsample = nn.Sequential(
 74 |                 nn.Conv2d(c_in, c_out, 1, stride, 0),
 75 |                 nn.BatchNorm2d(c_out) if norm else nn.Identity()
 76 |             )
 77 |         else:
 78 |             self.downsample = None
 79 |         self.relu2 = nn.ReLU()
 80 |         self.dropout2 = nn.Dropout(dropout)
 81 | 
 82 |     def forward(self, x):
 83 |         identity = x
 84 |         out = self.conv1(x)
 85 |         out = self.bn1(out)
 86 |         out = self.relu1(out)
 87 |         out = self.dropout1(out)
 88 |         out = self.conv2(out)
 89 |         out = self.bn2(out)
 90 | 
 91 |         if self.downsample is not None:
 92 |             identity = self.downsample(identity)
 93 |         if self.res:
 94 |             out += identity
 95 |         out = self.relu2(out)
 96 |         out = self.dropout2(out)
 97 |         return out
 98 | ```
 99 | 
100 | 可见如果统一参数`res, norm, dropout`,  决定基础块结构的参数为`(c_in, c_out, stride)`，只需要确定这个三元组即可确定网络的结构。
101 | 
102 | 固定网络的输入层和输出层，中间层使用若干基础块堆叠，可以自由调整网络的深度。传入各个基础块的三元组参数，就可以确定整个网络的结构。总的网络实现如下：
103 | 
104 | ```python
105 | class CNN(nn.Module):
106 |     def __init__(self, block_sizes, res: bool = False, norm: bool = True, conv_dropout=0.2, fc_dropout=0.5):
107 |         super().__init__()
108 |         self.conv1 = nn.Sequential(
109 |             nn.Conv2d(3, 64, 5, 1, 2),  # 64*64*64
110 |             nn.BatchNorm2d(64) if norm else nn.Identity(),
111 |             nn.ReLU(),
112 |             nn.MaxPool2d(2, 2)  # 64*32*32
113 |         )
114 |         self.block_list = nn.ModuleList()
115 |         for block_size in block_sizes:
116 |             block = BasicBlock(*block_size, res=res, norm=norm, dropout=conv_dropout)
117 |             self.block_list.append(block)
118 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
119 |         self.dropout = nn.Dropout(fc_dropout)
120 |         self.fc = nn.Linear(block_sizes[-1][1], 200)
121 | 
122 |     def forward(self, x):
123 |         x = self.conv1(x)
124 |         for block in self.block_list:
125 |             x = block(x)
126 |         x = self.avgpool(x)
127 |         x = x.view(x.shape[0], -1)
128 |         x = self.dropout(x)
129 |         x = self.fc(x)
130 |         return x
131 | ```
132 | 
133 | 其中`block_sizes`就是各个基础块的三元组参数组成的列表。例如，本实验使用了如下三组`block_sizes`: 
134 | 
135 | ```python
136 | # 结构1: 共11层卷积（算上输入层），约20M参数
137 | [(64, 64, 1), (64, 128, 2), (128, 256, 2), (256, 512, 2), (512, 1024, 2)],
138 | # 结构2: 共21层卷积，约45M参数
139 | [(64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 256, 2),     (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1)]
140 | # 结构3: 共31层卷积，约70M参数
141 | [(64, 64, 1), (64, 64, 1), (64, 64, 1), (64, 128, 2), (128, 128, 1), (128, 128, 1), (128, 256, 2), (256, 256, 1), (256, 256, 1), (256, 512, 2), (512, 512, 1), (512, 512, 1), (512, 1024, 2), (1024, 1024, 1), (1024, 1024, 1)]
142 | ```
143 | 
144 | 
145 | ### 结果及分析
146 | 
147 | 本实验的默认参数如下：
148 | 
149 | ```python
150 | batch_size = 64
151 | default_params = {
152 |     'block_sizes':
153 |         [
154 |             (64, 64, 1),
155 |             (64, 128, 2),
156 |             (128, 256, 2),
157 |             (256, 512, 2),
158 |             (512, 1024, 2),
159 |         ],  # 堆叠的各个基础块的参数，即结构1
160 |     'epochs': 40,  # 最大训练轮数
161 |     'res': True,  # 是否在基础块中使用残差连接
162 |     'norm': True,  # 是否在卷积层后使用批量标准化
163 |     'dropout': (0.2, 0.5),  # 分别为卷积层，全连接层后的drop概率
164 |     'lr_init': 1e-3,  # 学习率的初始值
165 |     'lr_min': 1e-5,  # 学习率的最小值
166 |     'lr_decay': 0.1,  # 学习率的衰减倍率
167 |     'lr_min_delta': 0.,  # 认为验证集loss有明显降低的阈值，用于调整学习率
168 |     'lr_patience': 1,  # 验证集loss没有明显降低的连续轮数，用于调整学习率
169 |     'val_min_delta': 0.,  # 认为验证集loss有明显降低的阈值, 由于控制早停
170 |     'val_patience': 3,  # 验证集loss没有明显降低的连续轮数，用于控制早停
171 |     'top': [1, 5, 10],  # top n 准确率
172 |     'restore_best_weights': True  # 早停后，是否将模型权值恢复为验证集loss最低时的权值
173 | }
174 | ```
175 | 
176 | 将会进行调节并加以对比的参数包括：
177 | 
178 | ```python
179 | 'block_sizes', 'res', 'norm', 'dropout', 'lr_decay'
180 | ```
181 | 
182 | 此外，本实验固定随机种子, 保证结果可复现：
183 | 
184 | ```python
185 | import random
186 | import numpy as np
187 | import torch as t
188 | import os
189 | 
190 | def set_seed(seed):
191 |     random.seed(seed)
192 |     os.environ['PYTHONHASHSEED'] = str(seed)
193 |     np.random.seed(seed)
194 |     t.manual_seed(seed)
195 |     t.cuda.manual_seed(seed)
196 |     t.backends.cudnn.deterministic = True
197 | set_seed(17717)
198 | ```
199 | 
200 | #### 对比一：Dropout
201 | 
202 | 固定其他参数为默认参数，调整`dropout`为如下四组值：
203 | 
204 | ```python
205 | 'dropout': [(0., 0.), (0.1, 0.3), (0.2, 0.5), (0.3, 0.7)],
206 | ```
207 | 
208 | 训练结束后，对所得模型在验证集上进行验证，得到的结果如下表：
209 | 
210 | |  `dropout`   |    loss    | top 1 accuracy | top 5 accuracy | top 10 accuracy |
211 | | :----------: | :--------: | :------------: | :------------: | :-------------: |
212 | |  `(0., 0.)`  |   2.6278   |     0.4119     |     0.6750     |     0.7767      |
213 | | `(0.1, 0.3)` |   2.5348   |     0.4245     |     0.6910     |     0.7916      |
214 | | `(0.2, 0.5)` | **2.3826** |   **0.4432**   |   **0.7116**   |   **0.8041**    |
215 | | `(0.3, 0.7)` |   2.4118   |     0.4233     |     0.6981     |     0.7954      |
216 | 
217 | **分析：**dropout概率太小或者不使用dropout技术，过拟合风险更大；dropout概率太大，会导致欠拟合。应该选择合适的dropout概率来降低过拟合风险，提高模型性能。
218 | 
219 | #### 对比二：Normalization
220 | 
221 | 固定其他参数为默认参数，调整`norm`分别为`True` 或者 `False`.
222 | 
223 | 训练结束后，对所得模型在验证集上进行验证，得到的结果如下表：
224 | 
225 | | `norm`  |    loss    | top 1 accuracy | top 5 accuracy | top 10 accuracy |
226 | | :-----: | :--------: | :------------: | :------------: | :-------------: |
227 | | `True`  | **2.3826** |   **0.4432**   |   **0.7116**   |   **0.8041**    |
228 | | `False` |   5.2983   |     0.0050     |     0.0250     |     0.0500      |
229 | 
230 | **分析：**如果不加标准化层，学不到东西。因此在使用卷积神经网络时标准化层是必不可少的。
231 | 
232 | #### 对比三：Learning rate decay
233 | 
234 | 固定其他参数为默认参数，调整`lr_decay`分别为`[0.1, 0.5, 0.99]`.
235 | 
236 | 训练结束后，对所得模型在验证集上进行验证，得到的结果如下表：
237 | 
238 | | `lr_decay` |    loss    | top 1 accuracy | top 5 accuracy | top 10 accuracy |
239 | | :--------: | :--------: | :------------: | :------------: | :-------------: |
240 | |   `0.1`    | **2.3826** |   **0.4431**   |   **0.7116**   |   **0.8041**    |
241 | |   `0.5`    |   2.4610   |     0.4162     |     0.6913     |     0.7891      |
242 | |  `0.99`*   |   2.5231   |     0.4066     |     0.6773     |     0.7828      |
243 | 
244 | > *注：设置为`0.99`是因为`pytorch`的`ReduceLROnPlateau`只允许小于`1.0`的值，可以认为是不对学习率进行衰减。
245 | 
246 | **分析：**在训练网络时，应该适当调节学习率。在刚开始训练时可以使用较大的学习率以加快收敛速度，在接近极小值时，如果不对学习率进行调整，会由于步长过大而越过极小值而无法收敛，这时应该减小学习率，以更好的收敛。
247 | 
248 | #### 对比四：Residual connection
249 | 
250 | 固定其他参数为默认参数，将`block_sizes`设置为**结构3**， 调整`res`分别为`True` 或者 `False`. 
251 | 
252 | 训练结束后，对所得模型在验证集上进行验证，得到的结果如下表：
253 | 
254 | |  `res`  |    loss    | top 1 accuracy | top 5 accuracy | top 10 accuracy |
255 | | :-----: | :--------: | :------------: | :------------: | :-------------: |
256 | | `True`  | **3.2994** |   **0.2570**   |   **0.5236**   |   **0.6470**    |
257 | | `False` |   4.1934   |     0.0827     |     0.2677     |     0.4042      |
258 | 
259 | **分析：**在**结构3**(31层卷积层)中，加残差连接相比不加有很大的性能提升，证实了残差连接能够极大改善深层网络难以优化的问题。
260 | 
261 | #### 对比五：网络深度
262 | 
263 | 固定其他参数为默认参数，将`block_sizes`分别设置为前述三种结构。
264 | 
265 | 训练结束后，对所得模型在验证集上进行验证，得到的结果如下表：
266 | 
267 | |   `block_sizes`   |    loss    | top 1 accuracy | top 5 accuracy | top 10 accuracy |
268 | | :---------------: | :--------: | :------------: | :------------: | :-------------: |
269 | | 结构1（11层卷积） | **2.3826** |   **0.4432**   |   **0.7116**   |   **0.8041**    |
270 | | 结构2（21层卷积） |   2.5370   |     0.3952     |     0.6694     |     0.7711      |
271 | | 结构3（31层卷积） |   3.3368   |     0.2576     |     0.5174     |     0.6403      |
272 | 
273 | **分析：**性能随着网络深度的增加变差了，但理论上深层网络应该至少能和浅层网络一样好。在实践中，更深层的网络更难优化，优化地形更为复杂，更容易陷入局部极小值，即使使用的残差连接技术，如果由于网络结构不适合当前任务，训练方法不当，徒增加网络深度仍很可能会导致更差的性能。
274 | 
275 | 在何凯明等人的论文中，引入残差连接后，深层网络相比浅层网络的训练loss降得更快了，最终效果也更好。而在我的实验中，深层网络的收敛速度却慢得多， 最终性能也要差一些，调了很久的参数也没有改善。其中的具体原因有待进一步做实验来弄明白，在这次实验中没有更多的时间、精力和算力来继续研究下去了(DDL要到了:-)。
276 | 
277 | ## 实验总结
278 | 
279 | 因为卷积神经网络的结构及原理比较简单，目标也很明确，本次实验实现上来说并不难。但是限于算力有限，想要跑出一次结果都要等很久，参数调节起来颇有困难，花了很多时间也没有明显提升。因此研究的并不够细致，有一些地方没有达到理论上那么好的结果。今后有机会还得多尝试尝试。
280 | 
281 | 


--------------------------------------------------------------------------------
/exp2/实验二：卷积神经网络.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp2/实验二：卷积神经网络.pdf


--------------------------------------------------------------------------------
/exp3/ReadMe.md:
--------------------------------------------------------------------------------
 1 | # 说明
 2 | 
 3 | ## glove.6B.zip
 4 | 
 5 | glove预训练词向量，也可以自己使用想用的词向量，使用方法参见https://blog.csdn.net/bqw18744018044/article/details/89575127
 6 | 
 7 | ## aclImdb_v1.tar.gz
 8 | 
 9 | 所使用的数据集
10 | 
11 | 具体可见：Maas, A. L., Daly, R. E., Pham, P. T., Huang, D., Ng, A. Y., & Potts, C. (2011, June). Learning word vectors for sentiment analysis. In Proceedings of the 49th annual meeting of the association for computational linguistics: Human language technologies-volume 1 (pp. 142-150). Association for Computational Linguistics.
12 | 
13 | ## IMDb.py
14 | 
15 | 数据集的基本处理代码
16 | 
17 | ## 模型评价指标
18 | 
19 | 预测准确率
20 | 
21 | ## SOTA方法准确率
22 | 
23 | ![img.png](img.png)
24 | 
25 | 可能有用的资料：
26 | 
27 | https://arxiv.org/pdf/1905.05583v3.pdf


--------------------------------------------------------------------------------
/exp3/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/img.png


--------------------------------------------------------------------------------
/exp3/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.19.2
2 | spacy==3.0.6
3 | torch==1.8.1
4 | torchtext==0.9.1
5 | tqdm==4.56.0
6 | 


--------------------------------------------------------------------------------
/exp3/src/config.py:
--------------------------------------------------------------------------------
 1 | raw_data_folder = './aclImdb/'
 2 | 
 3 | vectors_folder = './glove.6B/'
 4 | 
 5 | data_folder = './data/'
 6 | 
 7 | SEED = 2077
 8 | DEVICE = 'cuda:0'
 9 | 
10 | VAL_RATIO = 0.2
11 | 
12 | VECTORS = 'glove.6B.100d'
13 | 
14 | VOCAB_SIZE = 400000
15 | EMBEDDING_DIM = 100
16 | HIDDEN_DIM = 64
17 | N_LAYERS = 1
18 | BIDIRECTIONAL = False
19 | DROPOUT = 0.
20 | BATCH_SIZE = 128
21 | N_EPOCHS = 10
22 | MODEL_BASE = 'RNN'
23 | 


--------------------------------------------------------------------------------
/exp3/src/data.py:
--------------------------------------------------------------------------------
 1 | from torchtext.legacy import data
 2 | import os
 3 | import torch
 4 | from tqdm import tqdm
 5 | from config import *
 6 | import spacy
 7 | import pickle
 8 | 
 9 | 
10 | _nlp = spacy.load('en_core_web_sm')
11 | TEXT = data.Field(tokenize=lambda x: [t.text for t in _nlp(x)],
12 |                   include_lengths=True, lower=True)
13 | LABEL = data.LabelField(use_vocab=False)
14 | 
15 | 
16 | def _get_examples(which, fields=None):
17 |     cache_file = os.path.join(data_folder, which + '.pkl')
18 |     if os.path.exists(cache_file):
19 |         with open(cache_file, 'rb') as f:
20 |             examples = pickle.load(f)
21 |         return examples
22 | 
23 |     examples = []
24 |     raw_folder = os.path.join(raw_data_folder, which)
25 |     for subfolder in ['pos', 'neg']:
26 |         folder_name = os.path.join(raw_folder, subfolder)
27 |         for file in tqdm(os.listdir(folder_name),
28 |                          bar_format='{percentage:3.0f}%|{bar:20}{r_bar}'):
29 |             with open(os.path.join(folder_name, file), 'rb') as f:
30 |                 text = f.read().decode('utf-8').replace('\n', '').lower()
31 |                 label = 1 if subfolder == 'pos' else 0
32 |                 examples.append(data.Example.fromlist([text, label], fields))
33 |     with open(cache_file, 'wb') as f:
34 |         pickle.dump(examples, f)
35 |     return examples
36 | 
37 | 
38 | def get_dataloader(text_field=TEXT, label_field=LABEL):
39 | 
40 |     fields = [('text', text_field), ('label', label_field)]
41 | 
42 |     train_data = _get_examples('train', fields)
43 |     test_data = _get_examples('test', fields)
44 |     train_data = data.Dataset(train_data, fields)
45 |     test_data = data.Dataset(test_data, fields)
46 |     val_data = None
47 |     if VAL_RATIO:
48 |         train_data, val_data = train_data.split(split_ratio=1 - VAL_RATIO)
49 |     vectors = VECTORS.replace('.txt', '')
50 |     text_field.build_vocab(train_data, max_size=VOCAB_SIZE,
51 |                            vectors=vectors, vectors_cache=vectors_folder)
52 |     label_field.build_vocab(train_data)
53 |     if VAL_RATIO:
54 |         train_iterator, val_iterator, test_iterator = data.BucketIterator.splits(
55 |             (train_data, val_data, test_data),
56 |             batch_size=BATCH_SIZE,
57 |             sort_key=lambda x: len(x.text),
58 |             sort_within_batch=True,
59 |         )
60 |         return train_iterator, val_iterator, test_iterator
61 |     else:
62 |         train_iterator, test_iterator = data.BucketIterator.splits(
63 |             (train_data, test_data),
64 |             batch_size=BATCH_SIZE,
65 |             sort=False
66 |         )
67 |         return train_iterator, test_iterator
68 | 


--------------------------------------------------------------------------------
/exp3/src/main.py:
--------------------------------------------------------------------------------
  1 | from utils import binary_acc, set_seed
  2 | from config import *
  3 | import spacy
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.optim import Adam
  7 | from data import TEXT, get_dataloader
  8 | import random
  9 | import os
 10 | from model import RNNClassifier
 11 | from utils import binary_acc
 12 | import numpy as np
 13 | from tqdm import tqdm
 14 | 
 15 | 
 16 | def train(model, iterator, optimizer, criterion):
 17 |     epoch_loss = 0
 18 |     epoch_acc = 0
 19 | 
 20 |     pbar = tqdm(iterator, unit='batch', ascii=True,
 21 |                 bar_format='{percentage:3.0f}%|{bar:20}{r_bar}')
 22 |     model.train()
 23 |     for batch in pbar:
 24 |         optimizer.zero_grad()
 25 | 
 26 |         (text, text_lengths), label = batch.text, batch.label
 27 |         text = text.to(DEVICE)
 28 |         label = label.to(DEVICE)
 29 | 
 30 |         preds = model(text, text_lengths).squeeze()
 31 |         loss = criterion(preds, label.float())
 32 |         loss.backward()
 33 |         optimizer.step()
 34 | 
 35 |         acc = binary_acc(preds, label)
 36 | 
 37 |         pbar.set_postfix({'train_loss': loss.item(),
 38 |                           'train_acc': acc.item()})
 39 | 
 40 |         epoch_loss += loss.item()
 41 |         epoch_acc += acc.item()
 42 | 
 43 |     return epoch_loss / len(iterator), epoch_acc / len(iterator)
 44 | 
 45 | 
 46 | def evaluate(model, iterator, criterion):
 47 |     epoch_loss = 0
 48 |     epoch_acc = 0
 49 | 
 50 |     model.eval()
 51 |     with torch.no_grad():
 52 |         for batch in iterator:
 53 |             (text, text_lengths), label = batch.text, batch.label
 54 |             text = text.to(DEVICE)
 55 |             label = label.to(DEVICE)
 56 | 
 57 |             preds = model(text, text_lengths).squeeze()
 58 |             loss = criterion(preds, label.float())
 59 |             acc = binary_acc(preds, label)
 60 | 
 61 |             epoch_loss += loss.item()
 62 |             epoch_acc += acc.item()
 63 | 
 64 |     return epoch_loss / len(iterator), epoch_acc / len(iterator)
 65 | 
 66 | 
 67 | if __name__ == '__main__':
 68 |     set_seed(SEED)
 69 |     train_iterator, val_iterator, test_iterator = get_dataloader()
 70 |     rnn = RNNClassifier(
 71 |         vocab_size=len(TEXT.vocab),
 72 |         embedding_dim=EMBEDDING_DIM,
 73 |         hidden_dim=HIDDEN_DIM,
 74 |         n_layers=N_LAYERS,
 75 |         bidirectional=BIDIRECTIONAL,
 76 |         dropout=DROPOUT,
 77 |         model_base=MODEL_BASE
 78 |     )
 79 |     pretrained_embeddings = TEXT.vocab.vectors
 80 |     rnn.embedding.weight.data.copy_(pretrained_embeddings)
 81 |     optimizer = Adam(rnn.parameters())
 82 |     criterion = nn.BCELoss()
 83 | 
 84 |     rnn = rnn.to(DEVICE)
 85 |     criterion = criterion.to(DEVICE)
 86 | 
 87 |     best_valid_loss = np.inf
 88 |     for epoch in range(N_EPOCHS):
 89 |         print(f">>> Epoch {epoch+1}/{N_EPOCHS}")
 90 | 
 91 |         train_loss, train_acc = train(rnn, train_iterator, optimizer, criterion)
 92 |         valid_loss, valid_acc = evaluate(rnn, val_iterator, criterion)
 93 | 
 94 |         if valid_loss < best_valid_loss:
 95 |             best_valid_loss = valid_loss
 96 |             torch.save(rnn.state_dict(), './checkpoint/best_weights.pt')
 97 | 
 98 |         print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc * 100:.2f}%')
 99 |         print(f'\tValid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc * 100:.2f}%')
100 |     print(">>> Testing...")
101 |     rnn.load_state_dict(torch.load("./checkpoint/best_weights.pt"))
102 |     test_loss, test_acc = evaluate(rnn, test_iterator, criterion)
103 |     print(f'\tTest Loss: {test_loss:.4f} | Test Acc: {test_acc * 100:.2f}%')
104 | 


--------------------------------------------------------------------------------
/exp3/src/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn.utils.rnn import pack_padded_sequence
 4 | 
 5 | 
 6 | class RNNClassifier(nn.Module):
 7 |     def __init__(self, vocab_size, embedding_dim, hidden_dim,
 8 |                  n_layers: int = 1, bidirectional: bool = False,
 9 |                  dropout: float = 0., model_base: str = 'RNN'):
10 |         super(RNNClassifier, self).__init__()
11 |         self.bidirectional = bidirectional
12 |         self.model_base = model_base.lower()
13 |         if self.model_base == 'lstm':
14 |             model = nn.LSTM
15 |         else:
16 |             model = nn.RNN
17 | 
18 |         self.embedding = nn.Embedding(vocab_size, embedding_dim)
19 |         self.rnn = model(embedding_dim,
20 |                          hidden_dim,
21 |                          num_layers=n_layers,
22 |                          bidirectional=bidirectional,
23 |                          dropout=dropout)
24 |         if self.bidirectional:
25 |             hidden_dim *= 2
26 |         self.fc = nn.Linear(hidden_dim, 1)
27 |         self.act = nn.Sigmoid()
28 | 
29 |     def forward(self, x, x_len):
30 |         x = self.embedding(x)
31 |         x = pack_padded_sequence(x, x_len)
32 |         if self.model_base == 'lstm':
33 |             _, (h_n, _) = self.rnn(x)
34 |         else:
35 |             _, h_n = self.rnn(x)  # h_n.shape = (num_layers * num_directions, batch, hidden_size)
36 |         if self.bidirectional:
37 |             hidden = torch.cat((h_n[-2], h_n[-1]), dim=1)  # get last layer
38 |         else:
39 |             hidden = h_n[-1]
40 |         logits = self.fc(hidden)
41 |         output = self.act(logits)
42 |         return output
43 | 


--------------------------------------------------------------------------------
/exp3/src/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import os
 4 | import numpy as np
 5 | 
 6 | 
 7 | def binary_acc(preds, label):
 8 |     preds = torch.round(preds)
 9 |     correct = torch.eq(preds, label).float()
10 |     acc = correct.sum() / correct.shape[0]
11 |     return acc
12 | 
13 | 
14 | def set_seed(seed=123):
15 |     random.seed(seed)
16 |     np.random.seed(seed)
17 |     os.environ["PYTHONHASHSEED"] = str(seed)
18 |     torch.manual_seed(seed)
19 |     torch.cuda.manual_seed_all(seed)
20 |     # torch.use_deterministic_algorithms(True)
21 |     # torch.backends.cudnn.enabled = False
22 |     torch.backends.cudnn.benchmark = False
23 |     torch.backends.cudnn.deterministic = True
24 |     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2"
25 | 


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络.md:
--------------------------------------------------------------------------------
  1 | # 实验三：循环神经网络
  2 | 
  3 | 姓名：刘威
  4 | 
  5 | 学号：PB18010469
  6 | 
  7 | 
  8 | ## 实验目的
  9 | 
 10 | * 了解并熟悉循环神经网络的原理
 11 | * 了解随时间反向传播算法（BPTT）
 12 | * 学会使用循环神经网络完成文本分类任务
 13 | 
 14 | 
 15 | ## 实验原理
 16 | 
 17 | ![image-20210513172244821](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172244821.png)
 18 | 
 19 | ![image-20210513172303845](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172303845.png)
 20 | 
 21 | ![image-20210513172317856](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172317856.png)
 22 | 
 23 | ![image-20210513172733303](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172733303.png)
 24 | 
 25 | 
 26 | 
 27 | ![image-20210513172747548](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172747548.png)
 28 | 
 29 | ![image-20210513172852144](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172852144.png)
 30 | 
 31 | ![image-20210513172947008](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513172947008.png)
 32 | 
 33 | 
 34 | 
 35 | ![image-20210513173012590](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513173012590.png)
 36 | 
 37 | ![image-20210513173151864](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513173151864.png)
 38 | 
 39 | 
 40 | 
 41 | ![image-20210513173355442](%E5%AE%9E%E9%AA%8C%E4%B8%89%EF%BC%9A%E5%BE%AA%E7%8E%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210513173355442.png)
 42 | 
 43 | 
 44 | ## 实验内容
 45 | 
 46 | 编写RNN的语言模型，并基于训练好的词向量，编写RNN模型用于文本分类
 47 | 
 48 | 数据集：aclIMDB
 49 | 
 50 | 预训练词向量：GloVe.6B
 51 | 
 52 | ## 实验结果
 53 | 
 54 | 实验使用`pytorch`进行
 55 | 
 56 | 
 57 | ### 源码结构及说明
 58 | 
 59 | #### 数据预处理部分
 60 | 
 61 | 使用`torchtext`库处理文本；使用`spaCy`库进行分词。
 62 | 
 63 | 将`train/`目录下的数据集划分为`train/validation`, 划分比例为`0.8/0.2`.
 64 | 
 65 | #### 模型部分
 66 | 
 67 | 由一个`Embedding`层和一个`RNN/LSTM`模块构成，后者可以调节层数和是否双向。
 68 | 
 69 | `Embedding`层使用`GloVe`预训练词向量进行初始化。
 70 | 
 71 | 模型定义如下：
 72 | 
 73 | ```python
 74 | class RNNClassifier(nn.Module):
 75 |     def __init__(self, vocab_size, embedding_dim, hidden_dim,
 76 |                  n_layers: int = 1, bidirectional: bool = False,
 77 |                  dropout: float = 0., model_base: str = 'RNN'):
 78 |         super(RNNClassifier, self).__init__()
 79 |         self.bidirectional = bidirectional
 80 |         self.model_base = model_base.lower()
 81 |         if self.model_base == 'lstm':
 82 |             model = nn.LSTM
 83 |         else:
 84 |             model = nn.RNN
 85 | 
 86 |         self.embedding = nn.Embedding(vocab_size, embedding_dim)
 87 |         self.rnn = model(embedding_dim,
 88 |                          hidden_dim,
 89 |                          num_layers=n_layers,
 90 |                          bidirectional=bidirectional,
 91 |                          dropout=dropout)
 92 |         if self.bidirectional:
 93 |             hidden_dim *= 2
 94 |         self.fc = nn.Linear(hidden_dim, 1)
 95 |         self.act = nn.Sigmoid()
 96 | 
 97 |     def forward(self, x, x_len):
 98 |         x = self.embedding(x)
 99 |         x = pack_padded_sequence(x, x_len)
100 |         if self.model_base == 'lstm':
101 |             _, (h_n, _) = self.rnn(x)
102 |         else:
103 |             _, h_n = self.rnn(x)  # h_n.shape = (num_layers * num_directions, batch, hidden_size)
104 |         if self.bidirectional:
105 |             hidden = torch.cat((h_n[-2], h_n[-1]), dim=1)  # get last layer
106 |         else:
107 |             hidden = h_n[-1]
108 |         logits = self.fc(hidden)
109 |         output = self.act(logits)
110 |         return output
111 | ```
112 | 
113 | ### 结果及分析
114 | 
115 | 本实验的可选参数为
116 | 
117 | ```python
118 | VOCAB_SIZE = 400000
119 | EMBEDDING_DIM = 100
120 | HIDDEN_DIM = 64
121 | N_LAYERS = 1  # RNN/LSTM 层数
122 | BIDIRECTIONAL = False  # 是否双向
123 | DROPOUT = 0.
124 | BATCH_SIZE = 128
125 | N_EPOCHS = 10
126 | MODEL_BASE = 'RNN'  # 使用`Elman RNN` 还是 `LSTM`
127 | ```
128 | 
129 | 此外，本实验固定随机种子：
130 | 
131 | ```python
132 | import torch
133 | import random
134 | import os
135 | import numpy as np
136 | 
137 | def set_seed(seed=123):
138 |     random.seed(seed)
139 |     np.random.seed(seed)
140 |     os.environ["PYTHONHASHSEED"] = str(seed)
141 |     torch.manual_seed(seed)
142 |     torch.cuda.manual_seed_all(seed)
143 |     # torch.use_deterministic_algorithms(True)
144 |     # torch.backends.cudnn.enabled = False
145 |     torch.backends.cudnn.benchmark = False
146 |     torch.backends.cudnn.deterministic = True
147 |     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2"
148 |     
149 | set_seed(2077)
150 | ```
151 | 
152 | **词向量维度：100维**
153 | 
154 | 在验证集上验证保存`val_loss`最低的模型用于测试，得到的测试集准确率(%)如下表:
155 | 
156 | |N_L-N_D|RNN| LSTM|
157 | |:---:|:---:|:---:|
158 | | 1-1 |77.22|85.57|
159 | | 1-2 |77.87| 85.64 |
160 | | 2-1 | 77.30 | 86.56 |
161 | | 2-2 | 76.41 | 85.65 |
162 | | 5-1 | 75.28 | 84.47 |
163 | | 5-2 | 74.14 | 84.56 |
164 | 
165 | **注：**`N_L`代表`N_Layers`, 及循环神经网络的层数， `N_D`代表`N_Direction`, 当`bidirectional`设为`False`时为 `1`， 否则为 `2`.
166 | 
167 | 结果表明，`LSTM`明显优于`RNN`；设置双向对于网络浅时略有提升，对于网络深时有副作用；简单地加深网络会使模型性能变差。
168 | 
169 | **词向量维度：300维**
170 | 
171 | 将词向量维度增加到300维，比较模型表现。
172 | 
173 | | N_L-N_D |  RNN  | LSTM  |
174 | | :-----: | :---: | :---: |
175 | |   1-1   | 76.29 | 86.20 |
176 | |   1-2   | 76.80 | 86.62 |
177 | 
178 | RNN性能变差，LSTM性能变好。
179 | 
180 | 
181 | ## 实验总结
182 | 
183 | 本实验地主要难点在于：
184 | 
185 | + 认清并理解完成任务所需要地流程；
186 | + 文本处理的流程。
187 | + 词向量嵌入的原理和实践方法。
188 | 
189 | 因此主要时间花在如何处理数据上。模型结构上相比前几次实验反而要简单一些。


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络.pdf


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172134556.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172134556.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172216587.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172216587.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172230085.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172230085.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172244821.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172244821.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172303845.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172303845.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172317856.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172317856.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172733303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172733303.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172747548.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172747548.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172839199.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172839199.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172852144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172852144.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513172947008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513172947008.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513173012590.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513173012590.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513173151864.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513173151864.png


--------------------------------------------------------------------------------
/exp3/实验三：循环神经网络/image-20210513173355442.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp3/实验三：循环神经网络/image-20210513173355442.png


--------------------------------------------------------------------------------
/exp4/ReadMe.md:
--------------------------------------------------------------------------------
 1 | # 说明
 2 | 
 3 | ## glove.6B.zip
 4 | 
 5 | glove预训练词向量，也可以自己使用想用的词向量，使用方法参见https://blog.csdn.net/bqw18744018044/article/details/89575127
 6 | 
 7 | ## aclImdb_v1.tar.gz
 8 | 
 9 | 所使用的数据集
10 | 
11 | 具体可见：Maas, A. L., Daly, R. E., Pham, P. T., Huang, D., Ng, A. Y., & Potts, C. (2011, June). Learning word vectors for sentiment analysis. In Proceedings of the 49th annual meeting of the association for computational linguistics: Human language technologies-volume 1 (pp. 142-150). Association for Computational Linguistics.
12 | 
13 | ## IMDb.py
14 | 
15 | 数据集的基本处理代码
16 | 
17 | ## 模型评价指标
18 | 
19 | 预测准确率
20 | 
21 | ## SOTA方法准确率
22 | 
23 | ![img.png](img.png)
24 | 
25 | 可能有用的资料：
26 | 
27 | https://arxiv.org/pdf/1905.05583v3.pdf


--------------------------------------------------------------------------------
/exp4/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/img.png


--------------------------------------------------------------------------------
/exp4/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.6.1
2 | tqdm==4.56.0
3 | torch==1.8.1
4 | numpy==1.19.2
5 | 


--------------------------------------------------------------------------------
/exp4/src/config.py:
--------------------------------------------------------------------------------
 1 | raw_data_folder = './aclImdb/'
 2 | vectors_folder = './glove.6B/'
 3 | data_folder = './data/'
 4 | 
 5 | 
 6 | SEED = 2077
 7 | DEVICE = 'cuda:0'
 8 | 
 9 | VAL_RATIO = 0.2
10 | 
11 | HIDDEN_DIM = 32
12 | BATCH_SIZE = 8
13 | N_EPOCHS = 10
14 | MAX_LEN = 256
15 | LEARNING_RATE = 1e-5
16 | FREEZE_BERT = True
17 | JUST_TEST = False
18 | 


--------------------------------------------------------------------------------
/exp4/src/data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | from tqdm import tqdm
  4 | from config import *
  5 | import re
  6 | import pickle
  7 | from transformers import BertTokenizer
  8 | import numpy as np
  9 | from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
 10 | 
 11 | 
 12 | def get_raw_data(which):
 13 |     cache_file = os.path.join(data_folder, which + '_raw.pkl')
 14 |     if os.path.exists(cache_file):
 15 |         with open(cache_file, 'rb') as f:
 16 |             data = pickle.load(f)
 17 |         return data
 18 | 
 19 |     texts = []
 20 |     labels = []
 21 |     raw_folder = os.path.join(raw_data_folder, which)
 22 |     for subfolder in ['pos', 'neg']:
 23 |         folder_name = os.path.join(raw_folder, subfolder)
 24 |         for file in tqdm(os.listdir(folder_name),
 25 |                          bar_format='{percentage:3.0f}%|{bar:20}{r_bar}'):
 26 |             with open(os.path.join(folder_name, file), 'rb') as f:
 27 |                 text = f.read().decode('utf-8')
 28 |                 label = 1 if subfolder == 'pos' else 0
 29 |                 texts.append(text)
 30 |                 labels.append(label)
 31 |     data = (texts, labels)
 32 |     with open(cache_file, 'wb') as f:
 33 |         pickle.dump(data, f)
 34 |     return data
 35 | 
 36 | 
 37 | def text_preprocessing(text):
 38 |     # Remove '@name'
 39 |     text = re.sub(r'(@.*?)[\s]', ' ', text)
 40 | 
 41 |     # Replace '&amp;' with '&'
 42 |     text = re.sub(r'&amp;', '&', text)
 43 | 
 44 |     # Remove trailing whitespace
 45 |     text = re.sub(r'\s+', ' ', text).strip()
 46 | 
 47 |     return text
 48 | 
 49 | 
 50 | tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
 51 | 
 52 | 
 53 | def preprocessing_for_bert(sentences):
 54 |     # Create empty lists to store outputs
 55 |     input_ids = []
 56 |     attention_masks = []
 57 | 
 58 |     for sent in tqdm(sentences):
 59 |         encoded_sent = tokenizer.encode_plus(
 60 |             text=text_preprocessing(sent),  # Preprocess sentence
 61 |             add_special_tokens=True,  # Add `[CLS]` and `[SEP]`
 62 |             max_length=MAX_LEN,  # Max length to truncate/pad
 63 |             padding='max_length',  # Pad sentence to max length
 64 |             truncation=True,
 65 |             # return_tensors='pt',           # Return PyTorch tensor
 66 |             return_attention_mask=True  # Return attention mask
 67 |         )
 68 | 
 69 |         # Add the outputs to the lists
 70 |         input_ids.append(encoded_sent.get('input_ids'))
 71 |         attention_masks.append(encoded_sent.get('attention_mask'))
 72 | 
 73 |     # Convert lists to tensors
 74 |     input_ids = torch.tensor(input_ids)
 75 |     attention_masks = torch.tensor(attention_masks)
 76 | 
 77 |     return input_ids, attention_masks
 78 | 
 79 | 
 80 | def get_dataloader():
 81 |     dataset_cache_path = data_folder + 'train_data.pkl'
 82 |     dataset = dict()
 83 |     if not os.path.exists(dataset_cache_path):
 84 |         train_texts, train_labels = get_raw_data('train')
 85 |         test_texts, test_labels = get_raw_data('test')
 86 | 
 87 |         indices = np.random.permutation(len(train_texts))
 88 |         val_indices, train_indices = np.split(indices, [round(len(train_texts) * VAL_RATIO)])
 89 |         train_texts, train_labels = np.array(train_texts), np.array(train_labels)
 90 |         val_texts, val_labels = train_texts[val_indices], train_labels[val_indices]
 91 |         train_texts, train_labels = train_texts[train_indices], train_labels[train_indices]
 92 | 
 93 |         train_inputs, train_masks = preprocessing_for_bert(train_texts)
 94 |         val_inputs, val_masks = preprocessing_for_bert(val_texts)
 95 |         test_inputs, test_masks = preprocessing_for_bert(test_texts)
 96 | 
 97 |         # Convert other data types to torch.Tensor
 98 |         train_labels = torch.tensor(train_labels)
 99 |         val_labels = torch.tensor(val_labels)
100 |         test_labels = torch.tensor(test_labels)
101 | 
102 |         dataset['train'] = TensorDataset(train_inputs, train_masks, train_labels)
103 |         dataset['val'] = TensorDataset(val_inputs, val_masks, val_labels)
104 |         dataset['test'] = TensorDataset(test_inputs, test_masks, test_labels)
105 |         for which in ['train', 'val', 'test']:
106 |             pickle.dump(dataset[which], open(os.path.join(data_folder, f'{which}_data.pkl'), 'wb'))
107 |     else:
108 |         for which in ['train', 'val', 'test']:
109 |             dataset[which] = pickle.load(open(os.path.join(data_folder, f'{which}_data.pkl'), 'rb'))
110 | 
111 |     train_sampler = RandomSampler(dataset['train'])
112 |     train_dataloader = DataLoader(dataset['train'], sampler=train_sampler, batch_size=BATCH_SIZE)
113 | 
114 |     val_sampler = SequentialSampler(dataset['val'])
115 |     val_dataloader = DataLoader(dataset['val'], sampler=val_sampler, batch_size=BATCH_SIZE)
116 | 
117 |     test_sampler = SequentialSampler(dataset['test'])
118 |     test_dataloader = DataLoader(dataset['test'], sampler=test_sampler, batch_size=BATCH_SIZE)
119 | 
120 |     return train_dataloader, val_dataloader, test_dataloader
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     get_dataloader()
125 | 


--------------------------------------------------------------------------------
/exp4/src/main.py:
--------------------------------------------------------------------------------
 1 | from utils import set_seed
 2 | from config import *
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.optim import Adam
 6 | from data import get_dataloader
 7 | import random
 8 | import os
 9 | import numpy as np
10 | from tqdm import tqdm
11 | from model import BertClassifier
12 | 
13 | 
14 | def train(model, dataloader, optimizer, criterion):
15 |     epoch_loss = 0
16 |     epoch_acc = 0
17 | 
18 |     pbar = tqdm(dataloader, unit='batch', ascii=True,
19 |                 bar_format='{percentage:3.0f}%|{bar:20}{r_bar}')
20 |     model.train()
21 |     for batch in pbar:
22 |         optimizer.zero_grad()
23 | 
24 |         input_ids, attn_mask, labels = tuple(t.to(DEVICE) for t in batch)
25 | 
26 |         logits = model(input_ids, attn_mask)
27 |         loss = criterion(logits, labels)
28 |         loss.backward()
29 |         optimizer.step()
30 | 
31 |         # Get the predictions
32 |         preds = torch.argmax(logits, dim=1).flatten()
33 |         acc = (preds == labels).cpu().numpy().mean()
34 | 
35 |         pbar.set_postfix({'train_loss': loss.item(),
36 |                           'train_acc': acc.item()})
37 | 
38 |         epoch_loss += loss.item()
39 |         epoch_acc += acc.item()
40 | 
41 |     return epoch_loss / len(dataloader), epoch_acc / len(dataloader)
42 | 
43 | 
44 | def evaluate(model, dataloader, criterion):
45 |     epoch_loss = 0
46 |     epoch_acc = 0
47 | 
48 |     model.eval()
49 |     with torch.no_grad():
50 |         for batch in dataloader:
51 |             input_ids, attn_mask, labels = tuple(t.to(DEVICE) for t in batch)
52 | 
53 |             logits = model(input_ids, attn_mask)
54 |             loss = criterion(logits, labels)
55 | 
56 |             # Get the predictions
57 |             preds = torch.argmax(logits, dim=1).flatten()
58 |             acc = (preds == labels).cpu().numpy().mean()
59 | 
60 |             epoch_loss += loss.item()
61 |             epoch_acc += acc.item()
62 | 
63 |     return epoch_loss / len(dataloader), epoch_acc / len(dataloader)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     set_seed(SEED)
68 |     train_dataloader, val_dataloader, test_dataloader = get_dataloader()
69 |     bert = BertClassifier(freeze_bert=FREEZE_BERT)
70 |     # pretrained_embeddings = TEXT.vocab.vectors
71 |     # rnn.embedding.weight.data.copy_(pretrained_embeddings)
72 |     optimizer = Adam(bert.parameters(), lr=LEARNING_RATE)
73 |     criterion = nn.CrossEntropyLoss()
74 | 
75 |     bert = bert.to(DEVICE)
76 |     criterion = criterion.to(DEVICE)
77 |     if not JUST_TEST:
78 |         best_valid_loss = np.inf
79 |         for epoch in range(N_EPOCHS):
80 |             print(f">>> Epoch {epoch+1}/{N_EPOCHS}")
81 | 
82 |             train_loss, train_acc = train(bert, train_dataloader, optimizer, criterion)
83 |             valid_loss, valid_acc = evaluate(bert, val_dataloader, criterion)
84 | 
85 |             if valid_loss < best_valid_loss:
86 |                 best_valid_loss = valid_loss
87 |                 torch.save(bert.state_dict(), './checkpoint/best_weights.pt')
88 | 
89 |             print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc * 100:.2f}%')
90 |             print(f'\tValid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc * 100:.2f}%')
91 |     print(">>> Testing...")
92 |     bert.load_state_dict(torch.load("./checkpoint/best_weights.pt"))
93 |     test_loss, test_acc = evaluate(bert, test_dataloader, criterion)
94 |     print(f'\tTest Loss: {test_loss:.4f} | Test Acc: {test_acc * 100:.2f}%')
95 | 


--------------------------------------------------------------------------------
/exp4/src/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from transformers import BertModel
  4 | from config import *
  5 | import numpy as np
  6 | 
  7 | 
  8 | class BertClassifier(nn.Module):
  9 |     def __init__(self, freeze_bert=False):
 10 |         super(BertClassifier, self).__init__()
 11 |         D_in, H, D_out = 768, HIDDEN_DIM, 2
 12 |         # Instantiate BERT model
 13 |         self.bert = BertModel.from_pretrained('bert-base-uncased')
 14 | 
 15 |         self.classifier = nn.Sequential(
 16 |             nn.Linear(D_in, H),
 17 |             nn.ReLU(),
 18 |             # nn.Dropout(0.5),
 19 |             nn.Linear(H, D_out)
 20 |         )
 21 | 
 22 |         if freeze_bert:
 23 |             for param in self.bert.parameters():
 24 |                 param.requires_grad = False
 25 | 
 26 |     def forward(self, input_ids, attention_mask):
 27 |         # Feed input to BERT
 28 |         outputs = self.bert(input_ids=input_ids,
 29 |                             attention_mask=attention_mask)
 30 | 
 31 |         # Extract the last hidden state of the token `[CLS]` for classification task
 32 |         last_hidden_state_cls = outputs[0][:, 0, :]
 33 | 
 34 |         # Feed input to classifier to compute logits
 35 |         logits = self.classifier(last_hidden_state_cls)
 36 |         return logits
 37 | 
 38 | 
 39 | # ============= Following are step-by-step implementation =============
 40 | 
 41 | def get_attn_pad_mask(seq_q, seq_k):
 42 |     batch_size, len_q = seq_q.size()
 43 |     batch_size, len_k = seq_k.size()
 44 |     # eq(zero) is PAD token
 45 |     pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # batch_size x 1 x len_k(=len_q), one is masking
 46 |     return pad_attn_mask.expand(batch_size, len_q, len_k)  # batch_size x len_q x len_k
 47 | 
 48 | 
 49 | def gelu(x):
 50 |     return x * 0.5 * (1.0 + torch.erf(x / np.sqrt(2.0)))
 51 | 
 52 | 
 53 | class Embedding(nn.Module):
 54 |     def __init__(self, d_model, vocab_size, max_len, n_segments):
 55 |         super(Embedding, self).__init__()
 56 |         self.tok_embed = nn.Embedding(vocab_size, d_model)  # token embedding
 57 |         self.pos_embed = nn.Embedding(max_len, d_model)  # position embedding
 58 |         self.seg_embed = nn.Embedding(n_segments, d_model)  # segment(token type) embedding
 59 |         self.norm = nn.LayerNorm(d_model)
 60 | 
 61 |     def forward(self, x, seg):
 62 |         seq_len = x.size(1)
 63 |         pos = torch.arange(seq_len, dtype=torch.long)
 64 |         pos = pos.unsqueeze(0).expand_as(x)  # (seq_len,) -> (batch_size, seq_len)
 65 |         embedding = self.tok_embed(x) + self.pos_embed(pos) + self.seg_embed(seg)
 66 |         return self.norm(embedding)
 67 | 
 68 | 
 69 | class ScaledDotProductAttention(nn.Module):
 70 |     def __init__(self):
 71 |         super(ScaledDotProductAttention, self).__init__()
 72 | 
 73 |     def forward(self, Q, K, V, attn_mask):
 74 |         scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(
 75 |             Q.shape[-1])  # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
 76 |         scores.masked_fill_(attn_mask, -1e9)  # Fills elements of self tensor with value where mask is one.
 77 |         attn = nn.Softmax(dim=-1)(scores)
 78 |         context = torch.matmul(attn, V)
 79 |         return context, attn
 80 | 
 81 | 
 82 | class MultiHeadAttention(nn.Module):
 83 |     def __init__(self, d_model, d_k, d_v, n_heads):
 84 |         super(MultiHeadAttention, self).__init__()
 85 |         self.d_model = d_model
 86 |         self.d_k = d_k
 87 |         self.d_v = d_v
 88 |         self.n_heads = n_heads
 89 |         self.W_Q = nn.Linear(d_model, d_k * n_heads)
 90 |         self.W_K = nn.Linear(d_model, d_k * n_heads)
 91 |         self.W_V = nn.Linear(d_model, d_v * n_heads)
 92 | 
 93 |     def forward(self, Q, K, V, attn_mask):
 94 |         # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]
 95 |         residual, batch_size = Q, Q.size(0)
 96 |         # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)
 97 |         q_s = self.W_Q(Q).view(batch_size, -1,
 98 |                                self.n_heads, self.d_k).transpose(1, 2)  # q_s: [batch_size x n_heads x len_q x d_k]
 99 |         k_s = self.W_K(K).view(batch_size, -1,
100 |                                self.n_heads, self.d_k).transpose(1, 2)  # k_s: [batch_size x n_heads x len_k x d_k]
101 |         v_s = self.W_V(V).view(batch_size, -1,
102 |                                self.n_heads, self.d_v).transpose(1, 2)  # v_s: [batch_size x n_heads x len_k x d_v]
103 | 
104 |         attn_mask = attn_mask.unsqueeze(1).repeat(1, self.n_heads, 1,
105 |                                                   1)  # attn_mask : [batch_size x n_heads x len_q x len_k]
106 | 
107 |         # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
108 |         context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)
109 |         context = context.transpose(1, 2).contiguous().view(batch_size, -1,
110 |                                                             self.n_heads * self.d_v)  # context: [batch_size x len_q x n_heads * d_v]
111 |         output = nn.Linear(self.n_heads * self.d_v, self.d_model)(context)
112 | 
113 |         return nn.LayerNorm(self.d_model)(output + residual), attn  # output: [batch_size x len_q x d_model]
114 | 
115 | 
116 | class PoswiseFeedForwardNet(nn.Module):
117 |     def __init__(self, d_model, d_ff):
118 |         super(PoswiseFeedForwardNet, self).__init__()
119 |         self.fc1 = nn.Linear(d_model, d_ff)
120 |         self.fc2 = nn.Linear(d_ff, d_model)
121 | 
122 |     def forward(self, x):
123 |         # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)
124 |         return self.fc2(gelu(self.fc1(x)))
125 | 
126 | 
127 | class EncoderLayer(nn.Module):
128 |     def __init__(self, d_model, d_k, d_v, d_ff, n_heads):
129 |         super(EncoderLayer, self).__init__()
130 |         self.enc_self_attn = MultiHeadAttention(d_model, d_k, d_v, n_heads)
131 |         self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff)
132 | 
133 |     def forward(self, enc_inputs, enc_self_attn_mask):
134 |         enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs,
135 |                                                enc_self_attn_mask)  # enc_inputs to same Q,K,V
136 |         enc_outputs = self.pos_ffn(enc_outputs)  # enc_outputs: [batch_size x len_q x d_model]
137 |         return enc_outputs, attn
138 | 
139 | 
140 | class BERT(nn.Module):
141 |     def __init__(self, d_model, d_k, d_v, d_ff, vocab_size, max_len, n_segments, n_heads, n_layers):
142 |         super(BERT, self).__init__()
143 |         self.embedding = Embedding(d_model, vocab_size, max_len, n_segments)
144 |         self.layers = nn.ModuleList([EncoderLayer(d_model, d_k, d_v, d_ff, n_heads) for _ in range(n_layers)])
145 |         self.fc = nn.Linear(d_model, d_model)
146 |         self.activ1 = nn.Tanh()
147 |         self.linear = nn.Linear(d_model, d_model)
148 |         self.activ2 = gelu
149 |         self.norm = nn.LayerNorm(d_model)
150 |         self.classifier = nn.Linear(d_model, 2)
151 |         # decoder is shared with embedding layer
152 |         embed_weight = self.embedding.tok_embed.weight
153 |         n_vocab, n_dim = embed_weight.size()
154 |         self.decoder = nn.Linear(n_dim, n_vocab, bias=False)
155 |         self.decoder.weight = embed_weight
156 |         self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))
157 | 
158 |     def forward(self, input_ids, segment_ids, masked_pos):
159 |         output = self.embedding(input_ids, segment_ids)
160 |         enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids)
161 |         for layer in self.layers:
162 |             output, enc_self_attn = layer(output, enc_self_attn_mask)
163 |         # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]
164 |         # it will be decided by first token(CLS)
165 |         h_pooled = self.activ1(self.fc(output[:, 0]))  # [batch_size, d_model]
166 |         logits_clf = self.classifier(h_pooled)  # [batch_size, 2]
167 | 
168 |         masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1))  # [batch_size, max_pred, d_model]
169 | 
170 |         # get masked position from final output of transformer.
171 |         h_masked = torch.gather(output, 1, masked_pos)  # masking position [batch_size, max_pred, d_model]
172 |         h_masked = self.norm(self.activ2(self.linear(h_masked)))
173 |         logits_lm = self.decoder(h_masked) + self.decoder_bias  # [batch_size, max_pred, n_vocab]
174 | 
175 |         return logits_lm, logits_clf
176 | 
177 | 
178 | if __name__ == '__main__':
179 |     # BERT Parameters
180 |     max_len = 30  # maximum of length
181 |     batch_size = 6
182 |     n_layers = 6  # number of Encoder of Encoder Layer
183 |     n_heads = 12  # number of heads in Multi-Head Attention
184 |     d_model = 768  # Embedding Size
185 |     d_ff = 768 * 4  # 4*d_model, FeedForward dimension
186 |     d_k = d_v = 64  # dimension of K(=Q), V
187 |     n_segments = 2
188 |     vocab_size = 1000
189 | 
190 |     model = BERT(d_model, d_k, d_v, d_ff, vocab_size, max_len, n_segments, n_heads, n_layers)
191 | 
192 |     input_ids = torch.tensor([[1, 2, 3, 4, 5, 1, 2, 1, 2, 0, 0, 0]])
193 |     segment_ids = torch.tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0]])
194 |     masked_pos = torch.tensor([[6, 0, 0, 0, 0]])
195 |     logits_lm, logits_clf = model(input_ids, segment_ids, masked_pos)
196 |     logits_lm = logits_lm.data.max(2)[1][0].data.numpy()
197 |     logits_clf = logits_clf.data.max(1)[1].data.numpy()[0]
198 | 
199 |     print(logits_lm)
200 |     print(logits_clf)
201 | 


--------------------------------------------------------------------------------
/exp4/src/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import os
 4 | import numpy as np
 5 | 
 6 | 
 7 | def set_seed(seed=123):
 8 |     random.seed(seed)
 9 |     np.random.seed(seed)
10 |     os.environ["PYTHONHASHSEED"] = str(seed)
11 |     torch.manual_seed(seed)
12 |     torch.cuda.manual_seed_all(seed)
13 |     # torch.use_deterministic_algorithms(True)
14 |     # torch.backends.cudnn.enabled = False
15 |     torch.backends.cudnn.benchmark = False
16 |     torch.backends.cudnn.deterministic = True
17 |     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2"
18 | 


--------------------------------------------------------------------------------
/exp4/实验四：BERT.md:
--------------------------------------------------------------------------------
  1 | # 实验四：BERT
  2 | 
  3 | 姓名：刘威
  4 | 
  5 | 学号：PB18010469
  6 | 
  7 | 
  8 | ## 实验目的
  9 | 
 10 | * 了解并熟悉Attention机制的原理
 11 | * 了解self-attention的原理以及transformer的结构
 12 | * 学会使用 bert 预训练模型完成文本分类任务
 13 | 
 14 | 
 15 | ## 实验原理
 16 | 
 17 | ![image-20210605154503374](实验四：BERT/image-20210605154503374.png)
 18 | 
 19 | ![image-20210605154926294](实验四：BERT/image-20210605154926294.png)
 20 | 
 21 | ![image-20210605154945286](实验四：BERT/image-20210605154945286.png)
 22 | 
 23 | ![image-20210605160746073](实验四：BERT/image-20210605160746073.png)
 24 | 
 25 | ![image-20210605163538200](实验四：BERT/image-20210605163538200.png)
 26 | 
 27 | ![image-20210605163557321](实验四：BERT/image-20210605163557321.png)
 28 | 
 29 | ![image-20210605163641264](实验四：BERT/image-20210605163641264.png)
 30 | 
 31 | ![image-20210605163743489](实验四：BERT/image-20210605163743489.png)
 32 | 
 33 | 
 34 | ## 实验内容
 35 | 
 36 | 编写BERT的语言模型，并基于训练好的词向量，利用少量的训练数据，微调BERT模型用于文本分类，并和之前的RNN模型进行对比分析
 37 | 
 38 | 数据集：aclIMDB
 39 | 
 40 | ## 实验结果
 41 | 
 42 | + 实验使用`pytorch`进行
 43 | + 使用了`Huggingface`维护的`transformers`库中的预训练bert模型`bert-base-uncased`.
 44 | 
 45 | 
 46 | ### 源码结构及说明
 47 | 
 48 | #### 数据预处理部分
 49 | 
 50 | 使用`transformer`库处理文本, 使得输入符合bert模型的要求
 51 | 
 52 | 将`train/`目录下的数据集划分为`train/validation`, 划分比例为`0.8/0.2`.
 53 | 
 54 | #### 模型部分
 55 | 
 56 | 取bert模型的输出中`[CLS]`对应的隐藏层，作为二分类全连接的输入。
 57 | 
 58 | 模型定义如下：
 59 | 
 60 | ```python
 61 | import torch
 62 | import torch.nn as nn
 63 | from transformers import BertModel
 64 | 
 65 | class BertClassifier(nn.Module):
 66 |     def __init__(self, freeze_bert=False):
 67 |         super(BertClassifier, self).__init__()
 68 |         D_in, H, D_out = 768, HIDDEN_DIM, 2
 69 |         # Instantiate BERT model
 70 |         self.bert = BertModel.from_pretrained('bert-base-uncased')
 71 | 
 72 |         self.classifier = nn.Sequential(
 73 |             nn.Linear(D_in, H),
 74 |             nn.ReLU(),
 75 |             # nn.Dropout(0.5),
 76 |             nn.Linear(H, D_out)
 77 |         )
 78 | 
 79 |         if freeze_bert:
 80 |             for param in self.bert.parameters():
 81 |                 param.requires_grad = False
 82 | 
 83 |     def forward(self, input_ids, attention_mask):
 84 |         # Feed input to BERT
 85 |         outputs = self.bert(input_ids=input_ids,
 86 |                             attention_mask=attention_mask)
 87 | 
 88 |         # Extract the last hidden state of the token `[CLS]` for classification task
 89 |         last_hidden_state_cls = outputs[0][:, 0, :]
 90 | 
 91 |         # Feed input to classifier to compute logits
 92 |         logits = self.classifier(last_hidden_state_cls)
 93 |         return logits
 94 | ```
 95 | 
 96 | ### 结果及分析
 97 | 
 98 | 本实验的可选参数为
 99 | 
100 | ```python
101 | HIDDEN_DIM = 32	# 最后的全连接层的隐藏层维度
102 | BATCH_SIZE = 8
103 | N_EPOCHS = 10
104 | MAX_LEN = 256  # 序列的最大长度
105 | LEARNING_RATE = 1e-5
106 | FREEZE_BERT = False  # 是否固定bert的参数
107 | ```
108 | 
109 | 此外，本实验固定随机种子：
110 | 
111 | ```python
112 | import torch
113 | import random
114 | import os
115 | import numpy as np
116 | 
117 | def set_seed(seed=123):
118 |     random.seed(seed)
119 |     np.random.seed(seed)
120 |     os.environ["PYTHONHASHSEED"] = str(seed)
121 |     torch.manual_seed(seed)
122 |     torch.cuda.manual_seed_all(seed)
123 |     # torch.use_deterministic_algorithms(True)
124 |     # torch.backends.cudnn.enabled = False
125 |     torch.backends.cudnn.benchmark = False
126 |     torch.backends.cudnn.deterministic = True
127 |     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2"
128 |     
129 | set_seed(2077)
130 | ```
131 | 
132 | 设置`FREEZE_BERT`为 `True` 和 `False`, 分别训练模型。每轮训练结束时在验证集上验证，并保存`val_loss`最低的模型用于测试，得到的测试集准确率(%)如下表, RNN和BERT的结果来自于上一次实验的最好结果:
133 | 
134 | |模型|RNN| LSTM| BERT(Freeze) | BERT(no Freeze) |
135 | |:---:|:---:|:---:|:---:|:---:|
136 | | acc |77.87|86.62| 84.58 | 93.53 |
137 | 
138 | 可以看到，如果固定BERT的参数，只训练最后的全连接层，结果与LSTM差异不大，但如果BERT的参数也进行微调，能够得到远好于 LSTM 的结果。
139 | 
140 | 结果分析：当固定BERT的参数时，BERT 相当于是一个固定的 Embedding 层， 而且这个Embedding 是在大量的语料上训练得到的，可能无法准确的表达我们的任务情景下的语义。而当我们对 BERT的参数也进行微调时，可以使得这种语义的表达更加确切。
141 | 
142 | ## 实验总结
143 | 
144 | Transformer采用了相较于循环神经网络完全不同的机制：Attention, 不仅能有效地解决循环神经网络无法并行计算的问题，而且能够引入任意距离的依赖关系，在应用到文本这种序列化且有上下文依赖关系的数据时有很好的效果。BERT堆叠了若干transformer encoder结构，并使用完形填空，句子预测的预训练方式，在许多任务上都取得了 SOTA 结果。
145 | 
146 | 本实验采用预训练的 BERT 模型进行情感分析的二分类任务，在提供的数据集上进行微调，取得了远好于LSTM的结果，充分展现了attetion机制以及BERT预训练模型的强大。


--------------------------------------------------------------------------------
/exp4/实验四：BERT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT.pdf


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605154503374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605154503374.png


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605154926294.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605154926294.png


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605154945286.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605154945286.png


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605160746073.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605160746073.png


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605163538200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605163538200.png


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605163557321.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605163557321.png


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605163641264.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605163641264.png


--------------------------------------------------------------------------------
/exp4/实验四：BERT/image-20210605163743489.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp4/实验四：BERT/image-20210605163743489.png


--------------------------------------------------------------------------------
/exp5/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.8.1
2 | numpy==1.19.2
3 | torch_geometric==1.7.1
4 | pandas==1.2.0
5 | 


--------------------------------------------------------------------------------
/exp5/result.csv:
--------------------------------------------------------------------------------
  1 | data_name,add_self_loop,n_layers,drop_edge,pari_norm,activations,test_acc
  2 | citeseer,True,1,0.0,True,relu,0.443
  3 | citeseer,True,1,0.0,True,tanh,0.443
  4 | citeseer,True,1,0.0,True,sigmoid,0.443
  5 | citeseer,True,1,0.0,False,relu,0.68
  6 | citeseer,True,1,0.0,False,tanh,0.68
  7 | citeseer,True,1,0.0,False,sigmoid,0.68
  8 | citeseer,True,1,0.1,True,relu,0.449
  9 | citeseer,True,1,0.1,True,tanh,0.449
 10 | citeseer,True,1,0.1,True,sigmoid,0.449
 11 | citeseer,True,1,0.1,False,relu,0.68
 12 | citeseer,True,1,0.1,False,tanh,0.68
 13 | citeseer,True,1,0.1,False,sigmoid,0.68
 14 | citeseer,True,1,0.2,True,relu,0.451
 15 | citeseer,True,1,0.2,True,tanh,0.451
 16 | citeseer,True,1,0.2,True,sigmoid,0.451
 17 | citeseer,True,1,0.2,False,relu,0.653
 18 | citeseer,True,1,0.2,False,tanh,0.653
 19 | citeseer,True,1,0.2,False,sigmoid,0.653
 20 | citeseer,True,1,0.3,True,relu,0.431
 21 | citeseer,True,1,0.3,True,tanh,0.431
 22 | citeseer,True,1,0.3,True,sigmoid,0.431
 23 | citeseer,True,1,0.3,False,relu,0.661
 24 | citeseer,True,1,0.3,False,tanh,0.661
 25 | citeseer,True,1,0.3,False,sigmoid,0.661
 26 | citeseer,True,1,0.5,True,relu,0.426
 27 | citeseer,True,1,0.5,True,tanh,0.426
 28 | citeseer,True,1,0.5,True,sigmoid,0.426
 29 | citeseer,True,1,0.5,False,relu,0.62
 30 | citeseer,True,1,0.5,False,tanh,0.62
 31 | citeseer,True,1,0.5,False,sigmoid,0.62
 32 | citeseer,True,2,0.0,True,relu,0.526
 33 | citeseer,True,2,0.0,True,tanh,0.547
 34 | citeseer,True,2,0.0,True,sigmoid,0.454
 35 | citeseer,True,2,0.0,False,relu,0.685
 36 | citeseer,True,2,0.0,False,tanh,0.683
 37 | citeseer,True,2,0.0,False,sigmoid,0.207
 38 | citeseer,True,2,0.1,True,relu,0.535
 39 | citeseer,True,2,0.1,True,tanh,0.556
 40 | citeseer,True,2,0.1,True,sigmoid,0.456
 41 | citeseer,True,2,0.1,False,relu,0.683
 42 | citeseer,True,2,0.1,False,tanh,0.681
 43 | citeseer,True,2,0.1,False,sigmoid,0.207
 44 | citeseer,True,2,0.2,True,relu,0.527
 45 | citeseer,True,2,0.2,True,tanh,0.545
 46 | citeseer,True,2,0.2,True,sigmoid,0.451
 47 | citeseer,True,2,0.2,False,relu,0.671
 48 | citeseer,True,2,0.2,False,tanh,0.676
 49 | citeseer,True,2,0.2,False,sigmoid,0.207
 50 | citeseer,True,2,0.3,True,relu,0.524
 51 | citeseer,True,2,0.3,True,tanh,0.544
 52 | citeseer,True,2,0.3,True,sigmoid,0.439
 53 | citeseer,True,2,0.3,False,relu,0.658
 54 | citeseer,True,2,0.3,False,tanh,0.666
 55 | citeseer,True,2,0.3,False,sigmoid,0.207
 56 | citeseer,True,2,0.5,True,relu,0.49
 57 | citeseer,True,2,0.5,True,tanh,0.53
 58 | citeseer,True,2,0.5,True,sigmoid,0.435
 59 | citeseer,True,2,0.5,False,relu,0.626
 60 | citeseer,True,2,0.5,False,tanh,0.63
 61 | citeseer,True,2,0.5,False,sigmoid,0.207
 62 | citeseer,True,3,0.0,True,relu,0.568
 63 | citeseer,True,3,0.0,True,tanh,0.591
 64 | citeseer,True,3,0.0,True,sigmoid,0.439
 65 | citeseer,True,3,0.0,False,relu,0.645
 66 | citeseer,True,3,0.0,False,tanh,0.667
 67 | citeseer,True,3,0.0,False,sigmoid,0.207
 68 | citeseer,True,3,0.1,True,relu,0.56
 69 | citeseer,True,3,0.1,True,tanh,0.576
 70 | citeseer,True,3,0.1,True,sigmoid,0.427
 71 | citeseer,True,3,0.1,False,relu,0.671
 72 | citeseer,True,3,0.1,False,tanh,0.66
 73 | citeseer,True,3,0.1,False,sigmoid,0.202
 74 | citeseer,True,3,0.2,True,relu,0.571
 75 | citeseer,True,3,0.2,True,tanh,0.588
 76 | citeseer,True,3,0.2,True,sigmoid,0.432
 77 | citeseer,True,3,0.2,False,relu,0.655
 78 | citeseer,True,3,0.2,False,tanh,0.647
 79 | citeseer,True,3,0.2,False,sigmoid,0.208
 80 | citeseer,True,3,0.3,True,relu,0.564
 81 | citeseer,True,3,0.3,True,tanh,0.582
 82 | citeseer,True,3,0.3,True,sigmoid,0.423
 83 | citeseer,True,3,0.3,False,relu,0.663
 84 | citeseer,True,3,0.3,False,tanh,0.623
 85 | citeseer,True,3,0.3,False,sigmoid,0.198
 86 | citeseer,True,3,0.5,True,relu,0.543
 87 | citeseer,True,3,0.5,True,tanh,0.578
 88 | citeseer,True,3,0.5,True,sigmoid,0.362
 89 | citeseer,True,3,0.5,False,relu,0.609
 90 | citeseer,True,3,0.5,False,tanh,0.647
 91 | citeseer,True,3,0.5,False,sigmoid,0.193
 92 | citeseer,True,5,0.0,True,relu,0.545
 93 | citeseer,True,5,0.0,True,tanh,0.58
 94 | citeseer,True,5,0.0,True,sigmoid,0.246
 95 | citeseer,True,5,0.0,False,relu,0.522
 96 | citeseer,True,5,0.0,False,tanh,0.588
 97 | citeseer,True,5,0.0,False,sigmoid,0.207
 98 | citeseer,True,5,0.1,True,relu,0.47
 99 | citeseer,True,5,0.1,True,tanh,0.576
100 | citeseer,True,5,0.1,True,sigmoid,0.231
101 | citeseer,True,5,0.1,False,relu,0.351
102 | citeseer,True,5,0.1,False,tanh,0.613
103 | citeseer,True,5,0.1,False,sigmoid,0.207
104 | citeseer,True,5,0.2,True,relu,0.471
105 | citeseer,True,5,0.2,True,tanh,0.567
106 | citeseer,True,5,0.2,True,sigmoid,0.258
107 | citeseer,True,5,0.2,False,relu,0.182
108 | citeseer,True,5,0.2,False,tanh,0.6
109 | citeseer,True,5,0.2,False,sigmoid,0.207
110 | citeseer,True,5,0.3,True,relu,0.457
111 | citeseer,True,5,0.3,True,tanh,0.57
112 | citeseer,True,5,0.3,True,sigmoid,0.255
113 | citeseer,True,5,0.3,False,relu,0.201
114 | citeseer,True,5,0.3,False,tanh,0.567
115 | citeseer,True,5,0.3,False,sigmoid,0.207
116 | citeseer,True,5,0.5,True,relu,0.391
117 | citeseer,True,5,0.5,True,tanh,0.536
118 | citeseer,True,5,0.5,True,sigmoid,0.22
119 | citeseer,True,5,0.5,False,relu,0.188
120 | citeseer,True,5,0.5,False,tanh,0.542
121 | citeseer,True,5,0.5,False,sigmoid,0.207
122 | citeseer,True,10,0.0,True,relu,0.3
123 | citeseer,True,10,0.0,True,tanh,0.61
124 | citeseer,True,10,0.0,True,sigmoid,0.197
125 | citeseer,True,10,0.0,False,relu,0.176
126 | citeseer,True,10,0.0,False,tanh,0.472
127 | citeseer,True,10,0.0,False,sigmoid,0.195
128 | citeseer,True,10,0.1,True,relu,0.245
129 | citeseer,True,10,0.1,True,tanh,0.562
130 | citeseer,True,10,0.1,True,sigmoid,0.198
131 | citeseer,True,10,0.1,False,relu,0.18
132 | citeseer,True,10,0.1,False,tanh,0.207
133 | citeseer,True,10,0.1,False,sigmoid,0.146
134 | citeseer,True,10,0.2,True,relu,0.218
135 | citeseer,True,10,0.2,True,tanh,0.49
136 | citeseer,True,10,0.2,True,sigmoid,0.209
137 | citeseer,True,10,0.2,False,relu,0.207
138 | citeseer,True,10,0.2,False,tanh,0.228
139 | citeseer,True,10,0.2,False,sigmoid,0.143
140 | citeseer,True,10,0.3,True,relu,0.24
141 | citeseer,True,10,0.3,True,tanh,0.488
142 | citeseer,True,10,0.3,True,sigmoid,0.18
143 | citeseer,True,10,0.3,False,relu,0.182
144 | citeseer,True,10,0.3,False,tanh,0.2
145 | citeseer,True,10,0.3,False,sigmoid,0.195
146 | citeseer,True,10,0.5,True,relu,0.213
147 | citeseer,True,10,0.5,True,tanh,0.306
148 | citeseer,True,10,0.5,True,sigmoid,0.196
149 | citeseer,True,10,0.5,False,relu,0.182
150 | citeseer,True,10,0.5,False,tanh,0.2
151 | citeseer,True,10,0.5,False,sigmoid,0.207
152 | citeseer,False,1,0.0,True,relu,0.372
153 | citeseer,False,1,0.0,True,tanh,0.372
154 | citeseer,False,1,0.0,True,sigmoid,0.372
155 | citeseer,False,1,0.0,False,relu,0.619
156 | citeseer,False,1,0.0,False,tanh,0.619
157 | citeseer,False,1,0.0,False,sigmoid,0.619
158 | citeseer,False,1,0.1,True,relu,0.359
159 | citeseer,False,1,0.1,True,tanh,0.359
160 | citeseer,False,1,0.1,True,sigmoid,0.359
161 | citeseer,False,1,0.1,False,relu,0.583
162 | citeseer,False,1,0.1,False,tanh,0.583
163 | citeseer,False,1,0.1,False,sigmoid,0.583
164 | citeseer,False,1,0.2,True,relu,0.345
165 | citeseer,False,1,0.2,True,tanh,0.345
166 | citeseer,False,1,0.2,True,sigmoid,0.345
167 | citeseer,False,1,0.2,False,relu,0.552
168 | citeseer,False,1,0.2,False,tanh,0.552
169 | citeseer,False,1,0.2,False,sigmoid,0.552
170 | citeseer,False,1,0.3,True,relu,0.31
171 | citeseer,False,1,0.3,True,tanh,0.31
172 | citeseer,False,1,0.3,True,sigmoid,0.31
173 | citeseer,False,1,0.3,False,relu,0.53
174 | citeseer,False,1,0.3,False,tanh,0.53
175 | citeseer,False,1,0.3,False,sigmoid,0.53
176 | citeseer,False,1,0.5,True,relu,0.283
177 | citeseer,False,1,0.5,True,tanh,0.283
178 | citeseer,False,1,0.5,True,sigmoid,0.283
179 | citeseer,False,1,0.5,False,relu,0.437
180 | citeseer,False,1,0.5,False,tanh,0.437
181 | citeseer,False,1,0.5,False,sigmoid,0.437
182 | citeseer,False,2,0.0,True,relu,0.522
183 | citeseer,False,2,0.0,True,tanh,0.553
184 | citeseer,False,2,0.0,True,sigmoid,0.364
185 | citeseer,False,2,0.0,False,relu,0.667
186 | citeseer,False,2,0.0,False,tanh,0.665
187 | citeseer,False,2,0.0,False,sigmoid,0.207
188 | citeseer,False,2,0.1,True,relu,0.505
189 | citeseer,False,2,0.1,True,tanh,0.523
190 | citeseer,False,2,0.1,True,sigmoid,0.321
191 | citeseer,False,2,0.1,False,relu,0.645
192 | citeseer,False,2,0.1,False,tanh,0.634
193 | citeseer,False,2,0.1,False,sigmoid,0.205
194 | citeseer,False,2,0.2,True,relu,0.476
195 | citeseer,False,2,0.2,True,tanh,0.5
196 | citeseer,False,2,0.2,True,sigmoid,0.327
197 | citeseer,False,2,0.2,False,relu,0.594
198 | citeseer,False,2,0.2,False,tanh,0.605
199 | citeseer,False,2,0.2,False,sigmoid,0.209
200 | citeseer,False,2,0.3,True,relu,0.469
201 | citeseer,False,2,0.3,True,tanh,0.478
202 | citeseer,False,2,0.3,True,sigmoid,0.355
203 | citeseer,False,2,0.3,False,relu,0.583
204 | citeseer,False,2,0.3,False,tanh,0.557
205 | citeseer,False,2,0.3,False,sigmoid,0.211
206 | citeseer,False,2,0.5,True,relu,0.349
207 | citeseer,False,2,0.5,True,tanh,0.425
208 | citeseer,False,2,0.5,True,sigmoid,0.311
209 | citeseer,False,2,0.5,False,relu,0.324
210 | citeseer,False,2,0.5,False,tanh,0.466
211 | citeseer,False,2,0.5,False,sigmoid,0.137
212 | citeseer,False,3,0.0,True,relu,0.551
213 | citeseer,False,3,0.0,True,tanh,0.547
214 | citeseer,False,3,0.0,True,sigmoid,0.301
215 | citeseer,False,3,0.0,False,relu,0.628
216 | citeseer,False,3,0.0,False,tanh,0.63
217 | citeseer,False,3,0.0,False,sigmoid,0.187
218 | citeseer,False,3,0.1,True,relu,0.501
219 | citeseer,False,3,0.1,True,tanh,0.54
220 | citeseer,False,3,0.1,True,sigmoid,0.262
221 | citeseer,False,3,0.1,False,relu,0.571
222 | citeseer,False,3,0.1,False,tanh,0.602
223 | citeseer,False,3,0.1,False,sigmoid,0.188
224 | citeseer,False,3,0.2,True,relu,0.497
225 | citeseer,False,3,0.2,True,tanh,0.526
226 | citeseer,False,3,0.2,True,sigmoid,0.25
227 | citeseer,False,3,0.2,False,relu,0.574
228 | citeseer,False,3,0.2,False,tanh,0.577
229 | citeseer,False,3,0.2,False,sigmoid,0.188
230 | citeseer,False,3,0.3,True,relu,0.474
231 | citeseer,False,3,0.3,True,tanh,0.484
232 | citeseer,False,3,0.3,True,sigmoid,0.224
233 | citeseer,False,3,0.3,False,relu,0.505
234 | citeseer,False,3,0.3,False,tanh,0.53
235 | citeseer,False,3,0.3,False,sigmoid,0.208
236 | citeseer,False,3,0.5,True,relu,0.397
237 | citeseer,False,3,0.5,True,tanh,0.373
238 | citeseer,False,3,0.5,True,sigmoid,0.237
239 | citeseer,False,3,0.5,False,relu,0.313
240 | citeseer,False,3,0.5,False,tanh,0.419
241 | citeseer,False,3,0.5,False,sigmoid,0.198
242 | citeseer,False,5,0.0,True,relu,0.473
243 | citeseer,False,5,0.0,True,tanh,0.573
244 | citeseer,False,5,0.0,True,sigmoid,0.21
245 | citeseer,False,5,0.0,False,relu,0.513
246 | citeseer,False,5,0.0,False,tanh,0.576
247 | citeseer,False,5,0.0,False,sigmoid,0.207
248 | citeseer,False,5,0.1,True,relu,0.437
249 | citeseer,False,5,0.1,True,tanh,0.53
250 | citeseer,False,5,0.1,True,sigmoid,0.178
251 | citeseer,False,5,0.1,False,relu,0.326
252 | citeseer,False,5,0.1,False,tanh,0.533
253 | citeseer,False,5,0.1,False,sigmoid,0.206
254 | citeseer,False,5,0.2,True,relu,0.425
255 | citeseer,False,5,0.2,True,tanh,0.521
256 | citeseer,False,5,0.2,True,sigmoid,0.175
257 | citeseer,False,5,0.2,False,relu,0.169
258 | citeseer,False,5,0.2,False,tanh,0.477
259 | citeseer,False,5,0.2,False,sigmoid,0.204
260 | citeseer,False,5,0.3,True,relu,0.428
261 | citeseer,False,5,0.3,True,tanh,0.489
262 | citeseer,False,5,0.3,True,sigmoid,0.18
263 | citeseer,False,5,0.3,False,relu,0.161
264 | citeseer,False,5,0.3,False,tanh,0.506
265 | citeseer,False,5,0.3,False,sigmoid,0.194
266 | citeseer,False,5,0.5,True,relu,0.308
267 | citeseer,False,5,0.5,True,tanh,0.395
268 | citeseer,False,5,0.5,True,sigmoid,0.173
269 | citeseer,False,5,0.5,False,relu,0.188
270 | citeseer,False,5,0.5,False,tanh,0.399
271 | citeseer,False,5,0.5,False,sigmoid,0.183
272 | citeseer,False,10,0.0,True,relu,0.231
273 | citeseer,False,10,0.0,True,tanh,0.517
274 | citeseer,False,10,0.0,True,sigmoid,0.208
275 | citeseer,False,10,0.0,False,relu,0.183
276 | citeseer,False,10,0.0,False,tanh,0.416
277 | citeseer,False,10,0.0,False,sigmoid,0.172
278 | citeseer,False,10,0.1,True,relu,0.227
279 | citeseer,False,10,0.1,True,tanh,0.458
280 | citeseer,False,10,0.1,True,sigmoid,0.206
281 | citeseer,False,10,0.1,False,relu,0.201
282 | citeseer,False,10,0.1,False,tanh,0.207
283 | citeseer,False,10,0.1,False,sigmoid,0.151
284 | citeseer,False,10,0.2,True,relu,0.238
285 | citeseer,False,10,0.2,True,tanh,0.397
286 | citeseer,False,10,0.2,True,sigmoid,0.199
287 | citeseer,False,10,0.2,False,relu,0.192
288 | citeseer,False,10,0.2,False,tanh,0.207
289 | citeseer,False,10,0.2,False,sigmoid,0.155
290 | citeseer,False,10,0.3,True,relu,0.182
291 | citeseer,False,10,0.3,True,tanh,0.406
292 | citeseer,False,10,0.3,True,sigmoid,0.196
293 | citeseer,False,10,0.3,False,relu,0.158
294 | citeseer,False,10,0.3,False,tanh,0.193
295 | citeseer,False,10,0.3,False,sigmoid,0.169
296 | citeseer,False,10,0.5,True,relu,0.161
297 | citeseer,False,10,0.5,True,tanh,0.327
298 | citeseer,False,10,0.5,True,sigmoid,0.176
299 | citeseer,False,10,0.5,False,relu,0.189
300 | citeseer,False,10,0.5,False,tanh,0.231
301 | citeseer,False,10,0.5,False,sigmoid,0.198
302 | cora,True,1,0.0,True,relu,0.562
303 | cora,True,1,0.0,True,tanh,0.562
304 | cora,True,1,0.0,True,sigmoid,0.562
305 | cora,True,1,0.0,False,relu,0.72
306 | cora,True,1,0.0,False,tanh,0.72
307 | cora,True,1,0.0,False,sigmoid,0.72
308 | cora,True,1,0.1,True,relu,0.547
309 | cora,True,1,0.1,True,tanh,0.547
310 | cora,True,1,0.1,True,sigmoid,0.547
311 | cora,True,1,0.1,False,relu,0.711
312 | cora,True,1,0.1,False,tanh,0.711
313 | cora,True,1,0.1,False,sigmoid,0.711
314 | cora,True,1,0.2,True,relu,0.568
315 | cora,True,1,0.2,True,tanh,0.568
316 | cora,True,1,0.2,True,sigmoid,0.568
317 | cora,True,1,0.2,False,relu,0.696
318 | cora,True,1,0.2,False,tanh,0.696
319 | cora,True,1,0.2,False,sigmoid,0.696
320 | cora,True,1,0.3,True,relu,0.553
321 | cora,True,1,0.3,True,tanh,0.553
322 | cora,True,1,0.3,True,sigmoid,0.553
323 | cora,True,1,0.3,False,relu,0.693
324 | cora,True,1,0.3,False,tanh,0.693
325 | cora,True,1,0.3,False,sigmoid,0.693
326 | cora,True,1,0.5,True,relu,0.533
327 | cora,True,1,0.5,True,tanh,0.533
328 | cora,True,1,0.5,True,sigmoid,0.533
329 | cora,True,1,0.5,False,relu,0.672
330 | cora,True,1,0.5,False,tanh,0.672
331 | cora,True,1,0.5,False,sigmoid,0.672
332 | cora,True,2,0.0,True,relu,0.666
333 | cora,True,2,0.0,True,tanh,0.686
334 | cora,True,2,0.0,True,sigmoid,0.587
335 | cora,True,2,0.0,False,relu,0.797
336 | cora,True,2,0.0,False,tanh,0.792
337 | cora,True,2,0.0,False,sigmoid,0.1
338 | cora,True,2,0.1,True,relu,0.664
339 | cora,True,2,0.1,True,tanh,0.684
340 | cora,True,2,0.1,True,sigmoid,0.601
341 | cora,True,2,0.1,False,relu,0.796
342 | cora,True,2,0.1,False,tanh,0.789
343 | cora,True,2,0.1,False,sigmoid,0.1
344 | cora,True,2,0.2,True,relu,0.663
345 | cora,True,2,0.2,True,tanh,0.666
346 | cora,True,2,0.2,True,sigmoid,0.602
347 | cora,True,2,0.2,False,relu,0.773
348 | cora,True,2,0.2,False,tanh,0.776
349 | cora,True,2,0.2,False,sigmoid,0.1
350 | cora,True,2,0.3,True,relu,0.644
351 | cora,True,2,0.3,True,tanh,0.654
352 | cora,True,2,0.3,True,sigmoid,0.58
353 | cora,True,2,0.3,False,relu,0.764
354 | cora,True,2,0.3,False,tanh,0.756
355 | cora,True,2,0.3,False,sigmoid,0.1
356 | cora,True,2,0.5,True,relu,0.617
357 | cora,True,2,0.5,True,tanh,0.626
358 | cora,True,2,0.5,True,sigmoid,0.511
359 | cora,True,2,0.5,False,relu,0.734
360 | cora,True,2,0.5,False,tanh,0.742
361 | cora,True,2,0.5,False,sigmoid,0.101
362 | cora,True,3,0.0,True,relu,0.698
363 | cora,True,3,0.0,True,tanh,0.691
364 | cora,True,3,0.0,True,sigmoid,0.472
365 | cora,True,3,0.0,False,relu,0.776
366 | cora,True,3,0.0,False,tanh,0.779
367 | cora,True,3,0.0,False,sigmoid,0.297
368 | cora,True,3,0.1,True,relu,0.687
369 | cora,True,3,0.1,True,tanh,0.683
370 | cora,True,3,0.1,True,sigmoid,0.428
371 | cora,True,3,0.1,False,relu,0.783
372 | cora,True,3,0.1,False,tanh,0.761
373 | cora,True,3,0.1,False,sigmoid,0.271
374 | cora,True,3,0.2,True,relu,0.685
375 | cora,True,3,0.2,True,tanh,0.687
376 | cora,True,3,0.2,True,sigmoid,0.36
377 | cora,True,3,0.2,False,relu,0.755
378 | cora,True,3,0.2,False,tanh,0.757
379 | cora,True,3,0.2,False,sigmoid,0.3
380 | cora,True,3,0.3,True,relu,0.663
381 | cora,True,3,0.3,True,tanh,0.671
382 | cora,True,3,0.3,True,sigmoid,0.352
383 | cora,True,3,0.3,False,relu,0.736
384 | cora,True,3,0.3,False,tanh,0.743
385 | cora,True,3,0.3,False,sigmoid,0.311
386 | cora,True,3,0.5,True,relu,0.639
387 | cora,True,3,0.5,True,tanh,0.622
388 | cora,True,3,0.5,True,sigmoid,0.365
389 | cora,True,3,0.5,False,relu,0.699
390 | cora,True,3,0.5,False,tanh,0.714
391 | cora,True,3,0.5,False,sigmoid,0.31
392 | cora,True,5,0.0,True,relu,0.711
393 | cora,True,5,0.0,True,tanh,0.711
394 | cora,True,5,0.0,True,sigmoid,0.24
395 | cora,True,5,0.0,False,relu,0.161
396 | cora,True,5,0.0,False,tanh,0.746
397 | cora,True,5,0.0,False,sigmoid,0.182
398 | cora,True,5,0.1,True,relu,0.688
399 | cora,True,5,0.1,True,tanh,0.682
400 | cora,True,5,0.1,True,sigmoid,0.225
401 | cora,True,5,0.1,False,relu,0.09
402 | cora,True,5,0.1,False,tanh,0.735
403 | cora,True,5,0.1,False,sigmoid,0.303
404 | cora,True,5,0.2,True,relu,0.694
405 | cora,True,5,0.2,True,tanh,0.683
406 | cora,True,5,0.2,True,sigmoid,0.213
407 | cora,True,5,0.2,False,relu,0.123
408 | cora,True,5,0.2,False,tanh,0.734
409 | cora,True,5,0.2,False,sigmoid,0.303
410 | cora,True,5,0.3,True,relu,0.677
411 | cora,True,5,0.3,True,tanh,0.637
412 | cora,True,5,0.3,True,sigmoid,0.209
413 | cora,True,5,0.3,False,relu,0.162
414 | cora,True,5,0.3,False,tanh,0.723
415 | cora,True,5,0.3,False,sigmoid,0.303
416 | cora,True,5,0.5,True,relu,0.621
417 | cora,True,5,0.5,True,tanh,0.622
418 | cora,True,5,0.5,True,sigmoid,0.197
419 | cora,True,5,0.5,False,relu,0.156
420 | cora,True,5,0.5,False,tanh,0.653
421 | cora,True,5,0.5,False,sigmoid,0.302
422 | cora,True,10,0.0,True,relu,0.603
423 | cora,True,10,0.0,True,tanh,0.607
424 | cora,True,10,0.0,True,sigmoid,0.069
425 | cora,True,10,0.0,False,relu,0.1
426 | cora,True,10,0.0,False,tanh,0.667
427 | cora,True,10,0.0,False,sigmoid,0.302
428 | cora,True,10,0.1,True,relu,0.396
429 | cora,True,10,0.1,True,tanh,0.622
430 | cora,True,10,0.1,True,sigmoid,0.075
431 | cora,True,10,0.1,False,relu,0.129
432 | cora,True,10,0.1,False,tanh,0.665
433 | cora,True,10,0.1,False,sigmoid,0.303
434 | cora,True,10,0.2,True,relu,0.373
435 | cora,True,10,0.2,True,tanh,0.633
436 | cora,True,10,0.2,True,sigmoid,0.072
437 | cora,True,10,0.2,False,relu,0.089
438 | cora,True,10,0.2,False,tanh,0.639
439 | cora,True,10,0.2,False,sigmoid,0.303
440 | cora,True,10,0.3,True,relu,0.218
441 | cora,True,10,0.3,True,tanh,0.453
442 | cora,True,10,0.3,True,sigmoid,0.077
443 | cora,True,10,0.3,False,relu,0.182
444 | cora,True,10,0.3,False,tanh,0.574
445 | cora,True,10,0.3,False,sigmoid,0.303
446 | cora,True,10,0.5,True,relu,0.114
447 | cora,True,10,0.5,True,tanh,0.299
448 | cora,True,10,0.5,True,sigmoid,0.067
449 | cora,True,10,0.5,False,relu,0.293
450 | cora,True,10,0.5,False,tanh,0.16
451 | cora,True,10,0.5,False,sigmoid,0.303
452 | cora,False,1,0.0,True,relu,0.523
453 | cora,False,1,0.0,True,tanh,0.523
454 | cora,False,1,0.0,True,sigmoid,0.523
455 | cora,False,1,0.0,False,relu,0.664
456 | cora,False,1,0.0,False,tanh,0.664
457 | cora,False,1,0.0,False,sigmoid,0.664
458 | cora,False,1,0.1,True,relu,0.506
459 | cora,False,1,0.1,True,tanh,0.506
460 | cora,False,1,0.1,True,sigmoid,0.506
461 | cora,False,1,0.1,False,relu,0.659
462 | cora,False,1,0.1,False,tanh,0.659
463 | cora,False,1,0.1,False,sigmoid,0.659
464 | cora,False,1,0.2,True,relu,0.491
465 | cora,False,1,0.2,True,tanh,0.491
466 | cora,False,1,0.2,True,sigmoid,0.491
467 | cora,False,1,0.2,False,relu,0.633
468 | cora,False,1,0.2,False,tanh,0.633
469 | cora,False,1,0.2,False,sigmoid,0.633
470 | cora,False,1,0.3,True,relu,0.499
471 | cora,False,1,0.3,True,tanh,0.499
472 | cora,False,1,0.3,True,sigmoid,0.499
473 | cora,False,1,0.3,False,relu,0.628
474 | cora,False,1,0.3,False,tanh,0.628
475 | cora,False,1,0.3,False,sigmoid,0.628
476 | cora,False,1,0.5,True,relu,0.424
477 | cora,False,1,0.5,True,tanh,0.424
478 | cora,False,1,0.5,True,sigmoid,0.424
479 | cora,False,1,0.5,False,relu,0.524
480 | cora,False,1,0.5,False,tanh,0.524
481 | cora,False,1,0.5,False,sigmoid,0.524
482 | cora,False,2,0.0,True,relu,0.673
483 | cora,False,2,0.0,True,tanh,0.677
484 | cora,False,2,0.0,True,sigmoid,0.505
485 | cora,False,2,0.0,False,relu,0.781
486 | cora,False,2,0.0,False,tanh,0.78
487 | cora,False,2,0.0,False,sigmoid,0.103
488 | cora,False,2,0.1,True,relu,0.646
489 | cora,False,2,0.1,True,tanh,0.654
490 | cora,False,2,0.1,True,sigmoid,0.511
491 | cora,False,2,0.1,False,relu,0.761
492 | cora,False,2,0.1,False,tanh,0.756
493 | cora,False,2,0.1,False,sigmoid,0.1
494 | cora,False,2,0.2,True,relu,0.64
495 | cora,False,2,0.2,True,tanh,0.634
496 | cora,False,2,0.2,True,sigmoid,0.475
497 | cora,False,2,0.2,False,relu,0.749
498 | cora,False,2,0.2,False,tanh,0.751
499 | cora,False,2,0.2,False,sigmoid,0.11
500 | cora,False,2,0.3,True,relu,0.626
501 | cora,False,2,0.3,True,tanh,0.601
502 | cora,False,2,0.3,True,sigmoid,0.503
503 | cora,False,2,0.3,False,relu,0.696
504 | cora,False,2,0.3,False,tanh,0.697
505 | cora,False,2,0.3,False,sigmoid,0.104
506 | cora,False,2,0.5,True,relu,0.537
507 | cora,False,2,0.5,True,tanh,0.526
508 | cora,False,2,0.5,True,sigmoid,0.492
509 | cora,False,2,0.5,False,relu,0.633
510 | cora,False,2,0.5,False,tanh,0.631
511 | cora,False,2,0.5,False,sigmoid,0.103
512 | cora,False,3,0.0,True,relu,0.685
513 | cora,False,3,0.0,True,tanh,0.681
514 | cora,False,3,0.0,True,sigmoid,0.357
515 | cora,False,3,0.0,False,relu,0.761
516 | cora,False,3,0.0,False,tanh,0.757
517 | cora,False,3,0.0,False,sigmoid,0.129
518 | cora,False,3,0.1,True,relu,0.673
519 | cora,False,3,0.1,True,tanh,0.677
520 | cora,False,3,0.1,True,sigmoid,0.307
521 | cora,False,3,0.1,False,relu,0.733
522 | cora,False,3,0.1,False,tanh,0.743
523 | cora,False,3,0.1,False,sigmoid,0.142
524 | cora,False,3,0.2,True,relu,0.661
525 | cora,False,3,0.2,True,tanh,0.643
526 | cora,False,3,0.2,True,sigmoid,0.315
527 | cora,False,3,0.2,False,relu,0.699
528 | cora,False,3,0.2,False,tanh,0.715
529 | cora,False,3,0.2,False,sigmoid,0.145
530 | cora,False,3,0.3,True,relu,0.654
531 | cora,False,3,0.3,True,tanh,0.625
532 | cora,False,3,0.3,True,sigmoid,0.243
533 | cora,False,3,0.3,False,relu,0.68
534 | cora,False,3,0.3,False,tanh,0.695
535 | cora,False,3,0.3,False,sigmoid,0.159
536 | cora,False,3,0.5,True,relu,0.598
537 | cora,False,3,0.5,True,tanh,0.567
538 | cora,False,3,0.5,True,sigmoid,0.253
539 | cora,False,3,0.5,False,relu,0.587
540 | cora,False,3,0.5,False,tanh,0.619
541 | cora,False,3,0.5,False,sigmoid,0.172
542 | cora,False,5,0.0,True,relu,0.699
543 | cora,False,5,0.0,True,tanh,0.716
544 | cora,False,5,0.0,True,sigmoid,0.206
545 | cora,False,5,0.0,False,relu,0.095
546 | cora,False,5,0.0,False,tanh,0.735
547 | cora,False,5,0.0,False,sigmoid,0.208
548 | cora,False,5,0.1,True,relu,0.69
549 | cora,False,5,0.1,True,tanh,0.687
550 | cora,False,5,0.1,True,sigmoid,0.195
551 | cora,False,5,0.1,False,relu,0.092
552 | cora,False,5,0.1,False,tanh,0.718
553 | cora,False,5,0.1,False,sigmoid,0.123
554 | cora,False,5,0.2,True,relu,0.653
555 | cora,False,5,0.2,True,tanh,0.687
556 | cora,False,5,0.2,True,sigmoid,0.191
557 | cora,False,5,0.2,False,relu,0.167
558 | cora,False,5,0.2,False,tanh,0.694
559 | cora,False,5,0.2,False,sigmoid,0.131
560 | cora,False,5,0.3,True,relu,0.605
561 | cora,False,5,0.3,True,tanh,0.644
562 | cora,False,5,0.3,True,sigmoid,0.205
563 | cora,False,5,0.3,False,relu,0.157
564 | cora,False,5,0.3,False,tanh,0.082
565 | cora,False,5,0.3,False,sigmoid,0.142
566 | cora,False,5,0.5,True,relu,0.499
567 | cora,False,5,0.5,True,tanh,0.509
568 | cora,False,5,0.5,True,sigmoid,0.211
569 | cora,False,5,0.5,False,relu,0.149
570 | cora,False,5,0.5,False,tanh,0.06
571 | cora,False,5,0.5,False,sigmoid,0.175
572 | cora,False,10,0.0,True,relu,0.108
573 | cora,False,10,0.0,True,tanh,0.51
574 | cora,False,10,0.0,True,sigmoid,0.074
575 | cora,False,10,0.0,False,relu,0.129
576 | cora,False,10,0.0,False,tanh,0.162
577 | cora,False,10,0.0,False,sigmoid,0.275
578 | cora,False,10,0.1,True,relu,0.139
579 | cora,False,10,0.1,True,tanh,0.603
580 | cora,False,10,0.1,True,sigmoid,0.197
581 | cora,False,10,0.1,False,relu,0.108
582 | cora,False,10,0.1,False,tanh,0.723
583 | cora,False,10,0.1,False,sigmoid,0.251
584 | cora,False,10,0.2,True,relu,0.136
585 | cora,False,10,0.2,True,tanh,0.577
586 | cora,False,10,0.2,True,sigmoid,0.199
587 | cora,False,10,0.2,False,relu,0.125
588 | cora,False,10,0.2,False,tanh,0.082
589 | cora,False,10,0.2,False,sigmoid,0.221
590 | cora,False,10,0.3,True,relu,0.131
591 | cora,False,10,0.3,True,tanh,0.539
592 | cora,False,10,0.3,True,sigmoid,0.07
593 | cora,False,10,0.3,False,relu,0.17
594 | cora,False,10,0.3,False,tanh,0.567
595 | cora,False,10,0.3,False,sigmoid,0.219
596 | cora,False,10,0.5,True,relu,0.15
597 | cora,False,10,0.5,True,tanh,0.299
598 | cora,False,10,0.5,True,sigmoid,0.184
599 | cora,False,10,0.5,False,relu,0.24
600 | cora,False,10,0.5,False,tanh,0.153
601 | cora,False,10,0.5,False,sigmoid,0.184
602 | 


--------------------------------------------------------------------------------
/exp5/src/config.py:
--------------------------------------------------------------------------------
 1 | default_cfg = {
 2 |     'data_root': './../GNN/',
 3 |     'data_name': 'cora',
 4 |     'num_train_per_class': 20,
 5 |     'num_val': 500,
 6 |     'num_test': 1000,
 7 |     'seed': 114514,
 8 |     'device': 'cuda:0',
 9 |     'epochs': 1000,
10 |     'patience': 5,
11 |     'lr': 5e-3,
12 |     'weight_decay': 5e-4,
13 |     'hidden_dim': 32,
14 |     'n_layers': 2,
15 |     'activations': 'relu',
16 |     'dropout': 0.5,
17 |     'drop_edge': 0.,
18 |     'add_self_loop': True,
19 |     'pair_norm': False,
20 |     'test_only': False
21 | }
22 | 
23 | 
24 | class Config(object):
25 |     def __init__(self, ):
26 |         self.data_root = None
27 |         self.data_name = None
28 |         self.num_train_per_class = None
29 |         self.num_val = None
30 |         self.num_test = None
31 |         self.seed = None
32 |         self.device = None
33 |         self.epochs = None
34 |         self.patience = None
35 |         self.lr = None
36 |         self.weight_decay = None
37 |         self.hidden_dim = None
38 |         self.n_layers = None
39 |         self.activations = None
40 |         self.dropout = None
41 |         self.drop_edge = None
42 |         self.add_self_loop = None
43 |         self.pair_norm = None
44 |         self.test_only = None
45 |         self.reset()
46 | 
47 |     def reset(self):
48 |         for key, val in default_cfg.items():
49 |             setattr(self, key, val)
50 | 
51 |     def update(self, new_cfg):
52 |         for key, val in new_cfg.items():
53 |             setattr(self, key, val)
54 | 


--------------------------------------------------------------------------------
/exp5/src/data.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.data import InMemoryDataset, Data
 3 | from torch_geometric.utils import to_undirected
 4 | import os
 5 | 
 6 | 
 7 | classes = {
 8 |     'citeseer': ['Agents', 'AI', 'DB', 'IR', 'ML', 'HCI'],
 9 |     'cora': ['Case_Based', 'Genetic_Algorithms', 'Neural_Networks', 'Probabilistic_Methods',
10 |              'Reinforcement_Learning', 'Rule_Learning', 'Theory']
11 | }
12 | 
13 | 
14 | class NodeClsDataset(InMemoryDataset):
15 |     def __init__(self, root, name, num_train_per_class: int = 20,
16 |                  num_val: int = 500, num_test: int = 1000, transform=None):
17 |         self.name = name.lower()
18 |         self.num_train_per_class = num_train_per_class
19 |         self.num_val = num_val
20 |         self.num_test = num_test
21 |         super(NodeClsDataset, self).__init__(root, transform=transform)
22 |         self.data, self.slices = torch.load(self.processed_paths[0])
23 | 
24 |     @property
25 |     def raw_dir(self) -> str:
26 |         return os.path.join(self.root, self.name)
27 | 
28 |     @property
29 |     def processed_dir(self) -> str:
30 |         return os.path.join(self.root, 'processed')
31 | 
32 |     @property
33 |     def raw_file_names(self):
34 |         return [f'{self.name}.content', f'{self.name}.cites']
35 | 
36 |     @property
37 |     def processed_file_names(self):
38 |         return [f'{self.name}.pt']
39 | 
40 |     def download(self):
41 |         pass
42 | 
43 |     def process(self):
44 |         label2index = {label: i for i, label in enumerate(classes[f'{self.name}'])}
45 |         id2index, x, y = read_content(self.raw_paths[0], label2index)
46 |         edge_index = read_cites(self.raw_paths[1], id2index)
47 |         data = Data(x=x, y=y, edge_index=edge_index)
48 | 
49 |         data.train_mask = torch.zeros(data.y.size(0), dtype=torch.bool)
50 |         data.val_mask = torch.zeros(data.y.size(0), dtype=torch.bool)
51 |         data.test_mask = torch.zeros(data.y.size(0), dtype=torch.bool)
52 |         for c in range(len(label2index)):
53 |             idx = (data.y == c).nonzero(as_tuple=False).view(-1)
54 |             idx = idx[torch.randperm(idx.size(0))[:self.num_train_per_class]]
55 |             data.train_mask[idx] = True
56 | 
57 |         remaining = (~data.train_mask).nonzero(as_tuple=False).view(-1)
58 |         remaining = remaining[torch.randperm(remaining.size(0))]
59 | 
60 |         data.val_mask[remaining[:self.num_val]] = True
61 |         data.test_mask[remaining[self.num_val:self.num_val + self.num_test]] = True
62 | 
63 |         data, slices = self.collate([data])
64 |         torch.save((data, slices), self.processed_paths[0])
65 | 
66 | 
67 | def read_content(content_file, label2index):
68 |     with open(content_file, 'r') as f:
69 |         lines = f.read().strip().split('\n')[:-1]
70 |     id2index = {}
71 |     x = []
72 |     y = []
73 |     for i, line in enumerate(lines):
74 |         line = line.strip().split('\t')
75 |         paper_id, attr, label = line[0], line[1:-1], line[-1]
76 |         id2index[paper_id] = i
77 |         x.append([float(e) for e in attr])
78 |         y.append(label2index[label])
79 |     return id2index, torch.tensor(x), torch.tensor(y, dtype=torch.long)
80 | 
81 | 
82 | def read_cites(cites_file, id2index):
83 |     with open(cites_file, 'r') as f:
84 |         lines = f.read().strip().split('\n')[:-1]
85 |     edge_index = []
86 |     for line in lines:
87 |         cited, citing = line.strip().split('\t')
88 |         if citing not in id2index or cited not in id2index:
89 |             continue
90 |         id_cited, id_citing = id2index[cited], id2index[citing]
91 |         edge_index.append([id_citing, id_cited])
92 | 
93 |     edge_index = torch.tensor(edge_index, dtype=torch.long)
94 |     edge_index = to_undirected(edge_index)
95 |     return edge_index.t().contiguous()
96 | 


--------------------------------------------------------------------------------
/exp5/src/main.py:
--------------------------------------------------------------------------------
  1 | from utils import set_seed
  2 | from config import Config
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.optim import Adam
  6 | from data import NodeClsDataset
  7 | from torch_geometric.transforms import NormalizeFeatures
  8 | import numpy as np
  9 | from model import GCN
 10 | from itertools import product
 11 | import pandas as pd
 12 | 
 13 | 
 14 | def train(model, data, optimizer, loss_fc):
 15 |     model.train()
 16 |     optimizer.zero_grad()
 17 | 
 18 |     logits = model(data.x, data.edge_index)
 19 |     loss = loss_fc(logits[data.train_mask], data.y[data.train_mask])
 20 |     loss.backward()
 21 |     optimizer.step()
 22 | 
 23 |     # Get the predictions
 24 |     preds = torch.argmax(logits, dim=1).flatten()
 25 |     acc = (preds[data.train_mask] == data.y[data.train_mask]).cpu().numpy().mean()
 26 | 
 27 |     return loss, acc
 28 | 
 29 | 
 30 | def evaluate(model, data, loss_fc, mode='val'):
 31 |     model.eval()
 32 |     with torch.no_grad():
 33 |         logits = model(data.x, data.edge_index)
 34 |         mask = getattr(data, f'{mode}_mask')
 35 |         loss = loss_fc(logits[mask], data.y[mask])
 36 |         # Get the predictions
 37 |         preds = torch.argmax(logits, dim=1).flatten()
 38 |         acc = (preds[mask] == data.y[mask]).cpu().numpy().mean()
 39 | 
 40 |     return loss, acc
 41 | 
 42 | 
 43 | def main(cfg: Config):
 44 |     set_seed(cfg.seed)
 45 |     dataset = NodeClsDataset(cfg.data_root, cfg.data_name, cfg.num_train_per_class,
 46 |                              cfg.num_val, cfg.num_test, transform=NormalizeFeatures())
 47 |     # from torch_geometric.datasets import Planetoid
 48 |     # dataset = Planetoid(root='./tmp/Cora', name='Cora', split='random', transform=NormalizeFeatures())
 49 | 
 50 |     model = GCN(dataset.num_node_features, cfg.hidden_dim, dataset.num_classes,
 51 |                 n_layers=cfg.n_layers, act=cfg.activations, add_self_loops=cfg.add_self_loop,
 52 |                 pair_norm=cfg.pair_norm, dropout=cfg.dropout, drop_edge=cfg.drop_edge)
 53 |     optimizer = Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
 54 |     criterion = nn.CrossEntropyLoss()
 55 | 
 56 |     data = dataset[0].to(cfg.device)
 57 |     model = model.to(device=cfg.device)
 58 |     criterion = criterion.to(cfg.device)
 59 |     if not cfg.test_only:
 60 |         best_valid_loss = np.inf
 61 |         wait = 0
 62 |         for epoch in range(cfg.epochs):
 63 |             print(f">>> Epoch {epoch + 1}/{cfg.epochs}")
 64 | 
 65 |             train_loss, train_acc = train(model, data, optimizer, criterion)
 66 |             valid_loss, valid_acc = evaluate(model, data, criterion, mode='val')
 67 | 
 68 |             print(f'\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc * 100:.2f}%')
 69 |             print(f'\tValid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc * 100:.2f}%')
 70 | 
 71 |             if valid_loss < best_valid_loss:
 72 |                 best_valid_loss = valid_loss
 73 |                 wait = 0
 74 |                 torch.save(model.state_dict(), './checkpoint/best_weights.pt')
 75 |             else:
 76 |                 wait += 1
 77 |                 if wait > cfg.patience:
 78 |                     print('>>> Early stopped.')
 79 |                     break
 80 | 
 81 |     print(">>> Testing...")
 82 |     model.load_state_dict(torch.load("./checkpoint/best_weights.pt"))
 83 |     test_loss, test_acc = evaluate(model, data, criterion, mode='test')
 84 |     print(f'\tTest Loss: {test_loss:.4f} | Test Acc: {test_acc * 100:.2f}%')
 85 |     return test_acc
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     config = Config()
 90 |     # main(config)
 91 |     # exit()
 92 | 
 93 |     cfg_grid = {
 94 |         'data_name': ['citeseer', 'cora'],
 95 |         'add_self_loop': [True, False],
 96 |         'n_layers': [1, 2, 3, 5, 10],
 97 |         'drop_edge': [0, .1, .2, .3, .5],
 98 |         'pair_norm': [True, False],
 99 |         'activations': ['relu', 'tanh', 'sigmoid']
100 |     }
101 |     results = []
102 |     keys = cfg_grid.keys()
103 |     for values in product(*cfg_grid.values()):
104 |         new_cfg = dict(zip(keys, values))
105 |         config.update(new_cfg)
106 |         acc = main(config)
107 |         results.append([*new_cfg.values, acc])
108 |     df = pd.DataFrame(results, columns=[*cfg_grid.keys(), 'test_acc'])
109 |     df.to_csv('./result.csv', index=False)
110 | 


--------------------------------------------------------------------------------
/exp5/src/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch_geometric.nn import GCNConv, PairNorm
 4 | from torch_geometric.utils import dropout_adj
 5 | 
 6 | 
 7 | activations = {
 8 |     'relu': torch.relu,
 9 |     'sigmoid': torch.sigmoid,
10 |     'tanh': torch.tanh,
11 | }
12 | 
13 | 
14 | class GCN(torch.nn.Module):
15 |     def __init__(self, in_channels: int, hidden_channels: int, num_classes: int,
16 |                  n_layers: int, act: str = 'relu', add_self_loops: bool = True,
17 |                  pair_norm: bool = True, dropout: float = .0, drop_edge: float = .0):
18 |         super(GCN, self).__init__()
19 |         self.dropout = dropout
20 |         self.drop_edge = drop_edge
21 |         self.pair_norm = pair_norm
22 |         self.act = activations[act] if isinstance(act, str) else act
23 | 
24 |         self.conv_list = torch.nn.ModuleList()
25 |         for i in range(n_layers):
26 |             in_c, out_c = hidden_channels, hidden_channels
27 |             if i == 0:
28 |                 in_c = in_channels
29 |             elif i == n_layers - 1:
30 |                 out_c = num_classes
31 |             self.conv_list.append(GCNConv(in_c, out_c, add_self_loops=add_self_loops))
32 | 
33 |     def forward(self, x, edge_index):
34 |         edge_index, _ = dropout_adj(edge_index, p=self.drop_edge)
35 | 
36 |         for i, conv in enumerate(self.conv_list):
37 |             x = conv(x, edge_index)
38 |             if self.pair_norm:
39 |                 x = PairNorm()(x)
40 |             if i < len(self.conv_list) - 1:
41 |                 x = self.act(x)
42 |                 x = F.dropout(x, p=self.dropout, training=self.training)
43 | 
44 |         return x
45 | 


--------------------------------------------------------------------------------
/exp5/src/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import os
 4 | import numpy as np
 5 | 
 6 | 
 7 | def set_seed(seed=123):
 8 |     random.seed(seed)
 9 |     np.random.seed(seed)
10 |     os.environ["PYTHONHASHSEED"] = str(seed)
11 |     torch.manual_seed(seed)
12 |     torch.cuda.manual_seed_all(seed)
13 |     # torch.use_deterministic_algorithms(True)
14 |     # torch.backends.cudnn.enabled = False
15 |     torch.backends.cudnn.benchmark = False
16 |     torch.backends.cudnn.deterministic = True
17 |     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2"
18 | 


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络.md:
--------------------------------------------------------------------------------
  1 | # 实验五：图卷积神经网络
  2 | 
  3 | 姓名：刘威
  4 | 
  5 | 学号：PB18010469
  6 | 
  7 | Click [here](#完) to finish reading:-)
  8 | 
  9 | 
 10 | ## 实验目的
 11 | 
 12 | + 熟悉图卷积神经网络的基本原理
 13 | + 了解网络层数对图卷积神经网络性能的影响
 14 | + 了解不同激活函数，Add self loop, DropEdge, PairNorm等技术对图卷积神经网络性能的影响。
 15 | 
 16 | 
 17 | ## 实验原理
 18 | 
 19 | ![image-20210621110149625](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110149625.png)
 20 | 
 21 | ![image-20210621110122757](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110122757.png)
 22 | 
 23 | ![image-20210621110050539](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110050539-1624244452275.png)
 24 | 
 25 | ![image-20210621110306141](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110306141.png)
 26 | 
 27 | ![image-20210621110337194](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110337194.png)
 28 | 
 29 | ![image-20210621110419877](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110419877.png)
 30 | 
 31 | ![image-20210621110603182](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110603182.png)
 32 | 
 33 | ![image-20210621110647970](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110647970.png)
 34 | 
 35 | ![image-20210621110619585](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110619585-1624244780237.png)
 36 | 
 37 | ![image-20210621110714808](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110714808.png)
 38 | 
 39 | ![image-20210621110733499](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110733499.png)
 40 | 
 41 | ![image-20210621110823719](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110823719.png)
 42 | 
 43 | ![image-20210621110938485](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110938485.png)
 44 | 
 45 | ![image-20210621110951936](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621110951936-1624244992867.png)
 46 | 
 47 | ![image-20210621111247829](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111247829-1624245168536.png)
 48 | 
 49 | ![image-20210621111339615](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111339615.png)
 50 | 
 51 | ![image-20210621111544917](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111544917.png)
 52 | 
 53 | ![image-20210621111633343](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111633343.png)
 54 | 
 55 | ![image-20210621111948461](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621111948461.png)
 56 | 
 57 | ![image-20210621112110697](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621112110697.png)
 58 | 
 59 | ![image-20210621112125882](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621112125882.png)
 60 | 
 61 | 
 62 | ## 实验内容
 63 | 
 64 | ![image-20210621112457787](%E5%AE%9E%E9%AA%8C%E4%BA%94%EF%BC%9A%E5%9B%BE%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/image-20210621112457787.png)
 65 | 
 66 | 
 67 | ## 实验结果
 68 | 
 69 | + 本实验使用`PyTorch`进行，并主要使用了`pytorch_geometric`库。
 70 | + 本实验再`Cora`和`Citeseer`两个数据集上进行了节点分类，并比较了自环，层数，DropEdge, PairNorm，激活函数对其分类性能的影响。
 71 | 
 72 | 
 73 | ### 源码结构及说明
 74 | 
 75 | #### 数据处理部分
 76 | 
 77 | **数据集概览：**
 78 | 
 79 | | Dataset  | Nodes | Edges | Classes | Features |
 80 | | :------: | :---: | :---: | :-----: | :------: |
 81 | | Citeseer | 3327  | 4732  |    6    |   3703   |
 82 | |   Cora   | 2708  | 5429  |    7    |   1433   |
 83 | 
 84 | **数据处理方法：**
 85 | 
 86 | 按照`pytorch_geometric`的数据输入格式，将顶点关联的`features`组织成一个二维矩阵`x: shape=(Nodes, Features)`, 将图结构，即顶点的连接关系用 `COO` 格式组织成一个二维矩阵`edge_index: shape=(2, Edges)`(邻接矩阵的稀疏表示). 将标签处理为一维向量`y: shape=(Nodes,)` 其取值范围为`range(Classes)`.
 87 | 
 88 | 通过`mask`将顶点划分为`train, val, test`. 其中`train_mask`覆盖每个类别分别20个顶点，`val_mask`覆盖除`train_mask`外的随机500个顶点，`test_mask`覆盖除前两者外的随机1000个顶点。图结构难以拆解成三个部分，因此图是一整个输入到网络中的，也即所有的顶点都会参与计算。`train_mask`的作用是，在计算损失时，将其他顶点mask掉，只计算训练顶点的损失。同样地，通过`val_mask,test_mask`我们可以分别计算`val`和`test`顶点的分类准确率。
 89 | 
 90 | #### 模型部分
 91 | 
 92 | 网络用**`n_layers`层`GCN`**堆叠而成，在每层`GCN`后都紧跟一层**可选的`PairNorm`层**；除了最后一层外，每层的`PairNorm`后还有有激活函数和`dropout`，**激活函数可以选择`relu, tanh, sigmoid`**, `dropout`可以调节drop的概率p.
 93 | 
 94 | 其中`GCN`直接使用`pytorch_geometric`库中的`GCNConv`层，它可以**通过参数`add_self_loop`设置是否添加自环**。在输入`GCN`之前，还可以通过**设置`drop_edge`的drop比例**去掉部分`edge_index`.
 95 | 
 96 | 完整的模型定义如下：
 97 | 
 98 | ```python
 99 | import torch
100 | import torch.nn.functional as F
101 | from torch_geometric.nn import GCNConv, PairNorm
102 | from torch_geometric.utils import dropout_adj
103 | 
104 | activations = {
105 |     'relu': torch.relu,
106 |     'sigmoid': torch.sigmoid,
107 |     'tanh': torch.tanh,
108 | }
109 | 
110 | class GCN(torch.nn.Module):
111 |     def __init__(self, in_channels: int, hidden_channels: int, num_classes: int,
112 |                  n_layers: int, act: str = 'relu', add_self_loops: bool = True,
113 |                  pair_norm: bool = True, dropout: float = .0, drop_edge: float = .0):
114 |         super(GCN, self).__init__()
115 |         self.dropout = dropout
116 |         self.drop_edge = drop_edge
117 |         self.pair_norm = pair_norm
118 |         self.act = activations[act] if isinstance(act, str) else act
119 | 
120 |         self.conv_list = torch.nn.ModuleList()
121 |         for i in range(n_layers):
122 |             in_c, out_c = hidden_channels, hidden_channels
123 |             if i == 0:
124 |                 in_c = in_channels
125 |             elif i == n_layers - 1:
126 |                 out_c = num_classes
127 |             self.conv_list.append(GCNConv(in_c, out_c, add_self_loops=add_self_loops))
128 | 
129 |     def forward(self, x, edge_index):
130 |         edge_index, _ = dropout_adj(edge_index, p=self.drop_edge)
131 | 
132 |         for i, conv in enumerate(self.conv_list):
133 |             x = conv(x, edge_index)
134 |             if self.pair_norm:
135 |                 x = PairNorm()(x)
136 |             if i < len(self.conv_list) - 1:
137 |                 x = self.act(x)
138 |                 x = F.dropout(x, p=self.dropout, training=self.training)
139 | 
140 |         return x
141 | ```
142 | 
143 | ### 结果及分析
144 | 
145 | #### 参数设置
146 | 
147 | 本实验的可选参数及其默认值为
148 | 
149 | ```python
150 | default_cfg = {
151 |     'data_root': './GNN/',	# 数据根目录
152 |     'data_name': 'cora',  # citeseer or cora
153 |     'num_train_per_class': 20,  # 训练集包含的每个类别的顶点数目
154 |     'num_val': 500,  # 验证集顶点数目
155 |     'num_test': 1000,  # 测试集顶点数目
156 |     'seed': 114514,
157 |     'device': 'cuda:0',
158 |     'epochs': 1000,
159 |     'patience': 5,  # 早停的等待轮数
160 |     'lr': 5e-3,
161 |     'weight_decay': 5e-4,
162 |     'hidden_dim': 32,	
163 |     'n_layers': 2,
164 |     'activations': 'relu',
165 |     'dropout': 0.5,
166 |     'drop_edge': 0.,
167 |     'add_self_loop': True,
168 |     'pair_norm': False,
169 |     'test_only': False
170 | }
171 | ```
172 | 
173 | 其中本实验进行调节的参数及调节的范围为
174 | 
175 | ```python
176 | cfg_grid = {
177 |     'data_name': ['citeseer', 'cora'],
178 |     'add_self_loop': [True, False],
179 |     'n_layers': [1, 2, 3, 5, 10],
180 |     'drop_edge': [0, .1, .2, .3, .5],
181 |     'pair_norm': [True, False],
182 |     'activations': ['relu', 'tanh', 'sigmoid']
183 | }
184 | ```
185 | 
186 | **共有600种可能的参数组合。** **在每种参数组合下**，分别训练模型，并通过验证集`val_loss`进行早停，以`val_loss`最低时的模型权重对测试集进行测试，以其分类准确率`test_acc`作为最终评价指标。
187 | 
188 | #### 结果对比分析
189 | 
190 | 所有的组合下的`test_acc`结果可以在附件[`result.csv`](./result.csv)中查看，下面仅列举出部分结果。
191 | 
192 | **两个数据集上的最好结果及对应参数**
193 | 
194 | | `data_name` | `add_self_loop` | `n_layers` | `drop_edge` | `pair_norm` | `activations` | `test_acc` |
195 | | :---------: | :-------------: | :--------: | :---------: | :---------: | :-----------: | :--------: |
196 | |   'cora'    |      True       |     2      |     0.      |    False    |    'relu'     |   0.797    |
197 | | 'citeseer'  |      True       |     2      |     0.      |    False    |    'relu'     |   0.685    |
198 | 
199 | > **Note**: 下面的对比均以`citeseer`数据集为例, 即`data_name='citeseer'`
200 | 
201 | **是否添加自环的对比**
202 | 
203 | Selected Compairson:
204 | 
205 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc |
206 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- |
207 | | citeseer  | True          | 3        | 0.0       | False     | relu        | 0.645    |
208 | | citeseer  | False         | 3        | 0.0       | False     | relu        | 0.628    |
209 | | citeseer  | True          | 2        | 0.0       | False     | relu        | 0.685    |
210 | | citeseer  | False         | 2        | 0.0       | False     | relu        | 0.667    |
211 | 
212 | 分析：添加自环效果好，在某些参数下提升非常显著。
213 | 
214 | **不同层数的对比**
215 | 
216 | Selected Compairson:
217 | 
218 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc |
219 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- |
220 | | citeseer  | True          | 1        | 0.0       | False     | relu        | 0.68     |
221 | | citeseer  | True          | 2        | 0.0       | False     | relu        | 0.685    |
222 | | citeseer  | True          | 3        | 0.0       | False     | relu        | 0.645    |
223 | | citeseer  | True          | 5        | 0.0       | False     | relu        | 0.522    |
224 | | citeseer  | True          | 10       | 0.0       | False     | relu        | 0.176    |
225 | 
226 | 分析：两层效果最好， 层数多难以优化。
227 | 
228 | **drop edge的对比**
229 | 
230 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc |
231 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- |
232 | | citeseer  | True          | 5        | 0.0       | False     | relu        | 0.522    |
233 | | citeseer  | True          | 5        | 0.1       | False     | relu        | 0.351    |
234 | | citeseer  | True          | 5        | 0.2       | False     | relu        | 0.182    |
235 | | citeseer  | True          | 5        | 0.3       | False     | relu        | 0.201    |
236 | | citeseer  | True          | 5        | 0.5       | False     | relu        | 0.188    |
237 | | citeseer  | True          | 3        | 0.0       | False     | relu        | 0.645    |
238 | | citeseer  | True          | 3        | 0.1       | False     | relu        | 0.671    |
239 | | citeseer  | True          | 3        | 0.2       | False     | relu        | 0.655    |
240 | | citeseer  | True          | 3        | 0.3       | False     | relu        | 0.663    |
241 | | citeseer  | True          | 3        | 0.5       | False     | relu        | 0.609    |
242 | 
243 | 分析：层数少时drop edge 有点效果，层数深时效果不好。
244 | 
245 | **是否使用PairNorm的对比**
246 | 
247 | | data_name | add_self_loop | n_layers | drop_edge | pair_norm | activations | test_acc |
248 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- |
249 | | citeseer  | True          | 1        | 0.0       | False     | relu        | 0.68     |
250 | | citeseer  | True          | 1        | 0.0       | True      | relu        | 0.443    |
251 | | citeseer  | True          | 2        | 0.0       | False     | relu        | 0.685    |
252 | | citeseer  | True          | 2        | 0.0       | True      | relu        | 0.526    |
253 | | citeseer  | True          | 3        | 0.0       | False     | relu        | 0.645    |
254 | | citeseer  | True          | 3        | 0.0       | True      | relu        | 0.568    |
255 | | citeseer  | True          | 5        | 0.0       | False     | relu        | 0.522    |
256 | | citeseer  | True          | 5        | 0.0       | True      | relu        | 0.545    |
257 | | citeseer  | True          | 10       | 0.0       | False     | relu        | 0.176    |
258 | | citeseer  | True          | 10       | 0.0       | True      | relu        | 0.3      |
259 | 
260 | 分析：层数少时加`PairNorm`效果变差，层数多时`PairNorm`有效果。
261 | 
262 | **不同激活函数的对比**
263 | 
264 | | data_name | add_self_loop | n_layers | drop_edge | pari_norm | activations | test_acc |
265 | | --------- | ------------- | -------- | --------- | --------- | ----------- | -------- |
266 | | citeseer  | True          | 2        | 0.0       | False     | relu        | 0.685    |
267 | | citeseer  | True          | 2        | 0.0       | False     | tanh        | 0.683    |
268 | | citeseer  | True          | 2        | 0.0       | False     | sigmoid     | 0.207    |
269 | | citeseer  | True          | 3        | 0.0       | False     | relu        | 0.645    |
270 | | citeseer  | True          | 3        | 0.0       | False     | tanh        | 0.667    |
271 | | citeseer  | True          | 3        | 0.0       | False     | sigmoid     | 0.207    |
272 | | citeseer  | True          | 5        | 0.0       | False     | relu        | 0.522    |
273 | | citeseer  | True          | 5        | 0.0       | False     | tanh        | 0.588    |
274 | | citeseer  | True          | 5        | 0.0       | False     | sigmoid     | 0.207    |
275 | | citeseer  | True          | 10       | 0.0       | False     | relu        | 0.176    |
276 | | citeseer  | True          | 10       | 0.0       | False     | tanh        | 0.472    |
277 | | citeseer  | True          | 10       | 0.0       | False     | sigmoid     | 0.195    |
278 | 
279 | 分析：2层和3层时`relu~=tanh>>sigmoid`, 3层和5层`tanh>relu>>sigmoid`, 10层`tanh>>sigmoid~=relu`。
280 | 
281 | 
282 | ## 实验总结
283 | 
284 | 本次实验的最大收获在于了解的图神经网络的原理，以及学会了使用`torch_geometric`库。
285 | 
286 | [原GCN论文](https://arxiv.org/pdf/1609.02907.pdf)里面`citeseer`和`cora`数据集的最好结果（%）分别为 70.3 和 81.5， 我这里略差，分别是 68.5 和 79.7。其实网络结构是一样的，也是两层，用`relu`作为激活函数。我对比了一下才发现原因：**它那个数据集划分是某种特定的划分**。虽然划分比例相同，但在它那个划分下结果就是好不少，主要差别就在于这里。~~(你们这些做学术的人都在调些什么啊，dataset split is all you need ?)~~ 
287 | 
288 | ##### (完)
289 | 
290 | 


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络.pdf


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621105946421.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621105946421.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110023351.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110023351.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110050539-1624244452275.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110050539-1624244452275.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110050539.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110050539.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110122757.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110122757.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110149625.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110149625.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110240262.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110240262.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110306141.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110306141.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110337194.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110337194.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110419877.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110419877.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110436272.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110436272.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110603182.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110603182.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110619585-1624244780237.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110619585-1624244780237.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110619585.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110619585.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110647970.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110647970.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110714808.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110714808.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110733499.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110733499.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110823719.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110823719.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110938485.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110938485.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110951936-1624244992867.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110951936-1624244992867.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621110951936.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621110951936.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621111247829-1624245168536.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621111247829-1624245168536.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621111247829.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621111247829.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621111339615.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621111339615.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621111544917.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621111544917.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621111633343.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621111633343.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621111948461.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621111948461.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621112110697.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621112110697.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621112125882.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621112125882.png


--------------------------------------------------------------------------------
/exp5/实验五：图卷积神经网络/image-20210621112457787.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v-liuwei/USTC-2021Spring-Introduction_to_Deep_Learning/92815244520001f01deed334d066a4e087f7a959/exp5/实验五：图卷积神经网络/image-20210621112457787.png


--------------------------------------------------------------------------------