├── .gitignore
├── CSL_Continuous_Seq2Seq.py
├── CSL_Isolated_Conv3D.py
├── CSL_Isolated_ConvLSTM.py
├── CSL_Skeleton_GCN.py
├── CSL_Skeleton_RNN.py
├── README.md
├── dataset.py
├── log
    ├── 3dresnet18_100_acc.svg
    ├── 3dresnet18_100_loss.svg
    ├── 3dresnet34_100_acc.svg
    ├── 3dresnet34_100_loss.svg
    ├── skeleton_lstm_100_acc.svg
    └── skeleton_lstm_100_loss.svg
├── models
    ├── Attention.py
    ├── Conv3D.py
    ├── ConvLSTM.py
    ├── GCN.py
    ├── RNN.py
    └── Seq2Seq.py
├── test.py
├── tools.py
├── train.py
└── validation.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # tensorboard
107 | runs/
108 | 
109 | # models
110 | *.pth


--------------------------------------------------------------------------------
/CSL_Continuous_Seq2Seq.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | from datetime import datetime
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | import torch.optim as optim
 8 | from torch.utils.data import DataLoader
 9 | from torch.utils.tensorboard import SummaryWriter
10 | import torchvision.transforms as transforms
11 | from dataset import CSL_Continuous, CSL_Continuous_Char
12 | from models.Seq2Seq import Encoder, Decoder, Seq2Seq
13 | from train import train_seq2seq
14 | from validation import val_seq2seq
15 | 
16 | # Path setting
17 | data_path = "/home/haodong/Data/CSL_Continuous/color"
18 | dict_path = "/home/haodong/Data/CSL_Continuous/dictionary.txt"
19 | corpus_path = "/home/haodong/Data/CSL_Continuous/corpus.txt"
20 | model_path = "/home/haodong/Data/seq2seq_models"
21 | log_path = "log/seq2seq_{:%Y-%m-%d_%H-%M-%S}.log".format(datetime.now())
22 | sum_path = "runs/slr_seq2seq_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now())
23 | 
24 | # Log to file & tensorboard writer
25 | logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[logging.FileHandler(log_path), logging.StreamHandler()])
26 | logger = logging.getLogger('SLR')
27 | logger.info('Logging to file...')
28 | writer = SummaryWriter(sum_path)
29 | 
30 | # Use specific gpus
31 | os.environ["CUDA_VISIBLE_DEVICES"]="3"
32 | # Device setting
33 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34 | 
35 | # Hyperparams
36 | epochs = 100
37 | batch_size = 8
38 | learning_rate = 1e-4
39 | weight_decay = 1e-5
40 | sample_size = 128
41 | sample_duration = 48
42 | enc_hid_dim = 512
43 | emb_dim = 256
44 | dec_hid_dim = 512
45 | dropout = 0.5
46 | clip = 1
47 | log_interval = 100
48 | 
49 | if __name__ == '__main__':
50 |     # Load data
51 |     transform = transforms.Compose([transforms.Resize([sample_size, sample_size]),
52 |                                     transforms.ToTensor(),
53 |                                     transforms.Normalize(mean=[0.5], std=[0.5])])
54 |     # train_set = CSL_Continuous(data_path=data_path, dict_path=dict_path,
55 |     #     corpus_path=corpus_path, frames=sample_duration, train=True, transform=transform)
56 |     # val_set = CSL_Continuous(data_path=data_path, dict_path=dict_path,
57 |     #     corpus_path=corpus_path, frames=sample_duration, train=False, transform=transform)
58 |     train_set = CSL_Continuous_Char(data_path=data_path, corpus_path=corpus_path,
59 |         frames=sample_duration, train=True, transform=transform)
60 |     val_set = CSL_Continuous_Char(data_path=data_path, corpus_path=corpus_path,
61 |         frames=sample_duration, train=False, transform=transform)
62 |     logger.info("Dataset samples: {}".format(len(train_set)+len(val_set)))
63 |     train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True)
64 |     val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True)
65 |     # Create Model
66 |     encoder = Encoder(lstm_hidden_size=enc_hid_dim, arch="resnet18").to(device)
67 |     decoder = Decoder(output_dim=train_set.output_dim, emb_dim=emb_dim, enc_hid_dim=enc_hid_dim, dec_hid_dim=dec_hid_dim, dropout=dropout).to(device)
68 |     model = Seq2Seq(encoder=encoder, decoder=decoder, device=device).to(device)
69 |     # Run the model parallelly
70 |     if torch.cuda.device_count() > 1:
71 |         logger.info("Using {} GPUs".format(torch.cuda.device_count()))
72 |         model = nn.DataParallel(model)
73 |     # Create loss criterion & optimizer
74 |     criterion = nn.CrossEntropyLoss()
75 |     optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
76 | 
77 |     # Start training
78 |     logger.info("Training Started".center(60, '#'))
79 |     for epoch in range(epochs):
80 |         # Train the model
81 |         train_seq2seq(model, criterion, optimizer, clip, train_loader, device, epoch, logger, log_interval, writer)
82 | 
83 |         # Validate the model
84 |         val_seq2seq(model, criterion, val_loader, device, epoch, logger, writer)
85 | 
86 |         # Save model
87 |         torch.save(model.state_dict(), os.path.join(model_path, "slr_seq2seq_epoch{:03d}.pth".format(epoch+1)))
88 |         logger.info("Epoch {} Model Saved".format(epoch+1).center(60, '#'))
89 | 
90 |     logger.info("Training Finished".center(60, '#'))
91 | 


--------------------------------------------------------------------------------
/CSL_Isolated_Conv3D.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from datetime import datetime
 4 | import logging
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | import torch.optim as optim
 9 | from torch.utils.data import DataLoader, random_split
10 | from torch.utils.tensorboard import SummaryWriter
11 | import torchvision.transforms as transforms
12 | from models.Conv3D import CNN3D, resnet18, resnet34, resnet50, resnet101, r2plus1d_18
13 | from dataset import CSL_Isolated
14 | from train import train_epoch
15 | from validation import val_epoch
16 | 
17 | # Path setting
18 | data_path = "/home/haodong/Data/CSL_Isolated/color_video_125000"
19 | label_path = "/home/haodong/Data/CSL_Isolated/dictionary.txt"
20 | model_path = "/home/haodong/Data/cnn3d_models"
21 | log_path = "log/cnn3d_{:%Y-%m-%d_%H-%M-%S}.log".format(datetime.now())
22 | sum_path = "runs/slr_cnn3d_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now())
23 | 
24 | # Log to file & tensorboard writer
25 | logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[logging.FileHandler(log_path), logging.StreamHandler()])
26 | logger = logging.getLogger('SLR')
27 | logger.info('Logging to file...')
28 | writer = SummaryWriter(sum_path)
29 | 
30 | # Use specific gpus
31 | os.environ["CUDA_VISIBLE_DEVICES"]="2"
32 | # Device setting
33 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34 | 
35 | # Hyperparams
36 | num_classes = 100
37 | epochs = 100
38 | batch_size = 16
39 | learning_rate = 1e-5
40 | log_interval = 20
41 | sample_size = 128
42 | sample_duration = 16
43 | attention = False
44 | drop_p = 0.0
45 | hidden1, hidden2 = 512, 256
46 | 
47 | # Train with 3DCNN
48 | if __name__ == '__main__':
49 |     # Load data
50 |     transform = transforms.Compose([transforms.Resize([sample_size, sample_size]),
51 |                                     transforms.ToTensor(),
52 |                                     transforms.Normalize(mean=[0.5], std=[0.5])])
53 |     train_set = CSL_Isolated(data_path=data_path, label_path=label_path, frames=sample_duration,
54 |         num_classes=num_classes, train=True, transform=transform)
55 |     val_set = CSL_Isolated(data_path=data_path, label_path=label_path, frames=sample_duration,
56 |         num_classes=num_classes, train=False, transform=transform)
57 |     logger.info("Dataset samples: {}".format(len(train_set)+len(val_set)))
58 |     train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True)
59 |     val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True)
60 |     # Create model
61 |     # model = CNN3D(sample_size=sample_size, sample_duration=sample_duration, drop_p=drop_p,
62 |     #             hidden1=hidden1, hidden2=hidden2, num_classes=num_classes).to(device)
63 |     model = resnet18(pretrained=True, progress=True, sample_size=sample_size, sample_duration=sample_duration,
64 |                     attention=attention, num_classes=num_classes).to(device)
65 |     # model = r2plus1d_18(pretrained=True, num_classes=num_classes).to(device)
66 |     # Run the model parallelly
67 |     if torch.cuda.device_count() > 1:
68 |         logger.info("Using {} GPUs".format(torch.cuda.device_count()))
69 |         model = nn.DataParallel(model)
70 |     # Create loss criterion & optimizer
71 |     criterion = nn.CrossEntropyLoss()
72 |     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
73 | 
74 |     # Start training
75 |     logger.info("Training Started".center(60, '#'))
76 |     for epoch in range(epochs):
77 |         # Train the model
78 |         train_epoch(model, criterion, optimizer, train_loader, device, epoch, logger, log_interval, writer)
79 | 
80 |         # Validate the model
81 |         val_epoch(model, criterion, val_loader, device, epoch, logger, writer)
82 | 
83 |         # Save model
84 |         torch.save(model.state_dict(), os.path.join(model_path, "slr_cnn3d_epoch{:03d}.pth".format(epoch+1)))
85 |         logger.info("Epoch {} Model Saved".format(epoch+1).center(60, '#'))
86 | 
87 |     logger.info("Training Finished".center(60, '#'))
88 | 


--------------------------------------------------------------------------------
/CSL_Isolated_ConvLSTM.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from datetime import datetime
 4 | import logging
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | import torch.optim as optim
 9 | from torch.utils.data import DataLoader, random_split
10 | from torch.utils.tensorboard import SummaryWriter
11 | import torchvision.transforms as transforms
12 | from models.ConvLSTM import CRNN, ResCRNN
13 | from dataset import CSL_Isolated
14 | from train import train_epoch
15 | from validation import val_epoch
16 | 
17 | # Path setting
18 | data_path = "/home/haodong/Data/CSL_Isolated/color_video_125000"
19 | label_path = "/home/haodong/Data/CSL_Isolated/dictionary.txt"
20 | model_path = "/home/haodong/Data/cnnlstm_models"
21 | log_path = "log/cnnlstm_{:%Y-%m-%d_%H-%M-%S}.log".format(datetime.now())
22 | sum_path = "runs/slr_cnnlstm_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now())
23 | 
24 | # Log to file & tensorboard writer
25 | logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[logging.FileHandler(log_path), logging.StreamHandler()])
26 | logger = logging.getLogger('SLR')
27 | logger.info('Logging to file...')
28 | writer = SummaryWriter(sum_path)
29 | 
30 | # Use specific gpus
31 | os.environ["CUDA_VISIBLE_DEVICES"]="2"
32 | # Device setting
33 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34 | 
35 | # Hyperparams
36 | epochs = 200
37 | batch_size = 16
38 | learning_rate = 1e-4
39 | weight_decay = 1e-5
40 | log_interval = 20
41 | sample_size = 128
42 | sample_duration = 16
43 | num_classes = 100
44 | lstm_hidden_size = 512
45 | lstm_num_layers = 1
46 | attention = False
47 | 
48 | # Train with Conv+LSTM
49 | if __name__ == '__main__':
50 |     # Load data
51 |     transform = transforms.Compose([transforms.Resize([sample_size, sample_size]),
52 |                                     transforms.ToTensor(),
53 |                                     transforms.Normalize(mean=[0.5], std=[0.5])])
54 |     train_set = CSL_Isolated(data_path=data_path, label_path=label_path, frames=sample_duration,
55 |         num_classes=num_classes, train=True, transform=transform)
56 |     val_set = CSL_Isolated(data_path=data_path, label_path=label_path, frames=sample_duration,
57 |         num_classes=num_classes, train=False, transform=transform)
58 |     logger.info("Dataset samples: {}".format(len(train_set)+len(val_set)))
59 |     train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True)
60 |     val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True)
61 |     # Create model
62 |     # model = CRNN(sample_size=sample_size, sample_duration=sample_duration, num_classes=num_classes,
63 |     #             lstm_hidden_size=lstm_hidden_size, lstm_num_layers=lstm_num_layers).to(device)
64 |     model = ResCRNN(sample_size=sample_size, sample_duration=sample_duration, num_classes=num_classes,
65 |                 lstm_hidden_size=lstm_hidden_size, lstm_num_layers=lstm_num_layers, attention=attention).to(device)
66 |     # Run the model parallelly
67 |     if torch.cuda.device_count() > 1:
68 |         logger.info("Using {} GPUs".format(torch.cuda.device_count()))
69 |         model = nn.DataParallel(model)
70 |     # Create loss criterion & optimizer
71 |     criterion = nn.CrossEntropyLoss()
72 |     optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
73 | 
74 |     # Start training
75 |     logger.info("Training Started".center(60, '#'))
76 |     for epoch in range(epochs):
77 |         # Train the model
78 |         train_epoch(model, criterion, optimizer, train_loader, device, epoch, logger, log_interval, writer)
79 | 
80 |         # Validate the model
81 |         val_epoch(model, criterion, val_loader, device, epoch, logger, writer)
82 | 
83 |         # Save model
84 |         torch.save(model.state_dict(), os.path.join(model_path, "slr_convlstm_epoch{:03d}.pth".format(epoch+1)))
85 |         logger.info("Epoch {} Model Saved".format(epoch+1).center(60, '#'))
86 | 
87 |     logger.info("Training Finished".center(60, '#'))
88 | 


--------------------------------------------------------------------------------
/CSL_Skeleton_GCN.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from datetime import datetime
 4 | import logging
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | import torch.optim as optim
 9 | from torch.utils.data import DataLoader, random_split
10 | from torch.utils.tensorboard import SummaryWriter
11 | from models.GCN import GCN
12 | from dataset import CSL_Skeleton
13 | from train import train_epoch
14 | from validation import val_epoch
15 | 
16 | # Path setting
17 | data_path = "/home/haodong/Data/CSL_Isolated_1/xf500_body_depth_txt"
18 | label_path = "/home/haodong/Data/CSL_Isolated_1/dictionary.txt"
19 | model_path = "/home/haodong/Data/gcn_models"
20 | log_path = "log/gcn_{:%Y-%m-%d_%H-%M-%S}.log".format(datetime.now())
21 | sum_path = "runs/slr_gcn_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now())
22 | 
23 | # Log to file & tensorboard writer
24 | logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[logging.FileHandler(log_path), logging.StreamHandler()])
25 | logger = logging.getLogger('SLR')
26 | logger.info('Logging to file...')
27 | writer = SummaryWriter(sum_path)
28 | 
29 | # Use specific gpus
30 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
31 | # Device setting
32 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33 | 
34 | # Hyperparams
35 | epochs = 200
36 | batch_size = 32
37 | learning_rate = 1e-5
38 | log_interval = 100
39 | num_classes = 500
40 | in_channels = 2
41 | sample_duration = 16
42 | selected_joints = None
43 | split_to_channels = True
44 | 
45 | # Train with GCN
46 | if __name__ == '__main__':
47 |     # Load data
48 |     transform = None # TODO
49 |     train_set = CSL_Skeleton(data_path=data_path, label_path=label_path, frames=sample_duration, num_classes=num_classes,
50 |         selected_joints=selected_joints, split_to_channels=split_to_channels, train=True, transform=transform)
51 |     val_set = CSL_Skeleton(data_path=data_path, label_path=label_path, frames=sample_duration, num_classes=num_classes,
52 |         selected_joints=selected_joints, split_to_channels=split_to_channels, train=False, transform=transform)
53 |     logger.info("Dataset samples: {}".format(len(train_set)+len(val_set)))
54 |     train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
55 |     val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
56 |     # Create model
57 |     model = GCN(in_channels=in_channels, num_class=num_classes, graph_args={'layout': 'ntu-rgb+d'},
58 |                  edge_importance_weighting=True).to(device)
59 |     # Run the model parallelly
60 |     if torch.cuda.device_count() > 1:
61 |         logger.info("Using {} GPUs".format(torch.cuda.device_count()))
62 |         model = nn.DataParallel(model)
63 |     # Create loss criterion & optimizer
64 |     criterion = nn.CrossEntropyLoss()
65 |     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
66 | 
67 |     # Start training
68 |     logger.info("Training Started".center(60, '#'))
69 |     for epoch in range(epochs):
70 |         # Train the model
71 |         train_epoch(model, criterion, optimizer, train_loader, device, epoch, logger, log_interval, writer)
72 | 
73 |         # Validate the model
74 |         val_epoch(model, criterion, val_loader, device, epoch, logger, writer)
75 | 
76 |         # Save model
77 |         torch.save(model.state_dict(), os.path.join(model_path, "slr_gcn_epoch{:03d}.pth".format(epoch+1)))
78 |         logger.info("Epoch {} Model Saved".format(epoch+1).center(60, '#'))
79 | 
80 |     logger.info("Training Finished".center(60, '#'))
81 | 


--------------------------------------------------------------------------------
/CSL_Skeleton_RNN.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from datetime import datetime
 4 | import logging
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | import torch.optim as optim
 9 | from torch.utils.data import DataLoader, random_split
10 | from torch.utils.tensorboard import SummaryWriter
11 | from models.RNN import LSTM, GRU
12 | from dataset import CSL_Skeleton
13 | from train import train_epoch
14 | from validation import val_epoch
15 | 
16 | # Path setting
17 | data_path = "/home/haodong/Data/CSL_Isolated/xf500_body_depth_txt"
18 | label_path = "/home/haodong/Data/CSL_Isolated/dictionary.txt"
19 | model_path = "/home/haodong/Data/skeleton_models"
20 | log_path = "log/skeleton_{:%Y-%m-%d_%H-%M-%S}.log".format(datetime.now())
21 | sum_path = "runs/slr_skeleton_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now())
22 | 
23 | # Log to file & tensorboard writer
24 | logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[logging.FileHandler(log_path), logging.StreamHandler()])
25 | logger = logging.getLogger('SLR')
26 | logger.info('Logging to file...')
27 | writer = SummaryWriter(sum_path)
28 | 
29 | # Use specific gpus
30 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
31 | # Device setting
32 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33 | 
34 | # Hyperparams
35 | epochs = 500
36 | batch_size = 32
37 | learning_rate = 1e-5
38 | log_interval = 20
39 | num_classes = 100
40 | sample_duration = 16
41 | selected_joints = ['HANDLEFT', 'HANDRIGHT', 'ELBOWLEFT', 'ELBOWRIGHT']
42 | input_size = len(selected_joints)*2
43 | hidden_size = 512
44 | num_layers = 1
45 | hidden1 = 512
46 | drop_p = 0.0
47 | 
48 | # Train with Skeleton+RNN
49 | if __name__ == '__main__':
50 |     # Load data
51 |     transform = None # TODO
52 |     train_set = CSL_Skeleton(data_path=data_path, label_path=label_path, frames=sample_duration,
53 |         num_classes=num_classes, selected_joints=selected_joints, train=True, transform=transform)
54 |     val_set = CSL_Skeleton(data_path=data_path, label_path=label_path, frames=sample_duration,
55 |         num_classes=num_classes, selected_joints=selected_joints, train=False, transform=transform)
56 |     logger.info("Dataset samples: {}".format(len(train_set)+len(val_set)))
57 |     train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
58 |     val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
59 |     # Create model
60 |     # model = LSTM(lstm_input_size=input_size, lstm_hidden_size=hidden_size, lstm_num_layers=num_layers,
61 |     #     num_classes=num_classes, hidden1=hidden1, drop_p=drop_p).to(device)
62 |     model = GRU(gru_input_size=input_size, gru_hidden_size=hidden_size, gru_num_layers=num_layers,
63 |         num_classes=num_classes, hidden1=hidden1, drop_p=drop_p).to(device)
64 |     # Run the model parallelly
65 |     if torch.cuda.device_count() > 1:
66 |         logger.info("Using {} GPUs".format(torch.cuda.device_count()))
67 |         model = nn.DataParallel(model)
68 |     # Create loss criterion & optimizer
69 |     criterion = nn.CrossEntropyLoss()
70 |     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
71 | 
72 |     # Start training
73 |     logger.info("Training Started".center(60, '#'))
74 |     for epoch in range(epochs):
75 |         # Train the model
76 |         train_epoch(model, criterion, optimizer, train_loader, device, epoch, logger, log_interval, writer)
77 | 
78 |         # Validate the model
79 |         val_epoch(model, criterion, val_loader, device, epoch, logger, writer)
80 | 
81 |         # Save model
82 |         torch.save(model.state_dict(), os.path.join(model_path, "slr_skeleton_epoch{:03d}.pth".format(epoch+1)))
83 |         logger.info("Epoch {} Model Saved".format(epoch+1).center(60, '#'))
84 | 
85 |     logger.info("Training Finished".center(60, '#'))
86 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SLR
 2 | isolated & continuous sign language recognition using CNN+LSTM/3D CNN/GCN/Encoder-Decoder
 3 | 
 4 | ## Requirements
 5 | 
 6 | - Download and extract **[CSL Dataset](http://home.ustc.edu.cn/~pjh/openresources/cslr-dataset-2015/index.html)**
 7 | - Download and install **[PyTorch](https://pytorch.org/)**
 8 | 
 9 | ## Isolated Sign Language Recognition
10 | 
11 | ### CNN+LSTM
12 | 
13 | 1. **four layers of Conv2d + one layer of LSTM**
14 | 
15 |    | Dataset      | Classes | Samples | Best Test Acc | Best Test Loss |
16 |    | ------------ | ------- | ------- | ------------- | -------------- |
17 |    | CSL_Isolated | 100     | 25,000  | 82.08%        | 0.734426       |
18 |    | CSL_Isolated | 500     | 125,000 | 71.71%        | 1.332122       |
19 | 
20 | 2. **ResNet + one layer of LSTM**
21 | 
22 |    | Dataset      | Classes | Samples | Best Test Acc | Best Test Loss |
23 |    | ------------ | ------- | ------- | ------------- | -------------- |
24 |    | CSL_Isolated | 100     | 25,000  | 93.54%        | 0.245582       |
25 |    | CSL_Isolated | 500     | 125,000 | 83.17%        | 0.748759       |
26 | 
27 | ### 3D CNN
28 | 
29 | 1. **three layers of Conv3d**
30 | 
31 |    | Dataset      | Classes | Samples | Best Test Acc | Best Test Loss |
32 |    | ------------ | ------- | ------- | ------------- | -------------- |
33 |    | CSL_Isolated | 100     | 25,000  | 58.86%        | 1.560049       |
34 |    | CSL_Isolated | 500     | 125,000 | 45.07%        | 2.255563       |
35 |    
36 | 2. **3D ResNet**
37 | 
38 |    | Method    | Dataset      | Classes | Samples | Best Test Acc | Best Test Loss |
39 |    | --------- | ------------ | ------- | ------- | ------------- | -------------- |
40 |    | ResNet18  | CSL_Isolated | 100     | 25,000  | 93.30%        | 0.246169       |
41 |    | ResNet18  | CSL_Isolated | 500     | 125,000 | 79.42%        | 0.800490       |
42 |    | ResNet34  | CSL_Isolated | 100     | 25,000  | 94.78%        | 0.207592       |
43 |    | ResNet34  | CSL_Isolated | 500     | 125,000 | 81.61%        | 0.750424       |
44 |    | ResNet50  | CSL_Isolated | 100     | 25,000  | 94.36%        | 0.232631       |
45 |    | ResNet50  | CSL_Isolated | 500     | 125,000 | 83.15%        | 0.803212       |
46 |    | ResNet101 | CSL_Isolated | 100     | 25,000  | 95.26%        | 0.205430       |
47 |    | ResNet101 | CSL_Isolated | 500     | 125,000 | 83.18%        | 0.751727       |
48 | 
49 | 3. **ResNet (2+1)D**
50 | 
51 |    | Dataset      | Classes | Samples | Best Test Acc | Best Test Loss |
52 |    | ------------ | ------- | ------- | ------------- | -------------- |
53 |    | CSL_Isolated | 100     | 25,000  | 98.68%        | 0.043099       |
54 |    | CSL_Isolated | 500     | 125,000 | 94.85%        | 0.234880       |
55 | 
56 | ### GCN
57 | 
58 | | Dataset      | Classes | Samples | Best Test Acc | Best Test Loss |
59 | | ------------ | ------- | ------- | ------------- | -------------- |
60 | | CSL_Skeleton | 100     | 25,000  | 79.20%        | 0.737053       |
61 | | CSL_Skeleton | 500     | 125,000 | 66.64%        | 1.165872       |
62 | 
63 | ### Skeleton+LSTM
64 | 
65 | | Dataset      | Classes | Samples | Best Test Acc | Best Test Loss |
66 | | ------------ | ------- | ------- | ------------- | -------------- |
67 | | CSL_Skeleton | 100     | 25,000  | 84.30%        | 0.488253       |
68 | | CSL_Skeleton | 500     | 125,000 | 70.62%        | 1.078730       |
69 | 
70 | ## Continuous Sign Language Recognition
71 | 
72 | ### Encoder-Decoder
73 | 
74 | *Encoder is ResNet18+LSTM, and Decoder is LSTM*
75 | 
76 | | Dataset             | Sentences | Samples | Best Test Wer | Best Test Loss |
77 | | ------------------- | --------- | ------- | ------------- | -------------- |
78 | | CSL_Continuous      | 100       | 25,000  | 1.01%         | 0.034636       |
79 | | CSL_Continuous_Char | 100       | 25,000  | 1.19%         | 0.049449       |
80 | 
81 | ## References
82 | 
83 | - [Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?](https://arxiv.org/pdf/1711.09577.pdf)
84 | 
85 | - [Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition](https://arxiv.org/pdf/1801.07455.pdf)
86 | - [A Closer Look at Spatiotemporal Convolutions for Action Recognition](https://arxiv.org/abs/1711.11248)
87 | - [SIGN LANGUAGE RECOGNITION WITH LONG SHORT-TERM MEMORY](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7532884)
88 | - https://github.com/HHTseng/video-classification
89 | - https://github.com/kenshohara/3D-ResNets-PyTorch
90 | 
91 | - https://github.com/bentrevett/pytorch-seq2seq
92 | 
93 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from PIL import Image
  3 | import torch
  4 | from torch.utils.data import Dataset
  5 | import torchvision.transforms as transforms
  6 | 
  7 | """
  8 | Implementation of Chinese Sign Language Dataset(50 signers with 5 times)
  9 | """
 10 | class CSL_Isolated(Dataset):
 11 |     def __init__(self, data_path, label_path, frames=16, num_classes=500, train=True, transform=None):
 12 |         super(CSL_Isolated, self).__init__()
 13 |         self.data_path = data_path
 14 |         self.label_path = label_path
 15 |         self.train = train
 16 |         self.transform = transform
 17 |         self.frames = frames
 18 |         self.num_classes = num_classes
 19 |         self.signers = 50
 20 |         self.repetition = 5
 21 |         if self.train:
 22 |             self.videos_per_folder = int(0.8 * self.signers * self.repetition)
 23 |         else:
 24 |             self.videos_per_folder = int(0.2 * self.signers * self.repetition)
 25 |         self.data_folder = []
 26 |         try:
 27 |             obs_path = [os.path.join(self.data_path, item) for item in os.listdir(self.data_path)]
 28 |             self.data_folder = sorted([item for item in obs_path if os.path.isdir(item)])
 29 |         except Exception as e:
 30 |             print("Something wrong with your data path!!!")
 31 |             raise
 32 |         self.labels = {}
 33 |         try:
 34 |             label_file = open(self.label_path, 'r')
 35 |             for line in label_file.readlines():
 36 |                 line = line.strip()
 37 |                 line = line.split('\t')
 38 |                 self.labels[line[0]] = line[1]
 39 |         except Exception as e:
 40 |             raise
 41 | 
 42 |     def read_images(self, folder_path):
 43 |         assert len(os.listdir(folder_path)) >= self.frames, "Too few images in your data folder: " + str(folder_path)
 44 |         images = []
 45 |         start = 1
 46 |         step = int(len(os.listdir(folder_path))/self.frames)
 47 |         for i in range(self.frames):
 48 |             image = Image.open(os.path.join(folder_path, '{:06d}.jpg').format(start+i*step))  #.convert('L')
 49 |             if self.transform is not None:
 50 |                 image = self.transform(image)
 51 |             images.append(image)
 52 | 
 53 |         images = torch.stack(images, dim=0)
 54 |         # switch dimension for 3d cnn
 55 |         images = images.permute(1, 0, 2, 3)
 56 |         # print(images.shape)
 57 |         return images
 58 | 
 59 |     def __len__(self):
 60 |         return self.num_classes * self.videos_per_folder
 61 | 
 62 |     def __getitem__(self, idx):
 63 |         top_folder = self.data_folder[int(idx/self.videos_per_folder)]
 64 |         selected_folders = [os.path.join(top_folder, item) for item in os.listdir(top_folder)]
 65 |         selected_folders = sorted([item for item in selected_folders if os.path.isdir(item)])
 66 |         if self.train:
 67 |             selected_folder = selected_folders[idx%self.videos_per_folder]
 68 |         else:
 69 |             selected_folder = selected_folders[idx%self.videos_per_folder + int(0.8*self.signers*self.repetition)]
 70 |         images = self.read_images(selected_folder)
 71 |         # print(selected_folder, int(idx/self.videos_per_folder))
 72 |         # print(self.labels['{:06d}'.format(int(idx/self.videos_per_folder))])
 73 |         # label = self.labels['{:06d}'.format(int(idx/self.videos_per_folder))]
 74 |         label = torch.LongTensor([int(idx/self.videos_per_folder)])
 75 | 
 76 |         return {'data': images, 'label': label}
 77 | 
 78 |     def label_to_word(self, label):
 79 |         if isinstance(label, torch.Tensor):
 80 |             return self.labels['{:06d}'.format(label.item())]
 81 |         elif isinstance(label, int):
 82 |             return self.labels['{:06d}'.format(label)]
 83 | 
 84 | 
 85 | """
 86 | Implementation of CSL Skeleton Dataset
 87 | """
 88 | class CSL_Skeleton(Dataset):
 89 |     joints_index = {'SPINEBASE': 0, 'SPINEMID': 1, 'NECK': 2, 'HEAD': 3, 'SHOULDERLEFT':4,
 90 |                     'ELBOWLEFT': 5, 'WRISTLEFT': 6, 'HANDLEFT': 7, 'SHOULDERRIGHT': 8,
 91 |                     'ELBOWRIGHT': 9, 'WRISTRIGHT': 10, 'HANDRIGHT': 11, 'HIPLEFT': 12,
 92 |                     'KNEELEFT': 13, 'ANKLELEFT': 14, 'FOOTLEFT': 15, 'HIPRIGHT': 16,
 93 |                     'KNEERIGHT': 17, 'ANKLERIGHT': 18, 'FOOTRIGHT': 19, 'SPINESHOULDER': 20,
 94 |                     'HANDTIPLEFT': 21, 'THUMBLEFT': 22, 'HANDTIPRIGHT': 23, 'THUMBRIGHT': 24}
 95 |     def __init__(self, data_path, label_path, frames=16, num_classes=500, selected_joints=None, split_to_channels=False, train=True, transform=None):
 96 |         super(CSL_Skeleton, self).__init__()
 97 |         self.data_path = data_path
 98 |         self.label_path = label_path
 99 |         self.frames = frames
100 |         self.num_classes = num_classes
101 |         self.selected_joints = selected_joints
102 |         self.split_to_channels = split_to_channels
103 |         self.train = train
104 |         self.transform = transform
105 |         self.signers = 50
106 |         self.repetition = 5
107 |         if self.train:
108 |             self.txt_per_folder = int(0.8 * self.signers * self.repetition)
109 |         else:
110 |             self.txt_per_folder = int(0.2 * self.signers * self.repetition)
111 |         self.data_folder = []
112 |         try:
113 |             obs_path = [os.path.join(self.data_path, item) for item in os.listdir(self.data_path)]
114 |             self.data_folder = sorted([item for item in obs_path if os.path.isdir(item)])
115 |         except Exception as e:
116 |             print("Something wrong with your data path!!!")
117 |             raise
118 |         self.labels = {}
119 |         try:
120 |             label_file = open(self.label_path, 'r')
121 |             for line in label_file.readlines():
122 |                 line = line.strip()
123 |                 line = line.split('\t')
124 |                 self.labels[line[0]] = line[1]
125 |         except Exception as e:
126 |             raise
127 | 
128 |     def read_file(self, txt_path):
129 |         txt_file = open(txt_path, 'r')
130 |         all_skeletons = []
131 |         for line in txt_file.readlines():
132 |             line = line.split(' ')
133 |             skeleton = [int(item) for item in line if item is not '\n']
134 |             selected_x = []
135 |             selected_y = []
136 |             # select specific joints
137 |             if self.selected_joints is not None:
138 |                 for joint in self.selected_joints:
139 |                     assert joint in self.joints_index, 'JOINT ' + joint + ' DONT EXIST!!!'
140 |                     selected_x.append(skeleton[2*self.joints_index[joint]])
141 |                     selected_y.append(skeleton[2*self.joints_index[joint]+1])
142 |             else:
143 |                 for i in range(len(skeleton)):
144 |                     if i % 2 == 0:
145 |                         selected_x.append(skeleton[i])
146 |                     else:
147 |                         selected_y.append(skeleton[i])
148 |             # print(selected_x, selected_y)
149 |             if self.split_to_channels:
150 |                 selected_skeleton = torch.FloatTensor([selected_x, selected_y])
151 |             else:
152 |                 selected_skeleton = torch.FloatTensor(selected_x + selected_y)
153 |             # print(selected_skeleton.shape)
154 |             if self.transform is not None:
155 |                 selected_skeleton = self.transform(selected_skeleton)
156 |             all_skeletons.append(selected_skeleton)
157 |         # print(all_skeletons)
158 |         skeletons = []
159 |         start = 0
160 |         step = int(len(all_skeletons)/self.frames)
161 |         for i in range(self.frames):
162 |             skeletons.append(all_skeletons[start+i*step])
163 |         skeletons = torch.stack(skeletons, dim=0)
164 |         # print(skeletons.shape)
165 | 
166 |         return skeletons
167 | 
168 |     def __len__(self):
169 |         return self.num_classes * self.txt_per_folder
170 | 
171 |     def __getitem__(self, idx):
172 |         top_folder = self.data_folder[int(idx/self.txt_per_folder)]
173 |         selected_txts = [os.path.join(top_folder, item) for item in os.listdir(top_folder)]
174 |         selected_txts = sorted([item for item in selected_txts if item.endswith('.txt')])
175 |         if self.train:
176 |             selected_txt = selected_txts[idx%self.txt_per_folder]
177 |         else:
178 |             selected_txt = selected_txts[idx%self.txt_per_folder + int(0.8*self.signers*self.repetition)]
179 |         # print(selected_txt)
180 |         data = self.read_file(selected_txt)
181 |         label = torch.LongTensor([int(idx/self.txt_per_folder)])
182 | 
183 |         return {'data': data, 'label': label}
184 | 
185 |     def label_to_word(self, label):
186 |         if isinstance(label, torch.Tensor):
187 |             return self.labels['{:06d}'.format(label.item())]
188 |         elif isinstance(label, int):
189 |             return self.labels['{:06d}'.format(label)]
190 | 
191 | 
192 | """
193 | Implementation of CSL Continuous Dataset(Word Level)
194 | """
195 | class CSL_Continuous(Dataset):
196 |     def __init__(self, data_path, dict_path, corpus_path, frames=128, train=True, transform=None):
197 |         super(CSL_Continuous, self).__init__()
198 |         self.data_path = data_path
199 |         self.dict_path = dict_path
200 |         self.corpus_path = corpus_path
201 |         self.frames = frames
202 |         self.train = train
203 |         self.transform = transform
204 |         self.num_sentences = 100
205 |         self.signers = 50
206 |         self.repetition = 5
207 |         if self.train:
208 |             self.videos_per_folder = int(0.8 * self.signers * self.repetition)
209 |         else:
210 |             self.videos_per_folder = int(0.2 * self.signers * self.repetition)
211 |         # dictionary
212 |         self.dict = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
213 |         self.output_dim = 3
214 |         try:
215 |             dict_file = open(self.dict_path, 'r')
216 |             for line in dict_file.readlines():
217 |                 line = line.strip().split('\t')
218 |                 # word with multiple expressions
219 |                 if '（' in line[1] and '）' in line[1]:
220 |                     for delimeter in ['（', '）', '、']:
221 |                         line[1] = line[1].replace(delimeter, " ")
222 |                     words = line[1].split()
223 |                 else:
224 |                     words = [line[1]]
225 |                 # print(words)
226 |                 for word in words:
227 |                     self.dict[word] = self.output_dim
228 |                 self.output_dim += 1
229 |         except Exception as e:
230 |             raise
231 |         # img data
232 |         self.data_folder = []
233 |         try:
234 |             obs_path = [os.path.join(self.data_path, item) for item in os.listdir(self.data_path)]
235 |             self.data_folder = sorted([item for item in obs_path if os.path.isdir(item)])
236 |         except Exception as e:
237 |             raise
238 |         # corpus
239 |         self.corpus = {}
240 |         self.unknown = set()
241 |         try:
242 |             corpus_file = open(self.corpus_path, 'r')
243 |             for line in corpus_file.readlines():
244 |                 line = line.strip().split()
245 |                 sentence = line[1]
246 |                 raw_sentence = (line[1]+'.')[:-1]
247 |                 paired = [False for i in range(len(line[1]))]
248 |                 # print(id(raw_sentence), id(line[1]), id(sentence))
249 |                 # pair long words with higher priority
250 |                 for token in sorted(self.dict, key=len, reverse=True):
251 |                     index = raw_sentence.find(token)
252 |                     # print(index, line[1])
253 |                     if index != -1 and not paired[index]:
254 |                         line[1] = line[1].replace(token, " "+token+" ")
255 |                         # mark as paired
256 |                         for i in range(len(token)):
257 |                             paired[index+i] = True
258 |                 # add sos
259 |                 tokens = [self.dict['<sos>']]
260 |                 for token in line[1].split():
261 |                     if token in self.dict:
262 |                         tokens.append(self.dict[token])
263 |                     else:
264 |                         self.unknown.add(token)
265 |                 # add eos
266 |                 tokens.append(self.dict['<eos>'])
267 |                 self.corpus[line[0]] = tokens
268 |         except Exception as e:
269 |             raise
270 |         # add padding
271 |         length = [len(tokens) for key, tokens in self.corpus.items()]
272 |         self.max_length = max(length)
273 |         # print(max(length))
274 |         for key, tokens in self.corpus.items():
275 |             if len(tokens) < self.max_length:
276 |                 tokens.extend([self.dict['<pad>']]*(self.max_length-len(tokens)))
277 |         # print(self.corpus)
278 |         # print(self.unknown)
279 | 
280 |     def read_images(self, folder_path):
281 |         assert len(os.listdir(folder_path)) >= self.frames, "Too few images in your data folder: " + str(folder_path)
282 |         images = []
283 |         start = 1
284 |         step = int(len(os.listdir(folder_path))/self.frames)
285 |         for i in range(self.frames):
286 |             image = Image.open(os.path.join(folder_path, '{:06d}.jpg').format(start+i*step))  #.convert('L')
287 |             if self.transform is not None:
288 |                 image = self.transform(image)
289 |             images.append(image)
290 | 
291 |         images = torch.stack(images, dim=0)
292 |         # switch dimension
293 |         images = images.permute(1, 0, 2, 3)
294 |         # print(images.shape)
295 |         return images
296 | 
297 |     def __len__(self):
298 |         return self.num_sentences * self.videos_per_folder
299 | 
300 |     def __getitem__(self, idx):
301 |         top_folder = self.data_folder[int(idx/self.videos_per_folder)]
302 |         selected_folders = [os.path.join(top_folder, item) for item in os.listdir(top_folder)]
303 |         selected_folders = sorted([item for item in selected_folders if os.path.isdir(item)])
304 |         if self.train:
305 |             selected_folder = selected_folders[idx%self.videos_per_folder]
306 |         else:
307 |             selected_folder = selected_folders[idx%self.videos_per_folder + int(0.8*self.signers*self.repetition)]
308 |         images = self.read_images(selected_folder)
309 |         # print(selected_folder, int(idx/self.videos_per_folder))
310 |         # print(self.corpus['{:06d}'.format(int(idx/self.videos_per_folder))])
311 |         tokens = torch.LongTensor(self.corpus['{:06d}'.format(int(idx/self.videos_per_folder))])
312 | 
313 |         return images, tokens
314 | 
315 | 
316 | """
317 | Implementation of CSL Continuous Dataset(Character Level)
318 | """
319 | class CSL_Continuous_Char(Dataset):
320 |     def __init__(self, data_path, corpus_path, frames=128, train=True, transform=None):
321 |         super(CSL_Continuous_Char, self).__init__()
322 |         self.data_path = data_path
323 |         self.corpus_path = corpus_path
324 |         self.frames = frames
325 |         self.train = train
326 |         self.transform = transform
327 |         self.num_sentences = 100
328 |         self.signers = 50
329 |         self.repetition = 5
330 |         if self.train:
331 |             self.videos_per_folder = int(0.8 * self.signers * self.repetition)
332 |         else:
333 |             self.videos_per_folder = int(0.2 * self.signers * self.repetition)
334 |         # dictionary
335 |         self.dict = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
336 |         self.output_dim = 3
337 |         try:
338 |             dict_file = open(self.corpus_path, 'r')
339 |             for line in dict_file.readlines():
340 |                 line = line.strip().split()
341 |                 sentence = line[1]
342 |                 for char in sentence:
343 |                     if char not in self.dict:
344 |                         self.dict[char] = self.output_dim
345 |                         self.output_dim += 1
346 |         except Exception as e:
347 |             raise
348 |         # img data
349 |         self.data_folder = []
350 |         try:
351 |             obs_path = [os.path.join(self.data_path, item) for item in os.listdir(self.data_path)]
352 |             self.data_folder = sorted([item for item in obs_path if os.path.isdir(item)])
353 |         except Exception as e:
354 |             raise
355 |         # corpus
356 |         self.corpus = {}
357 |         self.unknown = set()
358 |         try:
359 |             corpus_file = open(self.corpus_path, 'r')
360 |             for line in corpus_file.readlines():
361 |                 line = line.strip().split()
362 |                 sentence = line[1]
363 |                 raw_sentence = (line[1]+'.')[:-1]
364 |                 paired = [False for i in range(len(line[1]))]
365 |                 # print(id(raw_sentence), id(line[1]), id(sentence))
366 |                 # pair long words with higher priority
367 |                 for token in sorted(self.dict, key=len, reverse=True):
368 |                     index = raw_sentence.find(token)
369 |                     # print(index, line[1])
370 |                     if index != -1 and not paired[index]:
371 |                         line[1] = line[1].replace(token, " "+token+" ")
372 |                         # mark as paired
373 |                         for i in range(len(token)):
374 |                             paired[index+i] = True
375 |                 # add sos
376 |                 tokens = [self.dict['<sos>']]
377 |                 for token in line[1].split():
378 |                     if token in self.dict:
379 |                         tokens.append(self.dict[token])
380 |                     else:
381 |                         self.unknown.add(token)
382 |                 # add eos
383 |                 tokens.append(self.dict['<eos>'])
384 |                 self.corpus[line[0]] = tokens
385 |         except Exception as e:
386 |             raise
387 |         # add padding
388 |         length = [len(tokens) for key, tokens in self.corpus.items()]
389 |         self.max_length = max(length)
390 |         # print(max(length))
391 |         for key, tokens in self.corpus.items():
392 |             if len(tokens) < self.max_length:
393 |                 tokens.extend([self.dict['<pad>']]*(self.max_length-len(tokens)))
394 |         # print(self.corpus)
395 |         # print(self.unknown)
396 | 
397 |     def read_images(self, folder_path):
398 |         assert len(os.listdir(folder_path)) >= self.frames, "Too few images in your data folder: " + str(folder_path)
399 |         images = []
400 |         start = 1
401 |         step = int(len(os.listdir(folder_path))/self.frames)
402 |         for i in range(self.frames):
403 |             image = Image.open(os.path.join(folder_path, '{:06d}.jpg').format(start+i*step))  #.convert('L')
404 |             if self.transform is not None:
405 |                 image = self.transform(image)
406 |             images.append(image)
407 | 
408 |         images = torch.stack(images, dim=0)
409 |         # switch dimension
410 |         images = images.permute(1, 0, 2, 3)
411 |         # print(images.shape)
412 |         return images
413 | 
414 |     def __len__(self):
415 |         return self.num_sentences * self.videos_per_folder
416 | 
417 |     def __getitem__(self, idx):
418 |         top_folder = self.data_folder[int(idx/self.videos_per_folder)]
419 |         selected_folders = [os.path.join(top_folder, item) for item in os.listdir(top_folder)]
420 |         selected_folders = sorted([item for item in selected_folders if os.path.isdir(item)])
421 |         if self.train:
422 |             selected_folder = selected_folders[idx%self.videos_per_folder]
423 |         else:
424 |             selected_folder = selected_folders[idx%self.videos_per_folder + int(0.8*self.signers*self.repetition)]
425 |         images = self.read_images(selected_folder)
426 |         # print(selected_folder, int(idx/self.videos_per_folder))
427 |         # print(self.corpus['{:06d}'.format(int(idx/self.videos_per_folder))])
428 |         tokens = torch.LongTensor(self.corpus['{:06d}'.format(int(idx/self.videos_per_folder))])
429 | 
430 |         return images, tokens
431 | 
432 | 
433 | # Test
434 | if __name__ == '__main__':
435 |     transform = transforms.Compose([transforms.Resize([128, 128]), transforms.ToTensor()])
436 |     # dataset = CSL_Isolated(data_path="/home/haodong/Data/CSL_Isolated/color_video_125000",
437 |     #     label_path='/home/haodong/Data/CSL_Isolated/dictionary.txt', transform=transform)    # print(len(dataset))
438 |     # print(dataset[1000]['images'].shape)
439 |     # dataset = CSL_Skeleton(data_path="/home/haodong/Data/CSL_Isolated/xf500_body_depth_txt",
440 |     #     label_path="/home/haodong/Data/CSL_Isolated/dictionary.txt", selected_joints=['SPINEBASE', 'SPINEMID', 'HANDTIPRIGHT'], split_to_channels=True)
441 |     # print(dataset[1000])
442 |     # label = dataset[1000]['label']
443 |     # print(dataset.label_to_word(label))
444 |     # dataset[1000]
445 |     dataset = CSL_Continuous(
446 |         data_path="/home/haodong/Data/CSL_Continuous/color",
447 |         dict_path="/home/haodong/Data/CSL_Continuous/dictionary.txt",
448 |         corpus_path="/home/haodong/Data/CSL_Continuous/corpus.txt",
449 |         train=True, transform=transform
450 |         )
451 |     # dataset = CSL_Continuous_Char(
452 |     #     data_path="/home/haodong/Data/CSL_Continuous/color",
453 |     #     corpus_path="/home/haodong/Data/CSL_Continuous/corpus.txt",
454 |     #     train=True, transform=transform
455 |     #     )
456 |     print(len(dataset))
457 |     images, tokens = dataset[1000]
458 |     print(images.shape, tokens)
459 |     print(dataset.output_dim)
460 | 


--------------------------------------------------------------------------------
/log/3dresnet18_100_acc.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 330 200" xmlns="http://www.w3.org/2000/svg"><g><g><g><g><g><line x1="53.1875" y1="177" x2="48.1875" y2="177" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="159.29999999999998" x2="48.1875" y2="159.29999999999998" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="141.59999999999997" x2="48.1875" y2="141.59999999999997" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="123.89999999999998" x2="48.1875" y2="123.89999999999998" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="106.19999999999996" x2="48.1875" y2="106.19999999999996" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="88.50000000000004" x2="48.1875" y2="88.50000000000004" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="70.80000000000004" x2="48.1875" y2="70.80000000000004" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="53.10000000000002" x2="48.1875" y2="53.10000000000002" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="35.400000000000006" x2="48.1875" y2="35.400000000000006" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="17.700000000000017" x2="48.1875" y2="17.700000000000017" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="0" x2="48.1875" y2="0" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(43.1875, 0)"><text x="0" y="177" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.84</text><text x="0" y="159.29999999999998" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.86</text><text x="0" y="141.59999999999997" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.88</text><text x="0" y="123.89999999999998" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.9</text><text x="0" y="106.19999999999996" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.92</text><text x="0" y="88.50000000000004" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.94</text><text x="0" y="70.80000000000004" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.96</text><text x="0" y="53.10000000000002" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.98</text><text x="0" y="35.400000000000006" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">1</text><text x="0" y="17.700000000000017" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">1.02</text><text x="0" y="0" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">1.04</text></g><line x1="53.1875" y1="0" x2="53.1875" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g><g transform="translate(53, 0)" clip-path="url(#clip_0)"><clipPath id="clip_0"><rect width="276" height="176"></rect></clipPath><g><g><g><line x1="0" y1="0" x2="0" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="30.756944444444443" y1="0" x2="30.756944444444443" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="61.513888888888886" y1="0" x2="61.513888888888886" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="123.02777777777777" y1="0" x2="123.02777777777777" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="153.78472222222223" y1="0" x2="153.78472222222223" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="215.29861111111111" y1="0" x2="215.29861111111111" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="246.05555555555554" y1="0" x2="246.05555555555554" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="276.8125" y1="0" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g><g><line x1="0" y1="177" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="159.29999999999998" x2="276.8125" y2="159.29999999999998" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="141.59999999999997" x2="276.8125" y2="141.59999999999997" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="123.89999999999998" x2="276.8125" y2="123.89999999999998" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="106.19999999999996" x2="276.8125" y2="106.19999999999996" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="88.50000000000004" x2="276.8125" y2="88.50000000000004" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="70.80000000000004" x2="276.8125" y2="70.80000000000004" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="53.10000000000002" x2="276.8125" y2="53.10000000000002" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="35.400000000000006" x2="276.8125" y2="35.400000000000006" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="17.700000000000017" x2="276.8125" y2="17.700000000000017" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g></g></g><g><g><line x1="0" y1="920.3999999999997" x2="276.8125" y2="920.3999999999997" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><line x1="30.756944444444443" y1="0" x2="30.756944444444443" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><g><g><g><path stroke="rgb(238, 51, 119)" stroke-width="2px" d="M39.984027777777776,179.25404835550154L43.05972222222222,162.03113129927837L46.135416666666664,149.40789135388422L49.211111111111116,143.08703420135316L52.28680555555555,135.19119602526345L55.362500000000004,137.2224265888443L58.43819444444444,132.98354430193305L61.513888888888886,131.74987403715886L64.58958333333334,136.05955695543983L67.66527777777777,132.4623126680996L70.74097222222221,133.42824752457625L73.81666666666666,130.81850057251577L76.89236111111111,121.6039621342041L79.96805555555555,117.35277277422935L83.04375,114.66120047524487L86.11944444444444,110.78072243659764L89.19513888888889,135.712356712825L92.27083333333333,127.30551607934133L95.34652777777778,124.45644096949177L98.42222222222223,122.53464006310568L101.49791666666665,120.60276692447356L104.5736111111111,117.31964898682847L107.64930555555556,113.50897288654326L110.72500000000001,111.08097152154241L113.80069444444445,108.27898085535664L116.87638888888888,143.62621538503726L119.95208333333333,138.7801346701075L123.02777777777777,128.3676852859177L126.10347222222222,124.73980906984998L129.17916666666667,116.5450762484211L132.25486111111113,129.3990537900708L135.33055555555555,130.59783497543464L138.40625,123.24589349918023L141.48194444444442,114.8699459385759L144.5576388888889,110.48157769776873L147.63333333333333,115.56575028362737L150.70902777777778,116.77544375845815L153.78472222222223,112.26207664714342L156.86041666666665,107.78403697075551L159.9361111111111,104.8848201216937L163.01180555555555,101.9417012886658L166.0875,109.66302178733925L169.16319444444446,108.7734050036541L172.23888888888888,107.31925100323858L175.31458333333333,103.8271459091627L178.39027777777778,110.01548592515306L181.46597222222223,113.58688753769158L184.54166666666666,112.96853451445409L187.6173611111111,112.17271550961766L190.69305555555556,109.0048207644636L193.76874999999998,108.44929613852487L196.84444444444446,105.85038521160811L199.92013888888889,108.32663311881052L202.9958333333333,107.121978520883L206.0715277777778,102.15117715330788L209.1472222222222,106.67349677164529L212.2229166666667,105.06808928536861L215.29861111111111,104.31725893907237L218.37430555555554,102.80475330407917L221.45000000000002,105.01246045622534L224.52569444444444,103.92988619919593L227.6013888888889,112.62593104431625L230.67708333333334,109.48916423076153L233.75277777777777,106.04949887605761L236.82847222222222,103.06529463298475L239.90416666666667,105.02718285660364L242.97986111111112,102.80590046370271L246.05555555555554,108.19914993360177L249.13125,107.89508189150413L252.20694444444445,112.173042720489L255.2826388888889,117.64261560555238L258.35833333333335,110.2335788161513L261.43402777777777,114.5673576540325" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(204, 51, 17)" stroke-width="2px" d="M36.90833333333333,260.3172041751444L39.984027777777776,153.3298520057786L43.05972222222222,100.67963408159427L46.135416666666664,73.11463518224203L49.211111111111116,57.661809517522514L52.28680555555555,48.937672910897476L55.362500000000004,43.86216184142907L58.43819444444444,41.12229163642479L61.513888888888886,39.531748460273235L64.58958333333334,38.672437090223184L67.66527777777777,37.80408216248699L70.74097222222221,37.62101282952355L73.81666666666666,37.74160081131521L76.89236111111111,37.10556916531576L79.96805555555555,36.4585648464124L83.04375,36.035067223847136L86.11944444444444,35.78101453272808L89.19513888888889,37.85892561627222L92.27083333333333,37.38863895181922L95.34652777777778,36.78786030013927L98.42222222222223,37.25932393853145L101.49791666666665,36.60408277406793L104.5736111111111,36.2286444081833L107.64930555555556,35.89718570256332L110.72500000000001,35.69831108229434L113.80069444444445,35.57898652724896L116.87638888888888,36.92339561154918L119.95208333333333,37.57073514096092L123.02777777777777,36.861746261928744L126.10347222222222,36.5071436926728L129.17916666666667,36.06428618035818L132.25486111111113,36.54196849005831L135.33055555555555,36.75778721079118L138.40625,36.692567227582856L141.48194444444442,36.24633098040749L144.5576388888889,35.96090739785643L147.63333333333333,36.39144761375911L150.70902777777778,36.543575219328346L153.78472222222223,36.192341653840316L156.86041666666665,35.87540499205002L159.9361111111111,35.68524299513845L163.01180555555555,35.57114579705009L166.0875,36.98948099654217L169.16319444444446,36.97318589711793L172.23888888888888,36.3793174124861L175.31458333333333,36.09378697019372L178.39027777777778,35.86938099348973L181.46597222222223,37.079928528590045L184.54166666666666,36.70886484820724L187.6173611111111,36.39771195434824L190.69305555555556,36.05173598398633L193.76874999999998,36.07424638432181L196.84444444444446,35.8399537048453L199.92013888888889,36.12416418801055L202.9958333333333,36.36550222641418L206.0715277777778,36.067794921434995L209.1472222222222,36.43787718918145L212.2229166666667,36.07583512488742L215.29861111111111,36.10640880598868L218.37430555555554,36.12475301464943L221.45000000000002,36.24195606255856L224.52569444444444,35.95828244891351L227.6013888888889,35.947362514773545L230.67708333333334,36.365617745184664L233.75277777777777,36.58116500917893L236.82847222222222,36.25030140247242L239.90416666666667,36.19338563541356L242.97986111111112,36.05303965246543L246.05555555555554,36.07502858540926L249.13125,36.15901259363591L252.20694444444445,36.244808872824336L255.2826388888889,36.06619065778577L258.35833333333335,35.941316791636524L261.43402777777777,35.919480183281166" style="fill: none;" fill="none"></path></g></g></g></g><g opacity="0.2"><g><g><g><path stroke="rgb(238, 51, 119)" stroke-width="2px" d="M33.83263888888889,286.03202261924736L36.90833333333333,173.81402202844617L39.984027777777776,144.07799341678617L43.05972222222222,141.7769808411598L46.135416666666664,132.9269892811775L49.211111111111116,134.34301325082777L52.28680555555555,123.90002110004424L55.362500000000004,140.18398025035856L58.43819444444444,126.73201628923415L61.513888888888886,129.91801747083662L64.58958333333334,142.48499282598493L67.66527777777777,127.08602228164672L70.74097222222221,134.8739958643913L73.81666666666666,126.90899291038512L76.89236111111111,107.79298582077026L79.96805555555555,110.97898700237273L83.04375,110.62498100996017L86.11944444444444,104.96099063158034L89.19513888888889,173.10601004362104L92.27083333333333,114.69602354764938L95.34652777777778,120.1829845547676L98.42222222222223,119.65200194120406L101.49791666666665,117.70499535799026L104.5736111111111,112.395010972023L107.64930555555556,107.79298582077026L110.72500000000001,107.43897982835769L113.80069444444445,104.07600202560424L116.87638888888888,196.64701291322706L119.95208333333333,131.51101806163786L123.02777777777777,112.74901696443557L126.10347222222222,119.2979959487915L129.17916666666667,104.25297864675521L132.25486111111113,148.68001856803892L135.33055555555555,132.39600666761396L138.40625,112.2179816007614L141.48194444444442,102.30602481365204L144.5576388888889,103.89902540445327L147.63333333333333,123.19200911521911L150.70902777777778,118.58998396396636L153.78472222222223,105.49202599525451L156.86041666666665,101.06697746515275L159.9361111111111,100.53599485158921L163.01180555555555,97.52702304124833L166.0875,121.2450025320053L169.16319444444446,107.43897982835769L172.23888888888888,105.13802000284194L175.31458333333333,98.5889882683754L178.39027777777778,119.2979959487915L181.46597222222223,118.94398995637893L184.54166666666666,112.04100497961043L187.6173611111111,110.97898700237273L190.69305555555556,104.25297864675521L193.76874999999998,107.61600919961928L196.84444444444446,101.95201882123948L199.92013888888889,112.04100497961043L202.9958333333333,105.31499662399291L206.0715277777778,94.69497510194779L209.1472222222222,113.45697619915008L212.2229166666667,102.65997805595399L215.29861111111111,103.19101341962815L218.37430555555554,100.53599485158921L221.45000000000002,108.32402118444442L224.52569444444444,102.30602481365204L227.6013888888889,125.66999831199645L230.67708333333334,104.78401401042937L233.75277777777777,100.89000084400178L236.82847222222222,98.5889882683754L239.90416666666667,107.97001519203185L242.97986111111112,99.47397687435151L246.05555555555554,116.28902413845061L249.13125,107.43897982835769L252.20694444444445,118.58998396396636L255.2826388888889,125.84697493314742L258.35833333333335,99.12002363204957L261.43402777777777,121.06802591085433" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(204, 51, 17)" stroke-width="2px" d="M33.83263888888889,509.80424278378473L36.90833333333333,110.62498100996017L39.984027777777776,50.62199392318726L43.05972222222222,38.762977802753454L46.135416666666664,37.12577261924744L49.211111111111116,36.28498860597611L52.28680555555555,36.46201797723771L55.362500000000004,36.46201797723771L58.43819444444444,37.08151527643204L61.513888888888886,37.169977211952215L64.58958333333334,37.39126392602921L67.66527777777777,36.506275320053106L70.74097222222221,37.34700658321381L73.81666666666666,37.92224653959275L76.89236111111111,36.15226932764054L79.96805555555555,35.488514685630804L83.04375,35.400000000000006L86.11944444444444,35.400000000000006L89.19513888888889,40.97547569274903L92.27083333333333,36.68325194120408L95.34652777777778,35.886725270748144L98.42222222222223,37.96650388240815L101.49791666666665,35.621233963966375L104.5736111111111,35.665491306781774L107.64930555555556,35.400000000000006L110.72500000000001,35.400000000000006L113.80069444444445,35.400000000000006L116.87638888888888,38.94000717401505L119.95208333333333,38.541743838787085L123.02777777777777,35.79826333522797L126.10347222222222,35.97523995637894L129.17916666666667,35.400000000000006L132.25486111111113,37.25849189758301L135.33055555555555,37.08151527643204L138.40625,36.59473725557328L141.48194444444442,35.576976621150976L144.5576388888889,35.5327720284462L147.63333333333333,37.037257933616644L150.70902777777778,36.771766626834875L153.78472222222223,35.665491306781774L156.86041666666665,35.400000000000006L159.9361111111111,35.400000000000006L163.01180555555555,35.400000000000006L166.0875,39.11698379516602L169.16319444444446,36.948743247985846L172.23888888888888,35.488514685630804L175.31458333333333,35.665491306781774L178.39027777777778,35.5327720284462L181.46597222222223,38.89574983119965L184.54166666666666,36.15226932764054L187.6173611111111,35.93098261356354L190.69305555555556,35.5327720284462L193.76874999999998,36.10801198482514L196.84444444444446,35.488514685630804L199.92013888888889,36.55047991275788L202.9958333333333,36.727509284019476L206.0715277777778,35.621233963966375L209.1472222222222,36.993000590801245L212.2229166666667,35.5327720284462L215.29861111111111,36.15226932764054L218.37430555555554,36.15226932764054L221.45000000000002,36.41776063442231L224.52569444444444,35.5327720284462L227.6013888888889,35.93098261356354L230.67708333333334,36.993000590801245L233.75277777777777,36.904485905170446L236.82847222222222,35.75400599241257L239.90416666666667,36.10801198482514L242.97986111111112,35.84252067804337L246.05555555555554,36.10801198482514L249.13125,36.28498860597611L252.20694444444445,36.37350329160691L255.2826388888889,35.79826333522797L258.35833333333335,35.75400599241257L261.43402777777777,35.886725270748144" style="fill: none;" fill="none"></path></g></g></g></g></g></g><g transform="translate(53, 177)" clip-path="url(#clip_1)"><clipPath id="clip_1"><rect width="276" height="22"></rect></clipPath><g><g><line x1="0" y1="0" x2="0" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="30.756944444444443" y1="0" x2="30.756944444444443" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="61.513888888888886" y1="0" x2="61.513888888888886" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="123.02777777777777" y1="0" x2="123.02777777777777" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="153.78472222222223" y1="0" x2="153.78472222222223" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="215.29861111111111" y1="0" x2="215.29861111111111" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="246.05555555555554" y1="0" x2="246.05555555555554" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="276.8125" y1="0" x2="276.8125" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(0, 8)"><text x="0" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">-10</text><text x="30.756944444444443" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0</text><text x="61.513888888888886" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">10</text><text x="92.27083333333333" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">20</text><text x="123.02777777777777" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">30</text><text x="153.78472222222223" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">40</text><text x="184.54166666666666" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">50</text><text x="215.29861111111111" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">60</text><text x="246.05555555555554" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">70</text><text x="276.8125" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">80</text></g><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g></g></g></svg>


--------------------------------------------------------------------------------
/log/3dresnet18_100_loss.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 330 200" xmlns="http://www.w3.org/2000/svg"><g><g><g><g><g><line x1="53.1875" y1="177" x2="48.1875" y2="177" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="159.3" x2="48.1875" y2="159.3" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="141.6" x2="48.1875" y2="141.6" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="123.89999999999999" x2="48.1875" y2="123.89999999999999" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="106.2" x2="48.1875" y2="106.2" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="88.5" x2="48.1875" y2="88.5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="70.8" x2="48.1875" y2="70.8" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="53.10000000000001" x2="48.1875" y2="53.10000000000001" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="35.400000000000006" x2="48.1875" y2="35.400000000000006" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="17.69999999999999" x2="48.1875" y2="17.69999999999999" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="0" x2="48.1875" y2="0" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(43.1875, 0)"><text x="0" y="177" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">-0.1</text><text x="0" y="159.3" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0</text><text x="0" y="141.6" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.1</text><text x="0" y="123.89999999999999" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.2</text><text x="0" y="106.2" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.3</text><text x="0" y="88.5" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.4</text><text x="0" y="70.8" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.5</text><text x="0" y="53.10000000000001" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.6</text><text x="0" y="35.400000000000006" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.7</text><text x="0" y="17.69999999999999" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.8</text><text x="0" y="0" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.9</text></g><line x1="53.1875" y1="0" x2="53.1875" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g><g transform="translate(53, 0)" clip-path="url(#clip_0)"><clipPath id="clip_0"><rect width="276" height="176"></rect></clipPath><g><g><g><line x1="0" y1="0" x2="0" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="30.756944444444443" y1="0" x2="30.756944444444443" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="61.513888888888886" y1="0" x2="61.513888888888886" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="123.02777777777777" y1="0" x2="123.02777777777777" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="153.78472222222223" y1="0" x2="153.78472222222223" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="215.29861111111111" y1="0" x2="215.29861111111111" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="246.05555555555554" y1="0" x2="246.05555555555554" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="276.8125" y1="0" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g><g><line x1="0" y1="177" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="159.3" x2="276.8125" y2="159.3" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="141.6" x2="276.8125" y2="141.6" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="123.89999999999999" x2="276.8125" y2="123.89999999999999" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="106.2" x2="276.8125" y2="106.2" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="88.5" x2="276.8125" y2="88.5" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="70.8" x2="276.8125" y2="70.8" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="53.10000000000001" x2="276.8125" y2="53.10000000000001" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="35.400000000000006" x2="276.8125" y2="35.400000000000006" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="17.69999999999999" x2="276.8125" y2="17.69999999999999" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g></g></g><g><g><line x1="0" y1="159.3" x2="276.8125" y2="159.3" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><line x1="30.756944444444443" y1="0" x2="30.756944444444443" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><g><g><g><path stroke="rgb(51, 187, 238)" stroke-width="2px" d="M43.05972222222222,-3.760531501603481L46.135416666666664,20.74740022420059L49.211111111111116,37.84569161514463L52.28680555555555,52.185591363728065L55.362500000000004,57.28420752915483L58.43819444444444,65.88666984866653L61.513888888888886,70.7035562081652L64.58958333333334,70.09803558773523L67.66527777777777,75.46492105606944L70.74097222222221,77.26906002626322L73.81666666666666,79.23025608710658L76.89236111111111,88.47711853320203L79.96805555555555,93.49747261138108L83.04375,96.86959122481883L86.11944444444444,99.70278939448346L89.19513888888889,77.36991755398714L92.27083333333333,85.40494066710973L95.34652777777778,87.98735719791816L98.42222222222223,90.85899721597819L101.49791666666665,92.84428540836409L104.5736111111111,95.75021270091065L107.64930555555556,98.58359752801474L110.72500000000001,100.52831456937409L113.80069444444445,102.56220134689332L116.87638888888888,72.11897599275063L119.95208333333333,77.77545616889213L123.02777777777777,87.27896549433376L126.10347222222222,91.50124326522709L129.17916666666667,97.90128371615157L132.25486111111113,87.50101371632631L135.33055555555555,86.37866791386394L138.40625,92.69885757998937L141.48194444444442,99.19830535580773L144.5576388888889,101.70077545957766L147.63333333333333,96.83511024526258L150.70902777777778,96.85902171743177L153.78472222222223,100.93659146742067L156.86041666666665,104.35533473358198L159.9361111111111,105.84632387629762L163.01180555555555,107.44565883963759L166.0875,101.51315191219464L169.16319444444446,103.45669332780903L172.23888888888888,102.83942555685049L175.31458333333333,105.04398769446043L178.39027777777778,100.54749946290785L181.46597222222223,98.57826068216612L184.54166666666666,98.91389077552859L187.6173611111111,100.69891366273258L190.69305555555556,103.70691778726317L193.76874999999998,104.21949658195427L196.84444444444446,106.23618331313494L199.92013888888889,104.91105457520618L202.9958333333333,107.04664054856461L206.0715277777778,110.51922434297737L209.1472222222222,106.78448700265558L212.2229166666667,107.6866966399937L215.29861111111111,107.22110299071005L218.37430555555554,109.08163099885189L221.45000000000002,106.04231729865191L224.52569444444444,107.74539258456304L227.6013888888889,101.25136190400163L230.67708333333334,102.81904471484208L233.75277777777777,104.99600938419927L236.82847222222222,108.2510389128949L239.90416666666667,106.47336413619672L242.97986111111112,107.38599406097983L246.05555555555554,101.92027884492072L249.13125,101.86849183421607L252.20694444444445,98.54124456513688L255.2826388888889,94.6671357456968L258.35833333333335,100.53182128811633L261.43402777777777,96.61445584860299" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(0, 119, 187)" stroke-width="2px" d="M39.984027777777776,-37.72101134828159L43.05972222222222,37.37056363452882L46.135416666666664,81.84129288235961L49.211111111111116,108.9809920706515L52.28680555555555,126.05411025678654L55.362500000000004,136.61519858969717L58.43819444444444,143.23208091340916L61.513888888888886,147.62395502289837L64.58958333333334,150.39385625017945L67.66527777777777,152.63673343604268L70.74097222222221,153.90134203787755L73.81666666666666,154.5993504538633L76.89236111111111,155.71171629634196L79.96805555555555,156.80782926881042L83.04375,157.5742664835708L86.11944444444444,158.07621974981154L89.19513888888889,156.33755749890952L92.27083333333333,156.7834043154046L95.34652777777778,157.42626205895277L98.42222222222223,157.1346891490328L101.49791666666665,157.72181741712944L104.5736111111111,158.13777654164863L107.64930555555556,158.48662322594546L110.72500000000001,158.72925586246345L113.80069444444445,158.8958347439056L116.87638888888888,157.87242506818825L119.95208333333333,157.26295402438245L123.02777777777777,157.7484516361605L126.10347222222222,158.06848351923082L129.17916666666667,158.4860289709859L132.25486111111113,158.07541829060818L135.33055555555555,157.85792077848805L138.40625,157.93454090656624L141.48194444444442,158.37294586036055L144.5576388888889,158.64584979536713L147.63333333333333,158.26316120823728L150.70902777777778,158.0912689424232L153.78472222222223,158.43884836510392L156.86041666666665,158.71525684748468L159.9361111111111,158.90931905892984L163.01180555555555,159.03509017576368L166.0875,157.85089031705363L169.16319444444446,157.8040029275315L172.23888888888888,158.28821617188663L175.31458333333333,158.56435757256654L178.39027777777778,158.7813897333096L181.46597222222223,157.89909581147737L184.54166666666666,158.14230743630043L187.6173611111111,158.39388010736042L190.69305555555556,158.66753312733914L193.76874999999998,158.6554172884838L196.84444444444446,158.8262935301017L199.92013888888889,158.56187453771224L202.9958333333333,158.40612499180187L206.0715277777778,158.62949371446078L209.1472222222222,158.3457615349227L212.2229166666667,158.59971165660414L215.29861111111111,158.67107362374824L218.37430555555554,158.6397619802142L221.45000000000002,158.51113755983917L224.52569444444444,158.74599065870976L227.6013888888889,158.7120095592197L230.67708333333334,158.43624469020702L233.75277777777777,158.25870958706727L236.82847222222222,158.48457536280372L239.90416666666667,158.54504994930085L242.97986111111112,158.70692793412138L246.05555555555554,158.66300566022474L249.13125,158.67033487347814L252.20694444444445,158.59863655895325L255.2826388888889,158.69563105144923L258.35833333333335,158.77433056103854L261.43402777777777,158.74091793128576" style="fill: none;" fill="none"></path></g></g></g></g><g opacity="0.2"><g><g><g><path stroke="rgb(51, 187, 238)" stroke-width="2px" d="M36.90833333333333,-36.61711807250978L39.984027777777776,19.305574107170116L43.05972222222222,36.058112490177166L46.135416666666664,52.744955885410306L49.211111111111116,61.498783993721005L52.28680555555555,72.69187744259834L55.362500000000004,64.71803884506225L58.43819444444444,78.57363082766533L61.513888888888886,77.85607107281685L64.58958333333334,69.19524668455124L67.66527777777777,83.48604290485382L70.74097222222221,79.96937765479088L73.81666666666666,82.1682079911232L76.89236111111111,102.33654286265373L79.96805555555555,101.02446298599243L83.04375,101.92634217739105L86.11944444444444,103.95186730027199L89.19513888888889,43.874011981487286L92.27083333333333,97.45674090385437L95.34652777777778,91.86084036827087L98.42222222222223,95.16636275053024L101.49791666666665,95.82217850089073L104.5736111111111,100.10906921625137L107.64930555555556,102.83365463018417L110.72500000000001,103.44538183808326L113.80069444444445,105.61302630901336L116.87638888888888,26.454184699058544L119.95208333333333,86.26017122268676L123.02777777777777,101.53422423005104L126.10347222222222,97.8346585214138L129.17916666666667,107.50134311914444L132.25486111111113,71.90060995817184L135.33055555555555,84.69514929056167L138.40625,102.17914180755615L141.48194444444442,108.94747685194015L144.5576388888889,105.4544805765152L147.63333333333333,89.5366124689579L150.70902777777778,96.89488892555237L153.78472222222223,107.05294607877731L156.86041666666665,109.48344962596893L159.9361111111111,108.08280758857727L163.01180555555555,109.84466128349304L166.0875,92.61439152359962L169.16319444444446,106.37200545072555L172.23888888888888,101.91352390050888L175.31458333333333,108.3508309006691L178.39027777777778,93.80276711583137L181.46597222222223,95.62440251111984L184.54166666666666,99.41733591556549L187.6173611111111,103.37644799351692L190.69305555555556,108.21892397403717L193.76874999999998,104.98836477398872L196.84444444444446,109.26121340990066L199.92013888888889,102.92336146831512L202.9958333333333,110.25001950860023L206.0715277777778,115.72810003459455L209.1472222222222,101.18238099217415L212.2229166666667,109.04001109600067L215.29861111111111,106.52271251678467L218.37430555555554,111.87242301106453L221.45000000000002,101.48334674835205L224.52569444444444,110.30000551342964L227.6013888888889,91.51031588315963L230.67708333333334,105.17056893110275L233.75277777777777,108.26145638823509L236.82847222222222,113.13358320593835L239.90416666666667,103.80685197114944L242.97986111111112,108.75493894815445L246.05555555555554,93.72170602083206L249.13125,101.7908113181591L252.20694444444445,93.5503736615181L255.2826388888889,88.85597251653671L258.35833333333335,109.3288496017456L261.43402777777777,90.73840768933296" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(0, 119, 187)" stroke-width="2px" d="M36.90833333333333,-31.035291266441362L39.984027777777776,88.22810377478599L43.05972222222222,125.6782558143139L46.135416666666664,139.90227698832751L49.211111111111116,146.524966339767L52.28680555555555,150.4689424328506L55.362500000000004,152.01336676552893L58.43819444444444,152.99069707766176L61.513888888888886,154.14537622891368L64.58958333333334,154.52358529083432L67.66527777777777,155.98884357623757L70.74097222222221,155.79412577413024L73.81666666666666,155.64499560669066L76.89236111111111,157.37895751576872L79.96805555555555,158.45122566372157L83.04375,158.7235979751684L86.11944444444444,158.8290222032927L89.19513888888889,153.72982899025084L92.27083333333333,157.45213378798218L95.34652777777778,158.3905134184286L98.42222222222223,156.6973393784836L101.49791666666665,158.602498227451L104.5736111111111,158.76171030099505L107.64930555555556,159.0098907729378L110.72500000000001,159.09320378252306L113.80069444444445,159.14570263983916L116.87638888888888,156.33731212578715L119.95208333333333,156.34874802008272L123.02777777777777,158.47669778550045L126.10347222222222,158.54853123771028L129.17916666666667,159.11234706554097L132.25486111111113,157.45950231906028L135.33055555555555,157.53167452588679L138.40625,158.04947109539063L141.48194444444442,159.03055327974727L144.5576388888889,159.05520569365473L147.63333333333333,157.689128331095L150.70902777777778,157.8334305446595L153.78472222222223,158.96021749796347L156.86041666666665,159.12986957050163L159.9361111111111,159.20041237586412L163.01180555555555,159.22374685092365L166.0875,156.07459052950145L169.16319444444446,157.73367184326054L172.23888888888888,159.0145360383438L175.31458333333333,158.97856967356057L178.39027777777778,159.106937974412L181.46597222222223,156.57565492875875L184.54166666666666,158.50712487353013L187.6173611111111,158.7712391139474L190.69305555555556,159.07801265730524L193.76874999999998,158.6372435302008L196.84444444444446,159.0826078925282L199.92013888888889,158.16524604912848L202.9958333333333,158.17250067293645L206.0715277777778,158.96454679844902L209.1472222222222,157.9201632656157L212.2229166666667,158.98063683912625L215.29861111111111,158.77811657446438L218.37430555555554,158.59279451491312L221.45000000000002,158.31820092927666L224.52569444444444,159.09827030701564L227.6013888888889,158.6610379099846L230.67708333333334,158.022597386688L233.75277777777777,157.9924069323577L236.82847222222222,158.82337402640843L239.90416666666667,158.63576182904654L242.97986111111112,158.94974491135218L246.05555555555554,158.59712224937977L249.13125,158.68132869335824L252.20694444444445,158.4910890871659L255.2826388888889,158.8411227901932L258.35833333333335,158.89237982542255L261.43402777777777,158.6907989866566" style="fill: none;" fill="none"></path></g></g></g></g></g></g><g transform="translate(53, 177)" clip-path="url(#clip_1)"><clipPath id="clip_1"><rect width="276" height="22"></rect></clipPath><g><g><line x1="0" y1="0" x2="0" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="30.756944444444443" y1="0" x2="30.756944444444443" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="61.513888888888886" y1="0" x2="61.513888888888886" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="123.02777777777777" y1="0" x2="123.02777777777777" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="153.78472222222223" y1="0" x2="153.78472222222223" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="215.29861111111111" y1="0" x2="215.29861111111111" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="246.05555555555554" y1="0" x2="246.05555555555554" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="276.8125" y1="0" x2="276.8125" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(0, 8)"><text x="0" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">-10</text><text x="30.756944444444443" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0</text><text x="61.513888888888886" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">10</text><text x="92.27083333333333" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">20</text><text x="123.02777777777777" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">30</text><text x="153.78472222222223" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">40</text><text x="184.54166666666666" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">50</text><text x="215.29861111111111" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">60</text><text x="246.05555555555554" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">70</text><text x="276.8125" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">80</text></g><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g></g></g></svg>


--------------------------------------------------------------------------------
/log/3dresnet34_100_acc.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 330 200" xmlns="http://www.w3.org/2000/svg"><g><g><g><g><g><line x1="53.1875" y1="177" x2="48.1875" y2="177" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="159.29999999999998" x2="48.1875" y2="159.29999999999998" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="141.59999999999997" x2="48.1875" y2="141.59999999999997" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="123.89999999999998" x2="48.1875" y2="123.89999999999998" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="106.19999999999996" x2="48.1875" y2="106.19999999999996" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="88.50000000000004" x2="48.1875" y2="88.50000000000004" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="70.80000000000004" x2="48.1875" y2="70.80000000000004" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="53.10000000000002" x2="48.1875" y2="53.10000000000002" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="35.400000000000006" x2="48.1875" y2="35.400000000000006" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="17.700000000000017" x2="48.1875" y2="17.700000000000017" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="0" x2="48.1875" y2="0" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(43.1875, 0)"><text x="0" y="177" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.84</text><text x="0" y="159.29999999999998" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.86</text><text x="0" y="141.59999999999997" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.88</text><text x="0" y="123.89999999999998" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.9</text><text x="0" y="106.19999999999996" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.92</text><text x="0" y="88.50000000000004" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.94</text><text x="0" y="70.80000000000004" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.96</text><text x="0" y="53.10000000000002" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.98</text><text x="0" y="35.400000000000006" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">1</text><text x="0" y="17.700000000000017" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">1.02</text><text x="0" y="0" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">1.04</text></g><line x1="53.1875" y1="0" x2="53.1875" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g><g transform="translate(53, 0)" clip-path="url(#clip_0)"><clipPath id="clip_0"><rect width="276" height="176"></rect></clipPath><g><g><g><line x1="0" y1="0" x2="0" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="23.067708333333332" y1="0" x2="23.067708333333332" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="46.135416666666664" y1="0" x2="46.135416666666664" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="69.203125" y1="0" x2="69.203125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="115.33854166666667" y1="0" x2="115.33854166666667" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="138.40625" y1="0" x2="138.40625" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="161.47395833333334" y1="0" x2="161.47395833333334" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="207.609375" y1="0" x2="207.609375" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="230.67708333333334" y1="0" x2="230.67708333333334" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="253.74479166666666" y1="0" x2="253.74479166666666" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="276.8125" y1="0" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g><g><line x1="0" y1="177" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="159.29999999999998" x2="276.8125" y2="159.29999999999998" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="141.59999999999997" x2="276.8125" y2="141.59999999999997" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="123.89999999999998" x2="276.8125" y2="123.89999999999998" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="106.19999999999996" x2="276.8125" y2="106.19999999999996" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="88.50000000000004" x2="276.8125" y2="88.50000000000004" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="70.80000000000004" x2="276.8125" y2="70.80000000000004" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="53.10000000000002" x2="276.8125" y2="53.10000000000002" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="35.400000000000006" x2="276.8125" y2="35.400000000000006" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="17.700000000000017" x2="276.8125" y2="17.700000000000017" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g></g></g><g><g><line x1="0" y1="920.3999999999997" x2="276.8125" y2="920.3999999999997" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><line x1="23.067708333333332" y1="0" x2="23.067708333333332" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><g><g><g><path stroke="rgb(238, 51, 119)" stroke-width="2px" d="M27.681250000000002,206.2050164580345L32.29479166666667,171.80063602477318L36.90833333333333,154.31511480419005L41.521875,141.39501651437845L46.135416666666664,142.6354772507028L50.74895833333333,139.30469297662637L55.362500000000004,127.72095171607842L59.97604166666667,134.8796041066534L64.58958333333334,131.80194632560165L69.203125,125.6302502128645L73.81666666666666,127.1384145463854L78.43020833333333,117.82237990997439L83.04375,120.18571131232815L87.65729166666667,128.61643274281172L92.27083333333333,121.27479772615905L96.88437499999999,118.71335609237846L101.49791666666665,125.81601448030094L106.11145833333335,119.17253789315451L110.72500000000001,118.23146809169333L115.33854166666667,122.55222927479059L119.95208333333333,119.69286795215169L124.565625,109.83518346051223L129.17916666666667,105.97387512605916L133.79270833333334,104.57751903308906L138.40625,103.52731596259166L143.0197916666667,100.91478019207601L147.63333333333333,101.89606513589287L152.24687500000002,100.78564785245474L156.86041666666665,103.16379782517501L161.47395833333334,98.8558838533104L166.0875,94.57192819078057L170.70104166666667,103.32955791529908L175.31458333333333,104.19454518858602L179.928125,101.81071995523735L184.54166666666666,100.52202725833904L189.15520833333335,108.59882440568767L193.76874999999998,106.50649064677545L198.38229166666667,102.91470359122756L202.9958333333333,102.104822490533L207.609375,107.84929380583921L212.2229166666667,105.84437925278175L216.83645833333333,102.58822741506181L221.45000000000002,97.23614208542358L226.06354166666665,100.96328174085254L230.67708333333334,100.29676914572256L235.290625,98.05605152807291L239.90416666666667,92.8884406395508L244.51770833333333,88.3718712369235L249.13125,93.52072547678924L253.74479166666666,95.33563754801291L258.35833333333335,104.84979025991015L262.971875,106.73508046908489L267.5854166666667,101.06944504047419" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(204, 51, 17)" stroke-width="2px" d="M32.29479166666667,214.80057445317493L36.90833333333333,128.1010409868494L41.521875,87.79736210335716L46.135416666666664,66.37632742999422L50.74895833333333,54.95753786735456L55.362500000000004,49.4585978911155L59.97604166666667,45.359296249182336L64.58958333333334,43.08727343199678L69.203125,41.4182751343229L73.81666666666666,40.672059621967264L78.43020833333333,39.81807436868053L83.04375,39.324600582837036L87.65729166666667,39.790625715202054L92.27083333333333,38.72418305149296L96.88437499999999,38.491836484799194L101.49791666666665,38.35247050229793L106.11145833333335,38.35738968802559L110.72500000000001,38.07711882736183L115.33854166666667,38.475376438286474L119.95208333333333,38.00631526935962L124.565625,37.459379802113375L129.17916666666667,37.839240705325096L133.79270833333334,38.01404626429263L138.40625,37.78263560820625L143.0197916666667,38.22788205691265L147.63333333333333,37.48612979161402L152.24687500000002,36.86407053839088L156.86041666666665,37.818345004088144L161.47395833333334,37.36430767855859L166.0875,36.59628744238779L170.70104166666667,37.020474592316646L175.31458333333333,38.05378953114263L179.928125,37.257775552682745L184.54166666666666,36.97485729184925L189.15520833333335,37.30070418292917L193.76874999999998,37.283819268686244L198.38229166666667,37.22060060867591L202.9958333333333,36.75786222097847L207.609375,36.62182158617415L212.2229166666667,37.584501696335025L216.83645833333333,37.06469645994116L221.45000000000002,36.487311461384536L226.06354166666665,36.31788873360526L230.67708333333334,37.22513371289858L235.290625,37.273882861027715L239.90416666666667,36.55973559084228L244.51770833333333,36.09584135449492L249.13125,36.77329461716468L253.74479166666666,37.0558671149208L258.35833333333335,36.836030396972745L262.971875,36.91652141163058L267.5854166666667,36.54000882952923" style="fill: none;" fill="none"></path></g></g></g></g><g opacity="0.2"><g><g><g><path stroke="rgb(238, 51, 119)" stroke-width="2px" d="M27.681250000000002,206.2050164580345L32.29479166666667,151.15800776481626L36.90833333333333,137.52901443243024L41.521875,126.20098092555999L46.135416666666664,144.25502278804777L50.74895833333333,134.69701924324033L55.362500000000004,111.15601637363433L59.97604166666667,145.31698801517484L64.58958333333334,127.26299890279769L69.203125,116.46600075960158L73.81666666666666,129.38698210716245L78.43020833333333,103.89902540445327L83.04375,123.72299172878265L87.65729166666667,141.24599822759626L92.27083333333333,110.2709750175476L96.88437499999999,114.87300016880035L101.49791666666665,136.46699645519254L106.11145833333335,109.20900979042052L110.72500000000001,116.82000675201415L115.33854166666667,129.03297611474989L119.95208333333333,115.40398278236388L124.565625,95.04898109436036L129.17916666666667,100.18198885917664L133.79270833333334,102.48300143480301L138.40625,101.95201882123948L143.0197916666667,96.99598767757416L147.63333333333333,103.3679900407791L152.24687500000002,99.12002363204957L156.86041666666665,106.73102059364318L161.47395833333334,92.39401527643204L166.0875,88.14599611759186L170.70104166666667,116.46600075960158L175.31458333333333,105.49202599525451L179.928125,98.23498227596284L184.54166666666666,98.5889882683754L189.15520833333335,120.71401991844176L193.76874999999998,103.3679900407791L198.38229166666667,97.52702304124833L202.9958333333333,100.89000084400178L207.609375,116.46600075960158L212.2229166666667,102.83700742721558L216.83645833333333,97.7039996623993L221.45000000000002,89.20801409482957L226.06354166666665,106.55399122238158L230.67708333333334,99.29700025320054L235.290625,94.69497510194779L239.90416666666667,85.13702430725098L244.51770833333333,81.59701713323594L249.13125,101.24400683641434L253.74479166666666,98.05800565481186L258.35833333333335,119.12101932764052L262.971875,109.56301578283309L267.5854166666667,92.57099189758301" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(204, 51, 17)" stroke-width="2px" d="M27.681250000000002,445.7302395343779L32.29479166666667,76.24277540445328L36.90833333333333,44.86948885917664L41.521875,40.40023573637009L46.135416666666664,38.40902456045151L50.74895833333333,39.16124113798142L55.362500000000004,41.59502574205399L59.97604166666667,39.38247510194779L64.58958333333334,39.73648109436036L69.203125,38.94000717401505L73.81666666666666,39.55950447320939L78.43020833333333,38.541743838787085L83.04375,38.586001181602484L87.65729166666667,40.48875042200089L92.27083333333333,37.12577261924744L96.88437499999999,38.14348050355912L101.49791666666665,38.14348050355912L106.11145833333335,38.364767217636114L110.72500000000001,37.65675523281098L115.33854166666667,39.07272645235062L119.95208333333333,37.30274924039841L124.565625,36.63899459838868L129.17916666666667,38.40902456045151L133.79270833333334,38.276252532005316L138.40625,37.43552126884461L143.0197916666667,38.89574983119965L147.63333333333333,36.37350329160691L152.24687500000002,35.93098261356354L156.86041666666665,39.24975582361222L161.47395833333334,36.68325194120408L166.0875,35.444257342815405L170.70104166666667,37.65675523281098L175.31458333333333,39.603761816024786L179.928125,36.06375464200974L184.54166666666666,36.55047991275788L189.15520833333335,37.78947451114655L193.76874999999998,37.25849189758301L198.38229166666667,37.12577261924744L202.9958333333333,36.06375464200974L207.609375,36.41776063442231L212.2229166666667,39.02852185964585L216.83645833333333,36.28498860597611L221.45000000000002,35.621233963966375L226.06354166666665,36.06375464200974L230.67708333333334,38.586001181602484L235.290625,37.34700658321381L239.90416666666667,35.488514685630804L244.51770833333333,35.400000000000006L249.13125,37.78947451114655L253.74479166666666,37.47972586154938L258.35833333333335,36.506275320053106L262.971875,37.037257933616644L267.5854166666667,35.97523995637894" style="fill: none;" fill="none"></path></g></g></g></g></g></g><g transform="translate(53, 177)" clip-path="url(#clip_1)"><clipPath id="clip_1"><rect width="276" height="22"></rect></clipPath><g><g><line x1="0" y1="0" x2="0" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="23.067708333333332" y1="0" x2="23.067708333333332" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="46.135416666666664" y1="0" x2="46.135416666666664" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="69.203125" y1="0" x2="69.203125" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="115.33854166666667" y1="0" x2="115.33854166666667" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="138.40625" y1="0" x2="138.40625" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="161.47395833333334" y1="0" x2="161.47395833333334" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="207.609375" y1="0" x2="207.609375" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="230.67708333333334" y1="0" x2="230.67708333333334" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="253.74479166666666" y1="0" x2="253.74479166666666" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="276.8125" y1="0" x2="276.8125" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(0, 8)"><text x="0" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">-5</text><text x="23.067708333333332" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0</text><text x="46.135416666666664" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">5</text><text x="69.203125" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">10</text><text x="92.27083333333333" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">15</text><text x="115.33854166666667" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">20</text><text x="138.40625" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">25</text><text x="161.47395833333334" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">30</text><text x="184.54166666666666" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">35</text><text x="207.609375" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">40</text><text x="230.67708333333334" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">45</text><text x="253.74479166666666" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">50</text><text x="276.8125" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">55</text></g><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g></g></g></svg>


--------------------------------------------------------------------------------
/log/3dresnet34_100_loss.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 330 200" xmlns="http://www.w3.org/2000/svg"><g><g><g><g><g><line x1="53.1875" y1="177" x2="48.1875" y2="177" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="157.33333333333334" x2="48.1875" y2="157.33333333333334" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="137.66666666666666" x2="48.1875" y2="137.66666666666666" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="118" x2="48.1875" y2="118" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="98.33333333333333" x2="48.1875" y2="98.33333333333333" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="78.66666666666666" x2="48.1875" y2="78.66666666666666" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="59" x2="48.1875" y2="59" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="39.33333333333334" x2="48.1875" y2="39.33333333333334" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="19.666666666666686" x2="48.1875" y2="19.666666666666686" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="53.1875" y1="0" x2="48.1875" y2="0" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(43.1875, 0)"><text x="0" y="177" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">-0.1</text><text x="0" y="157.33333333333334" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0</text><text x="0" y="137.66666666666666" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.1</text><text x="0" y="118" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.2</text><text x="0" y="98.33333333333333" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.3</text><text x="0" y="78.66666666666666" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.4</text><text x="0" y="59" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.5</text><text x="0" y="39.33333333333334" dx="0em" dy="0.3em" style="text-anchor: end; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.6</text><text x="0" y="19.666666666666686" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.7</text><text x="0" y="0" dx="0em" dy="0.3em" style="text-anchor: end; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0.8</text></g><line x1="53.1875" y1="0" x2="53.1875" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g><g transform="translate(53, 0)" clip-path="url(#clip_0)"><clipPath id="clip_0"><rect width="276" height="176"></rect></clipPath><g><g><g><line x1="0" y1="0" x2="0" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="23.067708333333332" y1="0" x2="23.067708333333332" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="46.135416666666664" y1="0" x2="46.135416666666664" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="69.203125" y1="0" x2="69.203125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="115.33854166666667" y1="0" x2="115.33854166666667" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="138.40625" y1="0" x2="138.40625" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="161.47395833333334" y1="0" x2="161.47395833333334" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="207.609375" y1="0" x2="207.609375" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="230.67708333333334" y1="0" x2="230.67708333333334" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="253.74479166666666" y1="0" x2="253.74479166666666" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="276.8125" y1="0" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g><g><line x1="0" y1="177" x2="276.8125" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="157.33333333333334" x2="276.8125" y2="157.33333333333334" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="137.66666666666666" x2="276.8125" y2="137.66666666666666" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="118" x2="276.8125" y2="118" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="98.33333333333333" x2="276.8125" y2="98.33333333333333" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="78.66666666666666" x2="276.8125" y2="78.66666666666666" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="59" x2="276.8125" y2="59" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="39.33333333333334" x2="276.8125" y2="39.33333333333334" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="19.666666666666686" x2="276.8125" y2="19.666666666666686" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(60, 60, 60)" stroke-width="1px" opacity="0.25"></line></g></g></g><g><g><line x1="0" y1="157.33333333333334" x2="276.8125" y2="157.33333333333334" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><line x1="23.067708333333332" y1="0" x2="23.067708333333332" y2="177" fill="rgb(0, 0, 0)" stroke="rgb(153, 153, 153)" stroke-width="1.5px"></line></g></g><g><g><g><g><g><path stroke="rgb(51, 187, 238)" stroke-width="2px" d="M32.29479166666667,-20.081594347953796L36.90833333333333,16.407164318220964L41.521875,38.99857318861518L46.135416666666664,45.34645901377377L50.74895833333333,52.71774638715213L55.362500000000004,67.52750039233192L59.97604166666667,62.275196240186034L64.58958333333334,67.58712872680168L69.203125,74.00188911415576L73.81666666666666,72.11477875126833L78.43020833333333,81.36522099809655L83.04375,80.96974661508553L87.65729166666667,73.56767502712361L92.27083333333333,79.51321620382551L96.88437499999999,82.35967225903323L101.49791666666665,71.45896936948076L106.11145833333335,77.81313230387404L110.72500000000001,79.76665407825526L115.33854166666667,76.82012823644716L119.95208333333333,79.96168743956925L124.565625,89.09859071522982L129.17916666666667,93.77168300520437L133.79270833333334,95.65700438147053L138.40625,96.04654861488241L143.0197916666667,98.56161512205077L147.63333333333333,98.56853859300169L152.24687500000002,98.32762957327206L156.86041666666665,96.47261557206535L161.47395833333334,99.78870246540814L166.0875,103.06452625571642L170.70104166666667,94.04249252346948L175.31458333333333,93.58028582989084L179.928125,96.27427717770284L184.54166666666666,98.03892788379162L189.15520833333335,90.507791718183L193.76874999999998,92.57463319806119L198.38229166666667,95.62869247760644L202.9958333333333,96.03270192812253L207.609375,90.01211977712394L212.2229166666667,91.27941596239377L216.83645833333333,94.05498856675818L221.45000000000002,99.16208929192778L226.06354166666665,95.69335636856718L230.67708333333334,96.7536506258312L235.290625,100.15833350855662L239.90416666666667,105.45942144913576L244.51770833333333,109.87839243813204L249.13125,106.01337451534536L253.74479166666666,102.60604457930195L258.35833333333335,90.66287501374335L262.971875,90.31645599833698L267.5854166666667,95.65371497372666" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(0, 119, 187)" stroke-width="2px" d="M32.29479166666667,-104.5420412818591L36.90833333333333,11.82541147705652L41.521875,70.35695895892295L46.135416666666664,103.16668552970815L50.74895833333333,121.4866214814515L55.362500000000004,131.76691983701758L59.97604166666667,139.10071178406713L64.58958333333334,143.56864100871735L69.203125,146.94672939345196L73.81666666666666,148.65912335986874L78.43020833333333,150.23129777819827L83.04375,151.3394464680268L87.65729166666667,151.5872546057616L92.27083333333333,152.8924490852881L96.88437499999999,153.17640892035618L101.49791666666665,153.36940981894284L106.11145833333335,153.44589365811697L110.72500000000001,153.94014540014683L115.33854166666667,153.65356162163738L119.95208333333333,154.2440271524278L124.565625,154.68046803645655L129.17916666666667,154.3487211266665L133.79270833333334,154.29080194652772L138.40625,154.6352154577241L143.0197916666667,154.29892644277746L147.63333333333333,154.99199422573298L152.24687500000002,155.53846006026754L156.86041666666665,154.7321659063602L161.47395833333334,155.14122286180648L166.0875,155.84022558501482L170.70104166666667,155.36710999779234L175.31458333333333,154.47486086671026L179.928125,155.1800090610923L184.54166666666666,155.50771151328317L189.15520833333335,155.16453065326033L193.76874999999998,155.28620809451672L198.38229166666667,155.43596934018075L202.9958333333333,155.8143084204681L207.609375,155.972224132321L212.2229166666667,155.15040228912983L216.83645833333333,155.56868669226878L221.45000000000002,156.12412868314448L226.06354166666665,156.3000837248032L230.67708333333334,155.45474819868747L235.290625,155.382015916134L239.90416666666667,156.0519117336214L244.51770833333333,156.5116748340631L249.13125,155.98106369490927L253.74479166666666,155.7313930409064L258.35833333333335,155.89115265795627L262.971875,155.821325056239L267.5854166666667,156.11828154212827" style="fill: none;" fill="none"></path></g></g></g></g><g opacity="0.2"><g><g><g><path stroke="rgb(51, 187, 238)" stroke-width="2px" d="M27.681250000000002,-90.61534976959229L32.29479166666667,22.238658905029297L36.90833333333333,51.43637263774872L41.521875,65.56607002019882L46.135416666666664,53.63425874710083L50.74895833333333,62.914890487988785L55.362500000000004,88.70568557580313L59.97604166666667,54.61728636423747L64.58958333333334,75.42119735479356L69.203125,83.52706068754196L73.81666666666666,69.30122915903728L78.43020833333333,95.19054387013118L83.04375,80.37782633304596L87.65729166666667,62.47906907399495L92.27083333333333,88.42453920841217L96.88437499999999,86.62734880050024L101.49791666666665,55.11252784729004L106.11145833333335,87.34276338418326L110.72500000000001,82.69663914044699L115.33854166666667,72.4006087978681L119.95208333333333,84.67385395367941L124.565625,102.80364497502646L129.17916666666667,100.78122917811076L133.79270833333334,98.48496411244075L138.40625,96.63086219628653L143.0197916666667,102.33420415719351L147.63333333333333,98.57892378171286L152.24687500000002,97.96626641352972L156.86041666666665,93.69009627898535L161.47395833333334,104.76283097267151L166.0875,107.97826085488002L170.70104166666667,80.50944372018179L175.31458333333333,92.88697584470114L179.928125,100.31526400645575L184.54166666666666,100.68590386708578L189.15520833333335,79.2110876639684L193.76874999999998,95.67489538590114L198.38229166666667,100.20978136857352L202.9958333333333,96.63871610164642L207.609375,80.98124657074612L212.2229166666667,93.18036023775737L216.83645833333333,98.21834746996562L221.45000000000002,106.82274037599565L226.06354166666665,90.4902569850286L230.67708333333334,98.34409201145172L235.290625,105.2653578321139L239.90416666666667,113.41105335950851L244.51770833333333,116.50684892137846L249.13125,100.21584763129553L253.74479166666666,97.4950496753057L258.35833333333335,72.74812066555023L262.971875,89.7968274752299L267.5854166666667,103.65960343678793" style="fill: none;" fill="none"></path></g><g><path stroke="rgb(0, 119, 187)" stroke-width="2px" d="M27.681250000000002,-363.8853519757589L32.29479166666667,51.06394513448079L36.90833333333333,123.53816612561545L41.521875,139.19005879759789L46.135416666666664,146.0030645405253L50.74895833333333,146.82968807965517L55.362500000000004,146.46791097025076L59.97604166666667,149.7934508472681L64.58958333333334,150.1579687645038L69.203125,151.96279694885015L73.81666666666666,151.21218302225074L78.43020833333333,152.5810037019352L83.04375,152.9980512050291L87.65729166666667,151.95848133042455L92.27083333333333,154.8487065974623L96.88437499999999,153.60214840248227L101.49791666666665,153.65882949531078L106.11145833333335,153.56059999763966L110.72500000000001,154.68144771891335L115.33854166666667,153.2237121487657L119.95208333333333,155.12969306608042L124.565625,155.33511500122646L129.17916666666667,153.8511073117455L133.79270833333334,154.20392386242747L138.40625,155.15183327657482L143.0197916666667,153.79449435447654L147.63333333333333,156.03159412679574L152.24687500000002,156.35815797311565L156.86041666666665,153.52272541821003L161.47395833333334,155.75480806889632L166.0875,156.88872943803048L170.70104166666667,154.6574367110928L175.31458333333333,153.1364872766038L179.928125,156.23773130215704L184.54166666666666,155.99926517748585L189.15520833333335,154.6497593720754L193.76874999999998,155.46872425451875L198.38229166666667,155.6606112072865L202.9958333333333,156.38181703879187L207.609375,156.20909769957265L212.2229166666667,153.91766952599087L216.83645833333333,156.196113296474L221.45000000000002,156.95729166905707L226.06354166666665,156.56401628721505L230.67708333333334,154.18674490973353L235.290625,155.27291749231517L239.90416666666667,157.0567554597898L244.51770833333333,157.20131948469984L249.13125,155.18514698619643L253.74479166666666,155.3568870599071L258.35833333333335,156.13079208352914L262.971875,155.7165836536636L267.5854166666667,156.56371627096087" style="fill: none;" fill="none"></path></g></g></g></g></g></g><g transform="translate(53, 177)" clip-path="url(#clip_1)"><clipPath id="clip_1"><rect width="276" height="22"></rect></clipPath><g><g><line x1="0" y1="0" x2="0" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="23.067708333333332" y1="0" x2="23.067708333333332" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="46.135416666666664" y1="0" x2="46.135416666666664" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="69.203125" y1="0" x2="69.203125" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="92.27083333333333" y1="0" x2="92.27083333333333" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="115.33854166666667" y1="0" x2="115.33854166666667" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="138.40625" y1="0" x2="138.40625" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="161.47395833333334" y1="0" x2="161.47395833333334" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="184.54166666666666" y1="0" x2="184.54166666666666" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="207.609375" y1="0" x2="207.609375" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="230.67708333333334" y1="0" x2="230.67708333333334" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="253.74479166666666" y1="0" x2="253.74479166666666" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line><line x1="276.8125" y1="0" x2="276.8125" y2="5" style="visibility: inherit;" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g><g transform="translate(0, 8)"><text x="0" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">-5</text><text x="23.067708333333332" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">0</text><text x="46.135416666666664" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">5</text><text x="69.203125" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">10</text><text x="92.27083333333333" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">15</text><text x="115.33854166666667" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">20</text><text x="138.40625" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">25</text><text x="161.47395833333334" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">30</text><text x="184.54166666666666" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">35</text><text x="207.609375" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">40</text><text x="230.67708333333334" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">45</text><text x="253.74479166666666" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: inherit; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">50</text><text x="276.8125" y="0" dx="0em" dy="0.95em" style="text-anchor: middle; visibility: hidden; font-family: &quot;Helvetica Neue&quot;, sans-serif; font-size: 12px; font-weight: 200;" fill="rgb(50, 49, 63)" stroke="none" stroke-width="1px">55</text></g><line x1="0" y1="0" x2="276.8125" y2="0" fill="rgb(0, 0, 0)" stroke="rgb(204, 204, 204)" stroke-width="1px"></line></g></g></g></g></svg>


--------------------------------------------------------------------------------
/models/Attention.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | """
  6 | Attention blocks
  7 | Reference: Learn To Pay Attention
  8 | """
  9 | class ProjectorBlock(nn.Module):
 10 |     def __init__(self, in_channels, out_channels):
 11 |         super(ProjectorBlock, self).__init__()
 12 |         self.op = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
 13 |             kernel_size=1, padding=0, bias=False)
 14 | 
 15 |     def forward(self, x):
 16 |         return self.op(x)
 17 | 
 18 | 
 19 | class ProjectorBlock3D(nn.Module):
 20 |     def __init__(self, in_channels, out_channels):
 21 |         super(ProjectorBlock3D, self).__init__()
 22 |         self.op = nn.Conv3d(in_channels=in_channels, out_channels=out_channels,
 23 |             kernel_size=1, padding=0, bias=False)
 24 | 
 25 |     def forward(self, x):
 26 |         return self.op(x)
 27 | 
 28 | 
 29 | class LinearAttentionBlock(nn.Module):
 30 |     def __init__(self, in_channels, normalize_attn=True):
 31 |         super(LinearAttentionBlock, self).__init__()
 32 |         self.normalize_attn = normalize_attn
 33 |         self.op = nn.Conv2d(in_channels=in_channels, out_channels=1,
 34 |             kernel_size=1, padding=0, bias=False)
 35 | 
 36 |     def forward(self, l, g):
 37 |         N, C, H, W = l.size()
 38 |         c = self.op(l+g) # (batch_size,1,H,W)
 39 |         if self.normalize_attn:
 40 |             a = F.softmax(c.view(N,1,-1), dim=2).view(N,1,H,W)
 41 |         else:
 42 |             a = torch.sigmoid(c)
 43 |         g = torch.mul(a.expand_as(l), l)
 44 |         if self.normalize_attn:
 45 |             g = g.view(N,C,-1).sum(dim=2) # (batch_size,C)
 46 |         else:
 47 |             g = F.adaptive_avg_pool2d(g, (1,1)).view(N,C)
 48 |         return c.view(N,1,H,W), g
 49 | 
 50 | 
 51 | class LinearAttentionBlock3D(nn.Module):
 52 |     def __init__(self, in_channels, normalize_attn=True):
 53 |         super(LinearAttentionBlock3D, self).__init__()
 54 |         self.normalize_attn = normalize_attn
 55 |         self.op = nn.Conv3d(in_channels=in_channels, out_channels=1,
 56 |             kernel_size=1, padding=0, bias=False)
 57 | 
 58 |     def forward(self, l, g):
 59 |         N, C, T, H, W = l.size()
 60 |         c = self.op(l+g) # (batch_size,1,T,H,W)
 61 |         if self.normalize_attn:
 62 |             a = F.softmax(c.view(N,1,-1), dim=2).view(N,1,T,H,W)
 63 |         else:
 64 |             a = torch.sigmoid(c)
 65 |         g = torch.mul(a.expand_as(l), l)
 66 |         if self.normalize_attn:
 67 |             g = g.view(N,C,-1).sum(dim=2) # (batch_size,C)
 68 |         else:
 69 |             g = F.adaptive_avg_pool3d(g, (1,1,1)).view(N,C)
 70 |         return c.view(N,1,T,H,W), g
 71 | 
 72 | """
 73 | Dense attention block
 74 | Reference: https://github.com/philipperemy/keras-attention-mechanism
 75 | """
 76 | class LSTMAttentionBlock(nn.Module):
 77 |     def __init__(self, hidden_size):
 78 |         super(LSTMAttentionBlock, self).__init__()
 79 |         self.hidden_size = hidden_size
 80 |         self.fc1 = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
 81 |         self.fc2 = nn.Linear(self.hidden_size*2, self.hidden_size, bias=False)
 82 | 
 83 |     def forward(self, hidden_states):
 84 |         # (batch_size, time_steps, hidden_size)
 85 |         score_first_part = self.fc1(hidden_states)
 86 |         # (batch_size, hidden_size)
 87 |         h_t = hidden_states[:,-1,:]
 88 |         # (batch_size, time_steps)
 89 |         score = torch.bmm(score_first_part, h_t.unsqueeze(2)).squeeze(2)
 90 |         attention_weights = F.softmax(score, dim=1)
 91 |         # (batch_size, hidden_size)
 92 |         context_vector = torch.bmm(hidden_states.permute(0,2,1), attention_weights.unsqueeze(2)).squeeze(2)
 93 |         # (batch_size, hidden_size*2)
 94 |         pre_activation = torch.cat((context_vector, h_t), dim=1)
 95 |         # (batch_size, hidden_size)
 96 |         attention_vector = self.fc2(pre_activation)
 97 |         attention_vector = torch.tanh(attention_vector)
 98 | 
 99 |         return attention_vector
100 | 
101 | # Test
102 | if __name__ == '__main__':
103 |     # 2d block
104 |     attention_block = LinearAttentionBlock(in_channels=3)
105 |     l = torch.randn(16, 3, 128, 128)
106 |     g = torch.randn(16, 3, 128, 128)
107 |     print(attention_block(l, g))
108 |     # 3d block
109 |     attention_block_3d = LinearAttentionBlock3D(in_channels=3)
110 |     l = torch.randn(16, 3, 16, 128, 128)
111 |     g = torch.randn(16, 3, 16, 128, 128)
112 |     print(attention_block_3d(l, g))
113 |     # LSTM block
114 |     attention_block_lstm = LSTMAttentionBlock(hidden_size=256)
115 |     hidden_states = torch.randn(32, 16, 256)
116 |     print(attention_block_lstm(hidden_states).shape)
117 | 


--------------------------------------------------------------------------------
/models/Conv3D.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from torch.hub import load_state_dict_from_url
  7 | import torchvision
  8 | from functools import partial
  9 | from collections import OrderedDict
 10 | import math
 11 | 
 12 | import os,inspect,sys
 13 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 14 | sys.path.insert(0,currentdir)
 15 | from Attention import ProjectorBlock3D, LinearAttentionBlock3D
 16 | 
 17 | """
 18 | Implementation of 3D CNN.
 19 | """
 20 | class CNN3D(nn.Module):
 21 |     def __init__(self, sample_size=128, sample_duration=16, drop_p=0.0, hidden1=512, hidden2=256, num_classes=100):
 22 |         super(CNN3D, self).__init__()
 23 |         self.sample_size = sample_size
 24 |         self.sample_duration = sample_duration
 25 |         self.num_classes = num_classes
 26 | 
 27 |         # network params
 28 |         self.ch1, self.ch2, self.ch3 = 32, 48, 48
 29 |         self.k1, self.k2, self.k3 = (3,7,7), (3,7,7), (3,5,5)
 30 |         self.s1, self.s2, self.s3 = (2,2,2), (2,2,2), (2,2,2)
 31 |         self.p1, self.p2, self.p3 = (0,0,0), (0,0,0), (0,0,0)
 32 |         self.d1, self.d2, self.d3 = (1,1,1), (1,1,1), (1,1,1)
 33 |         self.hidden1, self.hidden2 = hidden1, hidden2
 34 |         self.drop_p = drop_p
 35 |         self.pool_k, self.pool_s, self.pool_p, self.pool_d = (1,2,2), (1,2,2), (0,0,0), (1,1,1)
 36 |         # Conv1
 37 |         self.conv1_output_shape = self.compute_output_shape(self.sample_duration, self.sample_size,
 38 |             self.sample_size, self.k1, self.s1, self.p1, self.d1)
 39 |         # self.conv1_output_shape = self.compute_output_shape(self.conv1_output_shape[0], self.conv1_output_shape[1], 
 40 |         #     self.conv1_output_shape[2], self.pool_k, self.pool_s, self.pool_p, self.pool_d)
 41 |         # Conv2
 42 |         self.conv2_output_shape = self.compute_output_shape(self.conv1_output_shape[0], self.conv1_output_shape[1], 
 43 |             self.conv1_output_shape[2], self.k2, self.s2, self.p2, self.d2)
 44 |         # self.conv2_output_shape = self.compute_output_shape(self.conv2_output_shape[0], self.conv2_output_shape[1], 
 45 |         #     self.conv2_output_shape[2], self.pool_k, self.pool_s, self.pool_p, self.pool_d)
 46 |         # Conv3
 47 |         self.conv3_output_shape = self.compute_output_shape(self.conv2_output_shape[0], self.conv2_output_shape[1],
 48 |             self.conv2_output_shape[2], self.k3, self.s3, self.p3, self.d3)
 49 |         # print(self.conv1_output_shape, self.conv2_output_shape, self.conv3_output_shape)
 50 | 
 51 |         # network architecture
 52 |         # in_channels=1 for grayscale, 3 for rgb
 53 |         self.conv1 = nn.Conv3d(in_channels=3, out_channels=self.ch1, kernel_size=self.k1,
 54 |             stride=self.s1, padding=self.p1, dilation=self.d1)
 55 |         self.bn1 = nn.BatchNorm3d(self.ch1)
 56 |         self.conv2 = nn.Conv3d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2,
 57 |             stride=self.s2, padding=self.p2, dilation=self.d2)
 58 |         self.bn2 = nn.BatchNorm3d(self.ch2)
 59 |         self.conv3 = nn.Conv3d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3,
 60 |             stride=self.s3, padding=self.p3, dilation=self.d3)
 61 |         self.bn3 = nn.BatchNorm3d(self.ch3)
 62 |         self.relu = nn.ReLU(inplace=True)
 63 |         self.drop = nn.Dropout3d(p=self.drop_p)
 64 |         self.pool = nn.MaxPool3d(kernel_size=self.pool_k)
 65 |         self.fc1 = nn.Linear(self.ch3 * self.conv3_output_shape[0] * self.conv3_output_shape[1] * self.conv3_output_shape[2], self.hidden1)
 66 |         self.fc2 = nn.Linear(self.hidden1, self.hidden2)
 67 |         self.fc3 = nn.Linear(self.hidden2, self.num_classes)
 68 | 
 69 |     def forward(self, x):
 70 |         # Conv1
 71 |         x = self.conv1(x)
 72 |         x = self.bn1(x)
 73 |         x = self.relu(x)
 74 |         # x = self.pool(x)
 75 |         # x = self.drop(x)
 76 |         # Conv2
 77 |         x = self.conv2(x)
 78 |         x = self.bn2(x)
 79 |         x = self.relu(x)
 80 |         # x = self.pool(x)
 81 |         # x = self.drop(x)
 82 |         # Conv3
 83 |         x = self.conv3(x)
 84 |         x = self.bn3(x)
 85 |         x = self.relu(x)
 86 |         # x = self.drop(x)
 87 |         # MLP
 88 |         # print(x.shape)
 89 |         # x.size(0) ------ batch_size
 90 |         x = x.view(x.size(0), -1)
 91 |         x = F.relu(self.fc1(x))
 92 |         x = F.relu(self.fc2(x))
 93 |         x = F.dropout(x, p=self.drop_p, training=self.training)
 94 |         x = self.fc3(x)
 95 | 
 96 |         return x
 97 | 
 98 |     def compute_output_shape(self, D_in, H_in, W_in, k, s, p, d):
 99 |         # Conv
100 |         D_out = np.floor((D_in + 2*p[0] - d[0]*(k[0] - 1) - 1)/s[0] + 1).astype(int)
101 |         H_out = np.floor((H_in + 2*p[1] - d[1]*(k[1] - 1) - 1)/s[1] + 1).astype(int)
102 |         W_out = np.floor((W_in + 2*p[2] - d[2]*(k[2] - 1) - 1)/s[2] + 1).astype(int)
103 |         
104 |         return D_out, H_out, W_out
105 | 
106 | 
107 | """
108 | Implementation of 3D Resnet
109 | Reference: Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?
110 | """
111 | class BasicBlock(nn.Module):
112 |     expansion = 1
113 |     # planes refer to the number of feature maps
114 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
115 |         super(BasicBlock, self).__init__()
116 |         self.stride = stride
117 |         self.downsample = downsample
118 |         self.conv1 = nn.Conv3d(
119 |             inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
120 |         self.bn1 = nn.BatchNorm3d(planes)
121 |         self.relu = nn.ReLU(inplace=True)
122 |         self.conv2 = nn.Conv3d(
123 |             planes, planes, kernel_size=3, padding=1, bias=False)
124 |         self.bn2 = nn.BatchNorm3d(planes)
125 | 
126 |     def forward(self, x):
127 |         residual = x
128 |         # conv1
129 |         out = self.conv1(x)
130 |         out = self.bn1(out)
131 |         out = self.relu(out)
132 |         # conv2
133 |         out = self.conv2(out)
134 |         out = self.bn2(out)
135 |         # downsample
136 |         if self.downsample is not None:
137 |             residual = self.downsample(x)
138 | 
139 |         # print(out.shape, residual.shape)
140 |         out += residual
141 |         out = self.relu(out)
142 | 
143 |         return out
144 | 
145 | 
146 | class Bottleneck(nn.Module):
147 |     expansion = 4
148 |     # planes refer to the number of feature maps
149 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
150 |         super(Bottleneck, self).__init__()
151 |         self.stride = stride
152 |         self.downsample = downsample
153 |         self.conv1 = nn.Conv3d(
154 |             inplanes, planes, kernel_size=1, bias=False) # kernal_size=1 don't need padding
155 |         self.bn1 = nn.BatchNorm3d(planes)
156 |         self.conv2 = nn.Conv3d(
157 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
158 |         self.bn2 = nn.BatchNorm3d(planes)
159 |         self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False)
160 |         self.bn3 = nn.BatchNorm3d(planes * 4)
161 |         self.relu = nn.ReLU(inplace=True)
162 | 
163 |     def forward(self, x):
164 |         residual = x
165 |         # conv1
166 |         out = self.conv1(x)
167 |         out = self.bn1(out)
168 |         out = self.relu(out)
169 |         # conv2
170 |         out = self.conv2(out)
171 |         out = self.bn2(out)
172 |         out = self.relu(out)
173 |         # conv3
174 |         out = self.conv3(out)
175 |         out = self.bn3(out)
176 |         # downsample
177 |         if self.downsample is not None:
178 |             residual = self.downsample(x)
179 | 
180 |         # print(out.shape, residual.shape)
181 |         out += residual
182 |         out = self.relu(out)
183 | 
184 |         return out
185 | 
186 | 
187 | def downsample_basic_block(x, planes, stride):
188 |     # decrease data resolution if stride not equals to 1
189 |     out = F.avg_pool3d(x, kernel_size=1, stride=stride)
190 |     # shape: (batch_size, channel, t, h, w)
191 |     # try to match the channel size
192 |     zero_pads = torch.Tensor(
193 |         out.size(0), planes - out.size(1), out.size(2), out.size(3),
194 |         out.size(4)).zero_()
195 |     if isinstance(out.data, torch.cuda.FloatTensor):
196 |         zero_pads = zero_pads.cuda()
197 | 
198 |     out = Variable(torch.cat([out.data, zero_pads], dim=1))
199 | 
200 |     return out
201 | 
202 | 
203 | class ResNet(nn.Module):
204 |     def __init__(self, block, layers, shortcut_type, sample_size, sample_duration, attention=False, num_classes=500):
205 |         super(ResNet, self).__init__()
206 |         # initialize inplanes to 64, it'll be changed later
207 |         self.inplanes = 64
208 |         self.conv1 = nn.Conv3d(
209 |             3, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False)
210 |         self.bn1 = nn.BatchNorm3d(64)
211 |         self.relu = nn.ReLU(inplace=True)
212 |         self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
213 |         # layers refers to the number of blocks in each layer
214 |         self.layer1 = self._make_layer(
215 |             block, 64, layers[0], shortcut_type, stride=1)
216 |         self.layer2 = self._make_layer(
217 |             block, 128, layers[1], shortcut_type, stride=2)
218 |         self.layer3 = self._make_layer(
219 |             block, 256, layers[2], shortcut_type, stride=2)
220 |         self.layer4 = self._make_layer(
221 |             block, 512, layers[3], shortcut_type, stride=2)
222 |         # calclatue kernal size for average pooling
223 |         last_duration = int(math.ceil(sample_duration / 16))
224 |         last_size = int(math.ceil(sample_size / 32))
225 |         self.avgpool = nn.AvgPool3d(
226 |             (last_duration, last_size, last_size), stride=1)
227 |         # attention blocks
228 |         self.attention = attention
229 |         if self.attention:
230 |             self.attn1 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
231 |             self.attn2 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
232 |             self.attn3 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
233 |             self.attn4 = LinearAttentionBlock3D(in_channels=512*block.expansion, normalize_attn=True)
234 |             self.projector1 = ProjectorBlock3D(in_channels=64*block.expansion, out_channels=512*block.expansion)
235 |             self.projector2 = ProjectorBlock3D(in_channels=128*block.expansion, out_channels=512*block.expansion)
236 |             self.projector3 = ProjectorBlock3D(in_channels=256*block.expansion, out_channels=512*block.expansion)
237 |             self.fc = nn.Linear(512 * block.expansion * 4, num_classes)
238 |         else:
239 |             self.fc = nn.Linear(512 * block.expansion, num_classes)
240 |         # init the weights
241 |         for m in self.modules():
242 |             if isinstance(m, nn.Conv3d):
243 |                 m.weight = nn.init.kaiming_normal_(m.weight, mode='fan_out')
244 |             elif isinstance(m, nn.BatchNorm3d):
245 |                 m.weight.data.fill_(1)
246 |                 m.bias.data.zero_()
247 | 
248 |     def _make_layer(self, block, planes, blocks, shortcut_type, stride):
249 |         downsample = None
250 |         # when the in-channel and the out-channel dismatch, downsample!!!
251 |         if stride != 1 or self.inplanes != planes * block.expansion:
252 |             # stride once for downsample and block.
253 |             if shortcut_type == 'A':
254 |                 downsample = partial(
255 |                     downsample_basic_block,
256 |                     planes=planes * block.expansion,
257 |                     stride=stride)
258 |             else:
259 |                 downsample = nn.Sequential(
260 |                     nn.Conv3d(
261 |                         self.inplanes,
262 |                         planes * block.expansion,
263 |                         kernel_size=1,
264 |                         stride=stride,
265 |                         bias=False), nn.BatchNorm3d(planes * block.expansion))
266 | 
267 |         layers = []
268 |         # only the first block needs downsample.
269 |         layers.append(block(self.inplanes, planes, stride, downsample))
270 |         # change inplanes for the next layer
271 |         self.inplanes = planes * block.expansion
272 |         for i in range(1, blocks):
273 |             layers.append(block(self.inplanes, planes))
274 | 
275 |         return nn.Sequential(*layers)
276 | 
277 |     def forward(self, x):
278 |         x = self.conv1(x)
279 |         x = self.bn1(x)
280 |         x = self.relu(x)
281 |         x = self.maxpool(x)
282 | 
283 |         l1 = self.layer1(x)
284 |         l2 = self.layer2(l1)
285 |         l3 = self.layer3(l2)
286 |         l4 = self.layer4(l3)
287 | 
288 |         g = self.avgpool(l4)
289 |         # attention
290 |         if self.attention:
291 |             # print(l1.shape, l2.shape, l3.shape, l4.shape, g.shape)
292 |             c1, g1 = self.attn1(self.projector1(l1), g)
293 |             c2, g2 = self.attn2(self.projector2(l2), g)
294 |             c3, g3 = self.attn3(self.projector3(l3), g)
295 |             c4, g4 = self.attn4(l4, g)
296 |             g = torch.cat((g1,g2,g3,g4), dim=1)
297 |             x = self.fc(g)
298 |         else:
299 |             c1, c2, c3, c4 = None, None, None, None
300 |             # x.size(0) ------ batch_size
301 |             g = g.view(g.size(0), -1)
302 |             x = self.fc(g)
303 | 
304 |         return [x, c1, c2, c3, c4]
305 | 
306 |     def load_my_state_dict(self, state_dict):
307 |         my_state_dict = self.state_dict()
308 |         for name, param in state_dict.items():
309 |             if name == 'fc.weight' or name == 'fc.bias':
310 |                 continue
311 |             my_state_dict[name].copy_(param.data)
312 | 
313 | 
314 | model_urls = {
315 |     'resnet18': 'https://www.jianguoyun.com/c/dl-file/resnet-18-kinetics.pth?dt=q67aev&kv=YXF6QHpqdS5lZHUuY24&sd=a54cr&ud=B8Sbfz0nRv1pG8YNAbo0KiCnzvJHDsLYQsWjtT4b1j8&vr=1',
316 |     'resnet34': 'https://www.jianguoyun.com/c/dl-file/resnet-34-kinetics.pth?dt=q67acv&kv=YXF6QHpqdS5lZHUuY24&sd=a54cr&ud=BftTcvolMjyywptfxelwwjXJksCaU0ektvfMwCbMD1I&vr=1',
317 |     'resnet50': 'https://www.jianguoyun.com/c/dl-file/resnet-50-kinetics.pth?dt=q67atr&kv=YXF6QHpqdS5lZHUuY24&sd=a54cr&ud=uKpTbIK63qX3bHs2weOGqYYc2gtssQi-o7UqpoTaG6Q&vr=1',
318 |     'resnet101': '',
319 |     'resnet152': '',
320 |     'resnet200': '',
321 | }
322 | 
323 | 
324 | def resnet18(pretrained=False, progress=True, **kwargs):
325 |     """Constructs a ResNet-18 model.
326 |     """
327 |     model = ResNet(BasicBlock, [2, 2, 2, 2], shortcut_type='A', **kwargs)
328 |     if pretrained:
329 |         checkpoint = load_state_dict_from_url(model_urls['resnet18'],
330 |             progress=progress)
331 |         state_dict = checkpoint['state_dict']
332 | 
333 |         new_state_dict = OrderedDict()
334 |         for k, v in state_dict.items():
335 |             name = k[7:] # remove 'module.'
336 |             new_state_dict[name]=v
337 |         model.load_my_state_dict(new_state_dict)
338 | 
339 |     return model
340 | 
341 | 
342 | def resnet34(pretrained=False, progress=True, **kwargs):
343 |     """Constructs a ResNet-34 model.
344 |     """
345 |     model = ResNet(BasicBlock, [3, 4, 6, 3], shortcut_type='A', **kwargs)
346 |     if pretrained:
347 |         checkpoint = load_state_dict_from_url(model_urls['resnet34'],
348 |             progress=progress)
349 |         state_dict = checkpoint['state_dict']
350 | 
351 |         new_state_dict = OrderedDict()
352 |         for k, v in state_dict.items():
353 |             name = k[7:] # remove 'module.'
354 |             new_state_dict[name]=v
355 |         model.load_my_state_dict(new_state_dict)
356 | 
357 |     return model
358 | 
359 | 
360 | def resnet50(pretrained=False, progress=True, **kwargs):
361 |     """Constructs a ResNet-50 model.
362 |     """
363 |     model = ResNet(Bottleneck, [3, 4, 6, 3], shortcut_type='B', **kwargs)
364 |     if pretrained:
365 |         checkpoint = load_state_dict_from_url(model_urls['resnet50'],
366 |             progress=progress)
367 |         state_dict = checkpoint['state_dict']
368 | 
369 |         new_state_dict = OrderedDict()
370 |         for k, v in state_dict.items():
371 |             name = k[7:] # remove 'module.'
372 |             new_state_dict[name]=v
373 |         model.load_my_state_dict(new_state_dict)
374 | 
375 |     return model
376 | 
377 | 
378 | def resnet101(pretrained=False, progress=True, **kwargs):
379 |     """Constructs a ResNet-101 model.
380 |     """
381 |     model = ResNet(Bottleneck, [3, 4, 23, 3], shortcut_type='B', **kwargs)
382 |     if pretrained:
383 |         checkpoint = load_state_dict_from_url(model_urls['resnet101'],
384 |             progress=progress)
385 |         state_dict = checkpoint['state_dict']
386 | 
387 |         new_state_dict = OrderedDict()
388 |         for k, v in state_dict.items():
389 |             name = k[7:] # remove 'module.'
390 |             new_state_dict[name]=v
391 |         model.load_my_state_dict(new_state_dict)
392 | 
393 |     return model
394 | 
395 | 
396 | def resnet152(pretrained=False, progress=True, **kwargs):
397 |     """Constructs a ResNet-101 model.
398 |     """
399 |     model = ResNet(Bottleneck, [3, 8, 36, 3], shortcut_type='B', **kwargs)
400 |     if pretrained:
401 |         checkpoint = load_state_dict_from_url(model_urls['resnet152'],
402 |             progress=progress)
403 |         state_dict = checkpoint['state_dict']
404 | 
405 |         new_state_dict = OrderedDict()
406 |         for k, v in state_dict.items():
407 |             name = k[7:] # remove 'module.'
408 |             new_state_dict[name]=v
409 |         model.load_my_state_dict(new_state_dict)
410 | 
411 |     return model
412 | 
413 | 
414 | def resnet200(pretrained=False, progress=True, **kwargs):
415 |     """Constructs a ResNet-101 model.
416 |     """
417 |     model = ResNet(Bottleneck, [3, 24, 36, 3], shortcut_type='B', **kwargs)
418 |     if pretrained:
419 |         checkpoint = load_state_dict_from_url(model_urls['resnet200'],
420 |             progress=progress)
421 |         state_dict = checkpoint['state_dict']
422 | 
423 |         new_state_dict = OrderedDict()
424 |         for k, v in state_dict.items():
425 |             name = k[7:] # remove 'module.'
426 |             new_state_dict[name]=v
427 |         model.load_my_state_dict(new_state_dict)
428 | 
429 |     return model
430 | 
431 | 
432 | """
433 | 3D CNN Models from torchvision.models
434 | Reference: https://pytorch.org/docs/stable/torchvision/models.html#video-classification
435 | """
436 | class r3d_18(nn.Module):
437 |     def __init__(self, pretrained=True, num_classes=500):
438 |         super(r3d_18, self).__init__()
439 |         self.pretrained = pretrained
440 |         self.num_classes = num_classes
441 |         model = torchvision.models.video.r3d_18(pretrained=self.pretrained)
442 |         # delete the last fc layer
443 |         modules = list(model.children())[:-1]
444 |         # print(modules)
445 |         self.r3d_18 = nn.Sequential(*modules)
446 |         self.fc1 = nn.Linear(model.fc.in_features, self.num_classes)
447 | 
448 |     def forward(self, x):
449 |         out = self.r3d_18(x)
450 |         # print(out.shape)
451 |         # Flatten the layer to fc
452 |         out = out.flatten(1)
453 |         out = self.fc1(out)
454 | 
455 |         return out
456 | 
457 | 
458 | class mc3_18(nn.Module):
459 |     def __init__(self, pretrained=True, num_classes=500):
460 |         super(mc3_18, self).__init__()
461 |         self.pretrained = pretrained
462 |         self.num_classes = num_classes
463 |         model = torchvision.models.video.mc3_18(pretrained=self.pretrained)
464 |         # delete the last fc layer
465 |         modules = list(model.children())[:-1]
466 |         # print(modules)
467 |         self.mc3_18 = nn.Sequential(*modules)
468 |         self.fc1 = nn.Linear(model.fc.in_features, self.num_classes)
469 | 
470 |     def forward(self, x):
471 |         out = self.mc3_18(x)
472 |         # print(out.shape)
473 |         # Flatten the layer to fc
474 |         out = out.flatten(1)
475 |         out = self.fc1(out)
476 | 
477 |         return out
478 | 
479 | 
480 | class r2plus1d_18(nn.Module):
481 |     def __init__(self, pretrained=True, num_classes=500):
482 |         super(r2plus1d_18, self).__init__()
483 |         self.pretrained = pretrained
484 |         self.num_classes = num_classes
485 |         model = torchvision.models.video.r2plus1d_18(pretrained=self.pretrained)
486 |         # delete the last fc layer
487 |         modules = list(model.children())[:-1]
488 |         # print(modules)
489 |         self.r2plus1d_18 = nn.Sequential(*modules)
490 |         self.fc1 = nn.Linear(model.fc.in_features, self.num_classes)
491 | 
492 |     def forward(self, x):
493 |         out = self.r2plus1d_18(x)
494 |         # print(out.shape)
495 |         # Flatten the layer to fc
496 |         out = out.flatten(1)
497 |         out = self.fc1(out)
498 | 
499 |         return out
500 | 
501 | 
502 | # Test
503 | if __name__ == '__main__':
504 |     import sys
505 |     sys.path.append("..")
506 |     import torchvision.transforms as transforms
507 |     from dataset import CSL_Isolated
508 |     sample_size = 128
509 |     sample_duration = 16
510 |     num_classes = 500
511 |     transform = transforms.Compose([transforms.Resize([sample_size, sample_size]), transforms.ToTensor()])
512 |     dataset = CSL_Isolated(data_path="/home/haodong/Data/CSL_Isolated/color_video_125000",
513 |         label_path="/home/haodong/Data/CSL_Isolated/dictionary.txt", frames=sample_duration,
514 |         num_classes=num_classes, transform=transform)
515 |     # cnn3d = CNN3D(sample_size=sample_size, sample_duration=sample_duration, num_classes=num_classes)
516 |     cnn3d = resnet50(pretrained=True, progress=True, sample_size=sample_size, sample_duration=sample_duration, attention=True, num_classes=num_classes)
517 |     # cnn3d = r3d_18(pretrained=True, num_classes=num_classes)
518 |     # cnn3d = mc3_18(pretrained=True, num_classes=num_classes)
519 |     # cnn3d = r2plus1d_18(pretrained=True, num_classes=num_classes)
520 |     # print(dataset[0]['images'].shape)
521 |     print(cnn3d(dataset[0]['data'].unsqueeze(0)))
522 | 
523 |     # Test for loading pretrained models
524 |     # state_dict = torch.load('resnet-18-kinetics.pth')
525 |     # for name, param in state_dict.items():
526 |     #     print(name)
527 |     # # print(state_dict['arch'])
528 |     # # print(state_dict['optimizer'])
529 |     # # print(state_dict['epoch'])
530 | 


--------------------------------------------------------------------------------
/models/ConvLSTM.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torchvision.models as models
  6 | import math
  7 | 
  8 | import os,inspect,sys
  9 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 10 | sys.path.insert(0,currentdir)
 11 | from Attention import LSTMAttentionBlock
 12 | 
 13 | """
 14 | Implementation of CNN+LSTM.
 15 | """
 16 | class CRNN(nn.Module):
 17 |     def __init__(self, sample_size=256, sample_duration=16, num_classes=100,
 18 |                 lstm_hidden_size=512, lstm_num_layers=1):
 19 |         super(CRNN, self).__init__()
 20 |         self.sample_size = sample_size
 21 |         self.sample_duration = sample_duration
 22 |         self.num_classes = num_classes
 23 | 
 24 |         # network params
 25 |         self.ch1, self.ch2, self.ch3, self.ch4 = 64, 128, 256, 512
 26 |         self.k1, self.k2, self.k3, self.k4 = (7, 7), (3, 3), (3, 3), (3, 3)
 27 |         self.s1, self.s2, self.s3, self.s4 = (2, 2), (1, 1), (1, 1), (1, 1)
 28 |         self.p1, self.p2, self.p3, self.p4 = (0, 0), (0, 0), (0, 0), (0, 0)
 29 |         self.d1, self.d2, self.d3, self.d4 = (1, 1), (1, 1), (1, 1), (1, 1)
 30 |         self.lstm_input_size = self.ch4
 31 |         self.lstm_hidden_size = lstm_hidden_size
 32 |         self.lstm_num_layers = lstm_num_layers
 33 | 
 34 |         # network architecture
 35 |         # in_channels=3 for rgb
 36 |         self.conv1 = nn.Sequential(
 37 |             nn.Conv2d(in_channels=3, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.p1, dilation=self.d1),
 38 |             nn.BatchNorm2d(self.ch1, momentum=0.01),
 39 |             nn.ReLU(inplace=True),
 40 |             nn.Conv2d(in_channels=self.ch1, out_channels=self.ch1, kernel_size=1, stride=1),
 41 |             nn.MaxPool2d(kernel_size=2),
 42 |         )
 43 |         self.conv2 = nn.Sequential(
 44 |             nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.p2, dilation=self.d2),
 45 |             nn.BatchNorm2d(self.ch2, momentum=0.01),
 46 |             nn.ReLU(inplace=True),
 47 |             nn.Conv2d(in_channels=self.ch2, out_channels=self.ch2, kernel_size=1, stride=1),
 48 |             nn.MaxPool2d(kernel_size=2),
 49 |         )
 50 |         self.conv3 = nn.Sequential(
 51 |             nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.p3, dilation=self.d3),
 52 |             nn.BatchNorm2d(self.ch3, momentum=0.01),
 53 |             nn.ReLU(inplace=True),
 54 |             nn.Conv2d(in_channels=self.ch3, out_channels=self.ch3, kernel_size=1, stride=1),
 55 |             nn.MaxPool2d(kernel_size=2),
 56 |         )
 57 |         self.conv4 = nn.Sequential(
 58 |             nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.p4, dilation=self.d4),
 59 |             nn.BatchNorm2d(self.ch4, momentum=0.01),
 60 |             nn.ReLU(inplace=True),
 61 |             nn.Conv2d(in_channels=self.ch4, out_channels=self.ch4, kernel_size=1, stride=1),
 62 |             nn.AdaptiveAvgPool2d((1,1)),
 63 |         )
 64 |         self.lstm = nn.LSTM(
 65 |             input_size=self.lstm_input_size,
 66 |             hidden_size=self.lstm_hidden_size,
 67 |             num_layers=self.lstm_num_layers,
 68 |             batch_first=True,
 69 |         )
 70 |         self.fc1 = nn.Linear(self.lstm_hidden_size, self.num_classes)
 71 | 
 72 |     def forward(self, x):
 73 |         # CNN
 74 |         cnn_embed_seq = []
 75 |         # print(x.shape)
 76 |         # x: (batch_size, channel, t, h, w)
 77 |         for t in range(x.size(2)):
 78 |             # Conv
 79 |             out = self.conv1(x[:, :, t, :, :])
 80 |             out = self.conv2(out)
 81 |             out = self.conv3(out)
 82 |             out = self.conv4(out)
 83 |             # print(out.shape)
 84 |             out = out.view(out.size(0), -1)
 85 |             cnn_embed_seq.append(out)
 86 | 
 87 |         cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0)
 88 |         # print(cnn_embed_seq.shape)
 89 |         # batch first
 90 |         cnn_embed_seq = cnn_embed_seq.transpose_(0, 1)
 91 | 
 92 |         # LSTM
 93 |         # use faster code paths
 94 |         self.lstm.flatten_parameters()
 95 |         out, (h_n, c_n) = self.lstm(cnn_embed_seq, None)
 96 |         # MLP
 97 |         # out: (batch, seq, feature), choose the last time step
 98 |         out = self.fc1(out[:, -1, :])
 99 | 
100 |         return out
101 | 
102 | 
103 | """
104 | Implementation of Resnet+LSTM
105 | """
106 | class ResCRNN(nn.Module):
107 |     def __init__(self, sample_size=256, sample_duration=16, num_classes=100,
108 |                 lstm_hidden_size=512, lstm_num_layers=1, arch="resnet18",
109 |                 attention=False):
110 |         super(ResCRNN, self).__init__()
111 |         self.sample_size = sample_size
112 |         self.sample_duration = sample_duration
113 |         self.num_classes = num_classes
114 | 
115 |         # network params
116 |         self.lstm_hidden_size = lstm_hidden_size
117 |         self.lstm_num_layers = lstm_num_layers
118 |         self.attention = attention
119 | 
120 |         # network architecture
121 |         if arch == "resnet18":
122 |             resnet = models.resnet18(pretrained=True)
123 |         elif arch == "resnet34":
124 |             resnet = models.resnet34(pretrained=True)
125 |         elif arch == "resnet50":
126 |             resnet = models.resnet50(pretrained=True)
127 |         elif arch == "resnet101":
128 |             resnet = models.resnet101(pretrained=True)
129 |         elif arch == "resnet152":
130 |             resnet = models.resnet152(pretrained=True)
131 |         # delete the last fc layer
132 |         modules = list(resnet.children())[:-1]
133 |         self.resnet = nn.Sequential(*modules)
134 |         self.lstm = nn.LSTM(
135 |             input_size=resnet.fc.in_features,
136 |             hidden_size=self.lstm_hidden_size,
137 |             num_layers=self.lstm_num_layers,
138 |             batch_first=True,
139 |         )
140 |         if self.attention:
141 |             self.attn_block = LSTMAttentionBlock(hidden_size=self.lstm_hidden_size)
142 |         self.fc1 = nn.Linear(self.lstm_hidden_size, self.num_classes)
143 | 
144 |     def forward(self, x):
145 |         # CNN
146 |         cnn_embed_seq = []
147 |         # x: (batch_size, channel, t, h, w)
148 |         for t in range(x.size(2)):
149 |             # with torch.no_grad():
150 |             out = self.resnet(x[:, :, t, :, :])
151 |             # print(out.shape)
152 |             out = out.view(out.size(0), -1)
153 |             cnn_embed_seq.append(out)
154 | 
155 |         cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0)
156 |         # print(cnn_embed_seq.shape)
157 |         # batch first
158 |         cnn_embed_seq = cnn_embed_seq.transpose_(0, 1)
159 | 
160 |         # LSTM
161 |         # use faster code paths
162 |         self.lstm.flatten_parameters()
163 |         out, (h_n, c_n) = self.lstm(cnn_embed_seq, None)
164 |         # MLP
165 |         if self.attention:
166 |             out = self.fc1(self.attn_block(out))
167 |         else:
168 |             # out: (batch, seq, feature), choose the last time step
169 |             out = self.fc1(out[:, -1, :])
170 | 
171 |         return out
172 | 
173 | 
174 | # Test
175 | if __name__ == '__main__':
176 |     import sys
177 |     sys.path.append("..")
178 |     import torchvision.transforms as transforms
179 |     from dataset import CSL_Isolated
180 |     sample_size = 128
181 |     sample_duration = 16
182 |     num_classes = 500
183 |     transform = transforms.Compose([transforms.Resize([sample_size, sample_size]), transforms.ToTensor()])
184 |     dataset = CSL_Isolated(data_path="/home/haodong/Data/CSL_Isolated/color_video_125000",
185 |         label_path="/home/haodong/Data/CSL_Isolated/dictionary.txt", frames=sample_duration,
186 |         num_classes=num_classes, transform=transform)
187 |     # crnn = CRNN()
188 |     crnn = ResCRNN(sample_size=sample_size, sample_duration=sample_duration, num_classes=num_classes, arch="resnet152")
189 |     print(crnn(dataset[0]['data'].unsqueeze(0)))
190 | 


--------------------------------------------------------------------------------
/models/GCN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import numpy as np
  6 | 
  7 | """
  8 | Implementation of Spatial Temporal Graph Convolutional Network
  9 | Reference: Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition
 10 | """
 11 | class Graph():
 12 |     """ The Graph to model the skeletons extracted by the openpose
 13 |     Args:
 14 |         strategy (string): must be one of the follow candidates
 15 |         - uniform: Uniform Labeling
 16 |         - distance: Distance Partitioning
 17 |         - spatial: Spatial Configuration
 18 | 
 19 |         layout (string): must be one of the follow candidates
 20 |         - openpose: Is consists of 18 joints.
 21 |         - ntu-rgb+d: Is consists of 25 joints.
 22 | 
 23 |         max_hop (int): the maximal distance between two connected nodes
 24 |         dilation (int): controls the spacing between the kernel points
 25 |     """
 26 |     def __init__(self,
 27 |                  layout='openpose',
 28 |                  strategy='uniform',
 29 |                  max_hop=1,
 30 |                  dilation=1):
 31 |         self.max_hop = max_hop
 32 |         self.dilation = dilation
 33 | 
 34 |         self.get_edge(layout)
 35 |         self.hop_dis = get_hop_distance(
 36 |             self.num_node, self.edge, max_hop=max_hop)
 37 |         self.get_adjacency(strategy)
 38 | 
 39 |     def __str__(self):
 40 |         return self.A
 41 | 
 42 |     def get_edge(self, layout):
 43 |         if layout == 'openpose':
 44 |             self.num_node = 18
 45 |             self_link = [(i, i) for i in range(self.num_node)]
 46 |             neighbor_link = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12,
 47 |                                                                         11),
 48 |                              (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1),
 49 |                              (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)]
 50 |             self.edge = self_link + neighbor_link
 51 |             self.center = 1
 52 |         elif layout == 'ntu-rgb+d':
 53 |             self.num_node = 25
 54 |             # link to itself
 55 |             self_link = [(i, i) for i in range(self.num_node)]
 56 |             # link to neighbors
 57 |             neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21),
 58 |                               (6, 5), (7, 6), (8, 7), (9, 21), (10, 9),
 59 |                               (11, 10), (12, 11), (13, 1), (14, 13), (15, 14),
 60 |                               (16, 15), (17, 1), (18, 17), (19, 18), (20, 19),
 61 |                               (22, 23), (23, 8), (24, 25), (25, 12)]
 62 |             neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
 63 |             self.edge = self_link + neighbor_link
 64 |             self.center = 21 - 1
 65 |         elif layout == 'ntu_edge':
 66 |             self.num_node = 24
 67 |             self_link = [(i, i) for i in range(self.num_node)]
 68 |             neighbor_1base = [(1, 2), (3, 2), (4, 3), (5, 2), (6, 5), (7, 6),
 69 |                               (8, 7), (9, 2), (10, 9), (11, 10), (12, 11),
 70 |                               (13, 1), (14, 13), (15, 14), (16, 15), (17, 1),
 71 |                               (18, 17), (19, 18), (20, 19), (21, 22), (22, 8),
 72 |                               (23, 24), (24, 12)]
 73 |             neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
 74 |             self.edge = self_link + neighbor_link
 75 |             self.center = 2
 76 |         # elif layout=='customer settings'
 77 |         #     pass
 78 |         else:
 79 |             raise ValueError("Do Not Exist This Layout.")
 80 | 
 81 |     def get_adjacency(self, strategy):
 82 |         valid_hop = range(0, self.max_hop + 1, self.dilation)
 83 |         adjacency = np.zeros((self.num_node, self.num_node))
 84 |         for hop in valid_hop:
 85 |             adjacency[self.hop_dis == hop] = 1
 86 |         normalize_adjacency = normalize_digraph(adjacency)
 87 | 
 88 |         if strategy == 'uniform':
 89 |             A = np.zeros((1, self.num_node, self.num_node))
 90 |             A[0] = normalize_adjacency
 91 |             self.A = A
 92 |         elif strategy == 'distance':
 93 |             A = np.zeros((len(valid_hop), self.num_node, self.num_node))
 94 |             for i, hop in enumerate(valid_hop):
 95 |                 A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
 96 |                                                                 hop]
 97 |             self.A = A
 98 |         elif strategy == 'spatial':
 99 |             A = []
100 |             for hop in valid_hop:
101 |                 a_root = np.zeros((self.num_node, self.num_node))
102 |                 a_close = np.zeros((self.num_node, self.num_node))
103 |                 a_further = np.zeros((self.num_node, self.num_node))
104 |                 for i in range(self.num_node):
105 |                     for j in range(self.num_node):
106 |                         if self.hop_dis[j, i] == hop:
107 |                             if self.hop_dis[j, self.center] == self.hop_dis[
108 |                                     i, self.center]:
109 |                                 a_root[j, i] = normalize_adjacency[j, i]
110 |                             elif self.hop_dis[j, self.
111 |                                               center] > self.hop_dis[i, self.
112 |                                                                      center]:
113 |                                 a_close[j, i] = normalize_adjacency[j, i]
114 |                             else:
115 |                                 a_further[j, i] = normalize_adjacency[j, i]
116 |                 if hop == 0:
117 |                     A.append(a_root)
118 |                 else:
119 |                     A.append(a_root + a_close)
120 |                     A.append(a_further)
121 |             A = np.stack(A)
122 |             self.A = A
123 |         else:
124 |             raise ValueError("Do Not Exist This Strategy")
125 | 
126 | 
127 | def get_hop_distance(num_node, edge, max_hop=1):
128 | 	# link matrix
129 |     A = np.zeros((num_node, num_node))
130 |     for i, j in edge:
131 |         A[j, i] = 1
132 |         A[i, j] = 1
133 | 
134 |     # compute hop steps
135 |     hop_dis = np.zeros((num_node, num_node)) + np.inf
136 |     transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
137 |     arrive_mat = (np.stack(transfer_mat) > 0)
138 |     for d in range(max_hop, -1, -1):
139 |         hop_dis[arrive_mat[d]] = d
140 |     return hop_dis
141 | 
142 | 
143 | def normalize_digraph(A):
144 |     Dl = np.sum(A, 0)
145 |     num_node = A.shape[0]
146 |     Dn = np.zeros((num_node, num_node))
147 |     for i in range(num_node):
148 |         if Dl[i] > 0:
149 |             Dn[i, i] = Dl[i]**(-1)
150 |     AD = np.dot(A, Dn)
151 |     return AD
152 | 
153 | 
154 | def normalize_undigraph(A):
155 |     Dl = np.sum(A, 0)
156 |     num_node = A.shape[0]
157 |     Dn = np.zeros((num_node, num_node))
158 |     for i in range(num_node):
159 |         if Dl[i] > 0:
160 |             Dn[i, i] = Dl[i]**(-0.5)
161 |     DAD = np.dot(np.dot(Dn, A), Dn)
162 |     return DAD
163 | 
164 | 
165 | class ConvTemporalGraphical(nn.Module):
166 | 
167 |     r"""The basic module for applying a graph convolution.
168 | 
169 |     Args:
170 |         in_channels (int): Number of channels in the input sequence data
171 |         out_channels (int): Number of channels produced by the convolution
172 |         kernel_size (int): Size of the graph convolving kernel
173 |         t_kernel_size (int): Size of the temporal convolving kernel
174 |         t_stride (int, optional): Stride of the temporal convolution. Default: 1
175 |         t_padding (int, optional): Temporal zero-padding added to both sides of
176 |             the input. Default: 0
177 |         t_dilation (int, optional): Spacing between temporal kernel elements.
178 |             Default: 1
179 |         bias (bool, optional): If ``True``, adds a learnable bias to the output.
180 |             Default: ``True``
181 | 
182 |     Shape:
183 |         - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
184 |         - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
185 |         - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
186 |         - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
187 | 
188 |         where
189 |             :math:`N` is a batch size,
190 |             :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
191 |             :math:`T_{in}/T_{out}` is a length of input/output sequence,
192 |             :math:`V` is the number of graph nodes. 
193 |     """
194 | 
195 |     def __init__(self,
196 |                  in_channels,
197 |                  out_channels,
198 |                  kernel_size,
199 |                  t_kernel_size=1,
200 |                  t_stride=1,
201 |                  t_padding=0,
202 |                  t_dilation=1,
203 |                  bias=True):
204 |         super().__init__()
205 | 
206 |         self.kernel_size = kernel_size
207 |         self.conv = nn.Conv2d(
208 |             in_channels,
209 |             out_channels * kernel_size,
210 |             kernel_size=(t_kernel_size, 1),
211 |             padding=(t_padding, 0),
212 |             stride=(t_stride, 1),
213 |             dilation=(t_dilation, 1),
214 |             bias=bias)
215 | 
216 |     def forward(self, x, A):
217 |         assert A.size(0) == self.kernel_size
218 | 
219 |         x = self.conv(x)
220 | 
221 |         n, kc, t, v = x.size()
222 |         x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
223 |         x = torch.einsum('nkctv,kvw->nctw', (x, A))
224 | 
225 |         return x.contiguous(), A
226 | 
227 | 
228 | class st_gcn(nn.Module):
229 |     r"""Applies a spatial temporal graph convolution over an input graph sequence.
230 | 
231 |     Args:
232 |         in_channels (int): Number of channels in the input sequence data
233 |         out_channels (int): Number of channels produced by the convolution
234 |         kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel
235 |         stride (int, optional): Stride of the temporal convolution. Default: 1
236 |         dropout (int, optional): Dropout rate of the final output. Default: 0
237 |         residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``
238 | 
239 |     Shape:
240 |         - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
241 |         - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
242 |         - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
243 |         - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
244 | 
245 |         where
246 |             :math:`N` is a batch size,
247 |             :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
248 |             :math:`T_{in}/T_{out}` is a length of input/output sequence,
249 |             :math:`V` is the number of graph nodes.
250 | 
251 |     """
252 | 
253 |     def __init__(self,
254 |                  in_channels,
255 |                  out_channels,
256 |                  kernel_size,
257 |                  stride=1,
258 |                  dropout=0,
259 |                  residual=True):
260 |         super().__init__()
261 | 
262 |         assert len(kernel_size) == 2
263 |         assert kernel_size[0] % 2 == 1
264 |         padding = ((kernel_size[0] - 1) // 2, 0)
265 | 
266 |         self.gcn = ConvTemporalGraphical(in_channels, out_channels,
267 |                                          kernel_size[1])
268 | 
269 |         self.tcn = nn.Sequential(
270 |             nn.BatchNorm2d(out_channels),
271 |             nn.ReLU(inplace=True),
272 |             nn.Conv2d(
273 |                 out_channels,
274 |                 out_channels,
275 |                 (kernel_size[0], 1),
276 |                 (stride, 1),
277 |                 padding,
278 |             ),
279 |             nn.BatchNorm2d(out_channels),
280 |             nn.Dropout(dropout, inplace=True),
281 |         )
282 | 
283 |         if not residual:
284 |             self.residual = lambda x: 0
285 | 
286 |         elif (in_channels == out_channels) and (stride == 1):
287 |             self.residual = lambda x: x
288 | 
289 |         else:
290 |             self.residual = nn.Sequential(
291 |                 nn.Conv2d(
292 |                     in_channels,
293 |                     out_channels,
294 |                     kernel_size=1,
295 |                     stride=(stride, 1)),
296 |                 nn.BatchNorm2d(out_channels),
297 |             )
298 | 
299 |         self.relu = nn.ReLU(inplace=True)
300 | 
301 |     def forward(self, x, A):
302 | 
303 |         res = self.residual(x)
304 |         x, A = self.gcn(x, A)
305 |         x = self.tcn(x) + res
306 | 
307 |         return self.relu(x), A
308 | 
309 | 
310 | class GCN(nn.Module):
311 |     r"""Spatial temporal graph convolutional networks.
312 | 
313 |     Args:
314 |         in_channels (int): Number of channels in the input data
315 |         num_class (int): Number of classes for the classification task
316 |         graph_args (dict): The arguments for building the graph
317 |         edge_importance_weighting (bool): If ``True``, adds a learnable
318 |             importance weighting to the edges of the graph
319 |         **kwargs (optional): Other parameters for graph convolution units
320 | 
321 |     Shape:
322 |         - Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})`
323 |         - Output: :math:`(N, num_class)` where
324 |             :math:`N` is a batch size,
325 |             :math:`T_{in}` is a length of input sequence,
326 |             :math:`V_{in}` is the number of graph nodes,
327 |             :math:`M_{in}` is the number of instance in a frame.
328 |     """
329 | 
330 |     def __init__(self, in_channels, num_class, graph_args,
331 |                  edge_importance_weighting, **kwargs):
332 |         super().__init__()
333 | 
334 |         # load graph
335 |         self.graph = Graph(**graph_args)
336 |         A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False)
337 |         self.register_buffer('A', A)
338 | 
339 |         # build networks
340 |         spatial_kernel_size = A.size(0)
341 |         temporal_kernel_size = 9
342 |         kernel_size = (temporal_kernel_size, spatial_kernel_size)
343 |         self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
344 |         kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
345 |         self.st_gcn_networks = nn.ModuleList((
346 |             st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
347 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
348 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
349 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
350 |             st_gcn(64, 128, kernel_size, 2, **kwargs),
351 |             st_gcn(128, 128, kernel_size, 1, **kwargs),
352 |             st_gcn(128, 128, kernel_size, 1, **kwargs),
353 |             st_gcn(128, 256, kernel_size, 2, **kwargs),
354 |             st_gcn(256, 256, kernel_size, 1, **kwargs),
355 |             st_gcn(256, 256, kernel_size, 1, **kwargs),
356 |         ))
357 | 
358 |         # initialize parameters for edge importance weighting
359 |         if edge_importance_weighting:
360 |             self.edge_importance = nn.ParameterList([
361 |                 nn.Parameter(torch.ones(self.A.size()))
362 |                 for i in self.st_gcn_networks
363 |             ])
364 |         else:
365 |             self.edge_importance = [1] * len(self.st_gcn_networks)
366 | 
367 |         # fcn for prediction
368 |         self.fcn = nn.Conv2d(256, num_class, kernel_size=1)
369 | 
370 |     def forward(self, x):
371 |         # Trick: add new dimension & switch data dimension
372 |         x = x.unsqueeze(-1)
373 |         x = x.permute(0, 2, 1, 3, 4)
374 |         # print(x.shape)
375 | 
376 |         # data normalization
377 |         N, C, T, V, M = x.size()
378 |         x = x.permute(0, 4, 3, 1, 2).contiguous()
379 |         x = x.view(N * M, V * C, T)
380 |         x = self.data_bn(x)
381 |         x = x.view(N, M, V, C, T)
382 |         x = x.permute(0, 1, 3, 4, 2).contiguous()
383 |         x = x.view(N * M, C, T, V)
384 | 
385 |         # forwad
386 |         for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
387 |             x, _ = gcn(x, self.A * importance)
388 | 
389 |         # global pooling
390 |         x = F.avg_pool2d(x, x.size()[2:])
391 |         x = x.view(N, M, -1, 1, 1).mean(dim=1)
392 | 
393 |         # prediction
394 |         x = self.fcn(x)
395 |         x = x.view(x.size(0), -1)
396 | 
397 |         return x
398 | 
399 |     def extract_feature(self, x):
400 | 
401 |         # data normalization
402 |         N, C, T, V, M = x.size()
403 |         x = x.permute(0, 4, 3, 1, 2).contiguous()
404 |         x = x.view(N * M, V * C, T)
405 |         x = self.data_bn(x)
406 |         x = x.view(N, M, V, C, T)
407 |         x = x.permute(0, 1, 3, 4, 2).contiguous()
408 |         x = x.view(N * M, C, T, V)
409 | 
410 |         # forwad
411 |         for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
412 |             x, _ = gcn(x, self.A * importance)
413 | 
414 |         _, c, t, v = x.size()
415 |         feature = x.view(N, M, c, t, v).permute(0, 2, 3, 4, 1)
416 | 
417 |         # prediction
418 |         x = self.fcn(x)
419 |         output = x.view(N, M, -1, t, v).permute(0, 2, 3, 4, 1)
420 | 
421 |         return output, feature
422 | 
423 | 
424 | # Test
425 | if __name__ == '__main__':
426 |     import sys
427 |     sys.path.append("..")
428 |     from dataset import CSL_Skeleton
429 |     dataset = CSL_Skeleton(data_path="/home/haodong/Data/CSL_Isolated_1/xf500_body_depth_txt",
430 |         label_path="/home/haodong/Data/CSL_Isolated_1/dictionary.txt", split_to_channels=True)
431 |     gcn = GCN(in_channels=2, num_class=500, graph_args={'layout': 'ntu-rgb+d'},
432 |                  edge_importance_weighting=True)
433 |     print(dataset[0]['images'].unsqueeze(0).shape)
434 |     gcn(dataset[0]['images'].unsqueeze(0))
435 | 


--------------------------------------------------------------------------------
/models/RNN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | """
  6 | Implementation of LSTM
  7 | Reference: SIGN LANGUAGE RECOGNITION WITH LONG SHORT-TERM MEMORY
  8 | """
  9 | class LSTM(nn.Module):
 10 |     def __init__(self, lstm_input_size=512, lstm_hidden_size=512, lstm_num_layers=3,
 11 |                 num_classes=100, hidden1=256, drop_p=0.0):
 12 |         super(LSTM, self).__init__()
 13 |         # network params
 14 |         self.lstm_input_size = lstm_input_size
 15 |         self.lstm_hidden_size = lstm_hidden_size
 16 |         self.lstm_num_layers = lstm_num_layers
 17 |         self.num_classes = num_classes
 18 |         self.hidden1 = hidden1
 19 |         self.drop_p = drop_p
 20 | 
 21 |         # network architecture
 22 |         self.lstm = nn.LSTM(
 23 |             input_size=self.lstm_input_size,
 24 |             hidden_size=self.lstm_hidden_size,
 25 |             num_layers=self.lstm_num_layers,
 26 |             batch_first=True,
 27 |         )
 28 |         self.drop = nn.Dropout2d(p=self.drop_p)
 29 |         self.fc1 = nn.Linear(self.lstm_hidden_size, self.hidden1)
 30 |         self.fc2 = nn.Linear(self.hidden1, self.num_classes)
 31 | 
 32 |     def forward(self, x):
 33 |         # LSTM
 34 |         # use faster code paths
 35 |         self.lstm.flatten_parameters()
 36 |         # print(x.shape)
 37 |         # batch first: (batch, seq, feature)
 38 |         out, (h_n, c_n) = self.lstm(x, None)
 39 |         # MLP
 40 |         # out: (batch, seq, feature), choose the last time step
 41 |         out = F.relu(self.fc1(out[:, -1, :]))
 42 |         out = F.dropout(out, p=self.drop_p, training=self.training)
 43 |         out = self.fc2(out)
 44 | 
 45 |         return out
 46 | 
 47 | 
 48 | """
 49 | Implementation of GRU
 50 | """
 51 | class GRU(nn.Module):
 52 |     def __init__(self, gru_input_size=512, gru_hidden_size=512, gru_num_layers=3,
 53 |                 num_classes=100, hidden1=256, drop_p=0.0):
 54 |         super(GRU, self).__init__()
 55 |         # network params
 56 |         self.gru_input_size = gru_input_size
 57 |         self.gru_hidden_size = gru_hidden_size
 58 |         self.gru_num_layers = gru_num_layers
 59 |         self.num_classes = num_classes
 60 |         self.hidden1 = hidden1
 61 |         self.drop_p = drop_p
 62 | 
 63 |         # network architecture
 64 |         self.gru = nn.GRU(
 65 |             input_size=self.gru_input_size,
 66 |             hidden_size=self.gru_hidden_size,
 67 |             num_layers=self.gru_num_layers,
 68 |             batch_first=True,
 69 |         )
 70 |         self.drop = nn.Dropout2d(p=self.drop_p)
 71 |         self.fc1 = nn.Linear(self.gru_hidden_size, self.hidden1)
 72 |         self.fc2 = nn.Linear(self.hidden1, self.num_classes)
 73 | 
 74 |     def forward(self, x):
 75 |         # GRU
 76 |         # use faster code paths
 77 |         self.gru.flatten_parameters()
 78 |         # print(x.shape)
 79 |         # batch first: (batch, seq, feature)
 80 |         out, hidden = self.gru(x, None)
 81 |         # MLP
 82 |         # out: (batch, seq, feature), choose the last time step
 83 |         out = F.relu(self.fc1(out[:, -1, :]))
 84 |         out = F.dropout(out, p=self.drop_p, training=self.training)
 85 |         out = self.fc2(out)
 86 | 
 87 |         return out
 88 | 
 89 | # Test
 90 | if __name__ == '__main__':
 91 |     import sys
 92 |     sys.path.append("..")
 93 |     from dataset import CSL_Skeleton
 94 |     selected_joints = ['HANDLEFT', 'HANDRIGHT', 'ELBOWLEFT', 'ELBOWRIGHT']
 95 |     dataset = CSL_Skeleton(data_path="/home/haodong/Data/CSL_Isolated/xf500_body_depth_txt",
 96 |         label_path="/home/haodong/Data/CSL_Isolated/dictionary.txt", selected_joints=selected_joints)
 97 |     input_size = len(selected_joints)*2
 98 |     # test LSTM
 99 |     lstm = LSTM(lstm_input_size=input_size)
100 |     print(lstm(dataset[0]['data'].unsqueeze(0)))
101 | 
102 |     # test GRU
103 |     gru = GRU(gru_input_size=input_size)
104 |     print(gru(dataset[0]['data'].unsqueeze(0)))
105 | 


--------------------------------------------------------------------------------
/models/Seq2Seq.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision.models as models
  4 | import random
  5 | 
  6 | import os,inspect,sys
  7 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
  8 | sys.path.insert(0,currentdir)
  9 | from ConvLSTM import ResCRNN
 10 | 
 11 | """
 12 | Implementation of Sequence to Sequence Model
 13 | Encoder: encode video spatial and temporal dynamics e.g. CNN+LSTM
 14 | Decoder: decode the compressed info from encoder
 15 | """
 16 | class Encoder(nn.Module):
 17 |     def __init__(self, lstm_hidden_size=512, arch="resnet18"):
 18 |         super(Encoder, self).__init__()
 19 |         self.lstm_hidden_size = lstm_hidden_size
 20 | 
 21 |         # network architecture
 22 |         if arch == "resnet18":
 23 |             resnet = models.resnet18(pretrained=True)
 24 |         elif arch == "resnet34":
 25 |             resnet = models.resnet34(pretrained=True)
 26 |         elif arch == "resnet50":
 27 |             resnet = models.resnet50(pretrained=True)
 28 |         elif arch == "resnet101":
 29 |             resnet = models.resnet101(pretrained=True)
 30 |         elif arch == "resnet152":
 31 |             resnet = models.resnet152(pretrained=True)
 32 |         # delete the last fc layer
 33 |         modules = list(resnet.children())[:-1]
 34 |         self.resnet = nn.Sequential(*modules)
 35 |         self.lstm = nn.LSTM(
 36 |             input_size=resnet.fc.in_features,
 37 |             hidden_size=self.lstm_hidden_size,
 38 |             batch_first=True,
 39 |         )
 40 | 
 41 |     def forward(self, x):
 42 |         # CNN
 43 |         cnn_embed_seq = []
 44 |         # x: (batch_size, channel, t, h, w)
 45 |         for t in range(x.size(2)):
 46 |             # with torch.no_grad():
 47 |             out = self.resnet(x[:, :, t, :, :])
 48 |             # print(out.shape)
 49 |             out = out.view(out.size(0), -1)
 50 |             cnn_embed_seq.append(out)
 51 | 
 52 |         cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0)
 53 |         # batch first
 54 |         cnn_embed_seq = cnn_embed_seq.transpose_(0, 1)
 55 | 
 56 |         # LSTM
 57 |         # use faster code paths
 58 |         self.lstm.flatten_parameters()
 59 |         out, (h_n, c_n) = self.lstm(cnn_embed_seq, None)
 60 | 
 61 |         # num_layers * num_directions = 1
 62 |         return out, (h_n.squeeze(0), c_n.squeeze(0))
 63 | 
 64 | 
 65 | class Decoder(nn.Module):
 66 |     def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):
 67 |         super(Decoder, self).__init__()
 68 |         self.output_dim = output_dim
 69 |         self.embedding = nn.Embedding(output_dim, emb_dim)
 70 |         self.rnn = nn.LSTM(emb_dim+enc_hid_dim, dec_hid_dim)
 71 |         self.fc = nn.Linear(emb_dim+enc_hid_dim+dec_hid_dim, output_dim)
 72 |         self.dropout = nn.Dropout(dropout)
 73 | 
 74 |     def forward(self, input, hidden, cell, context):
 75 |         # input(batch_size): last prediction
 76 |         # hidden(batch_size, dec_hid_dim): decoder last hidden state
 77 |         # cell(batch_size, dec_hid_dim): decoder last cell state
 78 |         # context(batch_size, enc_hid_dim): context vector
 79 |         # print(input.shape, hidden.shape, cell.shape, context.shape)
 80 |         # expand dim to (1, batch_size)
 81 |         input = input.unsqueeze(0)
 82 | 
 83 |         # embedded(1, batch_size, emb_dim): embed last prediction word
 84 |         embedded = self.dropout(self.embedding(input))
 85 | 
 86 |         # rnn_input(1, batch_size, emb_dim+enc_hide_dim): concat embedded and context 
 87 |         rnn_input = torch.cat((embedded, context.unsqueeze(0)), dim=2)
 88 | 
 89 |         # output(seq_len, batch, num_directions * hidden_size)
 90 |         # hidden(num_layers * num_directions, batch, hidden_size)
 91 |         output, (hidden, cell) = self.rnn(rnn_input, (hidden.unsqueeze(0), cell.unsqueeze(0)))
 92 | 
 93 |         # hidden(batch_size, dec_hid_dim)
 94 |         # cell(batch_size, dec_hid_dim)
 95 |         # embedded(1, batch_size, emb_dim)
 96 |         hidden = hidden.squeeze(0)
 97 |         cell = cell.squeeze(0)
 98 |         embedded = embedded.squeeze(0)
 99 | 
100 |         # prediction
101 |         prediction = self.fc(torch.cat((embedded, context, hidden), dim=1))
102 | 
103 |         return prediction, (hidden, cell)
104 | 
105 | 
106 | class Seq2Seq(nn.Module):
107 |     def __init__(self, encoder, decoder, device):
108 |         super(Seq2Seq, self).__init__()
109 |         self.encoder = encoder
110 |         self.decoder = decoder
111 |         self.device = device
112 | 
113 |     def forward(self, imgs, target, teacher_forcing_ratio=0.5):
114 |         # imgs: (batch_size, channels, T, H, W)
115 |         # target: (batch_size, trg len)
116 |         batch_size = imgs.shape[0]
117 |         trg_len = target.shape[1]
118 |         trg_vocab_size = self.decoder.output_dim
119 | 
120 |         # tensor to store decoder outputs
121 |         outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
122 | 
123 |         # encoder_outputs(batch, seq_len, hidden_size): all hidden states of input sequence
124 |         encoder_outputs, (hidden, cell) = self.encoder(imgs)
125 | 
126 |         # compute context vector
127 |         context = encoder_outputs.mean(dim=1)
128 | 
129 |         # first input to the decoder is the <sos> tokens
130 |         input = target[:,0]
131 | 
132 |         for t in range(1, trg_len):
133 |             # decode
134 |             output, (hidden, cell) = self.decoder(input, hidden, cell, context)
135 | 
136 |             # store prediction
137 |             outputs[t] = output
138 | 
139 |             # decide whether to do teacher foring
140 |             teacher_force = random.random() < teacher_forcing_ratio
141 | 
142 |             # get the highest predicted token
143 |             top1 = output.argmax(1)
144 | 
145 |             # apply teacher forcing
146 |             input = target[:,t] if teacher_force else top1
147 | 
148 |         return outputs
149 | 
150 | 
151 | # Test
152 | if __name__ == '__main__':
153 |     # test encoder
154 |     encoder = Encoder(lstm_hidden_size=512)
155 |     # imgs = torch.randn(16, 3, 8, 128, 128)
156 |     # print(encoder(imgs))
157 | 
158 |     # test decoder
159 |     decoder = Decoder(output_dim=500, emb_dim=256, enc_hid_dim=512, dec_hid_dim=512, dropout=0.5)
160 |     # input = torch.LongTensor(16).random_(0, 500)
161 |     # hidden = torch.randn(16, 512)
162 |     # cell = torch.randn(16, 512)
163 |     # context = torch.randn(16, 512)
164 |     # print(decoder(input, hidden, cell, context))
165 | 
166 |     # test seq2seq
167 |     device = torch.device("cpu")
168 |     seq2seq = Seq2Seq(encoder=encoder, decoder=decoder, device=device)
169 |     imgs = torch.randn(16, 3, 8, 128, 128)
170 |     target = torch.LongTensor(16, 8).random_(0, 500)
171 |     print(seq2seq(imgs, target).argmax(dim=2).permute(1,0)) # batch first
172 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from sklearn.metrics import accuracy_score
  3 | 
  4 | def test(model, criterion, dataloader, device, epoch, logger, writer):
  5 |     model.eval()
  6 |     losses = []
  7 |     all_label = []
  8 |     all_pred = []
  9 | 
 10 |     with torch.no_grad():
 11 |         for batch_idx, data in enumerate(dataloader):
 12 |             # get the inputs and labels
 13 |             inputs, labels = data['data'].to(device), data['label'].to(device)
 14 |             # forward
 15 |             outputs = model(inputs)
 16 |             if isinstance(outputs, list):
 17 |                 outputs = outputs[0]
 18 |             # compute the loss
 19 |             loss = criterion(outputs, labels.squeeze())
 20 |             losses.append(loss.item())
 21 |             # collect labels & prediction
 22 |             prediction = torch.max(outputs, 1)[1]
 23 |             all_label.extend(labels.squeeze())
 24 |             all_pred.extend(prediction)
 25 |     # Compute the average loss & accuracy
 26 |     test_loss = sum(losses)/len(losses)
 27 |     all_label = torch.stack(all_label, dim=0)
 28 |     all_pred = torch.stack(all_pred, dim=0)
 29 |     test_acc = accuracy_score(all_label.squeeze().cpu().data.squeeze().numpy(), all_pred.cpu().data.squeeze().numpy())
 30 |     # Log
 31 |     writer.add_scalars('Loss', {'test': test_loss}, epoch+1)
 32 |     writer.add_scalars('Accuracy', {'test': test_acc}, epoch+1)
 33 |     logger.info("Average Test Loss: {:.6f} | Acc: {:.2f}%".format(test_loss, test_acc*100))
 34 | 
 35 | if __name__ == '__main__':
 36 |     import os
 37 |     import argparse
 38 |     from torch.utils.data import DataLoader
 39 |     import torchvision.transforms as transforms
 40 |     from dataset import CSL_Isolated
 41 |     from models.Conv3D import resnet18, resnet34, resnet50, r2plus1d_18
 42 |     # Arguments
 43 |     parser = argparse.ArgumentParser()
 44 |     parser.add_argument('--data_path', default='/home/haodong/Data/CSL_Isolated/color_video_125000',
 45 |         type=str, help='Data path for testing')
 46 |     parser.add_argument('--label_path', default='/home/haodong/Data/CSL_Isolated/dictionary.txt',
 47 |         type=str, help='Label path for testing')
 48 |     parser.add_argument('--model', default='3dresnet18',
 49 |         type=str, help='Choose a model for testing')
 50 |     parser.add_argument('--model_path', default='3dresnet18.pth',
 51 |         type=str, help='Model state dict path')
 52 |     parser.add_argument('--num_classes', default=500,
 53 |         type=int, help='Number of classes for testing')
 54 |     parser.add_argument('--batch_size', default=32,
 55 |         type=int, help='Batch size for testing')
 56 |     parser.add_argument('--sample_size', default=128,
 57 |         type=int, help='Sample size for testing')
 58 |     parser.add_argument('--sample_duration', default=16,
 59 |         type=int, help='Sample duration for testing')
 60 |     parser.add_argument('--no_cuda', action='store_true',
 61 |         help='If true, dont use cuda')
 62 |     parser.add_argument('--cuda_devices', default='2',
 63 |         type=str, help='Cuda visible devices')
 64 |     args = parser.parse_args()
 65 | 
 66 |     # Path setting
 67 |     data_path = args.data_path
 68 |     label_path = args.label_path
 69 |     model_path = args.model_path
 70 |     # Use specific gpus
 71 |     os.environ["CUDA_VISIBLE_DEVICES"]=args.cuda_devices
 72 |     # Device setting
 73 |     if torch.cuda.is_available() and not args.no_cuda:
 74 |         device = torch.device("cuda")
 75 |     else:
 76 |         device = torch.device("cpu")
 77 | 
 78 |     # Hyperparams
 79 |     num_classes = args.num_classes
 80 |     batch_size = args.batch_size
 81 |     sample_size = args.sample_size
 82 |     sample_duration = args.sample_duration
 83 | 
 84 |     # Start testing
 85 |     # Load data
 86 |     transform = transforms.Compose([transforms.Resize([sample_size, sample_size]),
 87 |                                     transforms.ToTensor(),
 88 |                                     transforms.Normalize(mean=[0.5], std=[0.5])])
 89 |     test_set = CSL_Isolated(data_path=data_path, label_path=label_path, frames=sample_duration,
 90 |         num_classes=num_classes, train=False, transform=transform)
 91 |     test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
 92 | 
 93 |     # Create model
 94 |     if args.model == '3dresnet18':
 95 |         model = resnet18(pretrained=True, progress=True, sample_size=sample_size,
 96 |             sample_duration=sample_duration, num_classes=num_classes).to(device)
 97 |     elif args.model == '3dresnet34':
 98 |         model = resnet34(pretrained=True, progress=True, sample_size=sample_size,
 99 |             sample_duration=sample_duration, num_classes=num_classes).to(device)
100 |     elif args.model == '3dresnet50':
101 |         model = resnet50(pretrained=True, progress=True, sample_size=sample_size,
102 |             sample_duration=sample_duration, num_classes=num_classes).to(device)
103 |     elif args.model == 'r2plus1d':
104 |         model = r2plus1d_18(pretrained=True, num_classes=num_classes).to(device)
105 |     # Run the model parallelly
106 |     if torch.cuda.device_count() > 1:
107 |         model = nn.DataParallel(model)
108 |     # Load model
109 |     model.load_state_dict(torch.load(model_path))
110 | 
111 |     # Test the model
112 |     model.eval()
113 |     all_label = []
114 |     all_pred = []
115 | 
116 |     with torch.no_grad():
117 |         for batch_idx, data in enumerate(test_loader):
118 |             # get the inputs and labels
119 |             inputs, labels = data['data'].to(device), data['label'].to(device)
120 |             # forward
121 |             outputs = model(inputs)
122 |             # collect labels & prediction
123 |             prediction = torch.max(outputs, 1)[1]
124 |             all_label.extend(labels.squeeze())
125 |             all_pred.extend(prediction)
126 |     # Compute the average loss & accuracy
127 |     all_label = torch.stack(all_label, dim=0)
128 |     all_pred = torch.stack(all_pred, dim=0)
129 |     test_acc = accuracy_score(all_label.squeeze().cpu().data.squeeze().numpy(), all_pred.cpu().data.squeeze().numpy())
130 |     print("Test Acc: {:.2f}%".format(test_acc*100))
131 | 


--------------------------------------------------------------------------------
/tools.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.utils.tensorboard import SummaryWriter
  4 | import torchvision.utils as utils
  5 | import cv2
  6 | from datetime import datetime
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | from sklearn.metrics import confusion_matrix
 10 | 
 11 | 
 12 | def get_label_and_pred(model, dataloader, device):
 13 |     all_label = []
 14 |     all_pred = []
 15 |     with torch.no_grad():
 16 |         for batch_idx, data in enumerate(test_loader):
 17 |             # get the inputs and labels
 18 |             inputs, labels = data['data'].to(device), data['label'].to(device)
 19 |             # forward
 20 |             outputs = model(inputs)
 21 |             if isinstance(outputs, list):
 22 |                 outputs = outputs[0]
 23 |             # collect labels & prediction
 24 |             prediction = torch.max(outputs, 1)[1]
 25 |             all_label.extend(labels.squeeze())
 26 |             all_pred.extend(prediction)
 27 |     # Compute accuracy
 28 |     all_label = torch.stack(all_label, dim=0)
 29 |     all_pred = torch.stack(all_pred, dim=0)
 30 |     all_label = all_label.squeeze().cpu().data.squeeze().numpy()
 31 |     all_pred = all_pred.cpu().data.squeeze().numpy()
 32 |     return all_label, all_pred
 33 | 
 34 | 
 35 | def plot_confusion_matrix(model, dataloader, device, save_path='confmat.png', normalize=True):
 36 |     # Get prediction
 37 |     all_label, all_pred = get_label_and_pred(model, dataloader, device)
 38 |     confmat = confusion_matrix(all_label, all_pred)
 39 | 
 40 |     # Normalize the matrix
 41 |     if normalize:
 42 |         confmat = confmat.astype('float') / confmat.sum(axis=1)[:, np.newaxis]
 43 |     # Draw matrix
 44 |     plt.figure(figsize=(20,20))
 45 |     # confmat = np.random.rand(100,100)
 46 |     plt.imshow(confmat, interpolation='nearest', cmap=plt.cm.Blues)
 47 |     plt.colorbar()
 48 |     # Add ticks
 49 |     ticks = np.arange(100)
 50 |     plt.xticks(ticks, fontsize=8)
 51 |     plt.yticks(ticks, fontsize=8)
 52 |     plt.grid(True)
 53 |     # Add title & labels
 54 |     plt.title('Confusion matrix', fontsize=20)
 55 |     plt.xlabel('Predicted label', fontsize=20)
 56 |     plt.ylabel('True label', fontsize=20)
 57 |     # Save figure
 58 |     plt.savefig(save_path)
 59 | 
 60 |     # Ranking
 61 |     sorted_index = np.diag(confmat).argsort()
 62 |     for i in range(10):
 63 |         # print(type(sorted_index[i]))
 64 |         print(test_set.label_to_word(int(sorted_index[i])), confmat[sorted_index[i]][sorted_index[i]])
 65 |     # Save to csv
 66 |     np.savetxt('matrix.csv', confmat, delimiter=',')
 67 | 
 68 | 
 69 | def visualize_attn(I, c):
 70 |     # Image
 71 |     img = I.permute((1,2,0)).cpu().numpy()
 72 |     # Heatmap
 73 |     N, C, H, W = c.size()
 74 |     a = F.softmax(c.view(N,C,-1), dim=2).view(N,C,H,W)
 75 |     up_factor = 128/H
 76 |     # print(up_factor, I.size(), c.size())
 77 |     if up_factor > 1:
 78 |         a = F.interpolate(a, scale_factor=up_factor, mode='bilinear', align_corners=False)
 79 |     attn = utils.make_grid(a, nrow=4, normalize=True, scale_each=True)
 80 |     attn = attn.permute((1,2,0)).mul(255).byte().cpu().numpy()
 81 |     attn = cv2.applyColorMap(attn, cv2.COLORMAP_JET)
 82 |     attn = cv2.cvtColor(attn, cv2.COLOR_BGR2RGB)
 83 |     # Add the heatmap to the image
 84 |     vis = 0.6 * img + 0.4 * attn
 85 |     return torch.from_numpy(vis).permute(2,0,1)
 86 | 
 87 | 
 88 | def plot_attention_map(model, dataloader, device):
 89 |     # Summary writer
 90 |     writer = SummaryWriter("runs/attention_{:%Y-%m-%d_%H-%M-%S}".format(datetime.now()))
 91 | 
 92 |     model.eval()
 93 |     with torch.no_grad():
 94 |         for batch_idx, data in enumerate(dataloader):
 95 |             # get images
 96 |             inputs = data['data'].to(device)
 97 |             if batch_idx == 0:
 98 |                 images = inputs[0:16,:,:,:,:]
 99 |                 I = utils.make_grid(images[:,:,0,:,:], nrow=4, normalize=True, scale_each=True)
100 |                 writer.add_image('origin', I)
101 |                 _, c1, c2, c3, c4 = model(images)
102 |                 # print(I.shape, c1.shape, c2.shape, c3.shape, c4.shape)
103 |                 attn1 = visualize_attn(I, c1[:,:,0,:,:])
104 |                 writer.add_image('attn1', attn1)
105 |                 attn2 = visualize_attn(I, c2[:,:,0,:,:])
106 |                 writer.add_image('attn2', attn2)
107 |                 attn3 = visualize_attn(I, c3[:,:,0,:,:])
108 |                 writer.add_image('attn3', attn3)
109 |                 attn4 = visualize_attn(I, c4[:,:,0,:,:])
110 |                 writer.add_image('attn4', attn4)
111 |                 break
112 | 
113 | 
114 | """
115 | Calculate Word Error Rate
116 | Word Error Rate = (Substitutions + Insertions + Deletions) / Number of Words Spoken
117 | Reference:
118 | https://holianh.github.io/portfolio/Cach-tinh-WER/
119 | https://github.com/imalic3/python-word-error-rate
120 | """
121 | def wer(r, h):
122 |     # initialisation
123 |     d = np.zeros((len(r)+1)*(len(h)+1), dtype=np.uint8)
124 |     d = d.reshape((len(r)+1, len(h)+1))
125 |     for i in range(len(r)+1):
126 |         for j in range(len(h)+1):
127 |             if i == 0:
128 |                 d[0][j] = j
129 |             elif j == 0:
130 |                 d[i][0] = i
131 | 
132 |     # computation
133 |     for i in range(1, len(r)+1):
134 |         for j in range(1, len(h)+1):
135 |             if r[i-1] == h[j-1]:
136 |                 d[i][j] = d[i-1][j-1]
137 |             else:
138 |                 substitution = d[i-1][j-1] + 1
139 |                 insertion = d[i][j-1] + 1
140 |                 deletion = d[i-1][j] + 1
141 |                 d[i][j] = min(substitution, insertion, deletion)
142 | 
143 |     return float(d[len(r)][len(h)]) / len(r) * 100
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     # Calculate WER
148 |     r = [1,2,3,4]
149 |     h = [1,1,3,5,6]
150 |     print(wer(r, h))
151 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from sklearn.metrics import accuracy_score
  3 | from tools import wer
  4 | 
  5 | def train_epoch(model, criterion, optimizer, dataloader, device, epoch, logger, log_interval, writer):
  6 |     model.train()
  7 |     losses = []
  8 |     all_label = []
  9 |     all_pred = []
 10 | 
 11 |     for batch_idx, data in enumerate(dataloader):
 12 |         # get the inputs and labels
 13 |         inputs, labels = data['data'].to(device), data['label'].to(device)
 14 | 
 15 |         optimizer.zero_grad()
 16 |         # forward
 17 |         outputs = model(inputs)
 18 |         if isinstance(outputs, list):
 19 |             outputs = outputs[0]
 20 | 
 21 |         # compute the loss
 22 |         loss = criterion(outputs, labels.squeeze())
 23 |         losses.append(loss.item())
 24 | 
 25 |         # compute the accuracy
 26 |         prediction = torch.max(outputs, 1)[1]
 27 |         all_label.extend(labels.squeeze())
 28 |         all_pred.extend(prediction)
 29 |         score = accuracy_score(labels.squeeze().cpu().data.squeeze().numpy(), prediction.cpu().data.squeeze().numpy())
 30 | 
 31 |         # backward & optimize
 32 |         loss.backward()
 33 |         optimizer.step()
 34 | 
 35 |         if (batch_idx + 1) % log_interval == 0:
 36 |             logger.info("epoch {:3d} | iteration {:5d} | Loss {:.6f} | Acc {:.2f}%".format(epoch+1, batch_idx+1, loss.item(), score*100))
 37 | 
 38 |     # Compute the average loss & accuracy
 39 |     training_loss = sum(losses)/len(losses)
 40 |     all_label = torch.stack(all_label, dim=0)
 41 |     all_pred = torch.stack(all_pred, dim=0)
 42 |     training_acc = accuracy_score(all_label.squeeze().cpu().data.squeeze().numpy(), all_pred.cpu().data.squeeze().numpy())
 43 |     # Log
 44 |     writer.add_scalars('Loss', {'train': training_loss}, epoch+1)
 45 |     writer.add_scalars('Accuracy', {'train': training_acc}, epoch+1)
 46 |     logger.info("Average Training Loss of Epoch {}: {:.6f} | Acc: {:.2f}%".format(epoch+1, training_loss, training_acc*100))
 47 | 
 48 | 
 49 | def train_seq2seq(model, criterion, optimizer, clip, dataloader, device, epoch, logger, log_interval, writer):
 50 |     model.train()
 51 |     losses = []
 52 |     all_trg = []
 53 |     all_pred = []
 54 |     all_wer = []
 55 | 
 56 |     for batch_idx, (imgs, target) in enumerate(dataloader):
 57 |         imgs = imgs.to(device)
 58 |         target = target.to(device)
 59 | 
 60 |         optimizer.zero_grad()
 61 |         # forward
 62 |         outputs = model(imgs, target)
 63 | 
 64 |         # target: (batch_size, trg len)
 65 |         # outputs: (trg_len, batch_size, output_dim)
 66 |         # skip sos
 67 |         output_dim = outputs.shape[-1]
 68 |         outputs = outputs[1:].view(-1, output_dim)
 69 |         target = target.permute(1,0)[1:].reshape(-1)
 70 | 
 71 |         # compute the loss
 72 |         loss = criterion(outputs, target)
 73 |         losses.append(loss.item())
 74 | 
 75 |         # compute the accuracy
 76 |         prediction = torch.max(outputs, 1)[1]
 77 |         score = accuracy_score(target.cpu().data.squeeze().numpy(), prediction.cpu().data.squeeze().numpy())
 78 |         all_trg.extend(target)
 79 |         all_pred.extend(prediction)
 80 | 
 81 |         # compute wer
 82 |         # prediction: ((trg_len-1)*batch_size)
 83 |         # target: ((trg_len-1)*batch_size)
 84 |         batch_size = imgs.shape[0]
 85 |         prediction = prediction.view(-1, batch_size).permute(1,0).tolist()
 86 |         target = target.view(-1, batch_size).permute(1,0).tolist()
 87 |         wers = []
 88 |         for i in range(batch_size):
 89 |             # add mask(remove padding, sos, eos)
 90 |             prediction[i] = [item for item in prediction[i] if item not in [0,1,2]]
 91 |             target[i] = [item for item in target[i] if item not in [0,1,2]]
 92 |             wers.append(wer(target[i], prediction[i]))
 93 |         all_wer.extend(wers)
 94 | 
 95 |         # backward & optimize
 96 |         loss.backward()
 97 |         torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
 98 |         optimizer.step()
 99 | 
100 |         if (batch_idx + 1) % log_interval == 0:
101 |             logger.info("epoch {:3d} | iteration {:5d} | Loss {:.6f} | Acc {:.2f}% | WER {:.2f}%".format(epoch+1, batch_idx+1, loss.item(), score*100, sum(wers)/len(wers)))
102 | 
103 |     # Compute the average loss & accuracy
104 |     training_loss = sum(losses)/len(losses)
105 |     all_trg = torch.stack(all_trg, dim=0)
106 |     all_pred = torch.stack(all_pred, dim=0)
107 |     training_acc = accuracy_score(all_trg.cpu().data.squeeze().numpy(), all_pred.cpu().data.squeeze().numpy())
108 |     training_wer = sum(all_wer)/len(all_wer)
109 |     # Log
110 |     writer.add_scalars('Loss', {'train': training_loss}, epoch+1)
111 |     writer.add_scalars('Accuracy', {'train': training_acc}, epoch+1)
112 |     writer.add_scalars('WER', {'train': training_wer}, epoch+1)
113 |     logger.info("Average Training Loss of Epoch {}: {:.6f} | Acc: {:.2f}% | WER {:.2f}%".format(epoch+1, training_loss, training_acc*100, training_wer))
114 | 


--------------------------------------------------------------------------------
/validation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.metrics import accuracy_score
 3 | from tools import wer
 4 | 
 5 | def val_epoch(model, criterion, dataloader, device, epoch, logger, writer):
 6 |     model.eval()
 7 |     losses = []
 8 |     all_label = []
 9 |     all_pred = []
10 | 
11 |     with torch.no_grad():
12 |         for batch_idx, data in enumerate(dataloader):
13 |             # get the inputs and labels
14 |             inputs, labels = data['data'].to(device), data['label'].to(device)
15 |             # forward
16 |             outputs = model(inputs)
17 |             if isinstance(outputs, list):
18 |                 outputs = outputs[0]
19 |             # compute the loss
20 |             loss = criterion(outputs, labels.squeeze())
21 |             losses.append(loss.item())
22 |             # collect labels & prediction
23 |             prediction = torch.max(outputs, 1)[1]
24 |             all_label.extend(labels.squeeze())
25 |             all_pred.extend(prediction)
26 |     # Compute the average loss & accuracy
27 |     validation_loss = sum(losses)/len(losses)
28 |     all_label = torch.stack(all_label, dim=0)
29 |     all_pred = torch.stack(all_pred, dim=0)
30 |     validation_acc = accuracy_score(all_label.squeeze().cpu().data.squeeze().numpy(), all_pred.cpu().data.squeeze().numpy())
31 |     # Log
32 |     writer.add_scalars('Loss', {'validation': validation_loss}, epoch+1)
33 |     writer.add_scalars('Accuracy', {'validation': validation_acc}, epoch+1)
34 |     logger.info("Average Validation Loss of Epoch {}: {:.6f} | Acc: {:.2f}%".format(epoch+1, validation_loss, validation_acc*100))
35 | 
36 | 
37 | def val_seq2seq(model, criterion, dataloader, device, epoch, logger, writer):
38 |     model.eval()
39 |     losses = []
40 |     all_trg = []
41 |     all_pred = []
42 |     all_wer = []
43 | 
44 |     with torch.no_grad():
45 |         for batch_idx, (imgs, target) in enumerate(dataloader):
46 |             imgs = imgs.to(device)
47 |             target = target.to(device)
48 | 
49 |             # forward(no teacher forcing)
50 |             outputs = model(imgs, target, 0)
51 | 
52 |             # target: (batch_size, trg len)
53 |             # outputs: (trg_len, batch_size, output_dim)
54 |             # skip sos
55 |             output_dim = outputs.shape[-1]
56 |             outputs = outputs[1:].view(-1, output_dim)
57 |             target = target.permute(1,0)[1:].reshape(-1)
58 | 
59 |             # compute the loss
60 |             loss = criterion(outputs, target)
61 |             losses.append(loss.item())
62 | 
63 |             # compute the accuracy
64 |             prediction = torch.max(outputs, 1)[1]
65 |             score = accuracy_score(target.cpu().data.squeeze().numpy(), prediction.cpu().data.squeeze().numpy())
66 |             all_trg.extend(target)
67 |             all_pred.extend(prediction)
68 | 
69 |             # compute wer
70 |             # prediction: ((trg_len-1)*batch_size)
71 |             # target: ((trg_len-1)*batch_size)
72 |             batch_size = imgs.shape[0]
73 |             prediction = prediction.view(-1, batch_size).permute(1,0).tolist()
74 |             target = target.view(-1, batch_size).permute(1,0).tolist()
75 |             wers = []
76 |             for i in range(batch_size):
77 |                 # add mask(remove padding, eos, sos)
78 |                 prediction[i] = [item for item in prediction[i] if item not in [0,1,2]]
79 |                 target[i] = [item for item in target[i] if item not in [0,1,2]]
80 |                 wers.append(wer(target[i], prediction[i]))
81 |             all_wer.extend(wers)
82 | 
83 |     # Compute the average loss & accuracy
84 |     validation_loss = sum(losses)/len(losses)
85 |     all_trg = torch.stack(all_trg, dim=0)
86 |     all_pred = torch.stack(all_pred, dim=0)
87 |     validation_acc = accuracy_score(all_trg.cpu().data.squeeze().numpy(), all_pred.cpu().data.squeeze().numpy())
88 |     validation_wer = sum(all_wer)/len(all_wer)
89 |     # Log
90 |     writer.add_scalars('Loss', {'validation': validation_loss}, epoch+1)
91 |     writer.add_scalars('Accuracy', {'validation': validation_acc}, epoch+1)
92 |     writer.add_scalars('WER', {'validation': validation_wer}, epoch+1)
93 |     logger.info("Average Validation Loss of Epoch {}: {:.6f} | Acc: {:.2f}% | WER: {:.2f}%".format(epoch+1, validation_loss, validation_acc*100, validation_wer))
94 | 


--------------------------------------------------------------------------------