├── LICENSE
├── README.md
├── code
├── BERT-gloss-tagger.py
├── README.md
├── generation-model
│ ├── README.md
│ ├── dynamic_selection
│ │ ├── Configs
│ │ │ ├── Base.yaml
│ │ │ ├── Base_train_all.yaml
│ │ │ ├── Base_train_all_test.yaml
│ │ │ ├── Base_train_gloss_after_no.yaml
│ │ │ ├── Base_train_gloss_basic.yaml
│ │ │ ├── Base_train_gloss_joint_yes.yaml
│ │ │ ├── dynamic.yaml
│ │ │ ├── src_train.gloss.after.no.repeat_vocab.txt
│ │ │ ├── src_train.gloss.joint.no.repeat_vocab.txt
│ │ │ ├── src_train.gloss.joint.no.repeat_vocab_extend.txt
│ │ │ ├── src_train.gloss.joint.yes.repeat_vocab copy.txt
│ │ │ ├── src_train.gloss.joint.yes.repeat_vocab.txt
│ │ │ ├── src_vocab.txt
│ │ │ ├── text_vocab.txt
│ │ │ └── validations.txt
│ │ ├── Data
│ │ │ ├── Gloss_Dynamic
│ │ │ │ ├── dev.after_gloss
│ │ │ │ ├── dev.files
│ │ │ │ ├── dev.gloss
│ │ │ │ ├── dev.joint_gloss
│ │ │ │ ├── dev.text
│ │ │ │ ├── test.after_gloss
│ │ │ │ ├── test.files
│ │ │ │ ├── test.gloss
│ │ │ │ ├── test.joint_gloss
│ │ │ │ ├── test.text
│ │ │ │ ├── train.after_gloss
│ │ │ │ ├── train.files
│ │ │ │ ├── train.gloss
│ │ │ │ ├── train.joint_gloss
│ │ │ │ └── train.text
│ │ │ └── tmp
│ │ │ │ ├── dev.files
│ │ │ │ ├── dev.gloss
│ │ │ │ ├── dev.skels
│ │ │ │ ├── dev.text
│ │ │ │ ├── test.files
│ │ │ │ ├── test.gloss
│ │ │ │ ├── test.skels
│ │ │ │ ├── test.text
│ │ │ │ ├── train.files
│ │ │ │ ├── train.gloss
│ │ │ │ ├── train.skels
│ │ │ │ └── train.text
│ │ ├── LICENSE.md
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── __pycache__
│ │ │ ├── batch.cpython-37.pyc
│ │ │ ├── builders.cpython-37.pyc
│ │ │ ├── constants.cpython-37.pyc
│ │ │ ├── data.cpython-37.pyc
│ │ │ ├── decoders.cpython-37.pyc
│ │ │ ├── dtw.cpython-37.pyc
│ │ │ ├── embeddings.cpython-37.pyc
│ │ │ ├── encoders.cpython-37.pyc
│ │ │ ├── helpers.cpython-37.pyc
│ │ │ ├── initialization.cpython-37.pyc
│ │ │ ├── loss.cpython-37.pyc
│ │ │ ├── model.cpython-37.pyc
│ │ │ ├── plot_videos.cpython-37.pyc
│ │ │ ├── prediction.cpython-37.pyc
│ │ │ ├── search.cpython-37.pyc
│ │ │ ├── training.cpython-37.pyc
│ │ │ ├── transformer_layers.cpython-37.pyc
│ │ │ └── vocabulary.cpython-37.pyc
│ │ ├── batch.py
│ │ ├── builders.py
│ │ ├── constants.py
│ │ ├── data.py
│ │ ├── decoders.py
│ │ ├── dtw.py
│ │ ├── embeddings.py
│ │ ├── encoders.py
│ │ ├── external_metrics
│ │ │ ├── mscoco_rouge.py
│ │ │ └── sacrebleu.py
│ │ ├── helpers.py
│ │ ├── initialization.py
│ │ ├── loss.py
│ │ ├── metrics.py
│ │ ├── model.py
│ │ ├── optim
│ │ │ ├── lamb.py
│ │ │ ├── lookahead.py
│ │ │ ├── novograd.py
│ │ │ ├── over9000.py
│ │ │ ├── radam.py
│ │ │ ├── ralamb.py
│ │ │ └── ranger.py
│ │ ├── plot_videos.py
│ │ ├── prediction.py
│ │ ├── requirements.txt
│ │ ├── run-adj.sh
│ │ ├── run-norm.sh
│ │ ├── run.sh
│ │ ├── run2.sh
│ │ ├── search.py
│ │ ├── training.py
│ │ ├── transformer_layers.py
│ │ ├── vocabulary.py
│ │ └── write_file_slt.py
│ ├── progressive_transformer
│ │ ├── Configs
│ │ │ ├── Base.yaml
│ │ │ ├── Base_real.yaml
│ │ │ ├── Base_text2Skeleton.yaml
│ │ │ ├── src_text_vocab.txt
│ │ │ ├── src_vocab.txt
│ │ │ ├── train.log
│ │ │ └── validations.txt
│ │ ├── Data
│ │ │ └── tmp
│ │ │ │ ├── dev.files
│ │ │ │ ├── dev.gloss
│ │ │ │ ├── dev.skels
│ │ │ │ ├── dev.text
│ │ │ │ ├── test.files
│ │ │ │ ├── test.gloss
│ │ │ │ ├── test.skels
│ │ │ │ ├── test.text
│ │ │ │ ├── train.files
│ │ │ │ ├── train.gloss
│ │ │ │ ├── train.skels
│ │ │ │ └── train.text
│ │ ├── LICENSE.md
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── __pycache__
│ │ │ ├── batch.cpython-37.pyc
│ │ │ ├── builders.cpython-37.pyc
│ │ │ ├── constants.cpython-37.pyc
│ │ │ ├── data.cpython-37.pyc
│ │ │ ├── decoders.cpython-37.pyc
│ │ │ ├── dtw.cpython-37.pyc
│ │ │ ├── embeddings.cpython-37.pyc
│ │ │ ├── encoders.cpython-37.pyc
│ │ │ ├── helpers.cpython-37.pyc
│ │ │ ├── initialization.cpython-37.pyc
│ │ │ ├── loss.cpython-37.pyc
│ │ │ ├── model.cpython-37.pyc
│ │ │ ├── plot_videos.cpython-37.pyc
│ │ │ ├── prediction.cpython-37.pyc
│ │ │ ├── search.cpython-37.pyc
│ │ │ ├── training.cpython-37.pyc
│ │ │ ├── training.cpython-39.pyc
│ │ │ ├── transformer_layers.cpython-37.pyc
│ │ │ └── vocabulary.cpython-37.pyc
│ │ ├── batch.py
│ │ ├── builders.py
│ │ ├── constants.py
│ │ ├── data.py
│ │ ├── decoders.py
│ │ ├── dtw.py
│ │ ├── embeddings.py
│ │ ├── encoders.py
│ │ ├── external_metrics
│ │ │ ├── mscoco_rouge.py
│ │ │ └── sacrebleu.py
│ │ ├── helpers.py
│ │ ├── initialization.py
│ │ ├── loss.py
│ │ ├── make_new_vocab.py
│ │ ├── metrics.py
│ │ ├── model.py
│ │ ├── optim
│ │ │ ├── lamb.py
│ │ │ ├── lookahead.py
│ │ │ ├── novograd.py
│ │ │ ├── over9000.py
│ │ │ ├── radam.py
│ │ │ ├── ralamb.py
│ │ │ └── ranger.py
│ │ ├── plot_videos.py
│ │ ├── prediction.py
│ │ ├── requirements (1).txt
│ │ ├── requirements.txt
│ │ ├── search.py
│ │ ├── training.py
│ │ ├── transformer_layers.py
│ │ ├── vocabulary.py
│ │ └── write_file_slt.py
│ └── slt
│ │ ├── .gitignore
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── base_annotations
│ │ ├── check_base_file.py
│ │ ├── file.txt
│ │ ├── out_dev
│ │ ├── phoenix14t.dev
│ │ ├── phoenix14t.test
│ │ └── phoenix14t.train
│ │ ├── configs
│ │ ├── sign copy.yaml
│ │ ├── sign.yaml
│ │ ├── sign_train_gloss_before_no_repeat.yaml
│ │ ├── sign_train_gloss_between_no_repeat.yaml
│ │ ├── sign_train_gloss_between_yes_repeat.yaml
│ │ └── sign_train_gloss_joint_no_repeat.yaml
│ │ ├── experiment_results
│ │ ├── dev.ph14t
│ │ ├── results.md
│ │ └── test.ph14t
│ │ ├── output
│ │ ├── output_gold.BW_009.dev.gls
│ │ ├── output_gold.BW_009.test.gls
│ │ ├── output_gold.BW_09.A_3.dev.txt
│ │ ├── output_gold.BW_09.A_3.test.txt
│ │ ├── output_gold.dev_results.pkl
│ │ └── output_gold.test_results.pkl
│ │ ├── requirements.txt
│ │ └── signjoey
│ │ ├── __init__.py
│ │ ├── __main__ eval.py
│ │ ├── __main__.py
│ │ ├── __main__evaluation.py
│ │ ├── attention.py
│ │ ├── batch.py
│ │ ├── builders.py
│ │ ├── data.py
│ │ ├── dataset.py
│ │ ├── decoders.py
│ │ ├── embeddings.py
│ │ ├── encoders.py
│ │ ├── external_metrics
│ │ ├── mscoco_rouge.py
│ │ └── sacrebleu.py
│ │ ├── helpers.py
│ │ ├── initialization.py
│ │ ├── loss.py
│ │ ├── metrics.py
│ │ ├── metrics_old.py
│ │ ├── model.py
│ │ ├── phoenix_utils
│ │ └── phoenix_cleanup.py
│ │ ├── prediction.py
│ │ ├── search.py
│ │ ├── training.py
│ │ ├── transformer_layers.py
│ │ └── vocabulary.py
└── gloss-tagger-model
│ └── README.md
└── data
├── README.MD
├── dev.full
├── gloss-annotation.tsv
├── test.full
└── train.full
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Mert Inan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Modeling Intensification for Sign Language Generation
2 | Public repo for the paper: "Modeling Intensification for Sign Language Generation: A Computational Approach" by Mert Inan*, Yang Zhong*, Sabit Hassan*, Lorna Quandt, Malihe Alikhani
3 |
4 | ## Abstract
5 | End-to-end sign language generation models do not accurately represent the prosody that exists in sign languages. The lack of temporal and spatial variation in the models’ scope leads to poor quality and lower human understanding of generated signs. In this paper, we seek to improve prosody in generated sign languages by modeling intensification in a data-driven manner. We present different strategies grounded in linguistics of sign language that differ in how intensity modifiers can be represented in gloss annotations. To employ our strategies, we first annotate a subset of the benchmark PHOENIX14T, a German Sign Language dataset, with different levels of intensification. We then use a supervised intensity tagger to extend the annotated dataset and obtain labels for the remaining portion of the
6 | dataset. This enhanced dataset is then used to train state-of-the-art transformer models for sign language generation. We find that our efforts in intensification modeling yield better results when evaluated with automated metrics. Human evaluation also indicates a higher preference of the videos generated using our model.
7 |
8 | ## Organization of the repo
9 | This repo provides the codes and the data required to replicate the results from the paper. The dataset that we provide with the intensifier augmentations to the gloss can be used for further research.
10 |
11 | Under the /code folder you can find the codes for the main proposed sign language generation model. In addition, our gloss augmentation codes are also provided, here.
12 |
13 |
--------------------------------------------------------------------------------
/code/BERT-gloss-tagger.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
3 | from sklearn.svm import LinearSVC
4 | from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
5 | from collections import Counter
6 | import os
7 | import ktrain
8 | from ktrain import text
9 |
10 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
11 | os.environ["CUDA_VISIBLE_DEVICES"]="0";
12 | # MODEL_NAME = "bert-base-german-cased"
13 | # MODEL_NAME = "bert-base-multilingual-cased"
14 | MODEL_NAME = "bert-base-multilingual-uncased"
15 | # MODEL_NAME = "dbmdz/bert-base-german-uncased"
16 |
17 | def print_evaluation(gold_labels, predicted_labels):
18 |
19 | '''Prints accuracy, precision, recall, f1 score'''
20 |
21 | accuracy = accuracy_score(gold_labels, predicted_labels) * 100
22 | f1 = f1_score(gold_labels, predicted_labels, average = "macro") * 100
23 | recall = recall_score(gold_labels, predicted_labels, average = "macro") * 100
24 | precision = precision_score(gold_labels, predicted_labels, average = "macro") * 100
25 |
26 |
27 | a = [accuracy, precision, recall, f1]
28 | for i in range (4):
29 | a[i] = round(a[i],2)
30 |
31 | return a
32 |
33 | train_text = []
34 | train_gloss = []
35 | train_labels = []
36 | train_serials = []
37 | train_inp = []
38 |
39 | dev_text = []
40 | dev_gloss = []
41 | dev_labels = []
42 | dev_serials = []
43 | dev_inp = []
44 |
45 | test_text = []
46 | test_gloss = []
47 | test_labels = []
48 | test_serials = []
49 | test_inp = []
50 |
51 |
52 | prev = ""
53 | check = 0
54 | max_len = 0
55 | with open ("../data/gloss-annotation.tsv", "r") as f:
56 | for line in f:
57 | check += 1
58 | if check == 1:
59 | continue
60 |
61 | row = line.lower().strip().split("\t")
62 |
63 |
64 | try:
65 | split = row[0]
66 | serial = row[1]
67 | g_text = row[2]
68 | g_gloss_token = row[6]
69 | label = row[8]
70 |
71 | if g_text == "":
72 | g_text = prev
73 | else:
74 | prev = g_text
75 | except:
76 | print (check)
77 |
78 | if (len(g_text) > max_len):
79 | max_len = len(g_text)
80 |
81 | # print (g_text, g_gloss_token)
82 | if split == "train":
83 | train_text.append(g_text)
84 | train_gloss.append(g_gloss_token)
85 | train_labels.append(label)
86 | train_serials.append(serial)
87 | train_inp.append((g_text, g_gloss_token))
88 |
89 | if split == "dev":
90 | dev_text.append(g_text)
91 | dev_gloss.append(g_gloss_token)
92 | dev_labels.append(label)
93 | dev_serials.append(serial)
94 | dev_inp.append((g_text, g_gloss_token))
95 |
96 | if split == "test":
97 | test_text.append(g_text)
98 | test_gloss.append(g_gloss_token)
99 | test_labels.append(label)
100 | test_serials.append(serial)
101 | test_inp.append((g_text, g_gloss_token))
102 |
103 |
104 | print (MODEL_NAME)
105 | t = text.Transformer(MODEL_NAME, maxlen=230, class_names=list(set(train_labels)))
106 | trn = t.preprocess_train(train_inp, train_labels)
107 | val = t.preprocess_test(dev_inp, dev_labels)
108 | tst = t.preprocess_test(test_inp, test_labels)
109 | model = t.get_classifier()
110 |
111 | learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=8) # lower bs if OOM occurs
112 | learner.fit_onecycle(5e-5, 5)
113 | clf = ktrain.get_predictor(learner.model, t)
114 |
115 | dev_preds = clf.predict(dev_inp)
116 | test_preds = clf.predict(test_inp)
117 |
118 | print ("dev results", print_evaluation(dev_labels, dev_preds))
119 | print ("test results", print_evaluation(test_labels, test_preds))
120 |
121 | print (Counter(test_preds))
122 |
123 | clf.save('gloss-tagger-model/mert-uncased')
124 |
125 |
126 |
127 |
128 |
--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
1 | To run BERT-gloss-tagger.py make sure to install sklearn and ktrain:
2 | pip install ktrain==0.28.3
3 | pip install sklearn==0.23.2
4 |
--------------------------------------------------------------------------------
/code/generation-model/README.md:
--------------------------------------------------------------------------------
1 | # Progressive Transformers
2 |
3 | `cd progressive_transformer`
4 |
5 | ## Install
6 | Install required packages using the requirements.txt file.
7 |
8 | `pip install -r requirements.txt`
9 |
10 | ## Usage
11 |
12 | To run, start __main__.py with arguments "train" and ".\Configs\Base.yaml":
13 |
14 | `python __main__.py train ./Configs/Base.yaml`
15 |
16 | You need ot modify the yaml file.
17 |
18 | (1) You can use different versions of PT dataset by modifying the data path (train, dev, test and src_vocab path). (located under data folder)
19 |
20 |
21 | ### Dump the skeletons
22 |
23 | First call the below function in `training.py` main function.
24 |
25 | `test(cfg_file=PATH_OF_MODEL_CONFIG_FILE, ckpt=get_latest_checkpoint( PATH_OF_TRAINED_MODEL,post_fix="_best"))
26 |
27 | Then `python training.py`
28 |
29 | ### Convert Data to SLT format.
30 |
31 | `python write_file_slt.py --cur_path Data/MODEL_SKELETON_NAME`
32 |
33 | ### Model checkpoints
34 | You can find the model checkpoints in [this link.](https://drive.google.com/drive/folders/10fsw-xSt2Rmupx31FYhxPcfSfiFL5Djf?usp=sharing)
35 |
36 | # SLT
37 |
38 | Make sure you have the corresponding data.
39 |
40 | `cd slt`
41 |
42 | `pip install -r requirements.txt`
43 |
44 | ## Training
45 | `python -m signjoey test configs/YOUR_MODIFIED_YAML`
46 | you only need to modify the data:data_path in the YAML file.
47 |
48 | ## Test
49 | `python -m signjoey test configs/YOUR_MODIFIED_YAML --ckpt your.ckpt --output_path output/MODEL_NAME`
50 |
51 |
52 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Configs/Base.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | src1: "text" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
4 | trg: "skels" # Target - 3D body co-ordinates (skels)
5 | files: "files" # Filenames for each sequence
6 |
7 | train: "./Data/tmp/train"
8 | dev: "./Data/tmp/dev"
9 | test: "./Data/tmp/test"
10 |
11 | max_sent_length: 300 # Max Sentence Length
12 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
13 | src_voc_min_freq: 0
14 | src_voc_limit: 2000
15 | src1_vocab: "./Configs/text_vocab.txt"
16 | src2_vocab: "./Configs/gloss_vocab.txt"
17 |
18 | training:
19 | random_seed: 27 # Random seed for initialisation
20 | optimizer: "adam" # Chosen optimiser (adam, ..)
21 | learning_rate: 0.001 # Initial model learning rate
22 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
23 | weight_decay: 0.0 # Weight Decay
24 | clip_grad_norm: 5.0 # Gradient clipping value
25 | batch_size: 8 # Batch Size for training
26 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
27 | patience: 7 # How many epochs of no improvement causes a LR reduction
28 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
29 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
30 | epochs: 20000 # How many epochs to run for
31 | validation_freq: 10 # After how many steps to run a validation on the model
32 | logging_freq: 250 # After how many steps to log training progress
33 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
34 | model_dir: "./Models/Base" # Where the model shall be stored
35 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
36 | continue: True # Flag to continue from a previous saved model in the model_dir
37 | shuffle: True # Flag to shuffle the data during training
38 | use_cuda: True # Flag to use GPU cuda capabilities
39 | max_output_length: 300 # Max Output Length
40 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
41 | loss: "MSE" # Loss function (MSE, L1)
42 |
43 | model:
44 | initializer: "xavier" # Model initialisation (Xavier, ...)
45 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
46 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
47 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
48 | just_count_in: False # Flag for Just Counter Data Augmentation
49 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
50 | noise_rate: 5 # Gaussian Noise rate
51 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
52 | encoder1: # Model Encoder
53 | type: "transformer"
54 | num_layers: 2 # Number of layers
55 | num_heads: 4 # Number of Heads
56 | embeddings:
57 | embedding_dim: 512 # Embedding Dimension
58 | dropout: 0.0 # Embedding Dropout
59 | hidden_size: 512 # Hidden Size Dimension
60 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
61 | dropout: 0.0 # Encoder Dropout
62 | encoder2: # Model Encoder
63 | type: "transformer"
64 | num_layers: 2 # Number of layers
65 | num_heads: 4 # Number of Heads
66 | embeddings:
67 | embedding_dim: 512 # Embedding Dimension
68 | dropout: 0.0 # Embedding Dropout
69 | hidden_size: 512 # Hidden Size Dimension
70 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
71 | dropout: 0.0 # Encoder Dropout
72 | decoder: # Model Decoder
73 | type: "transformer"
74 | num_layers: 2 # Number of layers
75 | num_heads: 4 # Number of Heads
76 | embeddings:
77 | embedding_dim: 512 # Embedding Dimension
78 | dropout: 0.0 # Embedding Dropout
79 | hidden_size: 512 # Hidden Size Dimension
80 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
81 | dropout: 0.0 # Decoder Dropout
82 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Configs/Base_train_all.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |
3 | src1: "joint_gloss"
4 | src2: "after_gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
5 | trg: "skels" # Target - 3D body co-ordinates (skels)
6 | files: "files" # Filenames for each sequence
7 |
8 | train: "../Data/Gloss_Dynamic/train"
9 | dev: "../Data/Gloss_Dynamic/dev"
10 | test: "../Data/Gloss_Dynamic/test"
11 |
12 |
13 | features: ["hands+body"]
14 | max_sent_length: 300 # Max Sentence Length
15 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
16 |
17 | src1_vocab: "./Configs/src_train.gloss.joint.no.repeat_vocab.txt"
18 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt"
19 |
20 | training:
21 | random_seed: 27 # Random seed for initialisation
22 | optimizer: "adam" # Chosen optimiser (adam, ..)
23 | learning_rate: 0.001 # Initial model learning rate
24 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
25 | weight_decay: 0.0 # Weight Decay
26 | clip_grad_norm: 5.0 # Gradient clipping value
27 | batch_size: 8 # Batch Size for training
28 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
29 | patience: 7 # How many epochs of no improvement causes a LR reduction
30 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
31 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
32 | epochs: 20000 # How many epochs to run for
33 | validation_freq: 10000 # After how many steps to run a validation on the model
34 | logging_freq: 250 # After how many steps to log training progress
35 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
36 | model_dir: "./Models/Base_train_dynamic_2gloss_BCD_hard_smaller" # Where the model shall be stored
37 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
38 | continue: True # Flag to continue from a previous saved model in the model_dir
39 | shuffle: True # Flag to shuffle the data during training
40 | use_cuda: True # Flag to use GPU cuda capabilities
41 | max_output_length: 300 # Max Output Length
42 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
43 | loss: "MSE" # Loss function (MSE, L1)
44 | T: 1 # Temeprature for attention balance
45 | balance: True # Whether do the dynamic attention on the multi-views
46 | BDC_train: True
47 |
48 | model:
49 | initializer: "xavier" # Model initialisation (Xavier, ...)
50 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
51 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
52 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
53 | just_count_in: False # Flag for Just Counter Data Augmentation
54 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
55 | noise_rate: 5 # Gaussian Noise rate
56 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
57 | encoder1: # Model Encoder
58 | type: "transformer"
59 | num_layers: 2 # Number of layers
60 | num_heads: 4 # Number of Heads
61 | embeddings:
62 | embedding_dim: 512 # Embedding Dimension
63 | dropout: 0.0 # Embedding Dropout
64 | hidden_size: 512 # Hidden Size Dimension
65 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
66 | dropout: 0.0 # Encoder Dropout
67 | encoder2: # Model Encoder
68 | type: "transformer"
69 | num_layers: 2 # Number of layers
70 | num_heads: 4 # Number of Heads
71 | embeddings:
72 | embedding_dim: 512 # Embedding Dimension
73 | dropout: 0.0 # Embedding Dropout
74 | hidden_size: 512 # Hidden Size Dimension
75 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
76 | dropout: 0.0 # Encoder Dropout
77 | encoder3: # Model Encoder
78 | type: "transformer"
79 | num_layers: 2 # Number of layers
80 | num_heads: 4 # Number of Heads
81 | embeddings:
82 | embedding_dim: 256 # Embedding Dimension
83 | dropout: 0.0 # Embedding Dropout
84 | hidden_size: 256 # Hidden Size Dimension
85 | ff_size: 1024 # Feed-forward dimension (4 x hidden_size)
86 | dropout: 0.0 # Encoder Dropout
87 | decoder: # Model Decoder
88 | type: "transformer"
89 | num_layers: 2 # Number of layers
90 | num_heads: 4 # Number of Heads
91 | embeddings:
92 | embedding_dim: 512 # Embedding Dimension
93 | dropout: 0.0 # Embedding Dropout
94 | hidden_size: 512 # Hidden Size Dimension
95 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
96 | dropout: 0.0 # Decoder Dropout
97 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Configs/Base_train_all_test.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |
3 | src1: "joint_gloss"
4 | src2: "after_gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
5 | trg: "skels" # Target - 3D body co-ordinates (skels)
6 | files: "files" # Filenames for each sequence
7 |
8 | train: "../Data/Gloss_Dynamic/train"
9 | dev: "../Data/Gloss_Dynamic/dev"
10 | test: "../Data/Gloss_Dynamic/test"
11 |
12 |
13 | features: ["hands+body"]
14 | max_sent_length: 300 # Max Sentence Length
15 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
16 |
17 | src1_vocab: "./Configs/src_train.gloss.joint.no.repeat_vocab.txt"
18 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt"
19 |
20 | training:
21 | random_seed: 0 # Random seed for initialisation
22 | optimizer: "adam" # Chosen optimiser (adam, ..)
23 | learning_rate: 0.001 # Initial model learning rate
24 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
25 | weight_decay: 0.0 # Weight Decay
26 | clip_grad_norm: 5.0 # Gradient clipping value
27 | batch_size: 8 # Batch Size for training
28 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
29 | patience: 7 # How many epochs of no improvement causes a LR reduction
30 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
31 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
32 | epochs: 20000 # How many epochs to run for
33 | validation_freq: 100 # After how many steps to run a validation on the model
34 | logging_freq: 250 # After how many steps to log training progress
35 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
36 | model_dir: "./Models/Base_train_dynamic_gloss_BDC" # Where the model shall be stored
37 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
38 | continue: True # Flag to continue from a previous saved model in the model_dir
39 | shuffle: True # Flag to shuffle the data during training
40 | use_cuda: True # Flag to use GPU cuda capabilities
41 | max_output_length: 300 # Max Output Length
42 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
43 | loss: "MSE" # Loss function (MSE, L1)
44 | T: 1 # Temeprature for attention balance
45 | balance: True # Whether do the dynamic attention on the multi-views
46 | BDC_train: True
47 |
48 | model:
49 | initializer: "xavier" # Model initialisation (Xavier, ...)
50 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
51 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
52 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
53 | just_count_in: False # Flag for Just Counter Data Augmentation
54 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
55 | noise_rate: 5 # Gaussian Noise rate
56 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
57 | encoder1: # Model Encoder
58 | type: "transformer"
59 | num_layers: 2 # Number of layers
60 | num_heads: 4 # Number of Heads
61 | embeddings:
62 | embedding_dim: 512 # Embedding Dimension
63 | dropout: 0.0 # Embedding Dropout
64 | hidden_size: 512 # Hidden Size Dimension
65 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
66 | dropout: 0.0 # Encoder Dropout
67 | encoder2: # Model Encoder
68 | type: "transformer"
69 | num_layers: 2 # Number of layers
70 | num_heads: 4 # Number of Heads
71 | embeddings:
72 | embedding_dim: 512 # Embedding Dimension
73 | dropout: 0.0 # Embedding Dropout
74 | hidden_size: 512 # Hidden Size Dimension
75 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
76 | dropout: 0.0 # Encoder Dropout
77 | decoder: # Model Decoder
78 | type: "transformer"
79 | num_layers: 2 # Number of layers
80 | num_heads: 4 # Number of Heads
81 | embeddings:
82 | embedding_dim: 512 # Embedding Dimension
83 | dropout: 0.0 # Embedding Dropout
84 | hidden_size: 512 # Hidden Size Dimension
85 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
86 | dropout: 0.0 # Decoder Dropout
87 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Configs/Base_train_gloss_after_no.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | src1: "text"
3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
4 | trg: "skels" # Target - 3D body co-ordinates (skels)
5 | files: "files" # Filenames for each sequence
6 |
7 | train: "./Data/train_gloss_after_no_repeat/train"
8 | dev: "./Data/train_gloss_after_no_repeat/dev"
9 | test: "./Data/train_gloss_after_no_repeat/test"
10 |
11 | features: ["hands+body"]
12 | max_sent_length: 300 # Max Sentence Length
13 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
14 | src1_vocab: "./Configs/text_vocab.txt"
15 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt"
16 |
17 | training:
18 | random_seed: 0 # Random seed for initialisation
19 | optimizer: "adam" # Chosen optimiser (adam, ..)
20 | learning_rate: 0.001 # Initial model learning rate
21 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
22 | weight_decay: 0.0 # Weight Decay
23 | clip_grad_norm: 5.0 # Gradient clipping value
24 | batch_size: 8 # Batch Size for training
25 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
26 | patience: 7 # How many epochs of no improvement causes a LR reduction
27 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
28 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
29 | epochs: 20000 # How many epochs to run for
30 | validation_freq: 10000 # After how many steps to run a validation on the model
31 | logging_freq: 250 # After how many steps to log training progress
32 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
33 | model_dir: "./Models/Base_train_after_no" # Where the model shall be stored
34 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
35 | continue: True # Flag to continue from a previous saved model in the model_dir
36 | shuffle: True # Flag to shuffle the data during training
37 | use_cuda: True # Flag to use GPU cuda capabilities
38 | max_output_length: 300 # Max Output Length
39 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
40 | loss: "MSE" # Loss function (MSE, L1)
41 |
42 | model:
43 | initializer: "xavier" # Model initialisation (Xavier, ...)
44 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
45 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
46 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
47 | just_count_in: False # Flag for Just Counter Data Augmentation
48 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
49 | noise_rate: 5 # Gaussian Noise rate
50 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
51 | encoder1: # Model Encoder
52 | type: "transformer"
53 | num_layers: 2 # Number of layers
54 | num_heads: 4 # Number of Heads
55 | embeddings:
56 | embedding_dim: 512 # Embedding Dimension
57 | dropout: 0.0 # Embedding Dropout
58 | hidden_size: 512 # Hidden Size Dimension
59 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
60 | dropout: 0.0 # Encoder Dropout
61 | encoder2: # Model Encoder
62 | type: "transformer"
63 | num_layers: 2 # Number of layers
64 | num_heads: 4 # Number of Heads
65 | embeddings:
66 | embedding_dim: 512 # Embedding Dimension
67 | dropout: 0.0 # Embedding Dropout
68 | hidden_size: 512 # Hidden Size Dimension
69 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
70 | dropout: 0.0 # Encoder Dropout
71 | decoder: # Model Decoder
72 | type: "transformer"
73 | num_layers: 2 # Number of layers
74 | num_heads: 4 # Number of Heads
75 | embeddings:
76 | embedding_dim: 512 # Embedding Dimension
77 | dropout: 0.0 # Embedding Dropout
78 | hidden_size: 512 # Hidden Size Dimension
79 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
80 | dropout: 0.0 # Decoder Dropout
81 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Configs/Base_train_gloss_basic.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | src1: "text"
3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
4 | trg: "skels" # Target - 3D body co-ordinates (skels)
5 | files: "files" # Filenames for each sequence
6 |
7 | train: "./Data/PT/train"
8 | dev: "./Data/PT/dev"
9 | test: "./Data/PT/test"
10 |
11 | features: ["hands+body"]
12 | max_sent_length: 300 # Max Sentence Length
13 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
14 | src1_vocab: "./Configs/text_vocab.txt"
15 | src2_vocab: "./Configs/gloss_vocab.txt"
16 |
17 | training:
18 | random_seed: 0 # Random seed for initialisation
19 | optimizer: "adam" # Chosen optimiser (adam, ..)
20 | learning_rate: 0.001 # Initial model learning rate
21 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
22 | weight_decay: 0.0 # Weight Decay
23 | clip_grad_norm: 5.0 # Gradient clipping value
24 | batch_size: 8 # Batch Size for training
25 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
26 | patience: 7 # How many epochs of no improvement causes a LR reduction
27 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
28 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
29 | epochs: 20000 # How many epochs to run for
30 | validation_freq: 10000 # After how many steps to run a validation on the model
31 | logging_freq: 250 # After how many steps to log training progress
32 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
33 | model_dir: "./Models/Base_train.gloss" # Where the model shall be stored
34 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
35 | continue: True # Flag to continue from a previous saved model in the model_dir
36 | shuffle: True # Flag to shuffle the data during training
37 | use_cuda: True # Flag to use GPU cuda capabilities
38 | max_output_length: 300 # Max Output Length
39 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
40 | loss: "MSE" # Loss function (MSE, L1)
41 |
42 | model:
43 | initializer: "xavier" # Model initialisation (Xavier, ...)
44 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
45 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
46 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
47 | just_count_in: False # Flag for Just Counter Data Augmentation
48 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
49 | noise_rate: 5 # Gaussian Noise rate
50 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
51 | encoder1: # Model Encoder
52 | type: "transformer"
53 | num_layers: 2 # Number of layers
54 | num_heads: 4 # Number of Heads
55 | embeddings:
56 | embedding_dim: 512 # Embedding Dimension
57 | dropout: 0.0 # Embedding Dropout
58 | hidden_size: 512 # Hidden Size Dimension
59 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
60 | dropout: 0.0 # Encoder Dropout
61 | encoder2: # Model Encoder
62 | type: "transformer"
63 | num_layers: 2 # Number of layers
64 | num_heads: 4 # Number of Heads
65 | embeddings:
66 | embedding_dim: 512 # Embedding Dimension
67 | dropout: 0.0 # Embedding Dropout
68 | hidden_size: 512 # Hidden Size Dimension
69 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
70 | dropout: 0.0 # Encoder Dropout
71 | decoder: # Model Decoder
72 | type: "transformer"
73 | num_layers: 2 # Number of layers
74 | num_heads: 4 # Number of Heads
75 | embeddings:
76 | embedding_dim: 512 # Embedding Dimension
77 | dropout: 0.0 # Embedding Dropout
78 | hidden_size: 512 # Hidden Size Dimension
79 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
80 | dropout: 0.0 # Decoder Dropout
81 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Configs/Base_train_gloss_joint_yes.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | src1: "text"
3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
4 | trg: "skels" # Target - 3D body co-ordinates (skels)
5 | files: "files" # Filenames for each sequence
6 |
7 | train: "./Data/train_gloss_joint_yes_repeat/train"
8 | dev: "./Data/train_gloss_joint_yes_repeat/dev"
9 | test: "./Data/train_gloss_joint_yes_repeat/test"
10 |
11 | features: ["hands+body"]
12 | max_sent_length: 300 # Max Sentence Length
13 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
14 | src1_vocab: "./Configs/text_vocab.txt"
15 | src2_vocab: "./Configs/src_train.gloss.joint.yes.repeat_vocab.txt"
16 |
17 | training:
18 | random_seed: 0 # Random seed for initialisation
19 | optimizer: "adam" # Chosen optimiser (adam, ..)
20 | learning_rate: 0.001 # Initial model learning rate
21 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
22 | weight_decay: 0.0 # Weight Decay
23 | clip_grad_norm: 5.0 # Gradient clipping value
24 | batch_size: 8 # Batch Size for training
25 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
26 | patience: 7 # How many epochs of no improvement causes a LR reduction
27 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
28 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
29 | epochs: 20000 # How many epochs to run for
30 | validation_freq: 10000 # After how many steps to run a validation on the model
31 | logging_freq: 250 # After how many steps to log training progress
32 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
33 | model_dir: "./Models/Base_train_joint_yes" # Where the model shall be stored
34 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
35 | continue: True # Flag to continue from a previous saved model in the model_dir
36 | shuffle: True # Flag to shuffle the data during training
37 | use_cuda: True # Flag to use GPU cuda capabilities
38 | max_output_length: 300 # Max Output Length
39 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
40 | loss: "MSE" # Loss function (MSE, L1)
41 |
42 | model:
43 | initializer: "xavier" # Model initialisation (Xavier, ...)
44 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
45 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
46 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
47 | just_count_in: False # Flag for Just Counter Data Augmentation
48 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
49 | noise_rate: 5 # Gaussian Noise rate
50 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
51 | encoder1: # Model Encoder
52 | type: "transformer"
53 | num_layers: 2 # Number of layers
54 | num_heads: 4 # Number of Heads
55 | embeddings:
56 | embedding_dim: 512 # Embedding Dimension
57 | dropout: 0.0 # Embedding Dropout
58 | hidden_size: 512 # Hidden Size Dimension
59 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
60 | dropout: 0.0 # Encoder Dropout
61 | encoder2: # Model Encoder
62 | type: "transformer"
63 | num_layers: 2 # Number of layers
64 | num_heads: 4 # Number of Heads
65 | embeddings:
66 | embedding_dim: 512 # Embedding Dimension
67 | dropout: 0.0 # Embedding Dropout
68 | hidden_size: 512 # Hidden Size Dimension
69 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
70 | dropout: 0.0 # Encoder Dropout
71 | decoder: # Model Decoder
72 | type: "transformer"
73 | num_layers: 2 # Number of layers
74 | num_heads: 4 # Number of Heads
75 | embeddings:
76 | embedding_dim: 512 # Embedding Dimension
77 | dropout: 0.0 # Embedding Dropout
78 | hidden_size: 512 # Hidden Size Dimension
79 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
80 | dropout: 0.0 # Decoder Dropout
81 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Configs/dynamic.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |
3 | src1: "joint_gloss"
4 | src2: "after_gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
5 | trg: "skels" # Target - 3D body co-ordinates (skels)
6 | files: "files" # Filenames for each sequence
7 |
8 | train: "../Data/Gloss_Dynamic/train"
9 | dev: "../Data/Gloss_Dynamic/dev"
10 | test: "../Data/Gloss_Dynamic/test"
11 |
12 |
13 | features: ["hands+body"]
14 | max_sent_length: 300 # Max Sentence Length
15 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
16 |
17 | src1_vocab: "./Configs/src_train.gloss.joint.no.repeat_vocab_extend.txt"
18 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt"
19 |
20 | training:
21 | random_seed: 27 # Random seed for initialisation
22 | optimizer: "adam" # Chosen optimiser (adam, ..)
23 | learning_rate: 0.001 # Initial model learning rate
24 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
25 | weight_decay: 0.0 # Weight Decay
26 | clip_grad_norm: 5.0 # Gradient clipping value
27 | batch_size: 8 # Batch Size for training
28 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
29 | patience: 7 # How many epochs of no improvement causes a LR reduction
30 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
31 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
32 | epochs: 20000 # How many epochs to run for
33 | validation_freq: 10000 # After how many steps to run a validation on the model
34 | logging_freq: 250 # After how many steps to log training progress
35 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
36 | model_dir: "./Models/Base_train_dynamic_multiview" # Where the model shall be stored
37 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
38 | continue: True # Flag to continue from a previous saved model in the model_dir
39 | shuffle: True # Flag to shuffle the data during training
40 | use_cuda: True # Flag to use GPU cuda capabilities
41 | max_output_length: 300 # Max Output Length
42 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
43 | loss: "MSE" # Loss function (MSE, L1)
44 | T: 1 # Temeprature for attention balance
45 | balance: True # Whether do the dynamic attention on the multi-views
46 | BDC_train: True
47 |
48 | model:
49 | initializer: "xavier" # Model initialisation (Xavier, ...)
50 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
51 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
52 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
53 | just_count_in: False # Flag for Just Counter Data Augmentation
54 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
55 | noise_rate: 5 # Gaussian Noise rate
56 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
57 | encoder1: # Model Encoder
58 | type: "transformer"
59 | num_layers: 2 # Number of layers
60 | num_heads: 4 # Number of Heads
61 | embeddings:
62 | embedding_dim: 512 # Embedding Dimension
63 | dropout: 0.0 # Embedding Dropout
64 | hidden_size: 512 # Hidden Size Dimension
65 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
66 | dropout: 0.0 # Encoder Dropout
67 | decoder: # Model Decoder
68 | type: "transformer"
69 | num_layers: 2 # Number of layers
70 | num_heads: 4 # Number of Heads
71 | embeddings:
72 | embedding_dim: 512 # Embedding Dimension
73 | dropout: 0.0 # Embedding Dropout
74 | hidden_size: 512 # Hidden Size Dimension
75 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
76 | dropout: 0.0 # Decoder Dropout
77 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/dev.files:
--------------------------------------------------------------------------------
1 | dev/11August_2010_Wednesday_tagesschau-2
2 | dev/11August_2010_Wednesday_tagesschau-3
3 | dev/11August_2010_Wednesday_tagesschau-8
4 | dev/25October_2010_Monday_tagesschau-22
5 | dev/05May_2011_Thursday_tagesschau-25
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/dev.gloss:
--------------------------------------------------------------------------------
1 | DRUCK TIEF KOMMEN
2 | ES-BEDEUTET VIEL WOLKE UND KOENNEN REGEN GEWITTER KOENNEN
3 | WIND MAESSIG SCHWACH REGION WENN GEWITTER WIND KOENNEN
4 | MITTWOCH REGEN KOENNEN NORDWEST WAHRSCHEINLICH NORD STARK WIND
5 | JETZT WETTER WIE-AUSSEHEN MORGEN FREITAG SECHSTE MAI ZEIGEN-BILDSCHIRM
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/dev.text:
--------------------------------------------------------------------------------
1 | tiefer luftdruck bestimmt in den nächsten tagen unser wetter .
2 | das bedeutet viele wolken und immer wieder zum teil kräftige schauer und gewitter .
3 | meist weht nur ein schwacher wind aus unterschiedlichen richtungen der bei schauern und gewittern stark böig sein kann .
4 | am mittwoch hier und da nieselregen in der nordwesthälfte an den küsten kräftiger wind .
5 | und nun die wettervorhersage für morgen freitag den sechsten mai .
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/test.files:
--------------------------------------------------------------------------------
1 | test/25October_2010_Monday_tagesschau-17
2 | test/25October_2010_Monday_tagesschau-24
3 | test/15December_2010_Wednesday_tagesschau-37
4 | test/10March_2011_Thursday_heute-58
5 | test/14August_2009_Friday_tagesschau-62
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/test.gloss:
--------------------------------------------------------------------------------
1 | REGEN SCHNEE REGION VERSCHWINDEN NORD REGEN KOENNEN REGION STERN KOENNEN SEHEN
2 | DONNERSTAG NORDWEST REGEN REGION SONNE WOLKE WECHSELHAFT DANN FREITAG AEHNLICH WETTER
3 | KRAEFTIG AB MORGEN FRUEH MEISTENS SCHNEE SCHNEIEN KALT REGEN
4 | WOCHENENDE SONNE SAMSTAG SCHOEN TEMPERATUR BIS SIEBZEHN GRAD REGION
5 | DEUTSCH LAND MORGEN HOCH DRUCK KOMMEN WOLKE AUFLOESEN
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/test.text:
--------------------------------------------------------------------------------
1 | regen und schnee lassen an den alpen in der nacht nach im norden und nordosten fallen hier und da schauer sonst ist das klar .
2 | am donnerstag regen in der nordhälfte in der südhälfte mal sonne mal wolken ähnliches wetter dann auch am freitag .
3 | vom nordmeer zieht ein kräftiges tief heran und bringt uns ab den morgenstunden heftige schneefälle zum teil auch gefrierenden regen .
4 | sonnig geht es auch ins wochenende samstag ein herrlicher tag mit temperaturen bis siebzehn grad hier im westen .
5 | deutschland liegt morgen unter hochdruckeinfluss der die wolken weitgehend vertreibt .
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/train.files:
--------------------------------------------------------------------------------
1 | train/11August_2010_Wednesday_tagesschau-1
2 | train/11August_2010_Wednesday_tagesschau-4
3 | train/11August_2010_Wednesday_tagesschau-5
4 | train/11August_2010_Wednesday_tagesschau-6
5 | train/11August_2010_Wednesday_tagesschau-7
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/train.gloss:
--------------------------------------------------------------------------------
1 | JETZT WETTER MORGEN DONNERSTAG ZWOELF FEBRUAR
2 | ORT REGEN DURCH REGEN KOENNEN UEBERSCHWEMMUNG KOENNEN
3 | NORDWEST HEUTE NACHT TROCKEN BLEIBEN SUEDWEST KOENNEN REGEN ORT GEWITTER DAZU
4 | TAGSUEBER OFT REGEN GEWITTER KOENNEN MANCHMAL REGEN VIEL REGEN
5 | WOLKE LOCH SPEZIELL NORDWEST
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/Data/tmp/train.text:
--------------------------------------------------------------------------------
1 | und nun die wettervorhersage für morgen donnerstag den zwölften august .
2 | mancherorts regnet es auch länger und ergiebig auch lokale überschwemmungen sind wieder möglich .
3 | im nordwesten bleibt es heute nacht meist trocken sonst muss mit teilweise kräftigen schauern gerechnet werden örtlich mit blitz und donner .
4 | auch am tag gibt es verbreitet zum teil kräftige schauer oder gewitter und in manchen regionen fallen ergiebige regenmengen .
5 | größere wolkenlücken finden sich vor allem im nordwesten .
6 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/README.md:
--------------------------------------------------------------------------------
1 | # Progressive Transformers for End-to-End Sign Language Production
2 |
3 | Source code for "Progressive Transformers for End-to-End Sign Language Production" (Ben Saunders, Necati Cihan Camgoz, Richard Bowden - ECCV 2020)
4 |
5 | Conference video available at https://twitter.com/BenMSaunders/status/1336638886198521857
6 |
7 | # Usage
8 |
9 | Install required packages using the requirements.txt file.
10 |
11 | `pip install -r requirements.txt`
12 |
13 | To run, start __main__.py with arguments "train" and ".\Configs\Base.yaml":
14 |
15 | `python __main__.py train ./Configs/Base.yaml`
16 |
17 | An example train.log file can be found in ".\Configs\train.log" and a validation file at ".\Configs\validations.txt"
18 |
19 | Back Translation model created from https://github.com/neccam/slt. Back Translation evaluation code coming soon.
20 |
21 | # Data
22 |
23 | Pre-processed Phoenix14T data can be requested via email at b.saunders@surrey.ac.uk. If you wish to create the data yourself, please follow below:
24 |
25 | Phoenix14T data can be downloaded from https://www-i6.informatik.rwth-aachen.de/~koller/RWTH-PHOENIX-2014-T/ and skeleton joints can be extracted using OpenPose at https://github.com/CMU-Perceptual-Computing-Lab/openpose and lifted to 3D using the 2D to 3D Inverse Kinematics code at https://github.com/gopeith/SignLanguageProcessing under 3DposeEstimator.
26 |
27 | Prepare Phoenix14T (or other sign language dataset) data as .txt files for .skel, .gloss, .txt and .files. Data format should be parallel .txt files for "src", "trg" and "files", with each line representing a new sequence:
28 |
29 | - The "src" file contains source sentences, with each line representing new sentence.
30 |
31 | - The "trg" file contains skeleton data of each frame, with a space separating frames. The joints should be divided by 3 to match the scaling I used. Each frame contains 150 joint values and a subsequent counter value, all separated by a space. Each sequence should be separated with a new line. If your data contains 150 joints per frame, please ensure that trg_size is set to 150 in the config file.
32 |
33 | - The "files" file should contain the name of each sequence on a new line.
34 |
35 | Examples can be found in /Data/tmp. Data path must be specified in config file.
36 |
37 | # Pre-Trained Model
38 |
39 | A pre-trained Progressive Transformer checkpoint can be downloaded from https://www.dropbox.com/s/l4xmnybp7luz0l3/PreTrained_PTSLP_Model.ckpt?dl=0.
40 |
41 | This model has a size of ```num_layers: 2```, ```num_heads: 4``` and ```embedding_dim: 512```, as outlined in ```./Configs/Base.yaml```. It has been pre-trained on the full PHOENIX14T dataset with the data format as above. The relevant train.log and validations.txt files can be found in ```.\Configs```.
42 |
43 | To initialise a model from this checkpoint, pass the ```--ckpt ./PreTrained_PTSLP_Model.ckpt``` argument to either ```train``` or ```test``` modes. Additionally, to initialise the correct src_embed size, the config argument ```src_vocab: "./Configs/src_vocab.txt"``` must be set to the location of the src_vocab.txt, found under ```./Configs```. Please open an issue if this checkpoint cannot be downloaded or loaded.
44 |
45 | # Reference
46 |
47 | If you use this code in your research, please cite the following [papers](https://arxiv.org/abs/2004.14874):
48 |
49 | ```
50 | @inproceedings{saunders2020progressive,
51 | title = {{Progressive Transformers for End-to-End Sign Language Production}},
52 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard},
53 | booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
54 | year = {2020}}
55 |
56 | @inproceedings{saunders2020adversarial,
57 | title = {{Adversarial Training for Multi-Channel Sign Language Production}},
58 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard},
59 | booktitle = {Proceedings of the British Machine Vision Conference (BMVC)},
60 | year = {2020}}
61 |
62 | @inproceedings{saunders2021continuous,
63 | title = {{Continuous 3D Multi-Channel Sign Language Production via Progressive Transformers and Mixture Density Networks}},
64 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard},
65 | booktitle = {International Journal of Computer Vision (IJCV)},
66 | year = {2021}}
67 |
68 | ```
69 |
70 | ## Acknowledgements
71 | This work received funding from the SNSF Sinergia project 'SMILE' (CRSII2 160811), the European Union's Horizon2020 research and innovation programme under grant agreement no. 762021 'Content4All' and the EPSRC project 'ExTOL' (EP/R03298X/1). This work reflects only the authors view and the Commission is not responsible for any use that may be made of the information it contains. We would also like to thank NVIDIA Corporation for their GPU grant.
72 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__main__.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from training import train, test
4 |
5 |
6 | def main():
7 |
8 | # Example options:
9 | # train ./Configs/Base.yaml
10 | # test ./Configs/Base.yaml
11 |
12 | ap = argparse.ArgumentParser("Progressive Transformers")
13 |
14 | # Choose between Train and Test
15 | ap.add_argument("mode", choices=["train", "test"],
16 | help="train a model or test")
17 | # Path to Config
18 | ap.add_argument("config_path", type=str,
19 | help="path to YAML config file")
20 |
21 | # Optional path to checkpoint
22 | ap.add_argument("--ckpt", type=str,
23 | help="path to model checkpoint")
24 |
25 | args = ap.parse_args()
26 |
27 | # If Train
28 | if args.mode == "train":
29 | train(cfg_file=args.config_path, ckpt=args.ckpt)
30 | # If Test
31 | elif args.mode == "test":
32 | test(cfg_file=args.config_path, ckpt=args.ckpt)
33 | else:
34 | raise ValueError("Unknown mode")
35 |
36 | if __name__ == "__main__":
37 | main()
38 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/batch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/batch.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/builders.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/builders.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/constants.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/constants.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/data.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/data.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/decoders.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/decoders.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/dtw.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/dtw.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/embeddings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/embeddings.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/encoders.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/encoders.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/helpers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/helpers.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/initialization.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/initialization.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/loss.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/model.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/plot_videos.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/plot_videos.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/prediction.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/prediction.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/search.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/search.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/training.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/training.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/transformer_layers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/transformer_layers.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/__pycache__/vocabulary.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/vocabulary.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/batch.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | """
4 | Implementation of a mini-batch.
5 | """
6 |
7 | import torch
8 | import torch.nn.functional as F
9 |
10 | from constants import TARGET_PAD
11 |
12 |
13 | class Batch:
14 | """Object for holding a batch of data with mask during training.
15 | Input is a batch from a torch text iterator.
16 | """
17 |
18 | def __init__(self, torch_batch, pad_index, model):
19 | """
20 | Create a new joey batch from a torch batch.
21 | This batch extends torch text's batch attributes with src and trg
22 | length, masks, number of non-padded tokens in trg.
23 | Furthermore, it can be sorted by src length.
24 |
25 | :param torch_batch:
26 | :param pad_index:
27 | :param use_cuda:
28 | """
29 | self.src1, self.src1_lengths = torch_batch.src1
30 | self.src1_mask = (self.src1 != pad_index).unsqueeze(1)
31 | self.src2, self.src2_lengths = torch_batch.src2
32 | self.src2_mask = (self.src2 != pad_index).unsqueeze(1)
33 |
34 | self.nseqs = self.src1.size(0)
35 | self.trg_input = None
36 | self.trg = None
37 | self.trg_mask = None
38 | self.trg_lengths = None
39 | self.ntokens = None
40 |
41 | self.file_paths = torch_batch.file_paths
42 | self.use_cuda = model.use_cuda
43 | self.target_pad = TARGET_PAD
44 | # Just Count
45 | self.just_count_in = model.just_count_in
46 | # Future Prediction
47 | self.future_prediction = model.future_prediction
48 |
49 | if hasattr(torch_batch, "trg"):
50 | trg = torch_batch.trg
51 | trg_lengths = torch_batch.trg.shape[1]
52 | # trg_input is used for teacher forcing, last one is cut off
53 | # Remove the last frame for target input, as inputs are only up to frame N-1
54 | self.trg_input = trg.clone()[:, :-1, :]
55 |
56 | self.trg_lengths = trg_lengths
57 | # trg is used for loss computation, shifted by one since BOS
58 | self.trg = trg.clone()[:, 1:, :]
59 |
60 | # Just Count
61 | if self.just_count_in:
62 | # If Just Count, cut off the first frame of trg_input
63 | self.trg_input = self.trg_input[:, :, -1:]
64 |
65 | # Future Prediction
66 | if self.future_prediction != 0:
67 | # Loop through the future prediction, concatenating the frames shifted across once each time
68 | future_trg = torch.Tensor()
69 | # Concatenate each frame (Not counter)
70 | for i in range(0, self.future_prediction):
71 | future_trg = torch.cat(
72 | (future_trg, self.trg[:, i:-(self.future_prediction - i), :-1].clone()), dim=2)
73 | # Create the final target using the collected future_trg and original trg
74 | self.trg = torch.cat(
75 | (future_trg, self.trg[:, :-self.future_prediction, -1:]), dim=2)
76 |
77 | # Cut off the last N frames of the trg_input
78 | self.trg_input = self.trg_input[:, :-self.future_prediction, :]
79 |
80 | # Target Pad is dynamic, so we exclude the padded areas from the loss computation
81 | trg_mask = (self.trg_input != self.target_pad).unsqueeze(1)
82 | # This increases the shape of the target mask to be even (16,1,120,120) -
83 | # adding padding that replicates - so just continues the False's or True's
84 | pad_amount = self.trg_input.shape[1] - self.trg_input.shape[2]
85 | # Create the target mask the same size as target input
86 | self.trg_mask = (F.pad(input=trg_mask.double(), pad=(
87 | pad_amount, 0, 0, 0), mode='replicate') == 1.0)
88 | self.ntokens = (self.trg != pad_index).data.sum().item()
89 |
90 | if self.use_cuda:
91 | self._make_cuda()
92 |
93 | # If using Cuda
94 | def _make_cuda(self):
95 | """
96 | Move the batch to GPU
97 |
98 | :return:
99 | """
100 | self.src1 = self.src1.cuda()
101 | self.src1_mask = self.src1_mask.cuda()
102 | self.src2 = self.src2.cuda()
103 | self.src2_mask = self.src2_mask.cuda()
104 |
105 | if self.trg_input is not None:
106 | self.trg_input = self.trg_input.cuda()
107 | self.trg = self.trg.cuda()
108 | self.trg_mask = self.trg_mask.cuda()
109 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/constants.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Defining global constants
4 | """
5 |
6 | UNK_TOKEN = ''
7 | PAD_TOKEN = ''
8 | BOS_TOKEN = ''
9 | EOS_TOKEN = ''
10 |
11 | TARGET_PAD = 0.0
12 |
13 | DEFAULT_UNK_ID = lambda: 0
14 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/dtw.py:
--------------------------------------------------------------------------------
1 | from numpy import array, zeros, full, argmin, inf, ndim
2 | from scipy.spatial.distance import cdist
3 | from math import isinf
4 |
5 | """
6 | Dynamic time warping (DTW) is used as a similarity measured between temporal sequences.
7 | Original DTW code found at https://github.com/pierre-rouanet/dtw
8 |
9 | """
10 |
11 | # Apply DTW
12 | def dtw(x, y, dist, warp=1, w=inf, s=1.0):
13 | """
14 | Computes Dynamic Time Warping (DTW) of two sequences.
15 |
16 | :param array x: N1*M array
17 | :param array y: N2*M array
18 | :param func dist: distance used as cost measure
19 | :param int warp: how many shifts are computed.
20 | :param int w: window size limiting the maximal distance between indices of matched entries |i,j|.
21 | :param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal
22 | Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
23 | """
24 | assert len(x)
25 | assert len(y)
26 | assert isinf(w) or (w >= abs(len(x) - len(y)))
27 | assert s > 0
28 | r, c = len(x), len(y)
29 | if not isinf(w):
30 | D0 = full((r + 1, c + 1), inf)
31 | for i in range(1, r + 1):
32 | D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0
33 | D0[0, 0] = 0
34 | else:
35 | D0 = zeros((r + 1, c + 1))
36 | D0[0, 1:] = inf
37 | D0[1:, 0] = inf
38 | D1 = D0[1:, 1:] # view
39 | for i in range(r):
40 | for j in range(c):
41 | if (isinf(w) or (max(0, i - w) <= j <= min(c, i + w))):
42 | D1[i, j] = dist(x[i], y[j])
43 | C = D1.copy()
44 | jrange = range(c)
45 | for i in range(r):
46 | if not isinf(w):
47 | jrange = range(max(0, i - w), min(c, i + w + 1))
48 | for j in jrange:
49 | min_list = [D0[i, j]]
50 | for k in range(1, warp + 1):
51 | i_k = min(i + k, r)
52 | j_k = min(j + k, c)
53 | min_list += [D0[i_k, j] * s, D0[i, j_k] * s]
54 | D1[i, j] += min(min_list)
55 | if len(x) == 1:
56 | path = zeros(len(y)), range(len(y))
57 | elif len(y) == 1:
58 | path = range(len(x)), zeros(len(x))
59 | else:
60 | path = _traceback(D0)
61 | return D1[-1, -1], C, D1, path
62 |
63 | def _traceback(D):
64 | i, j = array(D.shape) - 2
65 | p, q = [i], [j]
66 | while (i > 0) or (j > 0):
67 | tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j]))
68 | if tb == 0:
69 | i -= 1
70 | j -= 1
71 | elif tb == 1:
72 | i -= 1
73 | else: # (tb == 2):
74 | j -= 1
75 | p.insert(0, i)
76 | q.insert(0, j)
77 | return array(p), array(q)
78 |
79 |
80 | if __name__ == '__main__':
81 | w = inf
82 | s = 1.0
83 | if 1: # 1-D numeric
84 | from sklearn.metrics.pairwise import manhattan_distances
85 | x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
86 | y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]
87 | dist_fun = manhattan_distances
88 | w = 1
89 | # s = 1.2
90 | elif 0: # 2-D numeric
91 | from sklearn.metrics.pairwise import euclidean_distances
92 | x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]]
93 | y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]]
94 | dist_fun = euclidean_distances
95 | else: # 1-D list of strings
96 | from nltk.metrics.distance import edit_distance
97 | # x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder']
98 | # y = ['class', 'too']
99 | x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls']
100 | y = ['see', 'drown', 'himself']
101 | # x = 'we talked about the situation'.split()
102 | # y = 'we talked about the situation'.split()
103 | dist_fun = edit_distance
104 | dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s)
105 |
106 | # Vizualize
107 | from matplotlib import pyplot as plt
108 | plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest')
109 | plt.plot(path[0], path[1], '-o') # relation
110 | plt.xticks(range(len(x)), x)
111 | plt.yticks(range(len(y)), y)
112 | plt.xlabel('x')
113 | plt.ylabel('y')
114 | plt.axis('tight')
115 | if isinf(w):
116 | plt.title('Minimum distance: {}, slope weight: {}'.format(dist, s))
117 | else:
118 | plt.title('Minimum distance: {}, window widht: {}, slope weight: {}'.format(dist, w, s))
119 | plt.show()
120 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/embeddings.py:
--------------------------------------------------------------------------------
1 | import math
2 | from torch import nn, Tensor
3 | from helpers import freeze_params
4 | import torch
5 |
6 | class MaskedNorm(nn.Module):
7 | """
8 | Original Code from:
9 | https://discuss.pytorch.org/t/batchnorm-for-different-sized-samples-in-batch/44251/8
10 | """
11 |
12 | def __init__(self, norm_type, num_groups, num_features):
13 | super().__init__()
14 | self.norm_type = norm_type
15 | if self.norm_type == "batch":
16 | self.norm = nn.BatchNorm1d(num_features=num_features)
17 | elif self.norm_type == "group":
18 | self.norm = nn.GroupNorm(num_groups=num_groups, num_channels=num_features)
19 | elif self.norm_type == "layer":
20 | self.norm = nn.LayerNorm(normalized_shape=num_features)
21 | else:
22 | raise ValueError("Unsupported Normalization Layer")
23 |
24 | self.num_features = num_features
25 |
26 | def forward(self, x: Tensor, mask: Tensor):
27 | if self.training:
28 | reshaped = x.reshape([-1, self.num_features])
29 | reshaped_mask = mask.reshape([-1, 1]) > 0
30 | selected = torch.masked_select(reshaped, reshaped_mask).reshape(
31 | [-1, self.num_features]
32 | )
33 | batch_normed = self.norm(selected)
34 | scattered = reshaped.masked_scatter(reshaped_mask, batch_normed)
35 | return scattered.reshape([x.shape[0], -1, self.num_features])
36 | else:
37 | reshaped = x.reshape([-1, self.num_features])
38 | batched_normed = self.norm(reshaped)
39 | return batched_normed.reshape([x.shape[0], -1, self.num_features])
40 |
41 | class Embeddings(nn.Module):
42 |
43 | """
44 | Simple embeddings class
45 | """
46 |
47 | # pylint: disable=unused-argument
48 | def __init__(self,
49 | embedding_dim: int = 64,
50 | scale: bool = False,
51 | vocab_size: int = 0,
52 | padding_idx: int = 1,
53 | freeze: bool = False,
54 | **kwargs):
55 | """
56 | Create new embeddings for the vocabulary.
57 | Use scaling for the Transformer.
58 |
59 | :param embedding_dim:
60 | :param scale:
61 | :param vocab_size:
62 | :param padding_idx:
63 | :param freeze: freeze the embeddings during training
64 | """
65 | super(Embeddings, self).__init__()
66 |
67 | self.embedding_dim = embedding_dim
68 | self.scale = scale
69 | self.vocab_size = vocab_size
70 | self.lut = nn.Embedding(vocab_size, self.embedding_dim,
71 | padding_idx=padding_idx)
72 |
73 | if freeze:
74 | freeze_params(self)
75 |
76 | # pylint: disable=arguments-differ
77 | def forward(self, x: Tensor) -> Tensor:
78 | """
79 | Perform lookup for input `x` in the embedding table.
80 |
81 | :param x: index in the vocabulary
82 | :return: embedded representation for `x`
83 | """
84 | if self.scale:
85 | return self.lut(x) * math.sqrt(self.embedding_dim)
86 | return self.lut(x)
87 |
88 | def __repr__(self):
89 | return "%s(embedding_dim=%d, vocab_size=%d)" % (
90 | self.__class__.__name__, self.embedding_dim, self.vocab_size)
91 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/encoders.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch import Tensor
6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
7 | import torch.nn.functional as F
8 |
9 | from helpers import freeze_params
10 | from transformer_layers import \
11 | TransformerEncoderLayer, PositionalEncoding
12 |
13 | from embeddings import MaskedNorm
14 |
15 | #pylint: disable=abstract-method
16 | class Encoder(nn.Module):
17 | """
18 | Base encoder class
19 | """
20 | @property
21 | def output_size(self):
22 | """
23 | Return the output size
24 |
25 | :return:
26 | """
27 | return self._output_size
28 |
29 | class TransformerEncoder(Encoder):
30 | """
31 | Transformer Encoder
32 | """
33 |
34 | #pylint: disable=unused-argument
35 | def __init__(self,
36 | hidden_size: int = 512,
37 | ff_size: int = 2048,
38 | num_layers: int = 8,
39 | num_heads: int = 4,
40 | dropout: float = 0.1,
41 | emb_dropout: float = 0.1,
42 | freeze: bool = False,
43 | **kwargs):
44 | """
45 | Initializes the Transformer.
46 | :param hidden_size: hidden size and size of embeddings
47 | :param ff_size: position-wise feed-forward layer size.
48 | (Typically this is 2*hidden_size.)
49 | :param num_layers: number of layers
50 | :param num_heads: number of heads for multi-headed attention
51 | :param dropout: dropout probability for Transformer layers
52 | :param emb_dropout: Is applied to the input (word embeddings).
53 | :param freeze: freeze the parameters of the encoder during training
54 | :param kwargs:
55 | """
56 | super(TransformerEncoder, self).__init__()
57 |
58 | # build all (num_layers) layers
59 | self.layers = nn.ModuleList([
60 | TransformerEncoderLayer(size=hidden_size, ff_size=ff_size,
61 | num_heads=num_heads, dropout=dropout)
62 | for _ in range(num_layers)])
63 |
64 | self.layer_norm = nn.LayerNorm(hidden_size, eps=1e-6)
65 | self.pe = PositionalEncoding(hidden_size)
66 | self.emb_dropout = nn.Dropout(p=emb_dropout)
67 | self._output_size = hidden_size
68 |
69 | if freeze:
70 | freeze_params(self)
71 |
72 | #pylint: disable=arguments-differ
73 | def forward(self,
74 | embed_src: Tensor,
75 | src_length: Tensor,
76 | mask: Tensor) -> (Tensor, Tensor):
77 | """
78 | Pass the input (and mask) through each layer in turn.
79 | Applies a Transformer encoder to sequence of embeddings x.
80 | The input mini-batch x needs to be sorted by src length.
81 | x and mask should have the same dimensions [batch, time, dim].
82 |
83 | :param embed_src: embedded src inputs,
84 | shape (batch_size, src_len, embed_size)
85 | :param src_length: length of src inputs
86 | (counting tokens before padding), shape (batch_size)
87 | :param mask: indicates padding areas (zeros where padding), shape
88 | (batch_size, src_len, embed_size)
89 | :return:
90 | - output: hidden states with
91 | shape (batch_size, max_length, directions*hidden),
92 | - hidden_concat: last hidden state with
93 | shape (batch_size, directions*hidden)
94 | """
95 |
96 | x = embed_src
97 |
98 | # Add position encoding to word embeddings
99 | x = self.pe(x)
100 | # Add Dropout
101 | x = self.emb_dropout(x)
102 |
103 | # Apply each layer to the input
104 | for layer in self.layers:
105 | x = layer(x, mask)
106 |
107 | return self.layer_norm(x), None
108 |
109 | def __repr__(self):
110 | return "%s(num_layers=%r, num_heads=%r)" % (
111 | self.__class__.__name__, len(self.layers),
112 | self.layers[0].src_src_att.num_heads)
113 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/external_metrics/mscoco_rouge.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # File Name : mscoco_rouge.py
4 | #
5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
6 | #
7 | # Creation Date : 2015-01-07 06:03
8 | # Author : Ramakrishna Vedantam
9 |
10 |
11 | def my_lcs(string, sub):
12 | """
13 | Calculates longest common subsequence for a pair of tokenized strings
14 | :param string : list of str : tokens from a string split using whitespace
15 | :param sub : list of str : shorter string, also split using whitespace
16 | :returns: length (list of int): length of the longest common subsequence between the two strings
17 |
18 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
19 | """
20 | if len(string) < len(sub):
21 | sub, string = string, sub
22 |
23 | lengths = [[0 for i in range(0, len(sub) + 1)] for j in range(0, len(string) + 1)]
24 |
25 | for j in range(1, len(sub) + 1):
26 | for i in range(1, len(string) + 1):
27 | if string[i - 1] == sub[j - 1]:
28 | lengths[i][j] = lengths[i - 1][j - 1] + 1
29 | else:
30 | lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1])
31 |
32 | return lengths[len(string)][len(sub)]
33 |
34 |
35 | def calc_score(hypotheses, references, beta=1.2):
36 | """
37 | Compute ROUGE-L score given one candidate and references for an image
38 | :param hypotheses: str : candidate sentence to be evaluated
39 | :param references: list of str : COCO reference sentences for the particular image to be evaluated
40 | :returns score: int (ROUGE-L score for the candidate evaluated against references)
41 | """
42 | assert len(hypotheses) == 1
43 | assert len(references) > 0
44 | prec = []
45 | rec = []
46 |
47 | # split into tokens
48 | token_c = hypotheses[0].split(" ")
49 |
50 | for reference in references:
51 | # split into tokens
52 | token_r = reference.split(" ")
53 | # compute the longest common subsequence
54 | lcs = my_lcs(token_r, token_c)
55 | prec.append(lcs / float(len(token_c)))
56 | rec.append(lcs / float(len(token_r)))
57 |
58 | prec_max = max(prec)
59 | rec_max = max(rec)
60 |
61 | if prec_max != 0 and rec_max != 0:
62 | score = ((1 + beta ** 2) * prec_max * rec_max) / float(
63 | rec_max + beta ** 2 * prec_max
64 | )
65 | else:
66 | score = 0.0
67 | return score
68 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/loss.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Module to implement training loss
4 | """
5 |
6 | from torch import nn, Tensor
7 |
8 | class RegLoss(nn.Module):
9 | """
10 | Regression Loss
11 | """
12 |
13 | def __init__(self, cfg, target_pad=0.0):
14 | super(RegLoss, self).__init__()
15 |
16 | self.loss = cfg["training"]["loss"].lower()
17 |
18 | if self.loss == "l1":
19 | self.criterion = nn.L1Loss()
20 | elif self.loss == "mse":
21 | self.criterion = nn.MSELoss()
22 |
23 | else:
24 | print("Loss not found - revert to default L1 loss")
25 | self.criterion = nn.L1Loss()
26 |
27 | model_cfg = cfg["model"]
28 |
29 | self.target_pad = target_pad
30 | self.loss_scale = model_cfg.get("loss_scale", 1.0)
31 |
32 | # pylint: disable=arguments-differ
33 | def forward(self, preds, targets):
34 |
35 | loss_mask = (targets != self.target_pad)
36 |
37 | # Find the masked predictions and targets using loss mask
38 | preds_masked = preds * loss_mask
39 | targets_masked = targets * loss_mask
40 |
41 | # Calculate loss just over the masked predictions
42 | loss = self.criterion(preds_masked, targets_masked)
43 |
44 | # Multiply loss by the loss scale
45 | if self.loss_scale != 1.0:
46 | loss = loss * self.loss_scale
47 |
48 | return loss
49 |
50 | class XentLoss(nn.Module):
51 | """
52 | Cross-Entropy Loss with optional label smoothing
53 | """
54 |
55 | def __init__(self, pad_index: int, smoothing: float = 0.0):
56 | super(XentLoss, self).__init__()
57 | self.smoothing = smoothing
58 | self.pad_index = pad_index
59 | # standard xent loss
60 | self.criterion = nn.NLLLoss(ignore_index=self.pad_index,
61 | reduction='sum')
62 |
63 | # pylint: disable=arguments-differ
64 | def forward(self, log_probs, targets):
65 | """
66 | Compute the cross-entropy between logits and targets.
67 | If label smoothing is used, target distributions are not one-hot, but
68 | "1-smoothing" for the correct target token and the rest of the
69 | probability mass is uniformly spread across the other tokens.
70 | :param log_probs: log probabilities as predicted by model
71 | :param targets: target indices
72 | :return:
73 | """
74 | # targets: indices with batch*seq_len
75 | targets = targets.contiguous().view(-1)
76 | loss = self.criterion(
77 | log_probs.contiguous().view(-1, log_probs.size(-1)), targets)
78 |
79 | return loss
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/metrics.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | This module holds various MT evaluation metrics.
4 | """
5 |
6 | from external_metrics import sacrebleu
7 | from external_metrics import mscoco_rouge
8 | import logging
9 | import numpy as np
10 |
11 | def wer(hypotheses, references):
12 | wer = del_rate = ins_rate = sub_rate = 0
13 | n_seq = len(hypotheses)
14 |
15 | for h, r in zip(hypotheses, references):
16 | res = wer_single(r=r, h=h)
17 | wer += res["wer"] / n_seq
18 | del_rate += res["del"] / n_seq
19 | ins_rate += res["ins"] / n_seq
20 | sub_rate += res["sub"] / n_seq
21 |
22 | return {"wer": wer, "del": del_rate, "ins": ins_rate, "sub": sub_rate}
23 |
24 | def wer_single(r, h):
25 | edit_distance_matrix = editDistance(r=r, h=h)
26 | step_list = getStepList(r=r, h=h, d=edit_distance_matrix)
27 |
28 | min_distance = float(edit_distance_matrix[len(r)][len(h)])
29 | num_del = float(np.sum([s == "d" for s in step_list]))
30 | num_ins = float(np.sum([s == "i" for s in step_list]))
31 | num_sub = float(np.sum([s == "s" for s in step_list]))
32 |
33 | word_error_rate = round((min_distance / len(r) * 100), 4)
34 | del_rate = round((num_del / len(r) * 100), 4)
35 | ins_rate = round((num_ins / len(r) * 100), 4)
36 | sub_rate = round((num_sub / len(r) * 100), 4)
37 |
38 | return {"wer": word_error_rate, "del": del_rate, "ins": ins_rate, "sub": sub_rate}
39 |
40 | # Calculate ROUGE scores
41 | def rouge(hypotheses, references):
42 | rouge_score = 0
43 | n_seq = len(hypotheses)
44 |
45 | for h, r in zip(hypotheses, references):
46 | rouge_score += mscoco_rouge.calc_score(hypotheses=[h], references=[r]) / n_seq
47 |
48 | return rouge_score
49 |
50 | # Calculate CHRF scores
51 | def chrf(hypotheses, references):
52 | """
53 | Character F-score from sacrebleu
54 |
55 | :param hypotheses: list of hypotheses (strings)
56 | :param references: list of references (strings)
57 | :return:
58 | """
59 | return sacrebleu.corpus_chrf(hypotheses=hypotheses, references=references)
60 |
61 | # Calculate BLEU scores
62 | def bleu(hypotheses, references, all=False):
63 | """
64 | Raw corpus BLEU from sacrebleu (without tokenization)
65 |
66 | :param hypotheses: list of hypotheses (strings)
67 | :param references: list of references (strings)
68 | :return:
69 | """
70 | logger = logging.getLogger(__name__)
71 | bleu_scores = sacrebleu.raw_corpus_bleu(
72 | sys_stream=hypotheses, ref_streams=[references]
73 | ).scores
74 | scores = {}
75 | for n in range(len(bleu_scores)):
76 | scores["bleu" + str(n + 1)] = bleu_scores[n]
77 |
78 | rouge_score = rouge(hypotheses, references) * 100
79 | #wer_d = wer(hypotheses, references)
80 | #werStr = "WER: " + str(wer_d["wer"])
81 |
82 | bleu1Str = "BLEU-1: " + str(scores["bleu1"])
83 | bleu2Str = "BLEU-2: " + str(scores["bleu2"])
84 | bleu3Str = "BLEU-3: " + str(scores["bleu3"])
85 | bleu4Str = "BLEU-4: " + str(scores["bleu4"])
86 | rougeStr = "Rouge: " + str(rouge_score)
87 |
88 | logger.info(bleu1Str)
89 | logger.info(bleu2Str)
90 | logger.info(bleu3Str)
91 | logger.info(bleu4Str)
92 | logger.info(rougeStr)
93 |
94 | print("--------------- New Scores ------------")
95 | print(bleu1Str)
96 | print(bleu2Str)
97 | print(bleu3Str)
98 | print(bleu4Str)
99 | print(rougeStr)
100 | #print(werStr)
101 |
102 | return bleu_scores[3]
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/optim/lookahead.py:
--------------------------------------------------------------------------------
1 | # from https://github.com/mgrankin/over9000/blob/master/lookahead.py
2 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py
3 |
4 | import itertools as it
5 | from torch.optim import Optimizer, Adam
6 |
7 |
8 | # TODO: This code is buggy. It did not support state saving support
9 | # I did some ad-hoc fixes to get it working but noone should
10 | # use if for training on condor etc.
11 | # Sadly this eliminates over9000 + ranger from options to train
12 | # on servers.
13 | class Lookahead(Optimizer):
14 | def __init__(self, base_optimizer, alpha=0.5, k=6):
15 | if not 0.0 <= alpha <= 1.0:
16 | raise ValueError(f"Invalid slow update rate: {alpha}")
17 | if not 1 <= k:
18 | raise ValueError(f"Invalid lookahead steps: {k}")
19 |
20 | self.optimizer = base_optimizer
21 | self.state = base_optimizer.state
22 | self.defaults = base_optimizer.defaults
23 |
24 | self.param_groups = self.optimizer.param_groups
25 | self.alpha = alpha
26 | self.k = k
27 | for group in self.param_groups:
28 | group["step_counter"] = 0
29 | self.slow_weights = [
30 | [p.clone().detach() for p in group["params"]] for group in self.param_groups
31 | ]
32 |
33 | for w in it.chain(*self.slow_weights):
34 | w.requires_grad = False
35 |
36 | def step(self, closure=None):
37 | loss = None
38 | if closure is not None:
39 | loss = closure()
40 | loss = self.optimizer.step()
41 | for group, slow_weights in zip(self.param_groups, self.slow_weights):
42 | group["step_counter"] += 1
43 | if group["step_counter"] % self.k != 0:
44 | continue
45 | for p, q in zip(group["params"], slow_weights):
46 | if p.grad is None:
47 | continue
48 | q.data.add_(self.alpha, p.data - q.data)
49 | p.data.copy_(q.data)
50 | return loss
51 |
52 |
53 | def LookaheadAdam(params, alpha=0.5, k=6, *args, **kwargs):
54 | adam = Adam(params, *args, **kwargs)
55 | return Lookahead(adam, alpha, k)
56 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/optim/over9000.py:
--------------------------------------------------------------------------------
1 | # from https://raw.githubusercontent.com/mgrankin/over9000/master/over9000.py
2 |
3 | from SignProdJoey.optim.lookahead import Lookahead
4 | from SignProdJoey.optim.ralamb import Ralamb
5 |
6 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py
7 | # RAdam + LARS implementation from https://gist.github.com/redknightlois/c4023d393eb8f92bb44b2ab582d7ec20
8 |
9 |
10 | # RAdam + LARS + LookAHead
11 | def Over9000(params, alpha=0.5, k=6, *args, **kwargs):
12 | ralamb = Ralamb(params, *args, **kwargs)
13 | return Lookahead(ralamb, alpha, k)
14 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/optim/ranger.py:
--------------------------------------------------------------------------------
1 | # from https://github.com/mgrankin/over9000/blob/master/ranger.py
2 |
3 | from SignProdJoey.optim.lookahead import Lookahead
4 | from SignProdJoey.optim.radam import RAdam
5 |
6 |
7 | def Ranger(params, alpha=0.5, k=6, *args, **kwargs):
8 | radam = RAdam(params, *args, **kwargs)
9 | return Lookahead(radam, alpha, k)
10 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/prediction.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import math
4 |
5 | import torch
6 | from torchtext.data import Dataset
7 |
8 | from helpers import bpe_postprocess, load_config, get_latest_checkpoint, \
9 | load_checkpoint, calculate_dtw
10 | from model import build_model, Model
11 | from batch import Batch
12 | from data import load_data, make_data_iter
13 | from constants import UNK_TOKEN, PAD_TOKEN, EOS_TOKEN
14 |
15 | # Validate epoch given a dataset
16 |
17 |
18 | def validate_on_data(model: Model,
19 | data: Dataset,
20 | batch_size: int,
21 | max_output_length: int,
22 | eval_metric: str,
23 | loss_function: torch.nn.Module = None,
24 | batch_type: str = "sentence",
25 | type="val",
26 | BT_model=None,
27 | logger=None,
28 | config=None,
29 | epoch_num=0):
30 |
31 | valid_iter = make_data_iter(
32 | dataset=data, batch_size=32, batch_type=batch_type,
33 | shuffle=True, train=False)
34 |
35 | pad_index = model.src1_vocab.stoi[PAD_TOKEN]
36 | # disable dropout
37 | model.eval()
38 | # don't track gradients during validation
39 | with torch.no_grad():
40 | valid_hypotheses = []
41 | valid_references = []
42 | valid_inputs = []
43 | file_paths = []
44 | all_dtw_scores = []
45 |
46 | valid_loss = 0
47 | total_ntokens = 0
48 | total_nseqs = 0
49 |
50 | batches = 0
51 | for valid_batch in iter(valid_iter):
52 |
53 | batches += 1
54 | # Extract batch
55 | batch = Batch(torch_batch=valid_batch,
56 | pad_index=pad_index,
57 | model=model)
58 | targets = batch.trg
59 |
60 | # run as during training with teacher forcing
61 | if loss_function is not None and batch.trg is not None:
62 | # Get the loss for this batch
63 | batch_loss, _ = model.get_loss_for_batch(
64 | batch, loss_function=loss_function, epoch_num=epoch_num)
65 |
66 | valid_loss += batch_loss
67 | total_ntokens += batch.ntokens
68 | total_nseqs += batch.nseqs
69 |
70 | # If not just count in, run inference to produce translation videos
71 | if not model.just_count_in:
72 | # Run batch through the model in an auto-regressive format
73 | output, attention_scores = model.run_batch(
74 | batch=batch,
75 | max_output_length=max_output_length)
76 |
77 | # If future prediction
78 | if model.future_prediction != 0:
79 | # Cut to only the first frame prediction + add the counter
80 | train_output = torch.cat((train_output[:, :, :train_output.shape[2] // (
81 | model.future_prediction)], train_output[:, :, -1:]), dim=2)
82 | # Cut to only the first frame prediction + add the counter
83 | targets = torch.cat(
84 | (targets[:, :, :targets.shape[2] // (model.future_prediction)], targets[:, :, -1:]), dim=2)
85 |
86 | # For just counter, the inference is the same as GTing
87 | if model.just_count_in:
88 | output = train_output
89 |
90 | # Add references, hypotheses and file paths to list
91 | valid_references.extend(targets)
92 | valid_hypotheses.extend(output)
93 | file_paths.extend(batch.file_paths)
94 | # Add the source sentences to list, by using the model source vocab and batch indices
95 | valid_inputs.extend([[model.src1_vocab.itos[batch.src1[i][j]] for j in range(len(batch.src1[i]))] for i in
96 | range(len(batch.src1))])
97 |
98 | # Calculate the full Dynamic Time Warping score - for evaluation
99 | dtw_score = calculate_dtw(targets, output)
100 | all_dtw_scores.extend(dtw_score)
101 |
102 | # Can set to only run a few batches
103 | # if type is 'val':
104 | # if batches == math.ceil(20/batch_size):
105 | # break
106 | # batches += 1
107 | # Calculate backtranslation blue scores
108 | # if type is 'test':
109 | # calculate_backtranslation_BLUE(valid_hypotheses, config, logger, type)
110 | # Dynamic Time Warping scores
111 | current_valid_score = np.mean(all_dtw_scores)
112 |
113 | return current_valid_score, valid_loss, valid_references, valid_hypotheses, \
114 | valid_inputs, all_dtw_scores, file_paths
115 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.8.1
2 | alabaster==0.7.8
3 | apt-clone==0.2.1
4 | apt-xapian-index==0.47
5 | apturl==0.5.2
6 | asn1crypto==0.24.0
7 | astor==0.8.0
8 | astroid==2.3.1
9 | astropy==3.0
10 | attrs==17.4.0
11 | Babel==2.4.0
12 | bleach==2.1.2
13 | Brlapi==0.6.6
14 | certifi==2019.9.11
15 | chardet==3.0.4
16 | cloudpickle==0.5.2
17 | command-not-found==0.3
18 | configobj==5.0.6
19 | cryptography==2.1.4
20 | cupshelpers==1.0
21 | cycler==0.10.0
22 | Cython==0.26.1
23 | decorator==4.1.2
24 | defer==1.0.6
25 | devscripts===2.17.12ubuntu1.1
26 | distro-info===0.18ubuntu0.18.04.1
27 | docutils==0.14
28 | emcee==2.2.1
29 | entrypoints==0.2.3.post1
30 | fail2ban==0.10.2
31 | future==0.18.1
32 | gast==0.3.2
33 | google-pasta==0.1.7
34 | gpg==1.10.0
35 | grpcio==1.24.1
36 | h5py==2.10.0
37 | html5lib==0.999999999
38 | httplib2==0.9.2
39 | idna==2.8
40 | imagesize==0.7.1
41 | ipykernel==4.8.2
42 | ipython==5.5.0
43 | ipython-genutils==0.2.0
44 | isort==4.3.21
45 | jedi==0.11.1
46 | Jinja2==2.10
47 | joeynmt==0.0.1
48 | jsonschema==2.6.0
49 | jupyter-client==5.2.2
50 | jupyter-core==4.4.0
51 | Keras-Applications==1.0.8
52 | Keras-Preprocessing==1.1.0
53 | keyring==10.6.0
54 | keyrings.alt==3.0
55 | kiwisolver==1.1.0
56 | language-selector==0.1
57 | launchpadlib==1.10.6
58 | lazr.restfulclient==0.13.5
59 | lazr.uri==1.0.3
60 | lazy-object-proxy==1.4.2
61 | logilab-common==1.4.1
62 | louis==3.5.0
63 | macaroonbakery==1.1.3
64 | Mako==1.0.7
65 | Markdown==3.1.1
66 | MarkupSafe==1.0
67 | matplotlib==3.1.1
68 | mccabe==0.6.1
69 | mistune==0.8.3
70 | mpmath==1.0.0
71 | nbconvert==5.3.1
72 | nbformat==4.4.0
73 | netifaces==0.10.4
74 | nose==1.3.7
75 | nose-parameterized==0.3.4
76 | numpy==1.17.3
77 | numpydoc==0.7.0
78 | oauth==1.0.1
79 | olefile==0.45.1
80 | PAM==0.4.2
81 | pandas==0.25.1
82 | pandocfilters==1.4.2
83 | parso==0.1.1
84 | pexpect==4.2.1
85 | pickleshare==0.7.4
86 | Pillow==6.2.0
87 | pluggy==0.6.0
88 | ply==3.11
89 | portalocker==1.5.1
90 | prompt-toolkit==1.0.15
91 | protobuf==3.10.0
92 | psutil==5.4.2
93 | py==1.5.2
94 | pycairo==1.16.2
95 | pycodestyle==2.3.1
96 | pycrypto==2.6.1
97 | pycups==1.9.73
98 | pyds9==1.8.1
99 | pyfits==3.4
100 | pyflakes==1.6.0
101 | Pygments==2.2.0
102 | pygobject==3.26.1
103 | PyICU==1.9.8
104 | pyinotify==0.9.6
105 | pylint==2.4.2
106 | pymacaroons==0.13.0
107 | PyNaCl==1.1.2
108 | PyOpenGL==3.1.0
109 | pyparsing==2.4.2
110 | pyqtgraph==0.10.0
111 | pyRFC3339==1.0
112 | pytest==3.3.2
113 | pytest-arraydiff==0.2
114 | pytest-astropy==0.2.1
115 | pytest-doctestplus==0.1.2
116 | pytest-openfiles==0.2.0
117 | pytest-remotedata==0.2.0
118 | python-apt==1.6.5+ubuntu0.4
119 | python-dateutil==2.8.0
120 | python-debian==0.1.32
121 | python-magic==0.4.16
122 | pytz==2019.3
123 | pyxdg==0.25
124 | PyYAML==5.1.2
125 | pyzmq==16.0.2
126 | QtAwesome==0.4.4
127 | qtconsole==4.3.1
128 | QtPy==1.3.1
129 | reportlab==3.4.0
130 | requests==2.22.0
131 | requests-unixsocket==0.1.5
132 | roman==2.0.0
133 | rope==0.10.5
134 | sacrebleu==1.4.2
135 | scipy==1.3.1
136 | scour==0.36
137 | screen-resolution-extra==0.0.0
138 | seaborn==0.9.0
139 | SecretStorage==2.3.1
140 | simplegeneric==0.8.1
141 | simplejson==3.13.2
142 | six==1.12.0
143 | Sphinx==1.6.7
144 | spyder==3.2.6
145 | ssh-import-id==5.7
146 | subword-nmt==0.3.6
147 | sympy==1.1.1
148 | system-service==0.3
149 | systemd-python==234
150 | tensorboard==1.14.0
151 | tensorflow-estimator==1.14.0
152 | termcolor==1.1.0
153 | testpath==0.3.1
154 | torch==1.3.0
155 | torchtext==0.4.0
156 | tornado==4.5.3
157 | tqdm==4.36.1
158 | traitlets==4.3.2
159 | typed-ast==1.4.0
160 | typing==3.7.4.1
161 | ubuntu-drivers-common==0.0.0
162 | ufw==0.36
163 | unattended-upgrades==0.1
164 | unidiff==0.5.4
165 | urllib3==1.25.6
166 | usb-creator==0.3.3
167 | virtualenv==15.1.0
168 | wadllib==1.3.2
169 | wcwidth==0.1.7
170 | webencodings==0.5
171 | Werkzeug==0.16.0
172 | wrapt==1.11.2
173 | xkit==0.0.0
174 | zope.interface==4.3.2
175 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/run-adj.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT"
2 |
3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt
4 |
5 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/210000_every.ckpt
6 |
7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_same_vocab_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D_same_vocab/train1/320000_every.ckpt
8 |
9 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2Dauc_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2Dauc/train1/360000_every.ckpt
10 |
11 | # TODO add ckpt
12 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbodyface2Daub_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Models/handsbodyface2Daub/train1/0000_every.ckpt
13 |
14 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/run-norm.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT"
2 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbody.yaml
3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3D.yaml
4 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc_norm.yaml
5 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml
6 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/410000_every.ckpt
7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_same_vocab.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D_same_vocab/train1/320000_every.ckpt
8 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt
9 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train3/20000_best.ckpt
10 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/run.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT"
2 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbody.yaml
3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3D.yaml
4 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc.yaml
5 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml
6 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/410000_every.ckpt
7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_same_vocab.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D_same_vocab/train1/320000_every.ckpt
8 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt
9 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train3/20000_best.ckpt
10 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/run2.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT"
2 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbody.yaml
3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3D.yaml
4 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc.yaml
5 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml
6 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/410000_every.ckpt
7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/best.ckpt
8 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt
9 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/180000_every.ckpt
10 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/210000_every.ckpt
11 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface3Dauc/train1/450000_every.ckpt
12 |
--------------------------------------------------------------------------------
/code/generation-model/dynamic_selection/search.py:
--------------------------------------------------------------------------------
1 |
2 | import torch
3 | import torch.nn.functional as F
4 | from torch import Tensor
5 | import numpy as np
6 |
7 | from decoders import Decoder, TransformerDecoder
8 | from embeddings import Embeddings
9 |
10 | # pylint: disable=unused-argument
11 |
12 |
13 | def greedy(
14 | src1_mask: Tensor,
15 | src2_mask: Tensor,
16 |
17 | embed: Embeddings,
18 | decoder: Decoder,
19 | encoder1_output: Tensor,
20 | encoder2_output: Tensor,
21 |
22 | trg_input: Tensor,
23 | model,
24 | ) -> (np.array, np.array):
25 | """
26 | Special greedy function for transformer, since it works differently.
27 | The transformer remembers all previous states and attends to them.
28 |
29 | :param src_mask: mask for source inputs, 0 for positions after
30 | :param embed: target embedding
31 | :param bos_index: index of in the vocabulary
32 | :param max_output_length: maximum length for the hypotheses
33 | :param decoder: decoder to use for greedy decoding
34 | :param encoder_output: encoder hidden states for attention
35 | :param encoder_hidden: encoder final state (unused in Transformer)
36 | :return:
37 | - stacked_output: output hypotheses (2d array of indices),
38 | - stacked_attention_scores: attention scores (3d array)
39 | """
40 | # Initialise the input
41 | # Extract just the BOS first frame from the target
42 | ys = trg_input[:, :1, :].float()
43 |
44 | # If the counter is coming into the decoder or not
45 | ys_out = ys
46 |
47 | # Set the target mask, by finding the padded rows
48 | trg_mask = trg_input != 0.0
49 | trg_mask = trg_mask.unsqueeze(1)
50 |
51 | # Find the maximum output length for this batch
52 | max_output_length = trg_input.shape[1]
53 |
54 | # If just count in, input is just the counter
55 | if model.just_count_in:
56 | ys = ys[:, :, -1:]
57 |
58 | for i in range(max_output_length):
59 |
60 | # ys here is the input
61 | # Drive the timing by giving the GT timing - add in the counter to the last column
62 |
63 | if model.just_count_in:
64 | # If just counter, drive the input using the GT counter
65 | ys[:, -1] = trg_input[:, i, -1:]
66 |
67 | else:
68 | # Give the GT counter for timing, to drive the timing
69 | ys[:, -1, -1:] = trg_input[:, i, -1:]
70 |
71 | # Embed the target input before passing to the decoder
72 | trg_embed = embed(ys)
73 |
74 | # Cut padding mask to required size (of the size of the input)
75 | padding_mask = trg_mask[:, :, :i+1, :i+1]
76 | # Pad the mask (If required) (To make it square, and used later on correctly)
77 | pad_amount = padding_mask.shape[2] - padding_mask.shape[3]
78 | padding_mask = (F.pad(input=padding_mask.double(), pad=(
79 | pad_amount, 0, 0, 0), mode='replicate') == 1.0)
80 |
81 | # Pass the embedded input and the encoder output into the decoder
82 | with torch.no_grad():
83 | out, _, _, _ = decoder(
84 | trg_embed=trg_embed,
85 | encoder1_output=encoder1_output,
86 | encoder2_output=encoder2_output,
87 |
88 | src1_mask=src1_mask,
89 | src2_mask=src2_mask,
90 |
91 | trg_mask=padding_mask,
92 | )
93 |
94 | if model.future_prediction != 0:
95 | # Cut to only the first frame prediction
96 | out = torch.cat(
97 | (out[:, :, :out.shape[2] // (model.future_prediction)], out[:, :, -1:]), dim=2)
98 |
99 | if model.just_count_in:
100 | # If just counter in trg_input, concatenate counters of output
101 | ys = torch.cat([ys, out[:, -1:, -1:]], dim=1)
102 |
103 | # Add this frame prediction to the overall prediction
104 | ys = torch.cat([ys, out[:, -1:, :]], dim=1)
105 |
106 | # Add this next predicted frame to the full frame output
107 | ys_out = torch.cat([ys_out, out[:, -1:, :]], dim=1)
108 |
109 | return ys_out, None
110 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Configs/Base.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | src: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
3 | trg: "skels" # Target - 3D body co-ordinates (skels)
4 | files: "files" # Filenames for each sequence
5 |
6 | train: "./Data/tmp/train"
7 | dev: "./Data/tmp/dev"
8 | test: "./Data/tmp/test"
9 |
10 | max_sent_length: 300 # Max Sentence Length
11 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
12 | src_vocab: "./Configs/src_vocab.txt"
13 |
14 | training:
15 | random_seed: 27 # Random seed for initialisation
16 | optimizer: "adam" # Chosen optimiser (adam, ..)
17 | learning_rate: 0.001 # Initial model learning rate
18 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
19 | weight_decay: 0.0 # Weight Decay
20 | clip_grad_norm: 5.0 # Gradient clipping value
21 | batch_size: 8 # Batch Size for training
22 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
23 | patience: 7 # How many epochs of no improvement causes a LR reduction
24 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
25 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
26 | epochs: 20000 # How many epochs to run for
27 | validation_freq: 10 # After how many steps to run a validation on the model
28 | logging_freq: 250 # After how many steps to log training progress
29 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
30 | model_dir: "./Models/Base" # Where the model shall be stored
31 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
32 | continue: True # Flag to continue from a previous saved model in the model_dir
33 | shuffle: True # Flag to shuffle the data during training
34 | use_cuda: True # Flag to use GPU cuda capabilities
35 | max_output_length: 300 # Max Output Length
36 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
37 | loss: "MSE" # Loss function (MSE, L1)
38 |
39 | model:
40 | initializer: "xavier" # Model initialisation (Xavier, ...)
41 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
42 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
43 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
44 | just_count_in: False # Flag for Just Counter Data Augmentation
45 | gaussian_noise: False # Flag for Gaussian Noise Data Augmentation
46 | noise_rate: 5 # Gaussian Noise rate
47 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
48 | encoder: # Model Encoder
49 | type: "transformer"
50 | num_layers: 2 # Number of layers
51 | num_heads: 4 # Number of Heads
52 | embeddings:
53 | embedding_dim: 512 # Embedding Dimension
54 | dropout: 0.0 # Embedding Dropout
55 | hidden_size: 512 # Hidden Size Dimension
56 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
57 | dropout: 0.0 # Encoder Dropout
58 | decoder: # Model Decoder
59 | type: "transformer"
60 | num_layers: 2 # Number of layers
61 | num_heads: 4 # Number of Heads
62 | embeddings:
63 | embedding_dim: 512 # Embedding Dimension
64 | dropout: 0.0 # Embedding Dropout
65 | hidden_size: 512 # Hidden Size Dimension
66 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
67 | dropout: 0.0 # Decoder Dropout
68 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Configs/Base_real.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | src: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
3 | trg: "skels" # Target - 3D body co-ordinates (skels)
4 | files: "files" # Filenames for each sequence
5 |
6 | train: "./Data/PT/train"
7 | dev: "./Data/PT/dev"
8 | test: "./Data/PT/test"
9 |
10 | max_sent_length: 300 # Max Sentence Length
11 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
12 | src_vocab: "./Configs/src_vocab.txt"
13 |
14 | training:
15 | random_seed: 0 # Random seed for initialisation
16 | optimizer: "adam" # Chosen optimiser (adam, ..)
17 | learning_rate: 0.001 # Initial model learning rate
18 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop
19 | weight_decay: 0.0 # Weight Decay
20 | clip_grad_norm: 5.0 # Gradient clipping value
21 | batch_size: 8 # Batch Size for training
22 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
23 | patience: 7 # How many epochs of no improvement causes a LR reduction
24 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
25 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
26 | epochs: 20000 # How many epochs to run for
27 | validation_freq: 10000 # After how many steps to run a validation on the model
28 | logging_freq: 250 # After how many steps to log training progress
29 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
30 | model_dir: "./Models/Base_Gloss2P_counter" # Where the model shall be stored
31 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
32 | continue: True # Flag to continue from a previous saved model in the model_dir
33 | shuffle: True # Flag to shuffle the data during training
34 | use_cuda: True # Flag to use GPU cuda capabilities
35 | max_output_length: 300 # Max Output Length
36 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
37 | loss: "MSE" # Loss function (MSE, L1)
38 |
39 | model:
40 | initializer: "xavier" # Model initialisation (Xavier, ...)
41 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
42 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
43 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
44 | just_count_in: False # Flag for Just Counter Data Augmentation
45 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
46 | noise_rate: 5 # Gaussian Noise rate
47 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
48 | encoder: # Model Encoder
49 | type: "transformer"
50 | num_layers: 2 # Number of layers
51 | num_heads: 4 # Number of Heads
52 | embeddings:
53 | embedding_dim: 512 # Embedding Dimension
54 | dropout: 0.0 # Embedding Dropout
55 | hidden_size: 512 # Hidden Size Dimension
56 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
57 | dropout: 0.0 # Encoder Dropout
58 | decoder: # Model Decoder
59 | type: "transformer"
60 | num_layers: 2 # Number of layers
61 | num_heads: 4 # Number of Heads
62 | embeddings:
63 | embedding_dim: 512 # Embedding Dimension
64 | dropout: 0.0 # Embedding Dropout
65 | hidden_size: 512 # Hidden Size Dimension
66 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
67 | dropout: 0.0 # Decoder Dropout
68 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Configs/Base_text2Skeleton.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | src: "text" # Source - Either Gloss->Pose or Text->Pose (gloss,text)
3 | trg: "skels" # Target - 3D body co-ordinates (skels)
4 | files: "files" # Filenames for each sequence
5 |
6 | train: "./Data/PT/train"
7 | dev: "./Data/PT/dev"
8 | test: "./Data/PT/test"
9 |
10 | max_sent_length: 300 # Max Sentence Length
11 | skip_frames: 1 # Skip frames in the data, to reduce the data input size
12 | src_vocab: "./Configs/src_text_vocab.txt"
13 |
14 | training:
15 | random_seed: 0 # Random seed for initialisation
16 | optimizer: "adam" # Chosen optimiser (adam, ..)
17 | learning_rate: 0.001 # Initial model learning rate
18 | learning_rate_min: 0.0001 # Learning rate minimum, when training will stop
19 | weight_decay: 0.0 # Weight Decay
20 | clip_grad_norm: 5.0 # Gradient clipping value
21 | batch_size: 8 # Batch Size for training
22 | scheduling: "plateau" # Scheduling at training time (plateau, ...)
23 | patience: 7 # How many epochs of no improvement causes a LR reduction
24 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs
25 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...)
26 | epochs: 20000 # How many epochs to run for
27 | validation_freq: 10000 # After how many steps to run a validation on the model
28 | logging_freq: 250 # After how many steps to log training progress
29 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt')
30 | model_dir: "./Models/Base_T2P8head" # Where the model shall be stored
31 | overwrite: False # Flag to overwrite a previous saved model in the model_dir
32 | continue: True # Flag to continue from a previous saved model in the model_dir
33 | shuffle: True # Flag to shuffle the data during training
34 | use_cuda: True # Flag to use GPU cuda capabilities
35 | max_output_length: 300 # Max Output Length
36 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep
37 | loss: "MSE" # Loss function (MSE, L1)
38 |
39 | model:
40 | initializer: "xavier" # Model initialisation (Xavier, ...)
41 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...)
42 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...)
43 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands)
44 | just_count_in: False # Flag for Just Counter Data Augmentation
45 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation
46 | noise_rate: 5 # Gaussian Noise rate
47 | future_prediction: 0 # Future Prediction Data Augmentation if > 0
48 | encoder: # Model Encoder
49 | type: "transformer"
50 | num_layers: 2 # Number of layers
51 | num_heads: 8 # Number of Heads
52 | embeddings:
53 | embedding_dim: 512 # Embedding Dimension
54 | dropout: 0.0 # Embedding Dropout
55 | hidden_size: 512 # Hidden Size Dimension
56 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
57 | dropout: 0.0 # Encoder Dropout
58 | decoder: # Model Decoder
59 | type: "transformer"
60 | num_layers: 2 # Number of layers
61 | num_heads: 8 # Number of Heads
62 | embeddings:
63 | embedding_dim: 512 # Embedding Dimension
64 | dropout: 0.0 # Embedding Dropout
65 | hidden_size: 512 # Hidden Size Dimension
66 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size)
67 | dropout: 0.0 # Decoder Dropout
68 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/dev.files:
--------------------------------------------------------------------------------
1 | dev/11August_2010_Wednesday_tagesschau-2
2 | dev/11August_2010_Wednesday_tagesschau-3
3 | dev/11August_2010_Wednesday_tagesschau-8
4 | dev/25October_2010_Monday_tagesschau-22
5 | dev/05May_2011_Thursday_tagesschau-25
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/dev.gloss:
--------------------------------------------------------------------------------
1 | DRUCK TIEF KOMMEN
2 | ES-BEDEUTET VIEL WOLKE UND KOENNEN REGEN GEWITTER KOENNEN
3 | WIND MAESSIG SCHWACH REGION WENN GEWITTER WIND KOENNEN
4 | MITTWOCH REGEN KOENNEN NORDWEST WAHRSCHEINLICH NORD STARK WIND
5 | JETZT WETTER WIE-AUSSEHEN MORGEN FREITAG SECHSTE MAI ZEIGEN-BILDSCHIRM
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/dev.text:
--------------------------------------------------------------------------------
1 | tiefer luftdruck bestimmt in den nächsten tagen unser wetter .
2 | das bedeutet viele wolken und immer wieder zum teil kräftige schauer und gewitter .
3 | meist weht nur ein schwacher wind aus unterschiedlichen richtungen der bei schauern und gewittern stark böig sein kann .
4 | am mittwoch hier und da nieselregen in der nordwesthälfte an den küsten kräftiger wind .
5 | und nun die wettervorhersage für morgen freitag den sechsten mai .
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/test.files:
--------------------------------------------------------------------------------
1 | test/25October_2010_Monday_tagesschau-17
2 | test/25October_2010_Monday_tagesschau-24
3 | test/15December_2010_Wednesday_tagesschau-37
4 | test/10March_2011_Thursday_heute-58
5 | test/14August_2009_Friday_tagesschau-62
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/test.gloss:
--------------------------------------------------------------------------------
1 | REGEN SCHNEE REGION VERSCHWINDEN NORD REGEN KOENNEN REGION STERN KOENNEN SEHEN
2 | DONNERSTAG NORDWEST REGEN REGION SONNE WOLKE WECHSELHAFT DANN FREITAG AEHNLICH WETTER
3 | KRAEFTIG AB MORGEN FRUEH MEISTENS SCHNEE SCHNEIEN KALT REGEN
4 | WOCHENENDE SONNE SAMSTAG SCHOEN TEMPERATUR BIS SIEBZEHN GRAD REGION
5 | DEUTSCH LAND MORGEN HOCH DRUCK KOMMEN WOLKE AUFLOESEN
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/test.text:
--------------------------------------------------------------------------------
1 | regen und schnee lassen an den alpen in der nacht nach im norden und nordosten fallen hier und da schauer sonst ist das klar .
2 | am donnerstag regen in der nordhälfte in der südhälfte mal sonne mal wolken ähnliches wetter dann auch am freitag .
3 | vom nordmeer zieht ein kräftiges tief heran und bringt uns ab den morgenstunden heftige schneefälle zum teil auch gefrierenden regen .
4 | sonnig geht es auch ins wochenende samstag ein herrlicher tag mit temperaturen bis siebzehn grad hier im westen .
5 | deutschland liegt morgen unter hochdruckeinfluss der die wolken weitgehend vertreibt .
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/train.files:
--------------------------------------------------------------------------------
1 | train/11August_2010_Wednesday_tagesschau-1
2 | train/11August_2010_Wednesday_tagesschau-4
3 | train/11August_2010_Wednesday_tagesschau-5
4 | train/11August_2010_Wednesday_tagesschau-6
5 | train/11August_2010_Wednesday_tagesschau-7
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/train.gloss:
--------------------------------------------------------------------------------
1 | JETZT WETTER MORGEN DONNERSTAG ZWOELF FEBRUAR
2 | ORT REGEN DURCH REGEN KOENNEN UEBERSCHWEMMUNG KOENNEN
3 | NORDWEST HEUTE NACHT TROCKEN BLEIBEN SUEDWEST KOENNEN REGEN ORT GEWITTER DAZU
4 | TAGSUEBER OFT REGEN GEWITTER KOENNEN MANCHMAL REGEN VIEL REGEN
5 | WOLKE LOCH SPEZIELL NORDWEST
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/Data/tmp/train.text:
--------------------------------------------------------------------------------
1 | und nun die wettervorhersage für morgen donnerstag den zwölften august .
2 | mancherorts regnet es auch länger und ergiebig auch lokale überschwemmungen sind wieder möglich .
3 | im nordwesten bleibt es heute nacht meist trocken sonst muss mit teilweise kräftigen schauern gerechnet werden örtlich mit blitz und donner .
4 | auch am tag gibt es verbreitet zum teil kräftige schauer oder gewitter und in manchen regionen fallen ergiebige regenmengen .
5 | größere wolkenlücken finden sich vor allem im nordwesten .
6 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/README.md:
--------------------------------------------------------------------------------
1 | # Progressive Transformers for End-to-End Sign Language Production
2 |
3 | Source code for "Progressive Transformers for End-to-End Sign Language Production" (Ben Saunders, Necati Cihan Camgoz, Richard Bowden - ECCV 2020)
4 |
5 | Conference video available at https://twitter.com/BenMSaunders/status/1336638886198521857
6 |
7 | # Usage
8 |
9 | Install required packages using the requirements.txt file.
10 |
11 | `pip install -r requirements.txt`
12 |
13 | To run, start __main__.py with arguments "train" and ".\Configs\Base.yaml":
14 |
15 | `python __main__.py train ./Configs/Base.yaml`
16 |
17 | An example train.log file can be found in ".\Configs\train.log" and a validation file at ".\Configs\validations.txt"
18 |
19 | Back Translation model created from https://github.com/neccam/slt. Back Translation evaluation code coming soon.
20 |
21 | # Data
22 |
23 | Pre-processed Phoenix14T data can be requested via email at b.saunders@surrey.ac.uk. If you wish to create the data yourself, please follow below:
24 |
25 | Phoenix14T data can be downloaded from https://www-i6.informatik.rwth-aachen.de/~koller/RWTH-PHOENIX-2014-T/ and skeleton joints can be extracted using OpenPose at https://github.com/CMU-Perceptual-Computing-Lab/openpose and lifted to 3D using the 2D to 3D Inverse Kinematics code at https://github.com/gopeith/SignLanguageProcessing under 3DposeEstimator.
26 |
27 | Prepare Phoenix14T (or other sign language dataset) data as .txt files for .skel, .gloss, .txt and .files. Data format should be parallel .txt files for "src", "trg" and "files", with each line representing a new sequence:
28 |
29 | - The "src" file contains source sentences, with each line representing new sentence.
30 |
31 | - The "trg" file contains skeleton data of each frame, with a space separating frames. The joints should be divided by 3 to match the scaling I used. Each frame contains 150 joint values and a subsequent counter value, all separated by a space. Each sequence should be separated with a new line. If your data contains 150 joints per frame, please ensure that trg_size is set to 150 in the config file.
32 |
33 | - The "files" file should contain the name of each sequence on a new line.
34 |
35 | Examples can be found in /Data/tmp. Data path must be specified in config file.
36 |
37 | # Pre-Trained Model
38 |
39 | A pre-trained Progressive Transformer checkpoint can be downloaded from https://www.dropbox.com/s/l4xmnybp7luz0l3/PreTrained_PTSLP_Model.ckpt?dl=0.
40 |
41 | This model has a size of ```num_layers: 2```, ```num_heads: 4``` and ```embedding_dim: 512```, as outlined in ```./Configs/Base.yaml```. It has been pre-trained on the full PHOENIX14T dataset with the data format as above. The relevant train.log and validations.txt files can be found in ```.\Configs```.
42 |
43 | To initialise a model from this checkpoint, pass the ```--ckpt ./PreTrained_PTSLP_Model.ckpt``` argument to either ```train``` or ```test``` modes. Additionally, to initialise the correct src_embed size, the config argument ```src_vocab: "./Configs/src_vocab.txt"``` must be set to the location of the src_vocab.txt, found under ```./Configs```. Please open an issue if this checkpoint cannot be downloaded or loaded.
44 |
45 | # Reference
46 |
47 | If you use this code in your research, please cite the following [papers](https://arxiv.org/abs/2004.14874):
48 |
49 | ```
50 | @inproceedings{saunders2020progressive,
51 | title = {{Progressive Transformers for End-to-End Sign Language Production}},
52 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard},
53 | booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
54 | year = {2020}}
55 |
56 | @inproceedings{saunders2020adversarial,
57 | title = {{Adversarial Training for Multi-Channel Sign Language Production}},
58 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard},
59 | booktitle = {Proceedings of the British Machine Vision Conference (BMVC)},
60 | year = {2020}}
61 |
62 | @inproceedings{saunders2021continuous,
63 | title = {{Continuous 3D Multi-Channel Sign Language Production via Progressive Transformers and Mixture Density Networks}},
64 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard},
65 | booktitle = {International Journal of Computer Vision (IJCV)},
66 | year = {2021}}
67 |
68 | ```
69 |
70 | ## Acknowledgements
71 | This work received funding from the SNSF Sinergia project 'SMILE' (CRSII2 160811), the European Union's Horizon2020 research and innovation programme under grant agreement no. 762021 'Content4All' and the EPSRC project 'ExTOL' (EP/R03298X/1). This work reflects only the authors view and the Commission is not responsible for any use that may be made of the information it contains. We would also like to thank NVIDIA Corporation for their GPU grant.
72 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__main__.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from training import train, test
4 |
5 |
6 | def main():
7 |
8 | # Example options:
9 | # train ./Configs/Base.yaml
10 | # test ./Configs/Base.yaml
11 |
12 | ap = argparse.ArgumentParser("Progressive Transformers")
13 |
14 | # Choose between Train and Test
15 | ap.add_argument("mode", choices=["train", "test"],
16 | help="train a model or test")
17 | # Path to Config
18 | ap.add_argument("config_path", type=str,
19 | help="path to YAML config file")
20 |
21 | # Optional path to checkpoint
22 | ap.add_argument("--ckpt", type=str,
23 | help="path to model checkpoint")
24 |
25 | args = ap.parse_args()
26 |
27 | # If Train
28 | if args.mode == "train":
29 | train(cfg_file=args.config_path, ckpt=args.ckpt)
30 | # If Test
31 | elif args.mode == "test":
32 | test(cfg_file=args.config_path, ckpt=args.ckpt)
33 | else:
34 | raise ValueError("Unknown mode")
35 |
36 | if __name__ == "__main__":
37 | main()
38 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/batch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/batch.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/builders.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/builders.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/constants.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/constants.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/data.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/data.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/decoders.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/decoders.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/dtw.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/dtw.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/embeddings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/embeddings.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/encoders.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/encoders.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/helpers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/helpers.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/initialization.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/initialization.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/loss.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/model.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/plot_videos.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/plot_videos.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/prediction.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/prediction.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/search.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/search.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/training.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/training.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/training.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/training.cpython-39.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/transformer_layers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/transformer_layers.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/__pycache__/vocabulary.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/vocabulary.cpython-37.pyc
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/batch.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | """
4 | Implementation of a mini-batch.
5 | """
6 |
7 | import torch
8 | import torch.nn.functional as F
9 |
10 | from constants import TARGET_PAD
11 |
12 | class Batch:
13 | """Object for holding a batch of data with mask during training.
14 | Input is a batch from a torch text iterator.
15 | """
16 |
17 | def __init__(self, torch_batch, pad_index, model):
18 |
19 | """
20 | Create a new joey batch from a torch batch.
21 | This batch extends torch text's batch attributes with src and trg
22 | length, masks, number of non-padded tokens in trg.
23 | Furthermore, it can be sorted by src length.
24 |
25 | :param torch_batch:
26 | :param pad_index:
27 | :param use_cuda:
28 | """
29 | self.src, self.src_lengths = torch_batch.src
30 | self.src_mask = (self.src != pad_index).unsqueeze(1)
31 | self.nseqs = self.src.size(0)
32 | self.trg_input = None
33 | self.trg = None
34 | self.trg_mask = None
35 | self.trg_lengths = None
36 | self.ntokens = None
37 |
38 | self.file_paths = torch_batch.file_paths
39 | self.use_cuda = model.use_cuda
40 | self.target_pad = TARGET_PAD
41 | # Just Count
42 | self.just_count_in = model.just_count_in
43 | # Future Prediction
44 | self.future_prediction = model.future_prediction
45 |
46 | if hasattr(torch_batch, "trg"):
47 | trg = torch_batch.trg
48 | trg_lengths = torch_batch.trg.shape[1]
49 | # trg_input is used for teacher forcing, last one is cut off
50 | # Remove the last frame for target input, as inputs are only up to frame N-1
51 | self.trg_input = trg.clone()[:, :-1,:]
52 |
53 | self.trg_lengths = trg_lengths
54 | # trg is used for loss computation, shifted by one since BOS
55 | self.trg = trg.clone()[:, 1:, :]
56 |
57 | # Just Count
58 | if self.just_count_in:
59 | # If Just Count, cut off the first frame of trg_input
60 | self.trg_input = self.trg_input[:, :, -1:]
61 |
62 | # Future Prediction
63 | if self.future_prediction != 0:
64 | # Loop through the future prediction, concatenating the frames shifted across once each time
65 | future_trg = torch.Tensor()
66 | # Concatenate each frame (Not counter)
67 | for i in range(0, self.future_prediction):
68 | future_trg = torch.cat((future_trg, self.trg[:, i:-(self.future_prediction - i), :-1].clone()), dim=2)
69 | # Create the final target using the collected future_trg and original trg
70 | self.trg = torch.cat((future_trg, self.trg[:,:-self.future_prediction,-1:]), dim=2)
71 |
72 | # Cut off the last N frames of the trg_input
73 | self.trg_input = self.trg_input[:, :-self.future_prediction, :]
74 |
75 | # Target Pad is dynamic, so we exclude the padded areas from the loss computation
76 | trg_mask = (self.trg_input != self.target_pad).unsqueeze(1)
77 | # This increases the shape of the target mask to be even (16,1,120,120) -
78 | # adding padding that replicates - so just continues the False's or True's
79 | pad_amount = self.trg_input.shape[1] - self.trg_input.shape[2]
80 | # Create the target mask the same size as target input
81 | self.trg_mask = (F.pad(input=trg_mask.double(), pad=(pad_amount, 0, 0, 0), mode='replicate') == 1.0)
82 | self.ntokens = (self.trg != pad_index).data.sum().item()
83 |
84 | if self.use_cuda:
85 | self._make_cuda()
86 |
87 | # If using Cuda
88 | def _make_cuda(self):
89 | """
90 | Move the batch to GPU
91 |
92 | :return:
93 | """
94 | self.src = self.src.cuda()
95 | self.src_mask = self.src_mask.cuda()
96 |
97 | if self.trg_input is not None:
98 | self.trg_input = self.trg_input.cuda()
99 | self.trg = self.trg.cuda()
100 | self.trg_mask = self.trg_mask.cuda()
101 |
102 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/constants.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Defining global constants
4 | """
5 |
6 | UNK_TOKEN = ''
7 | PAD_TOKEN = ''
8 | BOS_TOKEN = ''
9 | EOS_TOKEN = ''
10 |
11 | TARGET_PAD = 0.0
12 |
13 | DEFAULT_UNK_ID = lambda: 0
14 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/decoders.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from torch import Tensor
3 |
4 | from helpers import freeze_params, ConfigurationError, subsequent_mask, uneven_subsequent_mask
5 | from transformer_layers import PositionalEncoding, \
6 | TransformerDecoderLayer
7 |
8 | class Decoder(nn.Module):
9 | """
10 | Base decoder class
11 | """
12 |
13 | @property
14 | def output_size(self):
15 | """
16 | Return the output size (size of the target vocabulary)
17 |
18 | :return:
19 | """
20 | return self._output_size
21 |
22 | class TransformerDecoder(Decoder):
23 | """
24 | A transformer decoder with N masked layers.
25 | Decoder layers are masked so that an attention head cannot see the future.
26 | """
27 |
28 | def __init__(self,
29 | num_layers: int = 4,
30 | num_heads: int = 8,
31 | hidden_size: int = 512,
32 | ff_size: int = 2048,
33 | dropout: float = 0.1,
34 | emb_dropout: float = 0.1,
35 | vocab_size: int = 1,
36 | freeze: bool = False,
37 | trg_size: int = 97,
38 | decoder_trg_trg_: bool = True,
39 | **kwargs):
40 | """
41 | Initialize a Transformer decoder.
42 |
43 | :param num_layers: number of Transformer layers
44 | :param num_heads: number of heads for each layer
45 | :param hidden_size: hidden size
46 | :param ff_size: position-wise feed-forward size
47 | :param dropout: dropout probability (1-keep)
48 | :param emb_dropout: dropout probability for embeddings
49 | :param vocab_size: size of the output vocabulary
50 | :param freeze: set to True keep all decoder parameters fixed
51 | :param kwargs:
52 | """
53 | super(TransformerDecoder, self).__init__()
54 |
55 | self._hidden_size = hidden_size
56 |
57 | # Dynamic output size depending on the target size
58 | self._output_size = trg_size
59 |
60 | # create num_layers decoder layers and put them in a list
61 | self.layers = nn.ModuleList([TransformerDecoderLayer(
62 | size=hidden_size, ff_size=ff_size, num_heads=num_heads,
63 | dropout=dropout, decoder_trg_trg=decoder_trg_trg_) for _ in range(num_layers)])
64 |
65 | self.pe = PositionalEncoding(hidden_size,mask_count=True)
66 | self.layer_norm = nn.LayerNorm(hidden_size, eps=1e-6)
67 |
68 | self.emb_dropout = nn.Dropout(p=emb_dropout)
69 |
70 | # Output layer to be the size of joints vector + 1 for counter (total is trg_size)
71 | self.output_layer = nn.Linear(hidden_size, trg_size, bias=False)
72 |
73 | if freeze:
74 | freeze_params(self)
75 |
76 | def forward(self,
77 | trg_embed: Tensor = None,
78 | encoder_output: Tensor = None,
79 | src_mask: Tensor = None,
80 | trg_mask: Tensor = None,
81 | **kwargs):
82 | """
83 | Transformer decoder forward pass.
84 |
85 | :param trg_embed: embedded targets
86 | :param encoder_output: source representations
87 | :param encoder_hidden: unused
88 | :param src_mask:
89 | :param unroll_steps: unused
90 | :param hidden: unused
91 | :param trg_mask: to mask out target paddings
92 | Note that a subsequent mask is applied here.
93 | :param kwargs:
94 | :return:
95 | """
96 | assert trg_mask is not None, "trg_mask required for Transformer"
97 |
98 | # add position encoding to word embedding
99 | x = self.pe(trg_embed)
100 | # Dropout if given
101 | x = self.emb_dropout(x)
102 |
103 | padding_mask = trg_mask
104 | # Create subsequent mask for decoding
105 | sub_mask = subsequent_mask(
106 | trg_embed.size(1)).type_as(trg_mask)
107 |
108 | # Apply each layer to the input
109 | for layer in self.layers:
110 | x = layer(x=x, memory=encoder_output,
111 | src_mask=src_mask, trg_mask=sub_mask, padding_mask=padding_mask)
112 |
113 | # Apply a layer normalisation
114 | x = self.layer_norm(x)
115 | # Output layer turns it back into vectors of size trg_size
116 | output = self.output_layer(x)
117 |
118 | return output, x, None, None
119 |
120 | def __repr__(self):
121 | return "%s(num_layers=%r, num_heads=%r)" % (
122 | self.__class__.__name__, len(self.layers),
123 | self.layers[0].trg_trg_att.num_heads)
124 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/dtw.py:
--------------------------------------------------------------------------------
1 | from numpy import array, zeros, full, argmin, inf, ndim
2 | from scipy.spatial.distance import cdist
3 | from math import isinf
4 |
5 | """
6 | Dynamic time warping (DTW) is used as a similarity measured between temporal sequences.
7 | Original DTW code found at https://github.com/pierre-rouanet/dtw
8 |
9 | """
10 |
11 | # Apply DTW
12 | def dtw(x, y, dist, warp=1, w=inf, s=1.0):
13 | """
14 | Computes Dynamic Time Warping (DTW) of two sequences.
15 |
16 | :param array x: N1*M array
17 | :param array y: N2*M array
18 | :param func dist: distance used as cost measure
19 | :param int warp: how many shifts are computed.
20 | :param int w: window size limiting the maximal distance between indices of matched entries |i,j|.
21 | :param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal
22 | Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
23 | """
24 | assert len(x)
25 | assert len(y)
26 | assert isinf(w) or (w >= abs(len(x) - len(y)))
27 | assert s > 0
28 | r, c = len(x), len(y)
29 | if not isinf(w):
30 | D0 = full((r + 1, c + 1), inf)
31 | for i in range(1, r + 1):
32 | D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0
33 | D0[0, 0] = 0
34 | else:
35 | D0 = zeros((r + 1, c + 1))
36 | D0[0, 1:] = inf
37 | D0[1:, 0] = inf
38 | D1 = D0[1:, 1:] # view
39 | for i in range(r):
40 | for j in range(c):
41 | if (isinf(w) or (max(0, i - w) <= j <= min(c, i + w))):
42 | D1[i, j] = dist(x[i], y[j])
43 | C = D1.copy()
44 | jrange = range(c)
45 | for i in range(r):
46 | if not isinf(w):
47 | jrange = range(max(0, i - w), min(c, i + w + 1))
48 | for j in jrange:
49 | min_list = [D0[i, j]]
50 | for k in range(1, warp + 1):
51 | i_k = min(i + k, r)
52 | j_k = min(j + k, c)
53 | min_list += [D0[i_k, j] * s, D0[i, j_k] * s]
54 | D1[i, j] += min(min_list)
55 | if len(x) == 1:
56 | path = zeros(len(y)), range(len(y))
57 | elif len(y) == 1:
58 | path = range(len(x)), zeros(len(x))
59 | else:
60 | path = _traceback(D0)
61 | return D1[-1, -1], C, D1, path
62 |
63 | def _traceback(D):
64 | i, j = array(D.shape) - 2
65 | p, q = [i], [j]
66 | while (i > 0) or (j > 0):
67 | tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j]))
68 | if tb == 0:
69 | i -= 1
70 | j -= 1
71 | elif tb == 1:
72 | i -= 1
73 | else: # (tb == 2):
74 | j -= 1
75 | p.insert(0, i)
76 | q.insert(0, j)
77 | return array(p), array(q)
78 |
79 |
80 | if __name__ == '__main__':
81 | w = inf
82 | s = 1.0
83 | if 1: # 1-D numeric
84 | from sklearn.metrics.pairwise import manhattan_distances
85 | x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
86 | y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]
87 | dist_fun = manhattan_distances
88 | w = 1
89 | # s = 1.2
90 | elif 0: # 2-D numeric
91 | from sklearn.metrics.pairwise import euclidean_distances
92 | x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]]
93 | y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]]
94 | dist_fun = euclidean_distances
95 | else: # 1-D list of strings
96 | from nltk.metrics.distance import edit_distance
97 | # x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder']
98 | # y = ['class', 'too']
99 | x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls']
100 | y = ['see', 'drown', 'himself']
101 | # x = 'we talked about the situation'.split()
102 | # y = 'we talked about the situation'.split()
103 | dist_fun = edit_distance
104 | dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s)
105 |
106 | # Vizualize
107 | from matplotlib import pyplot as plt
108 | plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest')
109 | plt.plot(path[0], path[1], '-o') # relation
110 | plt.xticks(range(len(x)), x)
111 | plt.yticks(range(len(y)), y)
112 | plt.xlabel('x')
113 | plt.ylabel('y')
114 | plt.axis('tight')
115 | if isinf(w):
116 | plt.title('Minimum distance: {}, slope weight: {}'.format(dist, s))
117 | else:
118 | plt.title('Minimum distance: {}, window widht: {}, slope weight: {}'.format(dist, w, s))
119 | plt.show()
120 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/embeddings.py:
--------------------------------------------------------------------------------
1 | import math
2 | from torch import nn, Tensor
3 | from helpers import freeze_params
4 | import torch
5 |
6 | class MaskedNorm(nn.Module):
7 | """
8 | Original Code from:
9 | https://discuss.pytorch.org/t/batchnorm-for-different-sized-samples-in-batch/44251/8
10 | """
11 |
12 | def __init__(self, norm_type, num_groups, num_features):
13 | super().__init__()
14 | self.norm_type = norm_type
15 | if self.norm_type == "batch":
16 | self.norm = nn.BatchNorm1d(num_features=num_features)
17 | elif self.norm_type == "group":
18 | self.norm = nn.GroupNorm(num_groups=num_groups, num_channels=num_features)
19 | elif self.norm_type == "layer":
20 | self.norm = nn.LayerNorm(normalized_shape=num_features)
21 | else:
22 | raise ValueError("Unsupported Normalization Layer")
23 |
24 | self.num_features = num_features
25 |
26 | def forward(self, x: Tensor, mask: Tensor):
27 | if self.training:
28 | reshaped = x.reshape([-1, self.num_features])
29 | reshaped_mask = mask.reshape([-1, 1]) > 0
30 | selected = torch.masked_select(reshaped, reshaped_mask).reshape(
31 | [-1, self.num_features]
32 | )
33 | batch_normed = self.norm(selected)
34 | scattered = reshaped.masked_scatter(reshaped_mask, batch_normed)
35 | return scattered.reshape([x.shape[0], -1, self.num_features])
36 | else:
37 | reshaped = x.reshape([-1, self.num_features])
38 | batched_normed = self.norm(reshaped)
39 | return batched_normed.reshape([x.shape[0], -1, self.num_features])
40 |
41 | class Embeddings(nn.Module):
42 |
43 | """
44 | Simple embeddings class
45 | """
46 |
47 | # pylint: disable=unused-argument
48 | def __init__(self,
49 | embedding_dim: int = 64,
50 | scale: bool = False,
51 | vocab_size: int = 0,
52 | padding_idx: int = 1,
53 | freeze: bool = False,
54 | **kwargs):
55 | """
56 | Create new embeddings for the vocabulary.
57 | Use scaling for the Transformer.
58 |
59 | :param embedding_dim:
60 | :param scale:
61 | :param vocab_size:
62 | :param padding_idx:
63 | :param freeze: freeze the embeddings during training
64 | """
65 | super(Embeddings, self).__init__()
66 |
67 | self.embedding_dim = embedding_dim
68 | self.scale = scale
69 | self.vocab_size = vocab_size
70 | self.lut = nn.Embedding(vocab_size, self.embedding_dim,
71 | padding_idx=padding_idx)
72 |
73 | if freeze:
74 | freeze_params(self)
75 |
76 | # pylint: disable=arguments-differ
77 | def forward(self, x: Tensor) -> Tensor:
78 | """
79 | Perform lookup for input `x` in the embedding table.
80 |
81 | :param x: index in the vocabulary
82 | :return: embedded representation for `x`
83 | """
84 | if self.scale:
85 | return self.lut(x) * math.sqrt(self.embedding_dim)
86 | return self.lut(x)
87 |
88 | def __repr__(self):
89 | return "%s(embedding_dim=%d, vocab_size=%d)" % (
90 | self.__class__.__name__, self.embedding_dim, self.vocab_size)
91 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/encoders.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch import Tensor
6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
7 | import torch.nn.functional as F
8 |
9 | from helpers import freeze_params
10 | from transformer_layers import \
11 | TransformerEncoderLayer, PositionalEncoding
12 |
13 | from embeddings import MaskedNorm
14 |
15 | #pylint: disable=abstract-method
16 | class Encoder(nn.Module):
17 | """
18 | Base encoder class
19 | """
20 | @property
21 | def output_size(self):
22 | """
23 | Return the output size
24 |
25 | :return:
26 | """
27 | return self._output_size
28 |
29 | class TransformerEncoder(Encoder):
30 | """
31 | Transformer Encoder
32 | """
33 |
34 | #pylint: disable=unused-argument
35 | def __init__(self,
36 | hidden_size: int = 512,
37 | ff_size: int = 2048,
38 | num_layers: int = 8,
39 | num_heads: int = 4,
40 | dropout: float = 0.1,
41 | emb_dropout: float = 0.1,
42 | freeze: bool = False,
43 | **kwargs):
44 | """
45 | Initializes the Transformer.
46 | :param hidden_size: hidden size and size of embeddings
47 | :param ff_size: position-wise feed-forward layer size.
48 | (Typically this is 2*hidden_size.)
49 | :param num_layers: number of layers
50 | :param num_heads: number of heads for multi-headed attention
51 | :param dropout: dropout probability for Transformer layers
52 | :param emb_dropout: Is applied to the input (word embeddings).
53 | :param freeze: freeze the parameters of the encoder during training
54 | :param kwargs:
55 | """
56 | super(TransformerEncoder, self).__init__()
57 |
58 | # build all (num_layers) layers
59 | self.layers = nn.ModuleList([
60 | TransformerEncoderLayer(size=hidden_size, ff_size=ff_size,
61 | num_heads=num_heads, dropout=dropout)
62 | for _ in range(num_layers)])
63 |
64 | self.layer_norm = nn.LayerNorm(hidden_size, eps=1e-6)
65 | self.pe = PositionalEncoding(hidden_size)
66 | self.emb_dropout = nn.Dropout(p=emb_dropout)
67 | self._output_size = hidden_size
68 |
69 | if freeze:
70 | freeze_params(self)
71 |
72 | #pylint: disable=arguments-differ
73 | def forward(self,
74 | embed_src: Tensor,
75 | src_length: Tensor,
76 | mask: Tensor) -> (Tensor, Tensor):
77 | """
78 | Pass the input (and mask) through each layer in turn.
79 | Applies a Transformer encoder to sequence of embeddings x.
80 | The input mini-batch x needs to be sorted by src length.
81 | x and mask should have the same dimensions [batch, time, dim].
82 |
83 | :param embed_src: embedded src inputs,
84 | shape (batch_size, src_len, embed_size)
85 | :param src_length: length of src inputs
86 | (counting tokens before padding), shape (batch_size)
87 | :param mask: indicates padding areas (zeros where padding), shape
88 | (batch_size, src_len, embed_size)
89 | :return:
90 | - output: hidden states with
91 | shape (batch_size, max_length, directions*hidden),
92 | - hidden_concat: last hidden state with
93 | shape (batch_size, directions*hidden)
94 | """
95 |
96 | x = embed_src
97 |
98 | # Add position encoding to word embeddings
99 | x = self.pe(x)
100 | # Add Dropout
101 | x = self.emb_dropout(x)
102 |
103 | # Apply each layer to the input
104 | for layer in self.layers:
105 | x = layer(x, mask)
106 |
107 | return self.layer_norm(x), None
108 |
109 | def __repr__(self):
110 | return "%s(num_layers=%r, num_heads=%r)" % (
111 | self.__class__.__name__, len(self.layers),
112 | self.layers[0].src_src_att.num_heads)
113 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/external_metrics/mscoco_rouge.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # File Name : mscoco_rouge.py
4 | #
5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
6 | #
7 | # Creation Date : 2015-01-07 06:03
8 | # Author : Ramakrishna Vedantam
9 |
10 |
11 | def my_lcs(string, sub):
12 | """
13 | Calculates longest common subsequence for a pair of tokenized strings
14 | :param string : list of str : tokens from a string split using whitespace
15 | :param sub : list of str : shorter string, also split using whitespace
16 | :returns: length (list of int): length of the longest common subsequence between the two strings
17 |
18 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
19 | """
20 | if len(string) < len(sub):
21 | sub, string = string, sub
22 |
23 | lengths = [[0 for i in range(0, len(sub) + 1)] for j in range(0, len(string) + 1)]
24 |
25 | for j in range(1, len(sub) + 1):
26 | for i in range(1, len(string) + 1):
27 | if string[i - 1] == sub[j - 1]:
28 | lengths[i][j] = lengths[i - 1][j - 1] + 1
29 | else:
30 | lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1])
31 |
32 | return lengths[len(string)][len(sub)]
33 |
34 |
35 | def calc_score(hypotheses, references, beta=1.2):
36 | """
37 | Compute ROUGE-L score given one candidate and references for an image
38 | :param hypotheses: str : candidate sentence to be evaluated
39 | :param references: list of str : COCO reference sentences for the particular image to be evaluated
40 | :returns score: int (ROUGE-L score for the candidate evaluated against references)
41 | """
42 | assert len(hypotheses) == 1
43 | assert len(references) > 0
44 | prec = []
45 | rec = []
46 |
47 | # split into tokens
48 | token_c = hypotheses[0].split(" ")
49 |
50 | for reference in references:
51 | # split into tokens
52 | token_r = reference.split(" ")
53 | # compute the longest common subsequence
54 | lcs = my_lcs(token_r, token_c)
55 | prec.append(lcs / float(len(token_c)))
56 | rec.append(lcs / float(len(token_r)))
57 |
58 | prec_max = max(prec)
59 | rec_max = max(rec)
60 |
61 | if prec_max != 0 and rec_max != 0:
62 | score = ((1 + beta ** 2) * prec_max * rec_max) / float(
63 | rec_max + beta ** 2 * prec_max
64 | )
65 | else:
66 | score = 0.0
67 | return score
68 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/loss.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Module to implement training loss
4 | """
5 |
6 | from torch import nn, Tensor
7 |
8 | class RegLoss(nn.Module):
9 | """
10 | Regression Loss
11 | """
12 |
13 | def __init__(self, cfg, target_pad=0.0):
14 | super(RegLoss, self).__init__()
15 |
16 | self.loss = cfg["training"]["loss"].lower()
17 |
18 | if self.loss == "l1":
19 | self.criterion = nn.L1Loss()
20 | elif self.loss == "mse":
21 | self.criterion = nn.MSELoss()
22 |
23 | else:
24 | print("Loss not found - revert to default L1 loss")
25 | self.criterion = nn.L1Loss()
26 |
27 | model_cfg = cfg["model"]
28 |
29 | self.target_pad = target_pad
30 | self.loss_scale = model_cfg.get("loss_scale", 1.0)
31 |
32 | # pylint: disable=arguments-differ
33 | def forward(self, preds, targets):
34 |
35 | loss_mask = (targets != self.target_pad)
36 |
37 | # Find the masked predictions and targets using loss mask
38 | preds_masked = preds * loss_mask
39 | targets_masked = targets * loss_mask
40 |
41 | # Calculate loss just over the masked predictions
42 | loss = self.criterion(preds_masked, targets_masked)
43 |
44 | # Multiply loss by the loss scale
45 | if self.loss_scale != 1.0:
46 | loss = loss * self.loss_scale
47 |
48 | return loss
49 |
50 | class XentLoss(nn.Module):
51 | """
52 | Cross-Entropy Loss with optional label smoothing
53 | """
54 |
55 | def __init__(self, pad_index: int, smoothing: float = 0.0):
56 | super(XentLoss, self).__init__()
57 | self.smoothing = smoothing
58 | self.pad_index = pad_index
59 | # standard xent loss
60 | self.criterion = nn.NLLLoss(ignore_index=self.pad_index,
61 | reduction='sum')
62 |
63 | # pylint: disable=arguments-differ
64 | def forward(self, log_probs, targets):
65 | """
66 | Compute the cross-entropy between logits and targets.
67 | If label smoothing is used, target distributions are not one-hot, but
68 | "1-smoothing" for the correct target token and the rest of the
69 | probability mass is uniformly spread across the other tokens.
70 | :param log_probs: log probabilities as predicted by model
71 | :param targets: target indices
72 | :return:
73 | """
74 | # targets: indices with batch*seq_len
75 | targets = targets.contiguous().view(-1)
76 | loss = self.criterion(
77 | log_probs.contiguous().view(-1, log_probs.size(-1)), targets)
78 |
79 | return loss
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/make_new_vocab.py:
--------------------------------------------------------------------------------
1 | from vocabulary import *
2 |
3 | import os
4 | train_path = "Data/augmented_data/enhanced-gloss/train/"
5 | for fname in os.listdir(train_path):
6 | input_file = open(train_path + fname, "r").readlines()
7 | vocab_ = Vocabulary()
8 |
9 | for line in input_file:
10 | vocab_._from_list(line.split())
11 | vocab_.to_file("Configs/src_%s_vocab.txt"%fname)
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/metrics.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | This module holds various MT evaluation metrics.
4 | """
5 |
6 | from external_metrics import sacrebleu
7 | from external_metrics import mscoco_rouge
8 | import logging
9 | import numpy as np
10 |
11 | def wer(hypotheses, references):
12 | wer = del_rate = ins_rate = sub_rate = 0
13 | n_seq = len(hypotheses)
14 |
15 | for h, r in zip(hypotheses, references):
16 | res = wer_single(r=r, h=h)
17 | wer += res["wer"] / n_seq
18 | del_rate += res["del"] / n_seq
19 | ins_rate += res["ins"] / n_seq
20 | sub_rate += res["sub"] / n_seq
21 |
22 | return {"wer": wer, "del": del_rate, "ins": ins_rate, "sub": sub_rate}
23 |
24 | def wer_single(r, h):
25 | edit_distance_matrix = editDistance(r=r, h=h)
26 | step_list = getStepList(r=r, h=h, d=edit_distance_matrix)
27 |
28 | min_distance = float(edit_distance_matrix[len(r)][len(h)])
29 | num_del = float(np.sum([s == "d" for s in step_list]))
30 | num_ins = float(np.sum([s == "i" for s in step_list]))
31 | num_sub = float(np.sum([s == "s" for s in step_list]))
32 |
33 | word_error_rate = round((min_distance / len(r) * 100), 4)
34 | del_rate = round((num_del / len(r) * 100), 4)
35 | ins_rate = round((num_ins / len(r) * 100), 4)
36 | sub_rate = round((num_sub / len(r) * 100), 4)
37 |
38 | return {"wer": word_error_rate, "del": del_rate, "ins": ins_rate, "sub": sub_rate}
39 |
40 | # Calculate ROUGE scores
41 | def rouge(hypotheses, references):
42 | rouge_score = 0
43 | n_seq = len(hypotheses)
44 |
45 | for h, r in zip(hypotheses, references):
46 | rouge_score += mscoco_rouge.calc_score(hypotheses=[h], references=[r]) / n_seq
47 |
48 | return rouge_score
49 |
50 | # Calculate CHRF scores
51 | def chrf(hypotheses, references):
52 | """
53 | Character F-score from sacrebleu
54 |
55 | :param hypotheses: list of hypotheses (strings)
56 | :param references: list of references (strings)
57 | :return:
58 | """
59 | return sacrebleu.corpus_chrf(hypotheses=hypotheses, references=references)
60 |
61 | # Calculate BLEU scores
62 | def bleu(hypotheses, references, all=False):
63 | """
64 | Raw corpus BLEU from sacrebleu (without tokenization)
65 |
66 | :param hypotheses: list of hypotheses (strings)
67 | :param references: list of references (strings)
68 | :return:
69 | """
70 | logger = logging.getLogger(__name__)
71 | bleu_scores = sacrebleu.raw_corpus_bleu(
72 | sys_stream=hypotheses, ref_streams=[references]
73 | ).scores
74 | scores = {}
75 | for n in range(len(bleu_scores)):
76 | scores["bleu" + str(n + 1)] = bleu_scores[n]
77 |
78 | rouge_score = rouge(hypotheses, references) * 100
79 | #wer_d = wer(hypotheses, references)
80 | #werStr = "WER: " + str(wer_d["wer"])
81 |
82 | bleu1Str = "BLEU-1: " + str(scores["bleu1"])
83 | bleu2Str = "BLEU-2: " + str(scores["bleu2"])
84 | bleu3Str = "BLEU-3: " + str(scores["bleu3"])
85 | bleu4Str = "BLEU-4: " + str(scores["bleu4"])
86 | rougeStr = "Rouge: " + str(rouge_score)
87 |
88 | logger.info(bleu1Str)
89 | logger.info(bleu2Str)
90 | logger.info(bleu3Str)
91 | logger.info(bleu4Str)
92 | logger.info(rougeStr)
93 |
94 | print("--------------- New Scores ------------")
95 | print(bleu1Str)
96 | print(bleu2Str)
97 | print(bleu3Str)
98 | print(bleu4Str)
99 | print(rougeStr)
100 | #print(werStr)
101 |
102 | return bleu_scores[3]
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/optim/lookahead.py:
--------------------------------------------------------------------------------
1 | # from https://github.com/mgrankin/over9000/blob/master/lookahead.py
2 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py
3 |
4 | import itertools as it
5 | from torch.optim import Optimizer, Adam
6 |
7 |
8 | # TODO: This code is buggy. It did not support state saving support
9 | # I did some ad-hoc fixes to get it working but noone should
10 | # use if for training on condor etc.
11 | # Sadly this eliminates over9000 + ranger from options to train
12 | # on servers.
13 | class Lookahead(Optimizer):
14 | def __init__(self, base_optimizer, alpha=0.5, k=6):
15 | if not 0.0 <= alpha <= 1.0:
16 | raise ValueError(f"Invalid slow update rate: {alpha}")
17 | if not 1 <= k:
18 | raise ValueError(f"Invalid lookahead steps: {k}")
19 |
20 | self.optimizer = base_optimizer
21 | self.state = base_optimizer.state
22 | self.defaults = base_optimizer.defaults
23 |
24 | self.param_groups = self.optimizer.param_groups
25 | self.alpha = alpha
26 | self.k = k
27 | for group in self.param_groups:
28 | group["step_counter"] = 0
29 | self.slow_weights = [
30 | [p.clone().detach() for p in group["params"]] for group in self.param_groups
31 | ]
32 |
33 | for w in it.chain(*self.slow_weights):
34 | w.requires_grad = False
35 |
36 | def step(self, closure=None):
37 | loss = None
38 | if closure is not None:
39 | loss = closure()
40 | loss = self.optimizer.step()
41 | for group, slow_weights in zip(self.param_groups, self.slow_weights):
42 | group["step_counter"] += 1
43 | if group["step_counter"] % self.k != 0:
44 | continue
45 | for p, q in zip(group["params"], slow_weights):
46 | if p.grad is None:
47 | continue
48 | q.data.add_(self.alpha, p.data - q.data)
49 | p.data.copy_(q.data)
50 | return loss
51 |
52 |
53 | def LookaheadAdam(params, alpha=0.5, k=6, *args, **kwargs):
54 | adam = Adam(params, *args, **kwargs)
55 | return Lookahead(adam, alpha, k)
56 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/optim/over9000.py:
--------------------------------------------------------------------------------
1 | # from https://raw.githubusercontent.com/mgrankin/over9000/master/over9000.py
2 |
3 | from SignProdJoey.optim.lookahead import Lookahead
4 | from SignProdJoey.optim.ralamb import Ralamb
5 |
6 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py
7 | # RAdam + LARS implementation from https://gist.github.com/redknightlois/c4023d393eb8f92bb44b2ab582d7ec20
8 |
9 |
10 | # RAdam + LARS + LookAHead
11 | def Over9000(params, alpha=0.5, k=6, *args, **kwargs):
12 | ralamb = Ralamb(params, *args, **kwargs)
13 | return Lookahead(ralamb, alpha, k)
14 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/optim/ranger.py:
--------------------------------------------------------------------------------
1 | # from https://github.com/mgrankin/over9000/blob/master/ranger.py
2 |
3 | from SignProdJoey.optim.lookahead import Lookahead
4 | from SignProdJoey.optim.radam import RAdam
5 |
6 |
7 | def Ranger(params, alpha=0.5, k=6, *args, **kwargs):
8 | radam = RAdam(params, *args, **kwargs)
9 | return Lookahead(radam, alpha, k)
10 |
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/prediction.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import math
4 |
5 | import torch
6 | from torchtext.data import Dataset
7 |
8 | from helpers import bpe_postprocess, load_config, get_latest_checkpoint, \
9 | load_checkpoint, calculate_dtw
10 | from model import build_model, Model
11 | from batch import Batch
12 | from data import load_data, make_data_iter
13 | from constants import UNK_TOKEN, PAD_TOKEN, EOS_TOKEN
14 |
15 | # Validate epoch given a dataset
16 | def validate_on_data(model: Model,
17 | data: Dataset,
18 | batch_size: int,
19 | max_output_length: int,
20 | eval_metric: str,
21 | loss_function: torch.nn.Module = None,
22 | batch_type: str = "sentence",
23 | type = "val",
24 | BT_model = None):
25 |
26 | valid_iter = make_data_iter(
27 | dataset=data, batch_size=batch_size, batch_type=batch_type,
28 | shuffle=True, train=False)
29 |
30 | pad_index = model.src_vocab.stoi[PAD_TOKEN]
31 | # disable dropout
32 | model.eval()
33 | # don't track gradients during validation
34 | with torch.no_grad():
35 | valid_hypotheses = []
36 | valid_references = []
37 | valid_inputs = []
38 | file_paths = []
39 | all_dtw_scores = []
40 |
41 | valid_loss = 0
42 | total_ntokens = 0
43 | total_nseqs = 0
44 |
45 | batches = 0
46 | for valid_batch in iter(valid_iter):
47 | # Extract batch
48 | batch = Batch(torch_batch=valid_batch,
49 | pad_index = pad_index,
50 | model = model)
51 | targets = batch.trg
52 |
53 | # run as during training with teacher forcing
54 | if loss_function is not None and batch.trg is not None:
55 | # Get the loss for this batch
56 | batch_loss, _ = model.get_loss_for_batch(
57 | batch, loss_function=loss_function)
58 |
59 | valid_loss += batch_loss
60 | total_ntokens += batch.ntokens
61 | total_nseqs += batch.nseqs
62 |
63 | # If not just count in, run inference to produce translation videos
64 | if not model.just_count_in:
65 | # Run batch through the model in an auto-regressive format
66 | output, attention_scores = model.run_batch(
67 | batch=batch,
68 | max_output_length=max_output_length)
69 |
70 | # If future prediction
71 | if model.future_prediction != 0:
72 | # Cut to only the first frame prediction + add the counter
73 | train_output = torch.cat((train_output[:, :, :train_output.shape[2] // (model.future_prediction)], train_output[:, :, -1:]),dim=2)
74 | # Cut to only the first frame prediction + add the counter
75 | targets = torch.cat((targets[:, :, :targets.shape[2] // (model.future_prediction)], targets[:, :, -1:]),dim=2)
76 |
77 | # For just counter, the inference is the same as GTing
78 | if model.just_count_in:
79 | output = train_output
80 |
81 | # Add references, hypotheses and file paths to list
82 | valid_references.extend(targets)
83 | valid_hypotheses.extend(output)
84 | file_paths.extend(batch.file_paths)
85 | # Add the source sentences to list, by using the model source vocab and batch indices
86 | valid_inputs.extend([[model.src_vocab.itos[batch.src[i][j]] for j in range(len(batch.src[i]))] for i in
87 | range(len(batch.src))])
88 |
89 | # Calculate the full Dynamic Time Warping score - for evaluation
90 | dtw_score = calculate_dtw(targets, output)
91 | all_dtw_scores.extend(dtw_score)
92 |
93 | # Can set to only run a few batches
94 | # if batches == math.ceil(20/batch_size):
95 | # break
96 | batches += 1
97 |
98 | # Dynamic Time Warping scores
99 | current_valid_score = np.mean(all_dtw_scores)
100 |
101 | return current_valid_score, valid_loss, valid_references, valid_hypotheses, \
102 | valid_inputs, all_dtw_scores, file_paths
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/requirements (1).txt:
--------------------------------------------------------------------------------
1 | absl-py==0.8.1
2 | alabaster==0.7.8
3 | asn1crypto==0.24.0
4 | astor==0.8.0
5 | astroid==2.3.1
6 | #astropy==3.0
7 | attrs==17.4.0
8 | Babel==2.4.0
9 | bleach==2.1.2
10 | certifi==2019.9.11
11 | chardet==3.0.4
12 | cloudpickle==0.5.2
13 | configobj==5.0.6
14 | cryptography==2.1.4
15 | cycler==0.10.0
16 | #Cython==0.27.3
17 | decorator==4.1.2
18 | docutils==0.14
19 | emcee==2.2.1
20 | future==0.18.1
21 | gast==0.3.2
22 | google-pasta==0.1.7
23 | grpcio==1.24.1
24 | h5py==2.10.0
25 | html5lib==0.999999999
26 | httplib2==0.9.2
27 | idna==2.8
28 | imagesize==0.7.1
29 | ipykernel==4.8.2
30 | ipython==5.5.0
31 | ipython-genutils==0.2.0
32 | isort==4.3.21
33 | jedi==0.11.1
34 | Jinja2==2.10
35 | joeynmt==0.0.1
36 | jsonschema==2.6.0
37 | jupyter-client==5.2.2
38 | jupyter-core==4.4.0
39 | Keras-Applications==1.0.8
40 | Keras-Preprocessing==1.1.0
41 | keyring==10.6.0
42 | keyrings.alt==3.0
43 | kiwisolver==1.1.0
44 | launchpadlib==1.10.6
45 | lazr.restfulclient==0.13.5
46 | lazr.uri==1.0.3
47 | lazy-object-proxy==1.4.2
48 | logilab-common==1.4.1
49 | macaroonbakery==1.1.4
50 | Mako==1.0.7
51 | Markdown==3.1.1
52 | matplotlib==3.1.1
53 | mccabe==0.6.1
54 | mistune==0.8.3
55 | mpmath==1.0.0
56 | nbconvert==5.3.1
57 | nbformat==4.4.0
58 | netifaces==0.10.4
59 | nose==1.3.7
60 | nose-parameterized==0.3.4
61 | numpy==1.17.3
62 | numpydoc==0.7.0
63 | oauth==1.0.1
64 | olefile==0.45.1
65 | pandas==0.25.1
66 | pandocfilters==1.4.2
67 | parso==0.1.1
68 | pexpect==4.2.1
69 | pickleshare==0.7.4
70 | Pillow==6.2.0
71 | pluggy==0.6.0
72 | ply==3.11
73 | portalocker==1.5.1
74 | prompt-toolkit==1.0.15
75 | protobuf==3.10.0
76 | psutil==5.4.2
77 | py==1.5.2
78 | pycairo==1.16.2
79 | pycodestyle==2.3.1
80 | pycrypto==2.6.1
81 | pycups==1.9.73
82 | pyds9==1.8.1
83 | pyfits==3.4
84 | pyflakes==1.6.0
85 | Pygments==2.2.0
86 | #pygobject==3.27.0
87 | PyICU==1.9.8
88 | pyinotify==0.9.6
89 | pylint==2.4.2
90 | pymacaroons==0.13.0
91 | PyNaCl==1.1.2
92 | PyOpenGL==3.1.0
93 | pyparsing==2.4.2
94 | pyqtgraph==0.10.0
95 | pyRFC3339==1.0
96 | pytest==3.3.2
97 | pytest-arraydiff==0.2
98 | pytest-astropy==0.2.1
99 | pytest-doctestplus==0.1.2
100 | pytest-openfiles==0.2.0
101 | pytest-remotedata==0.2.0
102 | python-dateutil==2.8.0
103 | python-debian==0.1.32
104 | pytz==2019.3
105 | pyxdg==0.25
106 | PyYAML==5.1.2
107 | pyzmq==19.0.2
108 | QtAwesome==0.4.4
109 | qtconsole==4.3.1
110 | QtPy==1.3.1
111 | reportlab==3.4.0
112 | requests==2.22.0
113 | requests-unixsocket==0.1.5
114 | roman==2.0.0
115 | rope==0.10.5
116 | sacrebleu==1.4.2
117 | scipy==1.3.1
118 | scour==0.36
119 | #screen-resolution-extra==0.0.0
120 | seaborn==0.9.0
121 | SecretStorage==2.3.1
122 | simplegeneric==0.8.1
123 | simplejson==3.13.2
124 | six==1.12.0
125 | Sphinx==1.6.7
126 | spyder==3.2.6
127 | ssh-import-id==5.10
128 | subword-nmt==0.3.6
129 | sympy==1.1.1
130 | tensorboard==1.14.0
131 | tensorflow==1.14.0
132 | termcolor==1.1.0
133 | testpath==0.3.1
134 | torch==1.4.0
135 | torchtext==0.4.0
136 | tornado==4.5.3
137 | tqdm==4.36.1
138 | traitlets==4.3.2
139 | typed-ast==1.4.0
140 | typing==3.7.4.1
141 | unidiff==0.5.4
142 | urllib3==1.25.6
143 | virtualenv==15.1.0
144 | wadllib==1.3.2
145 | wcwidth==0.1.7
146 | webencodings==0.5
147 | Werkzeug==0.16.0
148 | wrapt==1.11.1
149 | #xkit==0.0.0
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.8.1
2 | alabaster==0.7.8
3 | asn1crypto==0.24.0
4 | astor==0.8.0
5 | astroid==2.3.1
6 | astropy==3.0
7 | attrs==17.4.0
8 | Babel==2.4.0
9 | bleach==2.1.2
10 | certifi==2019.9.11
11 | chardet==3.0.4
12 | cloudpickle==0.5.2
13 | configobj==5.0.6
14 | cryptography==2.1.4
15 | cycler==0.10.0
16 | Cython==0.26.1
17 | decorator==4.1.2
18 | docutils==0.14
19 | emcee==2.2.1
20 | future==0.18.1
21 | gast==0.3.2
22 | google-pasta==0.1.7
23 | grpcio==1.24.1
24 | h5py==2.10.0
25 | html5lib==0.999999999
26 | httplib2==0.9.2
27 | idna==2.8
28 | imagesize==0.7.1
29 | ipykernel==4.8.2
30 | ipython==5.5.0
31 | ipython-genutils==0.2.0
32 | isort==4.3.21
33 | jedi==0.11.1
34 | Jinja2==2.10
35 | joeynmt==0.0.1
36 | jsonschema==2.6.0
37 | jupyter-client==5.2.2
38 | jupyter-core==4.4.0
39 | Keras-Applications==1.0.8
40 | Keras-Preprocessing==1.1.0
41 | keyring==10.6.0
42 | keyrings.alt==3.0
43 | kiwisolver==1.1.0
44 | launchpadlib==1.10.6
45 | lazr.restfulclient==0.13.5
46 | lazr.uri==1.0.3
47 | lazy-object-proxy==1.4.2
48 | logilab-common==1.4.1
49 | macaroonbakery==1.1.3
50 | Mako==1.0.7
51 | Markdown==3.1.1
52 | matplotlib==3.1.1
53 | mccabe==0.6.1
54 | mistune==0.8.3
55 | mpmath==1.0.0
56 | nbconvert==5.3.1
57 | nbformat==4.4.0
58 | netifaces==0.10.4
59 | nose==1.3.7
60 | nose-parameterized==0.3.4
61 | numpy==1.17.3
62 | numpydoc==0.7.0
63 | oauth==1.0.1
64 | olefile==0.45.1
65 | pandas==0.25.1
66 | pandocfilters==1.4.2
67 | parso==0.1.1
68 | pexpect==4.2.1
69 | pickleshare==0.7.4
70 | Pillow==6.2.0
71 | pluggy==0.6.0
72 | ply==3.11
73 | portalocker==1.5.1
74 | prompt-toolkit==1.0.15
75 | protobuf==3.10.0
76 | psutil==5.4.2
77 | py==1.5.2
78 | pycairo==1.16.2
79 | pycodestyle==2.3.1
80 | pycrypto==2.6.1
81 | pycups==1.9.73
82 | pyds9==1.8.1
83 | pyfits==3.4
84 | pyflakes==1.6.0
85 | Pygments==2.2.0
86 | pygobject==3.26.1
87 | PyICU==1.9.8
88 | pyinotify==0.9.6
89 | pylint==2.4.2
90 | pymacaroons==0.13.0
91 | PyNaCl==1.1.2
92 | PyOpenGL==3.1.0
93 | pyparsing==2.4.2
94 | pyqtgraph==0.10.0
95 | pyRFC3339==1.0
96 | pytest==3.3.2
97 | pytest-arraydiff==0.2
98 | pytest-astropy==0.2.1
99 | pytest-doctestplus==0.1.2
100 | pytest-openfiles==0.2.0
101 | pytest-remotedata==0.2.0
102 | python-dateutil==2.8.0
103 | python-debian==0.1.32
104 | pytz==2019.3
105 | pyxdg==0.25
106 | PyYAML==5.1.2
107 | pyzmq==16.0.2
108 | QtAwesome==0.4.4
109 | qtconsole==4.3.1
110 | QtPy==1.3.1
111 | reportlab==3.4.0
112 | requests==2.22.0
113 | requests-unixsocket==0.1.5
114 | roman==2.0.0
115 | rope==0.10.5
116 | sacrebleu==1.4.2
117 | scipy==1.3.1
118 | scour==0.36
119 | screen-resolution-extra==0.0.0
120 | seaborn==0.9.0
121 | SecretStorage==2.3.1
122 | simplegeneric==0.8.1
123 | simplejson==3.13.2
124 | six==1.12.0
125 | Sphinx==1.6.7
126 | spyder==3.2.6
127 | ssh-import-id==5.7
128 | subword-nmt==0.3.6
129 | sympy==1.1.1
130 | tensorflow==1.14.0
131 | tensorboard==1.14.0
132 | tensorflow-estimator==1.14.0
133 | termcolor==1.1.0
134 | testpath==0.3.1
135 | torch==1.3.0
136 | torchtext==0.4.0
137 | tornado==4.5.3
138 | tqdm==4.36.1
139 | traitlets==4.3.2
140 | typed-ast==1.4.0
141 | typing==3.7.4.1
142 | unidiff==0.5.4
143 | urllib3==1.25.6
144 | virtualenv==15.1.0
145 | wadllib==1.3.2
146 | wcwidth==0.1.7
147 | webencodings==0.5
148 | Werkzeug==0.16.0
149 | wrapt==1.11.1
150 | xkit==0.0.0
--------------------------------------------------------------------------------
/code/generation-model/progressive_transformer/search.py:
--------------------------------------------------------------------------------
1 |
2 | import torch
3 | import torch.nn.functional as F
4 | from torch import Tensor
5 | import numpy as np
6 |
7 | from decoders import Decoder, TransformerDecoder
8 | from embeddings import Embeddings
9 |
10 | # pylint: disable=unused-argument
11 | def greedy(
12 | src_mask: Tensor,
13 | embed: Embeddings,
14 | decoder: Decoder,
15 | encoder_output: Tensor,
16 | trg_input: Tensor,
17 | model,
18 | ) -> (np.array, np.array):
19 | """
20 | Special greedy function for transformer, since it works differently.
21 | The transformer remembers all previous states and attends to them.
22 |
23 | :param src_mask: mask for source inputs, 0 for positions after
24 | :param embed: target embedding
25 | :param bos_index: index of in the vocabulary
26 | :param max_output_length: maximum length for the hypotheses
27 | :param decoder: decoder to use for greedy decoding
28 | :param encoder_output: encoder hidden states for attention
29 | :param encoder_hidden: encoder final state (unused in Transformer)
30 | :return:
31 | - stacked_output: output hypotheses (2d array of indices),
32 | - stacked_attention_scores: attention scores (3d array)
33 | """
34 | # Initialise the input
35 | # Extract just the BOS first frame from the target
36 | ys = trg_input[:,:1,:].float()
37 |
38 | # If the counter is coming into the decoder or not
39 | ys_out = ys
40 |
41 | # Set the target mask, by finding the padded rows
42 | trg_mask = trg_input != 0.0
43 | trg_mask = trg_mask.unsqueeze(1)
44 |
45 | # Find the maximum output length for this batch
46 | max_output_length = trg_input.shape[1]
47 |
48 | # If just count in, input is just the counter
49 | if model.just_count_in:
50 | ys = ys[:,:,-1:]
51 |
52 | for i in range(max_output_length):
53 |
54 | # ys here is the input
55 | # Drive the timing by giving the GT timing - add in the counter to the last column
56 |
57 | if model.just_count_in:
58 | # If just counter, drive the input using the GT counter
59 | ys[:,-1] = trg_input[:, i, -1:]
60 |
61 | else:
62 | # Give the GT counter for timing, to drive the timing
63 | ys[:,-1,-1:] = trg_input[:, i, -1:]
64 |
65 | # Embed the target input before passing to the decoder
66 | trg_embed = embed(ys)
67 |
68 | # Cut padding mask to required size (of the size of the input)
69 | padding_mask = trg_mask[:, :, :i+1, :i+1]
70 | # Pad the mask (If required) (To make it square, and used later on correctly)
71 | pad_amount = padding_mask.shape[2] - padding_mask.shape[3]
72 | padding_mask = (F.pad(input=padding_mask.double(), pad=(pad_amount, 0, 0, 0), mode='replicate') == 1.0)
73 |
74 | # Pass the embedded input and the encoder output into the decoder
75 | with torch.no_grad():
76 | out, _, _, _ = decoder(
77 | trg_embed=trg_embed,
78 | encoder_output=encoder_output,
79 | src_mask=src_mask,
80 | trg_mask=padding_mask,
81 | )
82 |
83 | if model.future_prediction != 0:
84 | # Cut to only the first frame prediction
85 | out = torch.cat((out[:, :, :out.shape[2] // (model.future_prediction)],out[:,:,-1:]),dim=2)
86 |
87 | if model.just_count_in:
88 | # If just counter in trg_input, concatenate counters of output
89 | ys = torch.cat([ys, out[:,-1:,-1:]], dim=1)
90 |
91 | # Add this frame prediction to the overall prediction
92 | ys = torch.cat([ys, out[:,-1:,:]], dim=1)
93 |
94 | # Add this next predicted frame to the full frame output
95 | ys_out = torch.cat([ys_out, out[:,-1:,:]], dim=1)
96 |
97 | return ys_out, None
98 |
99 |
--------------------------------------------------------------------------------
/code/generation-model/slt/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | .idea/
3 |
4 | *.pyc
5 |
--------------------------------------------------------------------------------
/code/generation-model/slt/README.md:
--------------------------------------------------------------------------------
1 | # Sign Language Transformers (CVPR'20)
2 |
3 | This repo contains the training and evaluation code for the paper [Sign Language Transformers: Sign Language Transformers: Joint End-to-end Sign Language Recognition and Translation](https://www.cihancamgoz.com/pub/camgoz2020cvpr.pdf).
4 |
5 | This code is based on [Joey NMT](https://github.com/joeynmt/joeynmt) but modified to realize joint continuous sign language recognition and translation. For text-to-text translation experiments, you can use the original Joey NMT framework.
6 |
7 | ## Requirements
8 | * Download the feature files using the `data/download.sh` script.
9 |
10 | * [Optional] Create a conda or python virtual environment.
11 |
12 | * Install required packages using the `requirements.txt` file.
13 |
14 | `pip install -r requirements.txt`
15 |
16 | ## Usage
17 |
18 | `python -m signjoey train configs/sign.yaml`
19 |
20 | ! Note that the default data directory is `./data`. If you download them to somewhere else, you need to update the `data_path` parameters in your config file.
21 | ## ToDo:
22 |
23 | - [X] *Initial code release.*
24 | - [X] *Release image features for Phoenix2014T.*
25 | - [ ] Share extensive qualitative and quantitative results & config files to generate them.
26 | - [ ] (Nice to have) - Guide to set up conda environment and docker image.
27 |
28 | ## Reference
29 |
30 | Please cite the paper below if you use this code in your research:
31 |
32 | @inproceedings{camgoz2020sign,
33 | author = {Necati Cihan Camgoz and Oscar Koller and Simon Hadfield and Richard Bowden},
34 | title = {Sign Language Transformers: Joint End-to-end Sign Language Recognition and Translation},
35 | booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
36 | year = {2020}
37 | }
38 |
39 | ## Acknowledgements
40 | This work was funded by the SNSF Sinergia project "Scalable Multimodal Sign Language Technology for Sign Language Learning and Assessment" (SMILE) grant agreement number CRSII2 160811 and the European Union’s Horizon2020 research and innovation programme under grant agreement no. 762021 (Content4All). This work reflects only the author’s view and the Commission is not responsible for any use that may be made of the information it contains. We would also like to thank NVIDIA Corporation for their GPU grant.
41 |
--------------------------------------------------------------------------------
/code/generation-model/slt/base_annotations/check_base_file.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import pickle
3 |
4 | import shutil
5 | with gzip.open('../new_data/phoenix14t.yang.dev', 'rb') as f_in:
6 | loaded_object = pickle.load(f_in)
7 | print(type(loaded_object))
8 | print(loaded_object[0])
9 | sign_shape = loaded_object[0]['sign'].shape
10 | sign_shape = loaded_object[1]['sign'].shape
11 | print(sign_shape)
--------------------------------------------------------------------------------
/code/generation-model/slt/base_annotations/file.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/file.txt
--------------------------------------------------------------------------------
/code/generation-model/slt/base_annotations/out_dev:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/out_dev
--------------------------------------------------------------------------------
/code/generation-model/slt/base_annotations/phoenix14t.dev:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/phoenix14t.dev
--------------------------------------------------------------------------------
/code/generation-model/slt/base_annotations/phoenix14t.test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/phoenix14t.test
--------------------------------------------------------------------------------
/code/generation-model/slt/base_annotations/phoenix14t.train:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/phoenix14t.train
--------------------------------------------------------------------------------
/code/generation-model/slt/configs/sign copy.yaml:
--------------------------------------------------------------------------------
1 | name: sign_experiment
2 | data:
3 | data_path: ./new_data/
4 | version: phoenix_2014_trans
5 | sgn: sign
6 | txt: text
7 | gls: gloss
8 | train: phoenix14t.yang.train
9 | dev: phoenix14t.yang.dev
10 | test: phoenix14t.yang.test
11 | feature_size: 150
12 | level: word
13 | txt_lowercase: true
14 | max_sent_length: 400
15 | random_train_subset: -1
16 | random_dev_subset: -1
17 | testing:
18 | recognition_beam_sizes:
19 | - 1
20 | - 2
21 | - 3
22 | - 4
23 | - 5
24 | - 6
25 | - 7
26 | - 8
27 | - 9
28 | - 10
29 | translation_beam_sizes:
30 | - 1
31 | - 2
32 | - 3
33 | - 4
34 | - 5
35 | - 6
36 | - 7
37 | - 8
38 | - 9
39 | - 10
40 | translation_beam_alphas:
41 | - -1
42 | - 0
43 | - 1
44 | - 2
45 | - 3
46 | - 4
47 | - 5
48 | training:
49 | reset_best_ckpt: false
50 | reset_scheduler: false
51 | reset_optimizer: false
52 | random_seed: 0
53 | model_dir: "./sign_sample_model_2layer4head"
54 | recognition_loss_weight: 1.0
55 | translation_loss_weight: 1.0
56 | eval_metric: bleu
57 | optimizer: adam
58 | learning_rate: 0.001
59 | batch_size: 32
60 | num_valid_log: 5
61 | epochs: 5000000
62 | early_stopping_metric: eval_metric
63 | batch_type: sentence
64 | translation_normalization: batch
65 | eval_recognition_beam_size: 1
66 | eval_translation_beam_size: 1
67 | eval_translation_beam_alpha: -1
68 | overwrite: true
69 | shuffle: true
70 | use_cuda: true
71 | translation_max_output_length: 30
72 | keep_last_ckpts: 1
73 | batch_multiplier: 1
74 | logging_freq: 100
75 | validation_freq: 500
76 | betas:
77 | - 0.9
78 | - 0.998
79 | scheduling: plateau
80 | learning_rate_min: 1.0e-07
81 | weight_decay: 0.001
82 | patience: 8
83 | decrease_factor: 0.7
84 | label_smoothing: 0.0
85 | model:
86 | initializer: xavier
87 | bias_initializer: zeros
88 | init_gain: 1.0
89 | embed_initializer: xavier
90 | embed_init_gain: 1.0
91 | tied_softmax: false
92 | encoder:
93 | type: transformer
94 | num_layers: 2
95 | num_heads: 4
96 | embeddings:
97 | embedding_dim: 128
98 | scale: false
99 | dropout: 0.1
100 | norm_type: batch
101 | activation_type: softsign
102 | hidden_size: 128
103 | ff_size: 1024
104 | dropout: 0.1
105 | decoder:
106 | type: transformer
107 | num_layers: 2
108 | num_heads: 4
109 | embeddings:
110 | embedding_dim: 128
111 | # embedding_dim: 128
112 | scale: false
113 | dropout: 0.1
114 | norm_type: batch
115 | activation_type: softsign
116 | hidden_size: 128
117 | ff_size: 1024
118 | dropout: 0.1
119 |
--------------------------------------------------------------------------------
/code/generation-model/slt/configs/sign.yaml:
--------------------------------------------------------------------------------
1 | name: sign_experiment
2 | data:
3 | data_path: ./new_data/
4 | version: phoenix_2014_trans
5 | sgn: sign
6 | txt: text
7 | gls: gloss
8 | train: phoenix14t.yang.train
9 | dev: phoenix14t.yang.dev
10 | test: phoenix14t.yang.test
11 | feature_size: 150
12 | level: word
13 | txt_lowercase: true
14 | max_sent_length: 400
15 | random_train_subset: -1
16 | random_dev_subset: -1
17 | testing:
18 | recognition_beam_sizes:
19 | - 1
20 | - 2
21 | - 3
22 | - 4
23 | - 5
24 | - 6
25 | - 7
26 | - 8
27 | - 9
28 | - 10
29 | translation_beam_sizes:
30 | - 1
31 | - 2
32 | - 3
33 | - 4
34 | - 5
35 | - 6
36 | - 7
37 | - 8
38 | - 9
39 | - 10
40 | translation_beam_alphas:
41 | - -1
42 | - 0
43 | - 1
44 | - 2
45 | - 3
46 | - 4
47 | training:
48 | reset_best_ckpt: false
49 | reset_scheduler: false
50 | reset_optimizer: false
51 | random_seed: 0
52 | model_dir: "./sign_sample_model"
53 | recognition_loss_weight: 1.0
54 | translation_loss_weight: 1.0
55 | eval_metric: bleu
56 | optimizer: adam
57 | learning_rate: 0.001
58 | batch_size: 16
59 | num_valid_log: 5
60 | epochs: 5000000
61 | early_stopping_metric: eval_metric
62 | batch_type: sentence
63 | translation_normalization: batch
64 | eval_recognition_beam_size: 1
65 | eval_translation_beam_size: 1
66 | eval_translation_beam_alpha: -1
67 | overwrite: true
68 | shuffle: true
69 | use_cuda: true
70 | translation_max_output_length: 30
71 | keep_last_ckpts: 1
72 | batch_multiplier: 1
73 | logging_freq: 100
74 | validation_freq: 500
75 | betas:
76 | - 0.9
77 | - 0.998
78 | scheduling: plateau
79 | learning_rate_min: 1.0e-07
80 | weight_decay: 0.001
81 | patience: 8
82 | decrease_factor: 0.7
83 | label_smoothing: 0.0
84 | model:
85 | initializer: xavier
86 | bias_initializer: zeros
87 | init_gain: 1.0
88 | embed_initializer: xavier
89 | embed_init_gain: 1.0
90 | tied_softmax: false
91 | encoder:
92 | type: transformer
93 | num_layers: 1
94 | num_heads: 2
95 | embeddings:
96 | embedding_dim: 128
97 | scale: false
98 | dropout: 0.1
99 | norm_type: batch
100 | activation_type: softsign
101 | hidden_size: 128
102 | ff_size: 2048
103 | dropout: 0.1
104 | decoder:
105 | type: transformer
106 | num_layers: 1
107 | num_heads: 2
108 | embeddings:
109 | embedding_dim: 128
110 | # embedding_dim: 128
111 | scale: false
112 | dropout: 0.1
113 | norm_type: batch
114 | activation_type: softsign
115 | hidden_size: 128
116 | ff_size: 2048
117 | dropout: 0.1
118 |
--------------------------------------------------------------------------------
/code/generation-model/slt/configs/sign_train_gloss_before_no_repeat.yaml:
--------------------------------------------------------------------------------
1 | name: sign_experiment
2 | data:
3 | data_path: ./All_data/train_gloss_before_no_repeat/
4 | version: phoenix_2014_trans
5 | sgn: sign
6 | txt: text
7 | gls: gloss
8 | train: phoenix14t.yang.train
9 | dev: phoenix14t.yang.dev
10 | test: phoenix14t.yang.test
11 | feature_size: 150
12 | level: word
13 | txt_lowercase: true
14 | max_sent_length: 400
15 | random_train_subset: -1
16 | random_dev_subset: -1
17 | testing:
18 | recognition_beam_sizes:
19 | - 1
20 | - 2
21 | - 3
22 | - 4
23 | - 5
24 | - 6
25 | - 7
26 | - 8
27 | - 9
28 | - 10
29 | translation_beam_sizes:
30 | - 1
31 | - 2
32 | - 3
33 | - 4
34 | - 5
35 | - 6
36 | - 7
37 | - 8
38 | - 9
39 | - 10
40 | translation_beam_alphas:
41 | - -1
42 | - 0
43 | - 1
44 | - 2
45 | - 3
46 | - 4
47 | - 5
48 | training:
49 | reset_best_ckpt: false
50 | reset_scheduler: false
51 | reset_optimizer: false
52 | random_seed: 0
53 | model_dir: "./sign_sample_model"
54 | recognition_loss_weight: 1.0
55 | translation_loss_weight: 1.0
56 | eval_metric: bleu
57 | optimizer: adam
58 | learning_rate: 0.001
59 | batch_size: 32
60 | num_valid_log: 5
61 | epochs: 5000000
62 | early_stopping_metric: eval_metric
63 | batch_type: sentence
64 | translation_normalization: batch
65 | eval_recognition_beam_size: 1
66 | eval_translation_beam_size: 1
67 | eval_translation_beam_alpha: -1
68 | overwrite: true
69 | shuffle: true
70 | use_cuda: true
71 | translation_max_output_length: 30
72 | keep_last_ckpts: 1
73 | batch_multiplier: 1
74 | logging_freq: 100
75 | validation_freq: 500
76 | betas:
77 | - 0.9
78 | - 0.998
79 | scheduling: plateau
80 | learning_rate_min: 1.0e-07
81 | weight_decay: 0.001
82 | patience: 8
83 | decrease_factor: 0.7
84 | label_smoothing: 0.0
85 | model:
86 | initializer: xavier
87 | bias_initializer: zeros
88 | init_gain: 1.0
89 | embed_initializer: xavier
90 | embed_init_gain: 1.0
91 | tied_softmax: false
92 | encoder:
93 | type: transformer
94 | num_layers: 1
95 | num_heads: 2
96 | embeddings:
97 | embedding_dim: 128
98 | scale: false
99 | dropout: 0.1
100 | norm_type: batch
101 | activation_type: softsign
102 | hidden_size: 128
103 | ff_size: 2048
104 | dropout: 0.1
105 | decoder:
106 | type: transformer
107 | num_layers: 1
108 | num_heads: 2
109 | embeddings:
110 | embedding_dim: 128
111 | # embedding_dim: 128
112 | scale: false
113 | dropout: 0.1
114 | norm_type: batch
115 | activation_type: softsign
116 | hidden_size: 128
117 | ff_size: 2048
118 | dropout: 0.1
119 |
--------------------------------------------------------------------------------
/code/generation-model/slt/configs/sign_train_gloss_between_no_repeat.yaml:
--------------------------------------------------------------------------------
1 | name: sign_experiment
2 | data:
3 | data_path: ./All_data/train_gloss_between_no_repeat/
4 | version: phoenix_2014_trans
5 | sgn: sign
6 | txt: text
7 | gls: gloss
8 | train: phoenix14t.yang.train
9 | dev: phoenix14t.yang.dev
10 | test: phoenix14t.yang.test
11 | feature_size: 150
12 | level: word
13 | txt_lowercase: true
14 | max_sent_length: 400
15 | random_train_subset: -1
16 | random_dev_subset: -1
17 | testing:
18 | recognition_beam_sizes:
19 | - 1
20 | - 2
21 | - 3
22 | - 4
23 | - 5
24 | - 6
25 | - 7
26 | - 8
27 | - 9
28 | - 10
29 | translation_beam_sizes:
30 | - 1
31 | - 2
32 | - 3
33 | - 4
34 | - 5
35 | - 6
36 | - 7
37 | - 8
38 | - 9
39 | - 10
40 | translation_beam_alphas:
41 | - -1
42 | - 0
43 | - 1
44 | - 2
45 | - 3
46 | - 4
47 | - 5
48 | training:
49 | reset_best_ckpt: false
50 | reset_scheduler: false
51 | reset_optimizer: false
52 | random_seed: 0
53 | model_dir: "./sign_sample_model"
54 | recognition_loss_weight: 1.0
55 | translation_loss_weight: 1.0
56 | eval_metric: bleu
57 | optimizer: adam
58 | learning_rate: 0.001
59 | batch_size: 32
60 | num_valid_log: 5
61 | epochs: 5000000
62 | early_stopping_metric: eval_metric
63 | batch_type: sentence
64 | translation_normalization: batch
65 | eval_recognition_beam_size: 1
66 | eval_translation_beam_size: 1
67 | eval_translation_beam_alpha: -1
68 | overwrite: true
69 | shuffle: true
70 | use_cuda: true
71 | translation_max_output_length: 30
72 | keep_last_ckpts: 1
73 | batch_multiplier: 1
74 | logging_freq: 100
75 | validation_freq: 500
76 | betas:
77 | - 0.9
78 | - 0.998
79 | scheduling: plateau
80 | learning_rate_min: 1.0e-07
81 | weight_decay: 0.001
82 | patience: 8
83 | decrease_factor: 0.7
84 | label_smoothing: 0.0
85 | model:
86 | initializer: xavier
87 | bias_initializer: zeros
88 | init_gain: 1.0
89 | embed_initializer: xavier
90 | embed_init_gain: 1.0
91 | tied_softmax: false
92 | encoder:
93 | type: transformer
94 | num_layers: 1
95 | num_heads: 2
96 | embeddings:
97 | embedding_dim: 128
98 | scale: false
99 | dropout: 0.1
100 | norm_type: batch
101 | activation_type: softsign
102 | hidden_size: 128
103 | ff_size: 2048
104 | dropout: 0.1
105 | decoder:
106 | type: transformer
107 | num_layers: 1
108 | num_heads: 2
109 | embeddings:
110 | embedding_dim: 128
111 | # embedding_dim: 128
112 | scale: false
113 | dropout: 0.1
114 | norm_type: batch
115 | activation_type: softsign
116 | hidden_size: 128
117 | ff_size: 2048
118 | dropout: 0.1
119 |
--------------------------------------------------------------------------------
/code/generation-model/slt/configs/sign_train_gloss_between_yes_repeat.yaml:
--------------------------------------------------------------------------------
1 | name: sign_experiment
2 | data:
3 | data_path: ./All_data/train_gloss_between_yes_repeat/
4 | version: phoenix_2014_trans
5 | sgn: sign
6 | txt: text
7 | gls: gloss
8 | train: phoenix14t.yang.train
9 | dev: phoenix14t.yang.dev
10 | test: phoenix14t.yang.test
11 | feature_size: 150
12 | level: word
13 | txt_lowercase: true
14 | max_sent_length: 400
15 | random_train_subset: -1
16 | random_dev_subset: -1
17 | testing:
18 | recognition_beam_sizes:
19 | - 1
20 | - 2
21 | - 3
22 | - 4
23 | - 5
24 | - 6
25 | - 7
26 | - 8
27 | - 9
28 | - 10
29 | translation_beam_sizes:
30 | - 1
31 | - 2
32 | - 3
33 | - 4
34 | - 5
35 | - 6
36 | - 7
37 | - 8
38 | - 9
39 | - 10
40 | translation_beam_alphas:
41 | - -1
42 | - 0
43 | - 1
44 | - 2
45 | - 3
46 | - 4
47 | - 5
48 | training:
49 | reset_best_ckpt: false
50 | reset_scheduler: false
51 | reset_optimizer: false
52 | random_seed: 0
53 | model_dir: "./sign_sample_model"
54 | recognition_loss_weight: 1.0
55 | translation_loss_weight: 1.0
56 | eval_metric: bleu
57 | optimizer: adam
58 | learning_rate: 0.001
59 | batch_size: 32
60 | num_valid_log: 5
61 | epochs: 5000000
62 | early_stopping_metric: eval_metric
63 | batch_type: sentence
64 | translation_normalization: batch
65 | eval_recognition_beam_size: 1
66 | eval_translation_beam_size: 1
67 | eval_translation_beam_alpha: -1
68 | overwrite: true
69 | shuffle: true
70 | use_cuda: true
71 | translation_max_output_length: 30
72 | keep_last_ckpts: 1
73 | batch_multiplier: 1
74 | logging_freq: 100
75 | validation_freq: 500
76 | betas:
77 | - 0.9
78 | - 0.998
79 | scheduling: plateau
80 | learning_rate_min: 1.0e-07
81 | weight_decay: 0.001
82 | patience: 8
83 | decrease_factor: 0.7
84 | label_smoothing: 0.0
85 | model:
86 | initializer: xavier
87 | bias_initializer: zeros
88 | init_gain: 1.0
89 | embed_initializer: xavier
90 | embed_init_gain: 1.0
91 | tied_softmax: false
92 | encoder:
93 | type: transformer
94 | num_layers: 1
95 | num_heads: 2
96 | embeddings:
97 | embedding_dim: 128
98 | scale: false
99 | dropout: 0.1
100 | norm_type: batch
101 | activation_type: softsign
102 | hidden_size: 128
103 | ff_size: 2048
104 | dropout: 0.1
105 | decoder:
106 | type: transformer
107 | num_layers: 1
108 | num_heads: 2
109 | embeddings:
110 | embedding_dim: 128
111 | # embedding_dim: 128
112 | scale: false
113 | dropout: 0.1
114 | norm_type: batch
115 | activation_type: softsign
116 | hidden_size: 128
117 | ff_size: 2048
118 | dropout: 0.1
119 |
--------------------------------------------------------------------------------
/code/generation-model/slt/configs/sign_train_gloss_joint_no_repeat.yaml:
--------------------------------------------------------------------------------
1 | name: sign_experiment
2 | data:
3 | data_path: ./All_data/train_gloss_joint_no_repeat/
4 | version: phoenix_2014_trans
5 | sgn: sign
6 | txt: text
7 | gls: gloss
8 | train: phoenix14t.yang.train
9 | dev: phoenix14t.yang.dev
10 | test: phoenix14t.yang.test
11 | feature_size: 150
12 | level: word
13 | txt_lowercase: true
14 | max_sent_length: 400
15 | random_train_subset: -1
16 | random_dev_subset: -1
17 | testing:
18 | recognition_beam_sizes:
19 | - 1
20 | - 2
21 | - 3
22 | - 4
23 | - 5
24 | - 6
25 | - 7
26 | - 8
27 | - 9
28 | - 10
29 | translation_beam_sizes:
30 | - 1
31 | - 2
32 | - 3
33 | - 4
34 | - 5
35 | - 6
36 | - 7
37 | - 8
38 | - 9
39 | - 10
40 | translation_beam_alphas:
41 | - -1
42 | - 0
43 | - 1
44 | - 2
45 | - 3
46 | - 4
47 | - 5
48 | training:
49 | reset_best_ckpt: false
50 | reset_scheduler: false
51 | reset_optimizer: false
52 | random_seed: 0
53 | model_dir: "./sign_sample_model"
54 | recognition_loss_weight: 1.0
55 | translation_loss_weight: 1.0
56 | eval_metric: bleu
57 | optimizer: adam
58 | learning_rate: 0.001
59 | batch_size: 32
60 | num_valid_log: 5
61 | epochs: 5000000
62 | early_stopping_metric: eval_metric
63 | batch_type: sentence
64 | translation_normalization: batch
65 | eval_recognition_beam_size: 1
66 | eval_translation_beam_size: 1
67 | eval_translation_beam_alpha: -1
68 | overwrite: true
69 | shuffle: true
70 | use_cuda: true
71 | translation_max_output_length: 30
72 | keep_last_ckpts: 1
73 | batch_multiplier: 1
74 | logging_freq: 100
75 | validation_freq: 500
76 | betas:
77 | - 0.9
78 | - 0.998
79 | scheduling: plateau
80 | learning_rate_min: 1.0e-07
81 | weight_decay: 0.001
82 | patience: 8
83 | decrease_factor: 0.7
84 | label_smoothing: 0.0
85 | model:
86 | initializer: xavier
87 | bias_initializer: zeros
88 | init_gain: 1.0
89 | embed_initializer: xavier
90 | embed_init_gain: 1.0
91 | tied_softmax: false
92 | encoder:
93 | type: transformer
94 | num_layers: 1
95 | num_heads: 2
96 | embeddings:
97 | embedding_dim: 128
98 | scale: false
99 | dropout: 0.1
100 | norm_type: batch
101 | activation_type: softsign
102 | hidden_size: 128
103 | ff_size: 2048
104 | dropout: 0.1
105 | decoder:
106 | type: transformer
107 | num_layers: 1
108 | num_heads: 2
109 | embeddings:
110 | embedding_dim: 128
111 | # embedding_dim: 128
112 | scale: false
113 | dropout: 0.1
114 | norm_type: batch
115 | activation_type: softsign
116 | hidden_size: 128
117 | ff_size: 2048
118 | dropout: 0.1
119 |
--------------------------------------------------------------------------------
/code/generation-model/slt/experiment_results/dev.ph14t:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/code/generation-model/slt/experiment_results/results.md:
--------------------------------------------------------------------------------
1 | TODO: Add table of results with the latest code.
--------------------------------------------------------------------------------
/code/generation-model/slt/experiment_results/test.ph14t:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/code/generation-model/slt/output/output_gold.dev_results.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/output/output_gold.dev_results.pkl
--------------------------------------------------------------------------------
/code/generation-model/slt/output/output_gold.test_results.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/output/output_gold.test_results.pkl
--------------------------------------------------------------------------------
/code/generation-model/slt/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.9.0
2 | asn1crypto==1.2.0
3 | astor==0.8.1
4 | attrs==19.3.0
5 | cachetools==4.0.0
6 | certifi==2019.11.28
7 | cffi==1.13.2
8 | chardet==3.0.4
9 | cryptography==2.8
10 | cycler==0.10.0
11 | gast==0.2.2
12 | google-auth==1.10.2
13 | google-auth-oauthlib==0.4.1
14 | google-pasta==0.1.8
15 | grpcio==1.26.0
16 | h5py==2.10.0
17 | idna==2.8
18 | importlib-metadata==1.5.0
19 | joblib==0.14.1
20 | Keras-Applications==1.0.8
21 | Keras-Preprocessing==1.1.0
22 | keyboard==0.13.5
23 | kiwisolver==1.1.0
24 | Markdown==3.1.1
25 | matplotlib==3.1.2
26 | more-itertools==8.2.0
27 | natsort==7.0.1
28 | numpy==1.18.1
29 | oauthlib==3.1.0
30 | opencv-python==4.2.0.32
31 | opt-einsum==3.1.0
32 | packaging==20.3
33 | pandas==1.0.3
34 | Pillow==7.1.0
35 | plotly==4.5.0
36 | pluggy==0.13.1
37 | portalocker==1.5.2
38 | protobuf==3.11.2
39 | py==1.8.1
40 | pyasn1==0.4.8
41 | pyasn1-modules==0.2.8
42 | pycosat==0.6.3
43 | pycparser==2.19
44 | pyOpenSSL==19.1.0
45 | pyparsing==2.4.6
46 | PySocks==1.7.1
47 | pytest==5.4.0
48 | python-dateutil==2.8.1
49 | pytz==2019.3
50 | PyYAML==5.3
51 | requests==2.22.0
52 | requests-oauthlib==1.3.0
53 | retrying==1.3.3
54 | rsa==4.0
55 | ruamel-yaml==0.15.87
56 | sacrebleu==1.4.4
57 | scikit-learn==0.22.2.post1
58 | scipy==1.4.1
59 | sentencepiece==0.1.85
60 | six==1.13.0
61 | tensorboard==2.1.1
62 | tensorflow==2.1.2
63 | tensorflow-estimator==2.1.0
64 | termcolor==1.1.0
65 | torch==1.4.0
66 | torchtext==0.5.0
67 | torchvision==0.5.0
68 | tqdm==4.40.2
69 | typing==3.7.4.1
70 | urllib3==1.25.7
71 | warmup-scheduler==0.3
72 | wcwidth==0.1.8
73 | Werkzeug==0.16.0
74 | wrapt==1.11.2
75 | xmltodict==0.12.0
76 | zipp==3.1.0
77 |
--------------------------------------------------------------------------------
/code/generation-model/slt/signjoey/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/signjoey/__init__.py
--------------------------------------------------------------------------------
/code/generation-model/slt/signjoey/__main__.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import sys
5 | from signjoey.training import train
6 | from signjoey.prediction import test
7 |
8 | sys.path.append("/vol/research/extol/personal/cihan/code/SignJoey")
9 |
10 |
11 | def main():
12 | ap = argparse.ArgumentParser("Joey NMT")
13 |
14 | ap.add_argument("mode", choices=["train", "test"], help="train a model or test")
15 |
16 | ap.add_argument("config_path", type=str, help="path to YAML config file")
17 |
18 | ap.add_argument("--ckpt", type=str, help="checkpoint for prediction")
19 |
20 | ap.add_argument(
21 | "--output_path", type=str, help="path for saving translation output"
22 | )
23 | ap.add_argument("--gpu_id", type=str, default="0", help="gpu to run your job on")
24 | args = ap.parse_args()
25 |
26 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
27 |
28 | if args.mode == "train":
29 | train(cfg_file=args.config_path)
30 | elif args.mode == "test":
31 | test(cfg_file=args.config_path, ckpt=args.ckpt, output_path=args.output_path)
32 | else:
33 | raise ValueError("Unknown mode")
34 |
35 |
36 | if __name__ == "__main__":
37 | main()
38 |
--------------------------------------------------------------------------------
/code/generation-model/slt/signjoey/dataset.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Data module
4 | """
5 | from torchtext import data
6 | from torchtext.data import Field, RawField
7 | from typing import List, Tuple
8 | import pickle
9 | import gzip
10 | import torch
11 |
12 |
13 | def load_dataset_file(filename):
14 | with gzip.open(filename, "rb") as f:
15 | loaded_object = pickle.load(f)
16 | return loaded_object
17 |
18 |
19 | class SignTranslationDataset(data.Dataset):
20 | """Defines a dataset for machine translation."""
21 |
22 | @staticmethod
23 | def sort_key(ex):
24 | return data.interleave_keys(len(ex.sgn), len(ex.txt))
25 |
26 | def __init__(
27 | self,
28 | path: str,
29 | fields: Tuple[RawField, RawField, Field, Field, Field],
30 | **kwargs
31 | ):
32 | """Create a SignTranslationDataset given paths and fields.
33 |
34 | Arguments:
35 | path: Common prefix of paths to the data files for both languages.
36 | exts: A tuple containing the extension to path for each language.
37 | fields: A tuple containing the fields that will be used for data
38 | in each language.
39 | Remaining keyword arguments: Passed to the constructor of
40 | data.Dataset.
41 | """
42 | if not isinstance(fields[0], (tuple, list)):
43 | fields = [
44 | ("sequence", fields[0]),
45 | ("signer", fields[1]),
46 | ("sgn", fields[2]),
47 | ("gls", fields[3]),
48 | ("txt", fields[4]),
49 | ]
50 |
51 | if not isinstance(path, list):
52 | path = [path]
53 |
54 | samples = {}
55 | for annotation_file in path:
56 | tmp = load_dataset_file(annotation_file)
57 | for s in tmp:
58 | seq_id = s["name"]
59 | if seq_id in samples:
60 | assert samples[seq_id]["name"] == s["name"]
61 | assert samples[seq_id]["signer"] == s["signer"]
62 | assert samples[seq_id]["gloss"] == s["gloss"]
63 | assert samples[seq_id]["text"] == s["text"]
64 | samples[seq_id]["sign"] = torch.cat(
65 | [samples[seq_id]["sign"], s["sign"]], axis=1
66 | )
67 | else:
68 | samples[seq_id] = {
69 | "name": s["name"],
70 | "signer": s["signer"],
71 | "gloss": s["gloss"],
72 | "text": s["text"],
73 | "sign": s["sign"],
74 | }
75 |
76 | examples = []
77 | for s in samples:
78 | sample = samples[s]
79 |
80 |
81 | examples.append(
82 | data.Example.fromlist(
83 | [
84 | sample["name"],
85 | sample["signer"],
86 | # This is for numerical stability
87 | sample["sign"],
88 | sample["gloss"].strip(),
89 | sample["text"].strip(),
90 | ],
91 | fields,
92 | )
93 |
94 | )
95 | # print(sample["gloss"].strip())
96 |
97 | super().__init__(examples, fields, **kwargs)
98 |
--------------------------------------------------------------------------------
/code/generation-model/slt/signjoey/external_metrics/mscoco_rouge.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # File Name : mscoco_rouge.py
4 | #
5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
6 | #
7 | # Creation Date : 2015-01-07 06:03
8 | # Author : Ramakrishna Vedantam
9 |
10 |
11 | def my_lcs(string, sub):
12 | """
13 | Calculates longest common subsequence for a pair of tokenized strings
14 | :param string : list of str : tokens from a string split using whitespace
15 | :param sub : list of str : shorter string, also split using whitespace
16 | :returns: length (list of int): length of the longest common subsequence between the two strings
17 |
18 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
19 | """
20 | if len(string) < len(sub):
21 | sub, string = string, sub
22 |
23 | lengths = [[0 for i in range(0, len(sub) + 1)] for j in range(0, len(string) + 1)]
24 |
25 | for j in range(1, len(sub) + 1):
26 | for i in range(1, len(string) + 1):
27 | if string[i - 1] == sub[j - 1]:
28 | lengths[i][j] = lengths[i - 1][j - 1] + 1
29 | else:
30 | lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1])
31 |
32 | return lengths[len(string)][len(sub)]
33 |
34 |
35 | def calc_score(hypotheses, references, beta=1.2):
36 | """
37 | Compute ROUGE-L score given one candidate and references for an image
38 | :param hypotheses: str : candidate sentence to be evaluated
39 | :param references: list of str : COCO reference sentences for the particular image to be evaluated
40 | :returns score: int (ROUGE-L score for the candidate evaluated against references)
41 | """
42 | assert len(hypotheses) == 1
43 | assert len(references) > 0
44 | prec = []
45 | rec = []
46 |
47 | # split into tokens
48 | token_c = hypotheses[0].split(" ")
49 |
50 | for reference in references:
51 | # split into tokens
52 | token_r = reference.split(" ")
53 | # compute the longest common subsequence
54 | lcs = my_lcs(token_r, token_c)
55 | prec.append(lcs / float(len(token_c)))
56 | rec.append(lcs / float(len(token_r)))
57 |
58 | prec_max = max(prec)
59 | rec_max = max(rec)
60 |
61 | if prec_max != 0 and rec_max != 0:
62 | score = ((1 + beta ** 2) * prec_max * rec_max) / float(
63 | rec_max + beta ** 2 * prec_max
64 | )
65 | else:
66 | score = 0.0
67 | return score
68 |
--------------------------------------------------------------------------------
/code/generation-model/slt/signjoey/loss.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Module to implement training loss
4 | """
5 |
6 | import torch
7 | from torch import nn, Tensor
8 | from torch.autograd import Variable
9 |
10 |
11 | class XentLoss(nn.Module):
12 | """
13 | Cross-Entropy Loss with optional label smoothing
14 | """
15 |
16 | def __init__(self, pad_index: int, smoothing: float = 0.0):
17 | super(XentLoss, self).__init__()
18 | self.smoothing = smoothing
19 | self.pad_index = pad_index
20 | if self.smoothing <= 0.0:
21 | # standard xent loss
22 | self.criterion = nn.NLLLoss(ignore_index=self.pad_index, reduction="sum")
23 | else:
24 | # custom label-smoothed loss, computed with KL divergence loss
25 | self.criterion = nn.KLDivLoss(reduction="sum")
26 |
27 | def _smooth_targets(self, targets: Tensor, vocab_size: int):
28 | """
29 | Smooth target distribution. All non-reference words get uniform
30 | probability mass according to "smoothing".
31 |
32 | :param targets: target indices, batch*seq_len
33 | :param vocab_size: size of the output vocabulary
34 | :return: smoothed target distributions, batch*seq_len x vocab_size
35 | """
36 | # batch*seq_len x vocab_size
37 | smooth_dist = targets.new_zeros((targets.size(0), vocab_size)).float()
38 | # fill distribution uniformly with smoothing
39 | smooth_dist.fill_(self.smoothing / (vocab_size - 2))
40 | # assign true label the probability of 1-smoothing ("confidence")
41 | smooth_dist.scatter_(1, targets.unsqueeze(1).data, 1.0 - self.smoothing)
42 | # give padding probability of 0 everywhere
43 | smooth_dist[:, self.pad_index] = 0
44 | # masking out padding area (sum of probabilities for padding area = 0)
45 | padding_positions = torch.nonzero(targets.data == self.pad_index)
46 | # pylint: disable=len-as-condition
47 | if len(padding_positions) > 0:
48 | smooth_dist.index_fill_(0, padding_positions.squeeze(), 0.0)
49 | return Variable(smooth_dist, requires_grad=False)
50 |
51 | # pylint: disable=arguments-differ
52 | def forward(self, log_probs, targets):
53 | """
54 | Compute the cross-entropy between logits and targets.
55 |
56 | If label smoothing is used, target distributions are not one-hot, but
57 | "1-smoothing" for the correct target token and the rest of the
58 | probability mass is uniformly spread across the other tokens.
59 |
60 | :param log_probs: log probabilities as predicted by model
61 | :param targets: target indices
62 | :return:
63 | """
64 | if self.smoothing > 0:
65 | targets = self._smooth_targets(
66 | targets=targets.contiguous().view(-1), vocab_size=log_probs.size(-1)
67 | )
68 | # targets: distributions with batch*seq_len x vocab_size
69 | assert (
70 | log_probs.contiguous().view(-1, log_probs.size(-1)).shape
71 | == targets.shape
72 | )
73 | else:
74 | # targets: indices with batch*seq_len
75 | targets = targets.contiguous().view(-1)
76 | loss = self.criterion(
77 | log_probs.contiguous().view(-1, log_probs.size(-1)), targets
78 | )
79 | return loss
80 |
--------------------------------------------------------------------------------
/code/gloss-tagger-model/README.md:
--------------------------------------------------------------------------------
1 | ### the trained model for gloss tagging will be saved in this directory.
--------------------------------------------------------------------------------
/data/README.MD:
--------------------------------------------------------------------------------
1 | #### gloss-annotation.tsv:
2 | This file contains manual annotation of the gloss from PHOENIX 14T dataset.
3 |
4 | #### full.train, full.dev, full.test:
5 | These tab separated files contain manual annotation along with tags predicted by our classifier.
6 | The first column denotes whether the label is manual annotation or predicted by the classifier.
7 | The second column contains gloss data of PHOENIX 14T dataset infused with the intensification labels. Gloss tokens that are predicted/annotated to have lower intensity, are surrounded by and . Tokens with higher intensity are surrounded similarly with and . The tokens that do not have any associated intensity, are not surrounded by anything,
8 | Example:
9 | 1. $manual-annotation$ WOLKE LOCH SPEZIELL NORDWEST
10 | - This means the intensity labels for this instance are manually annotated. The token WOLKE has high-intensity, other tokens have no intensity.
11 | 2. $tagged$ SAMSTAG AUCH FREUNDLICH
12 | - This means the intensity labels for this instance are tagged by our classifier. The token FREUNDLICH has low intensity, other tokens have no intensity.
13 |
--------------------------------------------------------------------------------