├── LICENSE ├── README.md ├── code ├── BERT-gloss-tagger.py ├── README.md ├── generation-model │ ├── README.md │ ├── dynamic_selection │ │ ├── Configs │ │ │ ├── Base.yaml │ │ │ ├── Base_train_all.yaml │ │ │ ├── Base_train_all_test.yaml │ │ │ ├── Base_train_gloss_after_no.yaml │ │ │ ├── Base_train_gloss_basic.yaml │ │ │ ├── Base_train_gloss_joint_yes.yaml │ │ │ ├── dynamic.yaml │ │ │ ├── src_train.gloss.after.no.repeat_vocab.txt │ │ │ ├── src_train.gloss.joint.no.repeat_vocab.txt │ │ │ ├── src_train.gloss.joint.no.repeat_vocab_extend.txt │ │ │ ├── src_train.gloss.joint.yes.repeat_vocab copy.txt │ │ │ ├── src_train.gloss.joint.yes.repeat_vocab.txt │ │ │ ├── src_vocab.txt │ │ │ ├── text_vocab.txt │ │ │ └── validations.txt │ │ ├── Data │ │ │ ├── Gloss_Dynamic │ │ │ │ ├── dev.after_gloss │ │ │ │ ├── dev.files │ │ │ │ ├── dev.gloss │ │ │ │ ├── dev.joint_gloss │ │ │ │ ├── dev.text │ │ │ │ ├── test.after_gloss │ │ │ │ ├── test.files │ │ │ │ ├── test.gloss │ │ │ │ ├── test.joint_gloss │ │ │ │ ├── test.text │ │ │ │ ├── train.after_gloss │ │ │ │ ├── train.files │ │ │ │ ├── train.gloss │ │ │ │ ├── train.joint_gloss │ │ │ │ └── train.text │ │ │ └── tmp │ │ │ │ ├── dev.files │ │ │ │ ├── dev.gloss │ │ │ │ ├── dev.skels │ │ │ │ ├── dev.text │ │ │ │ ├── test.files │ │ │ │ ├── test.gloss │ │ │ │ ├── test.skels │ │ │ │ ├── test.text │ │ │ │ ├── train.files │ │ │ │ ├── train.gloss │ │ │ │ ├── train.skels │ │ │ │ └── train.text │ │ ├── LICENSE.md │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── __pycache__ │ │ │ ├── batch.cpython-37.pyc │ │ │ ├── builders.cpython-37.pyc │ │ │ ├── constants.cpython-37.pyc │ │ │ ├── data.cpython-37.pyc │ │ │ ├── decoders.cpython-37.pyc │ │ │ ├── dtw.cpython-37.pyc │ │ │ ├── embeddings.cpython-37.pyc │ │ │ ├── encoders.cpython-37.pyc │ │ │ ├── helpers.cpython-37.pyc │ │ │ ├── initialization.cpython-37.pyc │ │ │ ├── loss.cpython-37.pyc │ │ │ ├── model.cpython-37.pyc │ │ │ ├── plot_videos.cpython-37.pyc │ │ │ ├── prediction.cpython-37.pyc │ │ │ ├── search.cpython-37.pyc │ │ │ ├── training.cpython-37.pyc │ │ │ ├── transformer_layers.cpython-37.pyc │ │ │ └── vocabulary.cpython-37.pyc │ │ ├── batch.py │ │ ├── builders.py │ │ ├── constants.py │ │ ├── data.py │ │ ├── decoders.py │ │ ├── dtw.py │ │ ├── embeddings.py │ │ ├── encoders.py │ │ ├── external_metrics │ │ │ ├── mscoco_rouge.py │ │ │ └── sacrebleu.py │ │ ├── helpers.py │ │ ├── initialization.py │ │ ├── loss.py │ │ ├── metrics.py │ │ ├── model.py │ │ ├── optim │ │ │ ├── lamb.py │ │ │ ├── lookahead.py │ │ │ ├── novograd.py │ │ │ ├── over9000.py │ │ │ ├── radam.py │ │ │ ├── ralamb.py │ │ │ └── ranger.py │ │ ├── plot_videos.py │ │ ├── prediction.py │ │ ├── requirements.txt │ │ ├── run-adj.sh │ │ ├── run-norm.sh │ │ ├── run.sh │ │ ├── run2.sh │ │ ├── search.py │ │ ├── training.py │ │ ├── transformer_layers.py │ │ ├── vocabulary.py │ │ └── write_file_slt.py │ ├── progressive_transformer │ │ ├── Configs │ │ │ ├── Base.yaml │ │ │ ├── Base_real.yaml │ │ │ ├── Base_text2Skeleton.yaml │ │ │ ├── src_text_vocab.txt │ │ │ ├── src_vocab.txt │ │ │ ├── train.log │ │ │ └── validations.txt │ │ ├── Data │ │ │ └── tmp │ │ │ │ ├── dev.files │ │ │ │ ├── dev.gloss │ │ │ │ ├── dev.skels │ │ │ │ ├── dev.text │ │ │ │ ├── test.files │ │ │ │ ├── test.gloss │ │ │ │ ├── test.skels │ │ │ │ ├── test.text │ │ │ │ ├── train.files │ │ │ │ ├── train.gloss │ │ │ │ ├── train.skels │ │ │ │ └── train.text │ │ ├── LICENSE.md │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── __pycache__ │ │ │ ├── batch.cpython-37.pyc │ │ │ ├── builders.cpython-37.pyc │ │ │ ├── constants.cpython-37.pyc │ │ │ ├── data.cpython-37.pyc │ │ │ ├── decoders.cpython-37.pyc │ │ │ ├── dtw.cpython-37.pyc │ │ │ ├── embeddings.cpython-37.pyc │ │ │ ├── encoders.cpython-37.pyc │ │ │ ├── helpers.cpython-37.pyc │ │ │ ├── initialization.cpython-37.pyc │ │ │ ├── loss.cpython-37.pyc │ │ │ ├── model.cpython-37.pyc │ │ │ ├── plot_videos.cpython-37.pyc │ │ │ ├── prediction.cpython-37.pyc │ │ │ ├── search.cpython-37.pyc │ │ │ ├── training.cpython-37.pyc │ │ │ ├── training.cpython-39.pyc │ │ │ ├── transformer_layers.cpython-37.pyc │ │ │ └── vocabulary.cpython-37.pyc │ │ ├── batch.py │ │ ├── builders.py │ │ ├── constants.py │ │ ├── data.py │ │ ├── decoders.py │ │ ├── dtw.py │ │ ├── embeddings.py │ │ ├── encoders.py │ │ ├── external_metrics │ │ │ ├── mscoco_rouge.py │ │ │ └── sacrebleu.py │ │ ├── helpers.py │ │ ├── initialization.py │ │ ├── loss.py │ │ ├── make_new_vocab.py │ │ ├── metrics.py │ │ ├── model.py │ │ ├── optim │ │ │ ├── lamb.py │ │ │ ├── lookahead.py │ │ │ ├── novograd.py │ │ │ ├── over9000.py │ │ │ ├── radam.py │ │ │ ├── ralamb.py │ │ │ └── ranger.py │ │ ├── plot_videos.py │ │ ├── prediction.py │ │ ├── requirements (1).txt │ │ ├── requirements.txt │ │ ├── search.py │ │ ├── training.py │ │ ├── transformer_layers.py │ │ ├── vocabulary.py │ │ └── write_file_slt.py │ └── slt │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── base_annotations │ │ ├── check_base_file.py │ │ ├── file.txt │ │ ├── out_dev │ │ ├── phoenix14t.dev │ │ ├── phoenix14t.test │ │ └── phoenix14t.train │ │ ├── configs │ │ ├── sign copy.yaml │ │ ├── sign.yaml │ │ ├── sign_train_gloss_before_no_repeat.yaml │ │ ├── sign_train_gloss_between_no_repeat.yaml │ │ ├── sign_train_gloss_between_yes_repeat.yaml │ │ └── sign_train_gloss_joint_no_repeat.yaml │ │ ├── experiment_results │ │ ├── dev.ph14t │ │ ├── results.md │ │ └── test.ph14t │ │ ├── output │ │ ├── output_gold.BW_009.dev.gls │ │ ├── output_gold.BW_009.test.gls │ │ ├── output_gold.BW_09.A_3.dev.txt │ │ ├── output_gold.BW_09.A_3.test.txt │ │ ├── output_gold.dev_results.pkl │ │ └── output_gold.test_results.pkl │ │ ├── requirements.txt │ │ └── signjoey │ │ ├── __init__.py │ │ ├── __main__ eval.py │ │ ├── __main__.py │ │ ├── __main__evaluation.py │ │ ├── attention.py │ │ ├── batch.py │ │ ├── builders.py │ │ ├── data.py │ │ ├── dataset.py │ │ ├── decoders.py │ │ ├── embeddings.py │ │ ├── encoders.py │ │ ├── external_metrics │ │ ├── mscoco_rouge.py │ │ └── sacrebleu.py │ │ ├── helpers.py │ │ ├── initialization.py │ │ ├── loss.py │ │ ├── metrics.py │ │ ├── metrics_old.py │ │ ├── model.py │ │ ├── phoenix_utils │ │ └── phoenix_cleanup.py │ │ ├── prediction.py │ │ ├── search.py │ │ ├── training.py │ │ ├── transformer_layers.py │ │ └── vocabulary.py └── gloss-tagger-model │ └── README.md └── data ├── README.MD ├── dev.full ├── gloss-annotation.tsv ├── test.full └── train.full /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Mert Inan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Modeling Intensification for Sign Language Generation 2 | Public repo for the paper: "Modeling Intensification for Sign Language Generation: A Computational Approach" by Mert Inan*, Yang Zhong*, Sabit Hassan*, Lorna Quandt, Malihe Alikhani 3 | 4 | ## Abstract 5 | End-to-end sign language generation models do not accurately represent the prosody that exists in sign languages. The lack of temporal and spatial variation in the models’ scope leads to poor quality and lower human understanding of generated signs. In this paper, we seek to improve prosody in generated sign languages by modeling intensification in a data-driven manner. We present different strategies grounded in linguistics of sign language that differ in how intensity modifiers can be represented in gloss annotations. To employ our strategies, we first annotate a subset of the benchmark PHOENIX14T, a German Sign Language dataset, with different levels of intensification. We then use a supervised intensity tagger to extend the annotated dataset and obtain labels for the remaining portion of the 6 | dataset. This enhanced dataset is then used to train state-of-the-art transformer models for sign language generation. We find that our efforts in intensification modeling yield better results when evaluated with automated metrics. Human evaluation also indicates a higher preference of the videos generated using our model. 7 | 8 | ## Organization of the repo 9 | This repo provides the codes and the data required to replicate the results from the paper. The dataset that we provide with the intensifier augmentations to the gloss can be used for further research. 10 | 11 | Under the /code folder you can find the codes for the main proposed sign language generation model. In addition, our gloss augmentation codes are also provided, here. 12 | 13 | -------------------------------------------------------------------------------- /code/BERT-gloss-tagger.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer 3 | from sklearn.svm import LinearSVC 4 | from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score 5 | from collections import Counter 6 | import os 7 | import ktrain 8 | from ktrain import text 9 | 10 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; 11 | os.environ["CUDA_VISIBLE_DEVICES"]="0"; 12 | # MODEL_NAME = "bert-base-german-cased" 13 | # MODEL_NAME = "bert-base-multilingual-cased" 14 | MODEL_NAME = "bert-base-multilingual-uncased" 15 | # MODEL_NAME = "dbmdz/bert-base-german-uncased" 16 | 17 | def print_evaluation(gold_labels, predicted_labels): 18 | 19 | '''Prints accuracy, precision, recall, f1 score''' 20 | 21 | accuracy = accuracy_score(gold_labels, predicted_labels) * 100 22 | f1 = f1_score(gold_labels, predicted_labels, average = "macro") * 100 23 | recall = recall_score(gold_labels, predicted_labels, average = "macro") * 100 24 | precision = precision_score(gold_labels, predicted_labels, average = "macro") * 100 25 | 26 | 27 | a = [accuracy, precision, recall, f1] 28 | for i in range (4): 29 | a[i] = round(a[i],2) 30 | 31 | return a 32 | 33 | train_text = [] 34 | train_gloss = [] 35 | train_labels = [] 36 | train_serials = [] 37 | train_inp = [] 38 | 39 | dev_text = [] 40 | dev_gloss = [] 41 | dev_labels = [] 42 | dev_serials = [] 43 | dev_inp = [] 44 | 45 | test_text = [] 46 | test_gloss = [] 47 | test_labels = [] 48 | test_serials = [] 49 | test_inp = [] 50 | 51 | 52 | prev = "" 53 | check = 0 54 | max_len = 0 55 | with open ("../data/gloss-annotation.tsv", "r") as f: 56 | for line in f: 57 | check += 1 58 | if check == 1: 59 | continue 60 | 61 | row = line.lower().strip().split("\t") 62 | 63 | 64 | try: 65 | split = row[0] 66 | serial = row[1] 67 | g_text = row[2] 68 | g_gloss_token = row[6] 69 | label = row[8] 70 | 71 | if g_text == "": 72 | g_text = prev 73 | else: 74 | prev = g_text 75 | except: 76 | print (check) 77 | 78 | if (len(g_text) > max_len): 79 | max_len = len(g_text) 80 | 81 | # print (g_text, g_gloss_token) 82 | if split == "train": 83 | train_text.append(g_text) 84 | train_gloss.append(g_gloss_token) 85 | train_labels.append(label) 86 | train_serials.append(serial) 87 | train_inp.append((g_text, g_gloss_token)) 88 | 89 | if split == "dev": 90 | dev_text.append(g_text) 91 | dev_gloss.append(g_gloss_token) 92 | dev_labels.append(label) 93 | dev_serials.append(serial) 94 | dev_inp.append((g_text, g_gloss_token)) 95 | 96 | if split == "test": 97 | test_text.append(g_text) 98 | test_gloss.append(g_gloss_token) 99 | test_labels.append(label) 100 | test_serials.append(serial) 101 | test_inp.append((g_text, g_gloss_token)) 102 | 103 | 104 | print (MODEL_NAME) 105 | t = text.Transformer(MODEL_NAME, maxlen=230, class_names=list(set(train_labels))) 106 | trn = t.preprocess_train(train_inp, train_labels) 107 | val = t.preprocess_test(dev_inp, dev_labels) 108 | tst = t.preprocess_test(test_inp, test_labels) 109 | model = t.get_classifier() 110 | 111 | learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=8) # lower bs if OOM occurs 112 | learner.fit_onecycle(5e-5, 5) 113 | clf = ktrain.get_predictor(learner.model, t) 114 | 115 | dev_preds = clf.predict(dev_inp) 116 | test_preds = clf.predict(test_inp) 117 | 118 | print ("dev results", print_evaluation(dev_labels, dev_preds)) 119 | print ("test results", print_evaluation(test_labels, test_preds)) 120 | 121 | print (Counter(test_preds)) 122 | 123 | clf.save('gloss-tagger-model/mert-uncased') 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /code/README.md: -------------------------------------------------------------------------------- 1 | To run BERT-gloss-tagger.py make sure to install sklearn and ktrain:
2 | pip install ktrain==0.28.3
3 | pip install sklearn==0.23.2
4 | -------------------------------------------------------------------------------- /code/generation-model/README.md: -------------------------------------------------------------------------------- 1 | # Progressive Transformers 2 | 3 | `cd progressive_transformer` 4 | 5 | ## Install 6 | Install required packages using the requirements.txt file. 7 | 8 | `pip install -r requirements.txt` 9 | 10 | ## Usage 11 | 12 | To run, start __main__.py with arguments "train" and ".\Configs\Base.yaml": 13 | 14 | `python __main__.py train ./Configs/Base.yaml` 15 | 16 | You need ot modify the yaml file. 17 | 18 | (1) You can use different versions of PT dataset by modifying the data path (train, dev, test and src_vocab path). (located under data folder) 19 | 20 | 21 | ### Dump the skeletons 22 | 23 | First call the below function in `training.py` main function. 24 | 25 | `test(cfg_file=PATH_OF_MODEL_CONFIG_FILE, ckpt=get_latest_checkpoint( PATH_OF_TRAINED_MODEL,post_fix="_best")) 26 | 27 | Then `python training.py` 28 | 29 | ### Convert Data to SLT format. 30 | 31 | `python write_file_slt.py --cur_path Data/MODEL_SKELETON_NAME` 32 | 33 | ### Model checkpoints 34 | You can find the model checkpoints in [this link.](https://drive.google.com/drive/folders/10fsw-xSt2Rmupx31FYhxPcfSfiFL5Djf?usp=sharing) 35 | 36 | # SLT 37 | 38 | Make sure you have the corresponding data. 39 | 40 | `cd slt` 41 | 42 | `pip install -r requirements.txt` 43 | 44 | ## Training 45 | `python -m signjoey test configs/YOUR_MODIFIED_YAML` 46 | you only need to modify the data:data_path in the YAML file. 47 | 48 | ## Test 49 | `python -m signjoey test configs/YOUR_MODIFIED_YAML --ckpt your.ckpt --output_path output/MODEL_NAME` 50 | 51 | 52 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Configs/Base.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | src1: "text" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 4 | trg: "skels" # Target - 3D body co-ordinates (skels) 5 | files: "files" # Filenames for each sequence 6 | 7 | train: "./Data/tmp/train" 8 | dev: "./Data/tmp/dev" 9 | test: "./Data/tmp/test" 10 | 11 | max_sent_length: 300 # Max Sentence Length 12 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 13 | src_voc_min_freq: 0 14 | src_voc_limit: 2000 15 | src1_vocab: "./Configs/text_vocab.txt" 16 | src2_vocab: "./Configs/gloss_vocab.txt" 17 | 18 | training: 19 | random_seed: 27 # Random seed for initialisation 20 | optimizer: "adam" # Chosen optimiser (adam, ..) 21 | learning_rate: 0.001 # Initial model learning rate 22 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 23 | weight_decay: 0.0 # Weight Decay 24 | clip_grad_norm: 5.0 # Gradient clipping value 25 | batch_size: 8 # Batch Size for training 26 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 27 | patience: 7 # How many epochs of no improvement causes a LR reduction 28 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 29 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 30 | epochs: 20000 # How many epochs to run for 31 | validation_freq: 10 # After how many steps to run a validation on the model 32 | logging_freq: 250 # After how many steps to log training progress 33 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 34 | model_dir: "./Models/Base" # Where the model shall be stored 35 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 36 | continue: True # Flag to continue from a previous saved model in the model_dir 37 | shuffle: True # Flag to shuffle the data during training 38 | use_cuda: True # Flag to use GPU cuda capabilities 39 | max_output_length: 300 # Max Output Length 40 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 41 | loss: "MSE" # Loss function (MSE, L1) 42 | 43 | model: 44 | initializer: "xavier" # Model initialisation (Xavier, ...) 45 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 46 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 47 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 48 | just_count_in: False # Flag for Just Counter Data Augmentation 49 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 50 | noise_rate: 5 # Gaussian Noise rate 51 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 52 | encoder1: # Model Encoder 53 | type: "transformer" 54 | num_layers: 2 # Number of layers 55 | num_heads: 4 # Number of Heads 56 | embeddings: 57 | embedding_dim: 512 # Embedding Dimension 58 | dropout: 0.0 # Embedding Dropout 59 | hidden_size: 512 # Hidden Size Dimension 60 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 61 | dropout: 0.0 # Encoder Dropout 62 | encoder2: # Model Encoder 63 | type: "transformer" 64 | num_layers: 2 # Number of layers 65 | num_heads: 4 # Number of Heads 66 | embeddings: 67 | embedding_dim: 512 # Embedding Dimension 68 | dropout: 0.0 # Embedding Dropout 69 | hidden_size: 512 # Hidden Size Dimension 70 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 71 | dropout: 0.0 # Encoder Dropout 72 | decoder: # Model Decoder 73 | type: "transformer" 74 | num_layers: 2 # Number of layers 75 | num_heads: 4 # Number of Heads 76 | embeddings: 77 | embedding_dim: 512 # Embedding Dimension 78 | dropout: 0.0 # Embedding Dropout 79 | hidden_size: 512 # Hidden Size Dimension 80 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 81 | dropout: 0.0 # Decoder Dropout 82 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Configs/Base_train_all.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | 3 | src1: "joint_gloss" 4 | src2: "after_gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 5 | trg: "skels" # Target - 3D body co-ordinates (skels) 6 | files: "files" # Filenames for each sequence 7 | 8 | train: "../Data/Gloss_Dynamic/train" 9 | dev: "../Data/Gloss_Dynamic/dev" 10 | test: "../Data/Gloss_Dynamic/test" 11 | 12 | 13 | features: ["hands+body"] 14 | max_sent_length: 300 # Max Sentence Length 15 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 16 | 17 | src1_vocab: "./Configs/src_train.gloss.joint.no.repeat_vocab.txt" 18 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt" 19 | 20 | training: 21 | random_seed: 27 # Random seed for initialisation 22 | optimizer: "adam" # Chosen optimiser (adam, ..) 23 | learning_rate: 0.001 # Initial model learning rate 24 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 25 | weight_decay: 0.0 # Weight Decay 26 | clip_grad_norm: 5.0 # Gradient clipping value 27 | batch_size: 8 # Batch Size for training 28 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 29 | patience: 7 # How many epochs of no improvement causes a LR reduction 30 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 31 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 32 | epochs: 20000 # How many epochs to run for 33 | validation_freq: 10000 # After how many steps to run a validation on the model 34 | logging_freq: 250 # After how many steps to log training progress 35 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 36 | model_dir: "./Models/Base_train_dynamic_2gloss_BCD_hard_smaller" # Where the model shall be stored 37 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 38 | continue: True # Flag to continue from a previous saved model in the model_dir 39 | shuffle: True # Flag to shuffle the data during training 40 | use_cuda: True # Flag to use GPU cuda capabilities 41 | max_output_length: 300 # Max Output Length 42 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 43 | loss: "MSE" # Loss function (MSE, L1) 44 | T: 1 # Temeprature for attention balance 45 | balance: True # Whether do the dynamic attention on the multi-views 46 | BDC_train: True 47 | 48 | model: 49 | initializer: "xavier" # Model initialisation (Xavier, ...) 50 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 51 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 52 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 53 | just_count_in: False # Flag for Just Counter Data Augmentation 54 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 55 | noise_rate: 5 # Gaussian Noise rate 56 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 57 | encoder1: # Model Encoder 58 | type: "transformer" 59 | num_layers: 2 # Number of layers 60 | num_heads: 4 # Number of Heads 61 | embeddings: 62 | embedding_dim: 512 # Embedding Dimension 63 | dropout: 0.0 # Embedding Dropout 64 | hidden_size: 512 # Hidden Size Dimension 65 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 66 | dropout: 0.0 # Encoder Dropout 67 | encoder2: # Model Encoder 68 | type: "transformer" 69 | num_layers: 2 # Number of layers 70 | num_heads: 4 # Number of Heads 71 | embeddings: 72 | embedding_dim: 512 # Embedding Dimension 73 | dropout: 0.0 # Embedding Dropout 74 | hidden_size: 512 # Hidden Size Dimension 75 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 76 | dropout: 0.0 # Encoder Dropout 77 | encoder3: # Model Encoder 78 | type: "transformer" 79 | num_layers: 2 # Number of layers 80 | num_heads: 4 # Number of Heads 81 | embeddings: 82 | embedding_dim: 256 # Embedding Dimension 83 | dropout: 0.0 # Embedding Dropout 84 | hidden_size: 256 # Hidden Size Dimension 85 | ff_size: 1024 # Feed-forward dimension (4 x hidden_size) 86 | dropout: 0.0 # Encoder Dropout 87 | decoder: # Model Decoder 88 | type: "transformer" 89 | num_layers: 2 # Number of layers 90 | num_heads: 4 # Number of Heads 91 | embeddings: 92 | embedding_dim: 512 # Embedding Dimension 93 | dropout: 0.0 # Embedding Dropout 94 | hidden_size: 512 # Hidden Size Dimension 95 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 96 | dropout: 0.0 # Decoder Dropout 97 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Configs/Base_train_all_test.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | 3 | src1: "joint_gloss" 4 | src2: "after_gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 5 | trg: "skels" # Target - 3D body co-ordinates (skels) 6 | files: "files" # Filenames for each sequence 7 | 8 | train: "../Data/Gloss_Dynamic/train" 9 | dev: "../Data/Gloss_Dynamic/dev" 10 | test: "../Data/Gloss_Dynamic/test" 11 | 12 | 13 | features: ["hands+body"] 14 | max_sent_length: 300 # Max Sentence Length 15 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 16 | 17 | src1_vocab: "./Configs/src_train.gloss.joint.no.repeat_vocab.txt" 18 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt" 19 | 20 | training: 21 | random_seed: 0 # Random seed for initialisation 22 | optimizer: "adam" # Chosen optimiser (adam, ..) 23 | learning_rate: 0.001 # Initial model learning rate 24 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 25 | weight_decay: 0.0 # Weight Decay 26 | clip_grad_norm: 5.0 # Gradient clipping value 27 | batch_size: 8 # Batch Size for training 28 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 29 | patience: 7 # How many epochs of no improvement causes a LR reduction 30 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 31 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 32 | epochs: 20000 # How many epochs to run for 33 | validation_freq: 100 # After how many steps to run a validation on the model 34 | logging_freq: 250 # After how many steps to log training progress 35 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 36 | model_dir: "./Models/Base_train_dynamic_gloss_BDC" # Where the model shall be stored 37 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 38 | continue: True # Flag to continue from a previous saved model in the model_dir 39 | shuffle: True # Flag to shuffle the data during training 40 | use_cuda: True # Flag to use GPU cuda capabilities 41 | max_output_length: 300 # Max Output Length 42 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 43 | loss: "MSE" # Loss function (MSE, L1) 44 | T: 1 # Temeprature for attention balance 45 | balance: True # Whether do the dynamic attention on the multi-views 46 | BDC_train: True 47 | 48 | model: 49 | initializer: "xavier" # Model initialisation (Xavier, ...) 50 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 51 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 52 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 53 | just_count_in: False # Flag for Just Counter Data Augmentation 54 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 55 | noise_rate: 5 # Gaussian Noise rate 56 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 57 | encoder1: # Model Encoder 58 | type: "transformer" 59 | num_layers: 2 # Number of layers 60 | num_heads: 4 # Number of Heads 61 | embeddings: 62 | embedding_dim: 512 # Embedding Dimension 63 | dropout: 0.0 # Embedding Dropout 64 | hidden_size: 512 # Hidden Size Dimension 65 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 66 | dropout: 0.0 # Encoder Dropout 67 | encoder2: # Model Encoder 68 | type: "transformer" 69 | num_layers: 2 # Number of layers 70 | num_heads: 4 # Number of Heads 71 | embeddings: 72 | embedding_dim: 512 # Embedding Dimension 73 | dropout: 0.0 # Embedding Dropout 74 | hidden_size: 512 # Hidden Size Dimension 75 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 76 | dropout: 0.0 # Encoder Dropout 77 | decoder: # Model Decoder 78 | type: "transformer" 79 | num_layers: 2 # Number of layers 80 | num_heads: 4 # Number of Heads 81 | embeddings: 82 | embedding_dim: 512 # Embedding Dimension 83 | dropout: 0.0 # Embedding Dropout 84 | hidden_size: 512 # Hidden Size Dimension 85 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 86 | dropout: 0.0 # Decoder Dropout 87 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Configs/Base_train_gloss_after_no.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | src1: "text" 3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 4 | trg: "skels" # Target - 3D body co-ordinates (skels) 5 | files: "files" # Filenames for each sequence 6 | 7 | train: "./Data/train_gloss_after_no_repeat/train" 8 | dev: "./Data/train_gloss_after_no_repeat/dev" 9 | test: "./Data/train_gloss_after_no_repeat/test" 10 | 11 | features: ["hands+body"] 12 | max_sent_length: 300 # Max Sentence Length 13 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 14 | src1_vocab: "./Configs/text_vocab.txt" 15 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt" 16 | 17 | training: 18 | random_seed: 0 # Random seed for initialisation 19 | optimizer: "adam" # Chosen optimiser (adam, ..) 20 | learning_rate: 0.001 # Initial model learning rate 21 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 22 | weight_decay: 0.0 # Weight Decay 23 | clip_grad_norm: 5.0 # Gradient clipping value 24 | batch_size: 8 # Batch Size for training 25 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 26 | patience: 7 # How many epochs of no improvement causes a LR reduction 27 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 28 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 29 | epochs: 20000 # How many epochs to run for 30 | validation_freq: 10000 # After how many steps to run a validation on the model 31 | logging_freq: 250 # After how many steps to log training progress 32 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 33 | model_dir: "./Models/Base_train_after_no" # Where the model shall be stored 34 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 35 | continue: True # Flag to continue from a previous saved model in the model_dir 36 | shuffle: True # Flag to shuffle the data during training 37 | use_cuda: True # Flag to use GPU cuda capabilities 38 | max_output_length: 300 # Max Output Length 39 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 40 | loss: "MSE" # Loss function (MSE, L1) 41 | 42 | model: 43 | initializer: "xavier" # Model initialisation (Xavier, ...) 44 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 45 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 46 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 47 | just_count_in: False # Flag for Just Counter Data Augmentation 48 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 49 | noise_rate: 5 # Gaussian Noise rate 50 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 51 | encoder1: # Model Encoder 52 | type: "transformer" 53 | num_layers: 2 # Number of layers 54 | num_heads: 4 # Number of Heads 55 | embeddings: 56 | embedding_dim: 512 # Embedding Dimension 57 | dropout: 0.0 # Embedding Dropout 58 | hidden_size: 512 # Hidden Size Dimension 59 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 60 | dropout: 0.0 # Encoder Dropout 61 | encoder2: # Model Encoder 62 | type: "transformer" 63 | num_layers: 2 # Number of layers 64 | num_heads: 4 # Number of Heads 65 | embeddings: 66 | embedding_dim: 512 # Embedding Dimension 67 | dropout: 0.0 # Embedding Dropout 68 | hidden_size: 512 # Hidden Size Dimension 69 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 70 | dropout: 0.0 # Encoder Dropout 71 | decoder: # Model Decoder 72 | type: "transformer" 73 | num_layers: 2 # Number of layers 74 | num_heads: 4 # Number of Heads 75 | embeddings: 76 | embedding_dim: 512 # Embedding Dimension 77 | dropout: 0.0 # Embedding Dropout 78 | hidden_size: 512 # Hidden Size Dimension 79 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 80 | dropout: 0.0 # Decoder Dropout 81 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Configs/Base_train_gloss_basic.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | src1: "text" 3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 4 | trg: "skels" # Target - 3D body co-ordinates (skels) 5 | files: "files" # Filenames for each sequence 6 | 7 | train: "./Data/PT/train" 8 | dev: "./Data/PT/dev" 9 | test: "./Data/PT/test" 10 | 11 | features: ["hands+body"] 12 | max_sent_length: 300 # Max Sentence Length 13 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 14 | src1_vocab: "./Configs/text_vocab.txt" 15 | src2_vocab: "./Configs/gloss_vocab.txt" 16 | 17 | training: 18 | random_seed: 0 # Random seed for initialisation 19 | optimizer: "adam" # Chosen optimiser (adam, ..) 20 | learning_rate: 0.001 # Initial model learning rate 21 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 22 | weight_decay: 0.0 # Weight Decay 23 | clip_grad_norm: 5.0 # Gradient clipping value 24 | batch_size: 8 # Batch Size for training 25 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 26 | patience: 7 # How many epochs of no improvement causes a LR reduction 27 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 28 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 29 | epochs: 20000 # How many epochs to run for 30 | validation_freq: 10000 # After how many steps to run a validation on the model 31 | logging_freq: 250 # After how many steps to log training progress 32 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 33 | model_dir: "./Models/Base_train.gloss" # Where the model shall be stored 34 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 35 | continue: True # Flag to continue from a previous saved model in the model_dir 36 | shuffle: True # Flag to shuffle the data during training 37 | use_cuda: True # Flag to use GPU cuda capabilities 38 | max_output_length: 300 # Max Output Length 39 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 40 | loss: "MSE" # Loss function (MSE, L1) 41 | 42 | model: 43 | initializer: "xavier" # Model initialisation (Xavier, ...) 44 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 45 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 46 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 47 | just_count_in: False # Flag for Just Counter Data Augmentation 48 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 49 | noise_rate: 5 # Gaussian Noise rate 50 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 51 | encoder1: # Model Encoder 52 | type: "transformer" 53 | num_layers: 2 # Number of layers 54 | num_heads: 4 # Number of Heads 55 | embeddings: 56 | embedding_dim: 512 # Embedding Dimension 57 | dropout: 0.0 # Embedding Dropout 58 | hidden_size: 512 # Hidden Size Dimension 59 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 60 | dropout: 0.0 # Encoder Dropout 61 | encoder2: # Model Encoder 62 | type: "transformer" 63 | num_layers: 2 # Number of layers 64 | num_heads: 4 # Number of Heads 65 | embeddings: 66 | embedding_dim: 512 # Embedding Dimension 67 | dropout: 0.0 # Embedding Dropout 68 | hidden_size: 512 # Hidden Size Dimension 69 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 70 | dropout: 0.0 # Encoder Dropout 71 | decoder: # Model Decoder 72 | type: "transformer" 73 | num_layers: 2 # Number of layers 74 | num_heads: 4 # Number of Heads 75 | embeddings: 76 | embedding_dim: 512 # Embedding Dimension 77 | dropout: 0.0 # Embedding Dropout 78 | hidden_size: 512 # Hidden Size Dimension 79 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 80 | dropout: 0.0 # Decoder Dropout 81 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Configs/Base_train_gloss_joint_yes.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | src1: "text" 3 | src2: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 4 | trg: "skels" # Target - 3D body co-ordinates (skels) 5 | files: "files" # Filenames for each sequence 6 | 7 | train: "./Data/train_gloss_joint_yes_repeat/train" 8 | dev: "./Data/train_gloss_joint_yes_repeat/dev" 9 | test: "./Data/train_gloss_joint_yes_repeat/test" 10 | 11 | features: ["hands+body"] 12 | max_sent_length: 300 # Max Sentence Length 13 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 14 | src1_vocab: "./Configs/text_vocab.txt" 15 | src2_vocab: "./Configs/src_train.gloss.joint.yes.repeat_vocab.txt" 16 | 17 | training: 18 | random_seed: 0 # Random seed for initialisation 19 | optimizer: "adam" # Chosen optimiser (adam, ..) 20 | learning_rate: 0.001 # Initial model learning rate 21 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 22 | weight_decay: 0.0 # Weight Decay 23 | clip_grad_norm: 5.0 # Gradient clipping value 24 | batch_size: 8 # Batch Size for training 25 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 26 | patience: 7 # How many epochs of no improvement causes a LR reduction 27 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 28 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 29 | epochs: 20000 # How many epochs to run for 30 | validation_freq: 10000 # After how many steps to run a validation on the model 31 | logging_freq: 250 # After how many steps to log training progress 32 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 33 | model_dir: "./Models/Base_train_joint_yes" # Where the model shall be stored 34 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 35 | continue: True # Flag to continue from a previous saved model in the model_dir 36 | shuffle: True # Flag to shuffle the data during training 37 | use_cuda: True # Flag to use GPU cuda capabilities 38 | max_output_length: 300 # Max Output Length 39 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 40 | loss: "MSE" # Loss function (MSE, L1) 41 | 42 | model: 43 | initializer: "xavier" # Model initialisation (Xavier, ...) 44 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 45 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 46 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 47 | just_count_in: False # Flag for Just Counter Data Augmentation 48 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 49 | noise_rate: 5 # Gaussian Noise rate 50 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 51 | encoder1: # Model Encoder 52 | type: "transformer" 53 | num_layers: 2 # Number of layers 54 | num_heads: 4 # Number of Heads 55 | embeddings: 56 | embedding_dim: 512 # Embedding Dimension 57 | dropout: 0.0 # Embedding Dropout 58 | hidden_size: 512 # Hidden Size Dimension 59 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 60 | dropout: 0.0 # Encoder Dropout 61 | encoder2: # Model Encoder 62 | type: "transformer" 63 | num_layers: 2 # Number of layers 64 | num_heads: 4 # Number of Heads 65 | embeddings: 66 | embedding_dim: 512 # Embedding Dimension 67 | dropout: 0.0 # Embedding Dropout 68 | hidden_size: 512 # Hidden Size Dimension 69 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 70 | dropout: 0.0 # Encoder Dropout 71 | decoder: # Model Decoder 72 | type: "transformer" 73 | num_layers: 2 # Number of layers 74 | num_heads: 4 # Number of Heads 75 | embeddings: 76 | embedding_dim: 512 # Embedding Dimension 77 | dropout: 0.0 # Embedding Dropout 78 | hidden_size: 512 # Hidden Size Dimension 79 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 80 | dropout: 0.0 # Decoder Dropout 81 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Configs/dynamic.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | 3 | src1: "joint_gloss" 4 | src2: "after_gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 5 | trg: "skels" # Target - 3D body co-ordinates (skels) 6 | files: "files" # Filenames for each sequence 7 | 8 | train: "../Data/Gloss_Dynamic/train" 9 | dev: "../Data/Gloss_Dynamic/dev" 10 | test: "../Data/Gloss_Dynamic/test" 11 | 12 | 13 | features: ["hands+body"] 14 | max_sent_length: 300 # Max Sentence Length 15 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 16 | 17 | src1_vocab: "./Configs/src_train.gloss.joint.no.repeat_vocab_extend.txt" 18 | src2_vocab: "./Configs/src_train.gloss.after.no.repeat_vocab.txt" 19 | 20 | training: 21 | random_seed: 27 # Random seed for initialisation 22 | optimizer: "adam" # Chosen optimiser (adam, ..) 23 | learning_rate: 0.001 # Initial model learning rate 24 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 25 | weight_decay: 0.0 # Weight Decay 26 | clip_grad_norm: 5.0 # Gradient clipping value 27 | batch_size: 8 # Batch Size for training 28 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 29 | patience: 7 # How many epochs of no improvement causes a LR reduction 30 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 31 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 32 | epochs: 20000 # How many epochs to run for 33 | validation_freq: 10000 # After how many steps to run a validation on the model 34 | logging_freq: 250 # After how many steps to log training progress 35 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 36 | model_dir: "./Models/Base_train_dynamic_multiview" # Where the model shall be stored 37 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 38 | continue: True # Flag to continue from a previous saved model in the model_dir 39 | shuffle: True # Flag to shuffle the data during training 40 | use_cuda: True # Flag to use GPU cuda capabilities 41 | max_output_length: 300 # Max Output Length 42 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 43 | loss: "MSE" # Loss function (MSE, L1) 44 | T: 1 # Temeprature for attention balance 45 | balance: True # Whether do the dynamic attention on the multi-views 46 | BDC_train: True 47 | 48 | model: 49 | initializer: "xavier" # Model initialisation (Xavier, ...) 50 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 51 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 52 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 53 | just_count_in: False # Flag for Just Counter Data Augmentation 54 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 55 | noise_rate: 5 # Gaussian Noise rate 56 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 57 | encoder1: # Model Encoder 58 | type: "transformer" 59 | num_layers: 2 # Number of layers 60 | num_heads: 4 # Number of Heads 61 | embeddings: 62 | embedding_dim: 512 # Embedding Dimension 63 | dropout: 0.0 # Embedding Dropout 64 | hidden_size: 512 # Hidden Size Dimension 65 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 66 | dropout: 0.0 # Encoder Dropout 67 | decoder: # Model Decoder 68 | type: "transformer" 69 | num_layers: 2 # Number of layers 70 | num_heads: 4 # Number of Heads 71 | embeddings: 72 | embedding_dim: 512 # Embedding Dimension 73 | dropout: 0.0 # Embedding Dropout 74 | hidden_size: 512 # Hidden Size Dimension 75 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 76 | dropout: 0.0 # Decoder Dropout 77 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/dev.files: -------------------------------------------------------------------------------- 1 | dev/11August_2010_Wednesday_tagesschau-2 2 | dev/11August_2010_Wednesday_tagesschau-3 3 | dev/11August_2010_Wednesday_tagesschau-8 4 | dev/25October_2010_Monday_tagesschau-22 5 | dev/05May_2011_Thursday_tagesschau-25 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/dev.gloss: -------------------------------------------------------------------------------- 1 | DRUCK TIEF KOMMEN 2 | ES-BEDEUTET VIEL WOLKE UND KOENNEN REGEN GEWITTER KOENNEN 3 | WIND MAESSIG SCHWACH REGION WENN GEWITTER WIND KOENNEN 4 | MITTWOCH REGEN KOENNEN NORDWEST WAHRSCHEINLICH NORD STARK WIND 5 | JETZT WETTER WIE-AUSSEHEN MORGEN FREITAG SECHSTE MAI ZEIGEN-BILDSCHIRM 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/dev.text: -------------------------------------------------------------------------------- 1 | tiefer luftdruck bestimmt in den nächsten tagen unser wetter . 2 | das bedeutet viele wolken und immer wieder zum teil kräftige schauer und gewitter . 3 | meist weht nur ein schwacher wind aus unterschiedlichen richtungen der bei schauern und gewittern stark böig sein kann . 4 | am mittwoch hier und da nieselregen in der nordwesthälfte an den küsten kräftiger wind . 5 | und nun die wettervorhersage für morgen freitag den sechsten mai . 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/test.files: -------------------------------------------------------------------------------- 1 | test/25October_2010_Monday_tagesschau-17 2 | test/25October_2010_Monday_tagesschau-24 3 | test/15December_2010_Wednesday_tagesschau-37 4 | test/10March_2011_Thursday_heute-58 5 | test/14August_2009_Friday_tagesschau-62 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/test.gloss: -------------------------------------------------------------------------------- 1 | REGEN SCHNEE REGION VERSCHWINDEN NORD REGEN KOENNEN REGION STERN KOENNEN SEHEN 2 | DONNERSTAG NORDWEST REGEN REGION SONNE WOLKE WECHSELHAFT DANN FREITAG AEHNLICH WETTER 3 | KRAEFTIG AB MORGEN FRUEH MEISTENS SCHNEE SCHNEIEN KALT REGEN 4 | WOCHENENDE SONNE SAMSTAG SCHOEN TEMPERATUR BIS SIEBZEHN GRAD REGION 5 | DEUTSCH LAND MORGEN HOCH DRUCK KOMMEN WOLKE AUFLOESEN 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/test.text: -------------------------------------------------------------------------------- 1 | regen und schnee lassen an den alpen in der nacht nach im norden und nordosten fallen hier und da schauer sonst ist das klar . 2 | am donnerstag regen in der nordhälfte in der südhälfte mal sonne mal wolken ähnliches wetter dann auch am freitag . 3 | vom nordmeer zieht ein kräftiges tief heran und bringt uns ab den morgenstunden heftige schneefälle zum teil auch gefrierenden regen . 4 | sonnig geht es auch ins wochenende samstag ein herrlicher tag mit temperaturen bis siebzehn grad hier im westen . 5 | deutschland liegt morgen unter hochdruckeinfluss der die wolken weitgehend vertreibt . 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/train.files: -------------------------------------------------------------------------------- 1 | train/11August_2010_Wednesday_tagesschau-1 2 | train/11August_2010_Wednesday_tagesschau-4 3 | train/11August_2010_Wednesday_tagesschau-5 4 | train/11August_2010_Wednesday_tagesschau-6 5 | train/11August_2010_Wednesday_tagesschau-7 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/train.gloss: -------------------------------------------------------------------------------- 1 | JETZT WETTER MORGEN DONNERSTAG ZWOELF FEBRUAR 2 | ORT REGEN DURCH REGEN KOENNEN UEBERSCHWEMMUNG KOENNEN 3 | NORDWEST HEUTE NACHT TROCKEN BLEIBEN SUEDWEST KOENNEN REGEN ORT GEWITTER DAZU 4 | TAGSUEBER OFT REGEN GEWITTER KOENNEN MANCHMAL REGEN VIEL REGEN 5 | WOLKE LOCH SPEZIELL NORDWEST 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/Data/tmp/train.text: -------------------------------------------------------------------------------- 1 | und nun die wettervorhersage für morgen donnerstag den zwölften august . 2 | mancherorts regnet es auch länger und ergiebig auch lokale überschwemmungen sind wieder möglich . 3 | im nordwesten bleibt es heute nacht meist trocken sonst muss mit teilweise kräftigen schauern gerechnet werden örtlich mit blitz und donner . 4 | auch am tag gibt es verbreitet zum teil kräftige schauer oder gewitter und in manchen regionen fallen ergiebige regenmengen . 5 | größere wolkenlücken finden sich vor allem im nordwesten . 6 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/README.md: -------------------------------------------------------------------------------- 1 | # Progressive Transformers for End-to-End Sign Language Production 2 | 3 | Source code for "Progressive Transformers for End-to-End Sign Language Production" (Ben Saunders, Necati Cihan Camgoz, Richard Bowden - ECCV 2020) 4 | 5 | Conference video available at https://twitter.com/BenMSaunders/status/1336638886198521857 6 | 7 | # Usage 8 | 9 | Install required packages using the requirements.txt file. 10 | 11 | `pip install -r requirements.txt` 12 | 13 | To run, start __main__.py with arguments "train" and ".\Configs\Base.yaml": 14 | 15 | `python __main__.py train ./Configs/Base.yaml` 16 | 17 | An example train.log file can be found in ".\Configs\train.log" and a validation file at ".\Configs\validations.txt" 18 | 19 | Back Translation model created from https://github.com/neccam/slt. Back Translation evaluation code coming soon. 20 | 21 | # Data 22 | 23 | Pre-processed Phoenix14T data can be requested via email at b.saunders@surrey.ac.uk. If you wish to create the data yourself, please follow below: 24 | 25 | Phoenix14T data can be downloaded from https://www-i6.informatik.rwth-aachen.de/~koller/RWTH-PHOENIX-2014-T/ and skeleton joints can be extracted using OpenPose at https://github.com/CMU-Perceptual-Computing-Lab/openpose and lifted to 3D using the 2D to 3D Inverse Kinematics code at https://github.com/gopeith/SignLanguageProcessing under 3DposeEstimator. 26 | 27 | Prepare Phoenix14T (or other sign language dataset) data as .txt files for .skel, .gloss, .txt and .files. Data format should be parallel .txt files for "src", "trg" and "files", with each line representing a new sequence: 28 | 29 | - The "src" file contains source sentences, with each line representing new sentence. 30 | 31 | - The "trg" file contains skeleton data of each frame, with a space separating frames. The joints should be divided by 3 to match the scaling I used. Each frame contains 150 joint values and a subsequent counter value, all separated by a space. Each sequence should be separated with a new line. If your data contains 150 joints per frame, please ensure that trg_size is set to 150 in the config file. 32 | 33 | - The "files" file should contain the name of each sequence on a new line. 34 | 35 | Examples can be found in /Data/tmp. Data path must be specified in config file. 36 | 37 | # Pre-Trained Model 38 | 39 | A pre-trained Progressive Transformer checkpoint can be downloaded from https://www.dropbox.com/s/l4xmnybp7luz0l3/PreTrained_PTSLP_Model.ckpt?dl=0. 40 | 41 | This model has a size of ```num_layers: 2```, ```num_heads: 4``` and ```embedding_dim: 512```, as outlined in ```./Configs/Base.yaml```. It has been pre-trained on the full PHOENIX14T dataset with the data format as above. The relevant train.log and validations.txt files can be found in ```.\Configs```. 42 | 43 | To initialise a model from this checkpoint, pass the ```--ckpt ./PreTrained_PTSLP_Model.ckpt``` argument to either ```train``` or ```test``` modes. Additionally, to initialise the correct src_embed size, the config argument ```src_vocab: "./Configs/src_vocab.txt"``` must be set to the location of the src_vocab.txt, found under ```./Configs```. Please open an issue if this checkpoint cannot be downloaded or loaded. 44 | 45 | # Reference 46 | 47 | If you use this code in your research, please cite the following [papers](https://arxiv.org/abs/2004.14874): 48 | 49 | ``` 50 | @inproceedings{saunders2020progressive, 51 | title = {{Progressive Transformers for End-to-End Sign Language Production}}, 52 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard}, 53 | booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)}, 54 | year = {2020}} 55 | 56 | @inproceedings{saunders2020adversarial, 57 | title = {{Adversarial Training for Multi-Channel Sign Language Production}}, 58 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard}, 59 | booktitle = {Proceedings of the British Machine Vision Conference (BMVC)}, 60 | year = {2020}} 61 | 62 | @inproceedings{saunders2021continuous, 63 | title = {{Continuous 3D Multi-Channel Sign Language Production via Progressive Transformers and Mixture Density Networks}}, 64 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard}, 65 | booktitle = {International Journal of Computer Vision (IJCV)}, 66 | year = {2021}} 67 | 68 | ``` 69 | 70 | ## Acknowledgements 71 | This work received funding from the SNSF Sinergia project 'SMILE' (CRSII2 160811), the European Union's Horizon2020 research and innovation programme under grant agreement no. 762021 'Content4All' and the EPSRC project 'ExTOL' (EP/R03298X/1). This work reflects only the authors view and the Commission is not responsible for any use that may be made of the information it contains. We would also like to thank NVIDIA Corporation for their GPU grant. 72 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from training import train, test 4 | 5 | 6 | def main(): 7 | 8 | # Example options: 9 | # train ./Configs/Base.yaml 10 | # test ./Configs/Base.yaml 11 | 12 | ap = argparse.ArgumentParser("Progressive Transformers") 13 | 14 | # Choose between Train and Test 15 | ap.add_argument("mode", choices=["train", "test"], 16 | help="train a model or test") 17 | # Path to Config 18 | ap.add_argument("config_path", type=str, 19 | help="path to YAML config file") 20 | 21 | # Optional path to checkpoint 22 | ap.add_argument("--ckpt", type=str, 23 | help="path to model checkpoint") 24 | 25 | args = ap.parse_args() 26 | 27 | # If Train 28 | if args.mode == "train": 29 | train(cfg_file=args.config_path, ckpt=args.ckpt) 30 | # If Test 31 | elif args.mode == "test": 32 | test(cfg_file=args.config_path, ckpt=args.ckpt) 33 | else: 34 | raise ValueError("Unknown mode") 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/batch.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/batch.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/builders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/builders.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/constants.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/constants.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/data.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/decoders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/decoders.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/dtw.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/dtw.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/embeddings.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/embeddings.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/encoders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/encoders.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/helpers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/helpers.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/initialization.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/initialization.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/plot_videos.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/plot_videos.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/prediction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/prediction.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/search.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/search.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/training.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/training.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/transformer_layers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/transformer_layers.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/__pycache__/vocabulary.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/dynamic_selection/__pycache__/vocabulary.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/batch.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """ 4 | Implementation of a mini-batch. 5 | """ 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from constants import TARGET_PAD 11 | 12 | 13 | class Batch: 14 | """Object for holding a batch of data with mask during training. 15 | Input is a batch from a torch text iterator. 16 | """ 17 | 18 | def __init__(self, torch_batch, pad_index, model): 19 | """ 20 | Create a new joey batch from a torch batch. 21 | This batch extends torch text's batch attributes with src and trg 22 | length, masks, number of non-padded tokens in trg. 23 | Furthermore, it can be sorted by src length. 24 | 25 | :param torch_batch: 26 | :param pad_index: 27 | :param use_cuda: 28 | """ 29 | self.src1, self.src1_lengths = torch_batch.src1 30 | self.src1_mask = (self.src1 != pad_index).unsqueeze(1) 31 | self.src2, self.src2_lengths = torch_batch.src2 32 | self.src2_mask = (self.src2 != pad_index).unsqueeze(1) 33 | 34 | self.nseqs = self.src1.size(0) 35 | self.trg_input = None 36 | self.trg = None 37 | self.trg_mask = None 38 | self.trg_lengths = None 39 | self.ntokens = None 40 | 41 | self.file_paths = torch_batch.file_paths 42 | self.use_cuda = model.use_cuda 43 | self.target_pad = TARGET_PAD 44 | # Just Count 45 | self.just_count_in = model.just_count_in 46 | # Future Prediction 47 | self.future_prediction = model.future_prediction 48 | 49 | if hasattr(torch_batch, "trg"): 50 | trg = torch_batch.trg 51 | trg_lengths = torch_batch.trg.shape[1] 52 | # trg_input is used for teacher forcing, last one is cut off 53 | # Remove the last frame for target input, as inputs are only up to frame N-1 54 | self.trg_input = trg.clone()[:, :-1, :] 55 | 56 | self.trg_lengths = trg_lengths 57 | # trg is used for loss computation, shifted by one since BOS 58 | self.trg = trg.clone()[:, 1:, :] 59 | 60 | # Just Count 61 | if self.just_count_in: 62 | # If Just Count, cut off the first frame of trg_input 63 | self.trg_input = self.trg_input[:, :, -1:] 64 | 65 | # Future Prediction 66 | if self.future_prediction != 0: 67 | # Loop through the future prediction, concatenating the frames shifted across once each time 68 | future_trg = torch.Tensor() 69 | # Concatenate each frame (Not counter) 70 | for i in range(0, self.future_prediction): 71 | future_trg = torch.cat( 72 | (future_trg, self.trg[:, i:-(self.future_prediction - i), :-1].clone()), dim=2) 73 | # Create the final target using the collected future_trg and original trg 74 | self.trg = torch.cat( 75 | (future_trg, self.trg[:, :-self.future_prediction, -1:]), dim=2) 76 | 77 | # Cut off the last N frames of the trg_input 78 | self.trg_input = self.trg_input[:, :-self.future_prediction, :] 79 | 80 | # Target Pad is dynamic, so we exclude the padded areas from the loss computation 81 | trg_mask = (self.trg_input != self.target_pad).unsqueeze(1) 82 | # This increases the shape of the target mask to be even (16,1,120,120) - 83 | # adding padding that replicates - so just continues the False's or True's 84 | pad_amount = self.trg_input.shape[1] - self.trg_input.shape[2] 85 | # Create the target mask the same size as target input 86 | self.trg_mask = (F.pad(input=trg_mask.double(), pad=( 87 | pad_amount, 0, 0, 0), mode='replicate') == 1.0) 88 | self.ntokens = (self.trg != pad_index).data.sum().item() 89 | 90 | if self.use_cuda: 91 | self._make_cuda() 92 | 93 | # If using Cuda 94 | def _make_cuda(self): 95 | """ 96 | Move the batch to GPU 97 | 98 | :return: 99 | """ 100 | self.src1 = self.src1.cuda() 101 | self.src1_mask = self.src1_mask.cuda() 102 | self.src2 = self.src2.cuda() 103 | self.src2_mask = self.src2_mask.cuda() 104 | 105 | if self.trg_input is not None: 106 | self.trg_input = self.trg_input.cuda() 107 | self.trg = self.trg.cuda() 108 | self.trg_mask = self.trg_mask.cuda() 109 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/constants.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Defining global constants 4 | """ 5 | 6 | UNK_TOKEN = '' 7 | PAD_TOKEN = '' 8 | BOS_TOKEN = '' 9 | EOS_TOKEN = '' 10 | 11 | TARGET_PAD = 0.0 12 | 13 | DEFAULT_UNK_ID = lambda: 0 14 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/dtw.py: -------------------------------------------------------------------------------- 1 | from numpy import array, zeros, full, argmin, inf, ndim 2 | from scipy.spatial.distance import cdist 3 | from math import isinf 4 | 5 | """ 6 | Dynamic time warping (DTW) is used as a similarity measured between temporal sequences. 7 | Original DTW code found at https://github.com/pierre-rouanet/dtw 8 | 9 | """ 10 | 11 | # Apply DTW 12 | def dtw(x, y, dist, warp=1, w=inf, s=1.0): 13 | """ 14 | Computes Dynamic Time Warping (DTW) of two sequences. 15 | 16 | :param array x: N1*M array 17 | :param array y: N2*M array 18 | :param func dist: distance used as cost measure 19 | :param int warp: how many shifts are computed. 20 | :param int w: window size limiting the maximal distance between indices of matched entries |i,j|. 21 | :param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal 22 | Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path. 23 | """ 24 | assert len(x) 25 | assert len(y) 26 | assert isinf(w) or (w >= abs(len(x) - len(y))) 27 | assert s > 0 28 | r, c = len(x), len(y) 29 | if not isinf(w): 30 | D0 = full((r + 1, c + 1), inf) 31 | for i in range(1, r + 1): 32 | D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0 33 | D0[0, 0] = 0 34 | else: 35 | D0 = zeros((r + 1, c + 1)) 36 | D0[0, 1:] = inf 37 | D0[1:, 0] = inf 38 | D1 = D0[1:, 1:] # view 39 | for i in range(r): 40 | for j in range(c): 41 | if (isinf(w) or (max(0, i - w) <= j <= min(c, i + w))): 42 | D1[i, j] = dist(x[i], y[j]) 43 | C = D1.copy() 44 | jrange = range(c) 45 | for i in range(r): 46 | if not isinf(w): 47 | jrange = range(max(0, i - w), min(c, i + w + 1)) 48 | for j in jrange: 49 | min_list = [D0[i, j]] 50 | for k in range(1, warp + 1): 51 | i_k = min(i + k, r) 52 | j_k = min(j + k, c) 53 | min_list += [D0[i_k, j] * s, D0[i, j_k] * s] 54 | D1[i, j] += min(min_list) 55 | if len(x) == 1: 56 | path = zeros(len(y)), range(len(y)) 57 | elif len(y) == 1: 58 | path = range(len(x)), zeros(len(x)) 59 | else: 60 | path = _traceback(D0) 61 | return D1[-1, -1], C, D1, path 62 | 63 | def _traceback(D): 64 | i, j = array(D.shape) - 2 65 | p, q = [i], [j] 66 | while (i > 0) or (j > 0): 67 | tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j])) 68 | if tb == 0: 69 | i -= 1 70 | j -= 1 71 | elif tb == 1: 72 | i -= 1 73 | else: # (tb == 2): 74 | j -= 1 75 | p.insert(0, i) 76 | q.insert(0, j) 77 | return array(p), array(q) 78 | 79 | 80 | if __name__ == '__main__': 81 | w = inf 82 | s = 1.0 83 | if 1: # 1-D numeric 84 | from sklearn.metrics.pairwise import manhattan_distances 85 | x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0] 86 | y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0] 87 | dist_fun = manhattan_distances 88 | w = 1 89 | # s = 1.2 90 | elif 0: # 2-D numeric 91 | from sklearn.metrics.pairwise import euclidean_distances 92 | x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]] 93 | y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]] 94 | dist_fun = euclidean_distances 95 | else: # 1-D list of strings 96 | from nltk.metrics.distance import edit_distance 97 | # x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder'] 98 | # y = ['class', 'too'] 99 | x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls'] 100 | y = ['see', 'drown', 'himself'] 101 | # x = 'we talked about the situation'.split() 102 | # y = 'we talked about the situation'.split() 103 | dist_fun = edit_distance 104 | dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s) 105 | 106 | # Vizualize 107 | from matplotlib import pyplot as plt 108 | plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest') 109 | plt.plot(path[0], path[1], '-o') # relation 110 | plt.xticks(range(len(x)), x) 111 | plt.yticks(range(len(y)), y) 112 | plt.xlabel('x') 113 | plt.ylabel('y') 114 | plt.axis('tight') 115 | if isinf(w): 116 | plt.title('Minimum distance: {}, slope weight: {}'.format(dist, s)) 117 | else: 118 | plt.title('Minimum distance: {}, window widht: {}, slope weight: {}'.format(dist, w, s)) 119 | plt.show() 120 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/embeddings.py: -------------------------------------------------------------------------------- 1 | import math 2 | from torch import nn, Tensor 3 | from helpers import freeze_params 4 | import torch 5 | 6 | class MaskedNorm(nn.Module): 7 | """ 8 | Original Code from: 9 | https://discuss.pytorch.org/t/batchnorm-for-different-sized-samples-in-batch/44251/8 10 | """ 11 | 12 | def __init__(self, norm_type, num_groups, num_features): 13 | super().__init__() 14 | self.norm_type = norm_type 15 | if self.norm_type == "batch": 16 | self.norm = nn.BatchNorm1d(num_features=num_features) 17 | elif self.norm_type == "group": 18 | self.norm = nn.GroupNorm(num_groups=num_groups, num_channels=num_features) 19 | elif self.norm_type == "layer": 20 | self.norm = nn.LayerNorm(normalized_shape=num_features) 21 | else: 22 | raise ValueError("Unsupported Normalization Layer") 23 | 24 | self.num_features = num_features 25 | 26 | def forward(self, x: Tensor, mask: Tensor): 27 | if self.training: 28 | reshaped = x.reshape([-1, self.num_features]) 29 | reshaped_mask = mask.reshape([-1, 1]) > 0 30 | selected = torch.masked_select(reshaped, reshaped_mask).reshape( 31 | [-1, self.num_features] 32 | ) 33 | batch_normed = self.norm(selected) 34 | scattered = reshaped.masked_scatter(reshaped_mask, batch_normed) 35 | return scattered.reshape([x.shape[0], -1, self.num_features]) 36 | else: 37 | reshaped = x.reshape([-1, self.num_features]) 38 | batched_normed = self.norm(reshaped) 39 | return batched_normed.reshape([x.shape[0], -1, self.num_features]) 40 | 41 | class Embeddings(nn.Module): 42 | 43 | """ 44 | Simple embeddings class 45 | """ 46 | 47 | # pylint: disable=unused-argument 48 | def __init__(self, 49 | embedding_dim: int = 64, 50 | scale: bool = False, 51 | vocab_size: int = 0, 52 | padding_idx: int = 1, 53 | freeze: bool = False, 54 | **kwargs): 55 | """ 56 | Create new embeddings for the vocabulary. 57 | Use scaling for the Transformer. 58 | 59 | :param embedding_dim: 60 | :param scale: 61 | :param vocab_size: 62 | :param padding_idx: 63 | :param freeze: freeze the embeddings during training 64 | """ 65 | super(Embeddings, self).__init__() 66 | 67 | self.embedding_dim = embedding_dim 68 | self.scale = scale 69 | self.vocab_size = vocab_size 70 | self.lut = nn.Embedding(vocab_size, self.embedding_dim, 71 | padding_idx=padding_idx) 72 | 73 | if freeze: 74 | freeze_params(self) 75 | 76 | # pylint: disable=arguments-differ 77 | def forward(self, x: Tensor) -> Tensor: 78 | """ 79 | Perform lookup for input `x` in the embedding table. 80 | 81 | :param x: index in the vocabulary 82 | :return: embedded representation for `x` 83 | """ 84 | if self.scale: 85 | return self.lut(x) * math.sqrt(self.embedding_dim) 86 | return self.lut(x) 87 | 88 | def __repr__(self): 89 | return "%s(embedding_dim=%d, vocab_size=%d)" % ( 90 | self.__class__.__name__, self.embedding_dim, self.vocab_size) 91 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/encoders.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch import Tensor 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 7 | import torch.nn.functional as F 8 | 9 | from helpers import freeze_params 10 | from transformer_layers import \ 11 | TransformerEncoderLayer, PositionalEncoding 12 | 13 | from embeddings import MaskedNorm 14 | 15 | #pylint: disable=abstract-method 16 | class Encoder(nn.Module): 17 | """ 18 | Base encoder class 19 | """ 20 | @property 21 | def output_size(self): 22 | """ 23 | Return the output size 24 | 25 | :return: 26 | """ 27 | return self._output_size 28 | 29 | class TransformerEncoder(Encoder): 30 | """ 31 | Transformer Encoder 32 | """ 33 | 34 | #pylint: disable=unused-argument 35 | def __init__(self, 36 | hidden_size: int = 512, 37 | ff_size: int = 2048, 38 | num_layers: int = 8, 39 | num_heads: int = 4, 40 | dropout: float = 0.1, 41 | emb_dropout: float = 0.1, 42 | freeze: bool = False, 43 | **kwargs): 44 | """ 45 | Initializes the Transformer. 46 | :param hidden_size: hidden size and size of embeddings 47 | :param ff_size: position-wise feed-forward layer size. 48 | (Typically this is 2*hidden_size.) 49 | :param num_layers: number of layers 50 | :param num_heads: number of heads for multi-headed attention 51 | :param dropout: dropout probability for Transformer layers 52 | :param emb_dropout: Is applied to the input (word embeddings). 53 | :param freeze: freeze the parameters of the encoder during training 54 | :param kwargs: 55 | """ 56 | super(TransformerEncoder, self).__init__() 57 | 58 | # build all (num_layers) layers 59 | self.layers = nn.ModuleList([ 60 | TransformerEncoderLayer(size=hidden_size, ff_size=ff_size, 61 | num_heads=num_heads, dropout=dropout) 62 | for _ in range(num_layers)]) 63 | 64 | self.layer_norm = nn.LayerNorm(hidden_size, eps=1e-6) 65 | self.pe = PositionalEncoding(hidden_size) 66 | self.emb_dropout = nn.Dropout(p=emb_dropout) 67 | self._output_size = hidden_size 68 | 69 | if freeze: 70 | freeze_params(self) 71 | 72 | #pylint: disable=arguments-differ 73 | def forward(self, 74 | embed_src: Tensor, 75 | src_length: Tensor, 76 | mask: Tensor) -> (Tensor, Tensor): 77 | """ 78 | Pass the input (and mask) through each layer in turn. 79 | Applies a Transformer encoder to sequence of embeddings x. 80 | The input mini-batch x needs to be sorted by src length. 81 | x and mask should have the same dimensions [batch, time, dim]. 82 | 83 | :param embed_src: embedded src inputs, 84 | shape (batch_size, src_len, embed_size) 85 | :param src_length: length of src inputs 86 | (counting tokens before padding), shape (batch_size) 87 | :param mask: indicates padding areas (zeros where padding), shape 88 | (batch_size, src_len, embed_size) 89 | :return: 90 | - output: hidden states with 91 | shape (batch_size, max_length, directions*hidden), 92 | - hidden_concat: last hidden state with 93 | shape (batch_size, directions*hidden) 94 | """ 95 | 96 | x = embed_src 97 | 98 | # Add position encoding to word embeddings 99 | x = self.pe(x) 100 | # Add Dropout 101 | x = self.emb_dropout(x) 102 | 103 | # Apply each layer to the input 104 | for layer in self.layers: 105 | x = layer(x, mask) 106 | 107 | return self.layer_norm(x), None 108 | 109 | def __repr__(self): 110 | return "%s(num_layers=%r, num_heads=%r)" % ( 111 | self.__class__.__name__, len(self.layers), 112 | self.layers[0].src_src_att.num_heads) 113 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/external_metrics/mscoco_rouge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : mscoco_rouge.py 4 | # 5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004) 6 | # 7 | # Creation Date : 2015-01-07 06:03 8 | # Author : Ramakrishna Vedantam 9 | 10 | 11 | def my_lcs(string, sub): 12 | """ 13 | Calculates longest common subsequence for a pair of tokenized strings 14 | :param string : list of str : tokens from a string split using whitespace 15 | :param sub : list of str : shorter string, also split using whitespace 16 | :returns: length (list of int): length of the longest common subsequence between the two strings 17 | 18 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS 19 | """ 20 | if len(string) < len(sub): 21 | sub, string = string, sub 22 | 23 | lengths = [[0 for i in range(0, len(sub) + 1)] for j in range(0, len(string) + 1)] 24 | 25 | for j in range(1, len(sub) + 1): 26 | for i in range(1, len(string) + 1): 27 | if string[i - 1] == sub[j - 1]: 28 | lengths[i][j] = lengths[i - 1][j - 1] + 1 29 | else: 30 | lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1]) 31 | 32 | return lengths[len(string)][len(sub)] 33 | 34 | 35 | def calc_score(hypotheses, references, beta=1.2): 36 | """ 37 | Compute ROUGE-L score given one candidate and references for an image 38 | :param hypotheses: str : candidate sentence to be evaluated 39 | :param references: list of str : COCO reference sentences for the particular image to be evaluated 40 | :returns score: int (ROUGE-L score for the candidate evaluated against references) 41 | """ 42 | assert len(hypotheses) == 1 43 | assert len(references) > 0 44 | prec = [] 45 | rec = [] 46 | 47 | # split into tokens 48 | token_c = hypotheses[0].split(" ") 49 | 50 | for reference in references: 51 | # split into tokens 52 | token_r = reference.split(" ") 53 | # compute the longest common subsequence 54 | lcs = my_lcs(token_r, token_c) 55 | prec.append(lcs / float(len(token_c))) 56 | rec.append(lcs / float(len(token_r))) 57 | 58 | prec_max = max(prec) 59 | rec_max = max(rec) 60 | 61 | if prec_max != 0 and rec_max != 0: 62 | score = ((1 + beta ** 2) * prec_max * rec_max) / float( 63 | rec_max + beta ** 2 * prec_max 64 | ) 65 | else: 66 | score = 0.0 67 | return score 68 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/loss.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Module to implement training loss 4 | """ 5 | 6 | from torch import nn, Tensor 7 | 8 | class RegLoss(nn.Module): 9 | """ 10 | Regression Loss 11 | """ 12 | 13 | def __init__(self, cfg, target_pad=0.0): 14 | super(RegLoss, self).__init__() 15 | 16 | self.loss = cfg["training"]["loss"].lower() 17 | 18 | if self.loss == "l1": 19 | self.criterion = nn.L1Loss() 20 | elif self.loss == "mse": 21 | self.criterion = nn.MSELoss() 22 | 23 | else: 24 | print("Loss not found - revert to default L1 loss") 25 | self.criterion = nn.L1Loss() 26 | 27 | model_cfg = cfg["model"] 28 | 29 | self.target_pad = target_pad 30 | self.loss_scale = model_cfg.get("loss_scale", 1.0) 31 | 32 | # pylint: disable=arguments-differ 33 | def forward(self, preds, targets): 34 | 35 | loss_mask = (targets != self.target_pad) 36 | 37 | # Find the masked predictions and targets using loss mask 38 | preds_masked = preds * loss_mask 39 | targets_masked = targets * loss_mask 40 | 41 | # Calculate loss just over the masked predictions 42 | loss = self.criterion(preds_masked, targets_masked) 43 | 44 | # Multiply loss by the loss scale 45 | if self.loss_scale != 1.0: 46 | loss = loss * self.loss_scale 47 | 48 | return loss 49 | 50 | class XentLoss(nn.Module): 51 | """ 52 | Cross-Entropy Loss with optional label smoothing 53 | """ 54 | 55 | def __init__(self, pad_index: int, smoothing: float = 0.0): 56 | super(XentLoss, self).__init__() 57 | self.smoothing = smoothing 58 | self.pad_index = pad_index 59 | # standard xent loss 60 | self.criterion = nn.NLLLoss(ignore_index=self.pad_index, 61 | reduction='sum') 62 | 63 | # pylint: disable=arguments-differ 64 | def forward(self, log_probs, targets): 65 | """ 66 | Compute the cross-entropy between logits and targets. 67 | If label smoothing is used, target distributions are not one-hot, but 68 | "1-smoothing" for the correct target token and the rest of the 69 | probability mass is uniformly spread across the other tokens. 70 | :param log_probs: log probabilities as predicted by model 71 | :param targets: target indices 72 | :return: 73 | """ 74 | # targets: indices with batch*seq_len 75 | targets = targets.contiguous().view(-1) 76 | loss = self.criterion( 77 | log_probs.contiguous().view(-1, log_probs.size(-1)), targets) 78 | 79 | return loss -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/metrics.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | This module holds various MT evaluation metrics. 4 | """ 5 | 6 | from external_metrics import sacrebleu 7 | from external_metrics import mscoco_rouge 8 | import logging 9 | import numpy as np 10 | 11 | def wer(hypotheses, references): 12 | wer = del_rate = ins_rate = sub_rate = 0 13 | n_seq = len(hypotheses) 14 | 15 | for h, r in zip(hypotheses, references): 16 | res = wer_single(r=r, h=h) 17 | wer += res["wer"] / n_seq 18 | del_rate += res["del"] / n_seq 19 | ins_rate += res["ins"] / n_seq 20 | sub_rate += res["sub"] / n_seq 21 | 22 | return {"wer": wer, "del": del_rate, "ins": ins_rate, "sub": sub_rate} 23 | 24 | def wer_single(r, h): 25 | edit_distance_matrix = editDistance(r=r, h=h) 26 | step_list = getStepList(r=r, h=h, d=edit_distance_matrix) 27 | 28 | min_distance = float(edit_distance_matrix[len(r)][len(h)]) 29 | num_del = float(np.sum([s == "d" for s in step_list])) 30 | num_ins = float(np.sum([s == "i" for s in step_list])) 31 | num_sub = float(np.sum([s == "s" for s in step_list])) 32 | 33 | word_error_rate = round((min_distance / len(r) * 100), 4) 34 | del_rate = round((num_del / len(r) * 100), 4) 35 | ins_rate = round((num_ins / len(r) * 100), 4) 36 | sub_rate = round((num_sub / len(r) * 100), 4) 37 | 38 | return {"wer": word_error_rate, "del": del_rate, "ins": ins_rate, "sub": sub_rate} 39 | 40 | # Calculate ROUGE scores 41 | def rouge(hypotheses, references): 42 | rouge_score = 0 43 | n_seq = len(hypotheses) 44 | 45 | for h, r in zip(hypotheses, references): 46 | rouge_score += mscoco_rouge.calc_score(hypotheses=[h], references=[r]) / n_seq 47 | 48 | return rouge_score 49 | 50 | # Calculate CHRF scores 51 | def chrf(hypotheses, references): 52 | """ 53 | Character F-score from sacrebleu 54 | 55 | :param hypotheses: list of hypotheses (strings) 56 | :param references: list of references (strings) 57 | :return: 58 | """ 59 | return sacrebleu.corpus_chrf(hypotheses=hypotheses, references=references) 60 | 61 | # Calculate BLEU scores 62 | def bleu(hypotheses, references, all=False): 63 | """ 64 | Raw corpus BLEU from sacrebleu (without tokenization) 65 | 66 | :param hypotheses: list of hypotheses (strings) 67 | :param references: list of references (strings) 68 | :return: 69 | """ 70 | logger = logging.getLogger(__name__) 71 | bleu_scores = sacrebleu.raw_corpus_bleu( 72 | sys_stream=hypotheses, ref_streams=[references] 73 | ).scores 74 | scores = {} 75 | for n in range(len(bleu_scores)): 76 | scores["bleu" + str(n + 1)] = bleu_scores[n] 77 | 78 | rouge_score = rouge(hypotheses, references) * 100 79 | #wer_d = wer(hypotheses, references) 80 | #werStr = "WER: " + str(wer_d["wer"]) 81 | 82 | bleu1Str = "BLEU-1: " + str(scores["bleu1"]) 83 | bleu2Str = "BLEU-2: " + str(scores["bleu2"]) 84 | bleu3Str = "BLEU-3: " + str(scores["bleu3"]) 85 | bleu4Str = "BLEU-4: " + str(scores["bleu4"]) 86 | rougeStr = "Rouge: " + str(rouge_score) 87 | 88 | logger.info(bleu1Str) 89 | logger.info(bleu2Str) 90 | logger.info(bleu3Str) 91 | logger.info(bleu4Str) 92 | logger.info(rougeStr) 93 | 94 | print("--------------- New Scores ------------") 95 | print(bleu1Str) 96 | print(bleu2Str) 97 | print(bleu3Str) 98 | print(bleu4Str) 99 | print(rougeStr) 100 | #print(werStr) 101 | 102 | return bleu_scores[3] -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/optim/lookahead.py: -------------------------------------------------------------------------------- 1 | # from https://github.com/mgrankin/over9000/blob/master/lookahead.py 2 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py 3 | 4 | import itertools as it 5 | from torch.optim import Optimizer, Adam 6 | 7 | 8 | # TODO: This code is buggy. It did not support state saving support 9 | # I did some ad-hoc fixes to get it working but noone should 10 | # use if for training on condor etc. 11 | # Sadly this eliminates over9000 + ranger from options to train 12 | # on servers. 13 | class Lookahead(Optimizer): 14 | def __init__(self, base_optimizer, alpha=0.5, k=6): 15 | if not 0.0 <= alpha <= 1.0: 16 | raise ValueError(f"Invalid slow update rate: {alpha}") 17 | if not 1 <= k: 18 | raise ValueError(f"Invalid lookahead steps: {k}") 19 | 20 | self.optimizer = base_optimizer 21 | self.state = base_optimizer.state 22 | self.defaults = base_optimizer.defaults 23 | 24 | self.param_groups = self.optimizer.param_groups 25 | self.alpha = alpha 26 | self.k = k 27 | for group in self.param_groups: 28 | group["step_counter"] = 0 29 | self.slow_weights = [ 30 | [p.clone().detach() for p in group["params"]] for group in self.param_groups 31 | ] 32 | 33 | for w in it.chain(*self.slow_weights): 34 | w.requires_grad = False 35 | 36 | def step(self, closure=None): 37 | loss = None 38 | if closure is not None: 39 | loss = closure() 40 | loss = self.optimizer.step() 41 | for group, slow_weights in zip(self.param_groups, self.slow_weights): 42 | group["step_counter"] += 1 43 | if group["step_counter"] % self.k != 0: 44 | continue 45 | for p, q in zip(group["params"], slow_weights): 46 | if p.grad is None: 47 | continue 48 | q.data.add_(self.alpha, p.data - q.data) 49 | p.data.copy_(q.data) 50 | return loss 51 | 52 | 53 | def LookaheadAdam(params, alpha=0.5, k=6, *args, **kwargs): 54 | adam = Adam(params, *args, **kwargs) 55 | return Lookahead(adam, alpha, k) 56 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/optim/over9000.py: -------------------------------------------------------------------------------- 1 | # from https://raw.githubusercontent.com/mgrankin/over9000/master/over9000.py 2 | 3 | from SignProdJoey.optim.lookahead import Lookahead 4 | from SignProdJoey.optim.ralamb import Ralamb 5 | 6 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py 7 | # RAdam + LARS implementation from https://gist.github.com/redknightlois/c4023d393eb8f92bb44b2ab582d7ec20 8 | 9 | 10 | # RAdam + LARS + LookAHead 11 | def Over9000(params, alpha=0.5, k=6, *args, **kwargs): 12 | ralamb = Ralamb(params, *args, **kwargs) 13 | return Lookahead(ralamb, alpha, k) 14 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/optim/ranger.py: -------------------------------------------------------------------------------- 1 | # from https://github.com/mgrankin/over9000/blob/master/ranger.py 2 | 3 | from SignProdJoey.optim.lookahead import Lookahead 4 | from SignProdJoey.optim.radam import RAdam 5 | 6 | 7 | def Ranger(params, alpha=0.5, k=6, *args, **kwargs): 8 | radam = RAdam(params, *args, **kwargs) 9 | return Lookahead(radam, alpha, k) 10 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/prediction.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import math 4 | 5 | import torch 6 | from torchtext.data import Dataset 7 | 8 | from helpers import bpe_postprocess, load_config, get_latest_checkpoint, \ 9 | load_checkpoint, calculate_dtw 10 | from model import build_model, Model 11 | from batch import Batch 12 | from data import load_data, make_data_iter 13 | from constants import UNK_TOKEN, PAD_TOKEN, EOS_TOKEN 14 | 15 | # Validate epoch given a dataset 16 | 17 | 18 | def validate_on_data(model: Model, 19 | data: Dataset, 20 | batch_size: int, 21 | max_output_length: int, 22 | eval_metric: str, 23 | loss_function: torch.nn.Module = None, 24 | batch_type: str = "sentence", 25 | type="val", 26 | BT_model=None, 27 | logger=None, 28 | config=None, 29 | epoch_num=0): 30 | 31 | valid_iter = make_data_iter( 32 | dataset=data, batch_size=32, batch_type=batch_type, 33 | shuffle=True, train=False) 34 | 35 | pad_index = model.src1_vocab.stoi[PAD_TOKEN] 36 | # disable dropout 37 | model.eval() 38 | # don't track gradients during validation 39 | with torch.no_grad(): 40 | valid_hypotheses = [] 41 | valid_references = [] 42 | valid_inputs = [] 43 | file_paths = [] 44 | all_dtw_scores = [] 45 | 46 | valid_loss = 0 47 | total_ntokens = 0 48 | total_nseqs = 0 49 | 50 | batches = 0 51 | for valid_batch in iter(valid_iter): 52 | 53 | batches += 1 54 | # Extract batch 55 | batch = Batch(torch_batch=valid_batch, 56 | pad_index=pad_index, 57 | model=model) 58 | targets = batch.trg 59 | 60 | # run as during training with teacher forcing 61 | if loss_function is not None and batch.trg is not None: 62 | # Get the loss for this batch 63 | batch_loss, _ = model.get_loss_for_batch( 64 | batch, loss_function=loss_function, epoch_num=epoch_num) 65 | 66 | valid_loss += batch_loss 67 | total_ntokens += batch.ntokens 68 | total_nseqs += batch.nseqs 69 | 70 | # If not just count in, run inference to produce translation videos 71 | if not model.just_count_in: 72 | # Run batch through the model in an auto-regressive format 73 | output, attention_scores = model.run_batch( 74 | batch=batch, 75 | max_output_length=max_output_length) 76 | 77 | # If future prediction 78 | if model.future_prediction != 0: 79 | # Cut to only the first frame prediction + add the counter 80 | train_output = torch.cat((train_output[:, :, :train_output.shape[2] // ( 81 | model.future_prediction)], train_output[:, :, -1:]), dim=2) 82 | # Cut to only the first frame prediction + add the counter 83 | targets = torch.cat( 84 | (targets[:, :, :targets.shape[2] // (model.future_prediction)], targets[:, :, -1:]), dim=2) 85 | 86 | # For just counter, the inference is the same as GTing 87 | if model.just_count_in: 88 | output = train_output 89 | 90 | # Add references, hypotheses and file paths to list 91 | valid_references.extend(targets) 92 | valid_hypotheses.extend(output) 93 | file_paths.extend(batch.file_paths) 94 | # Add the source sentences to list, by using the model source vocab and batch indices 95 | valid_inputs.extend([[model.src1_vocab.itos[batch.src1[i][j]] for j in range(len(batch.src1[i]))] for i in 96 | range(len(batch.src1))]) 97 | 98 | # Calculate the full Dynamic Time Warping score - for evaluation 99 | dtw_score = calculate_dtw(targets, output) 100 | all_dtw_scores.extend(dtw_score) 101 | 102 | # Can set to only run a few batches 103 | # if type is 'val': 104 | # if batches == math.ceil(20/batch_size): 105 | # break 106 | # batches += 1 107 | # Calculate backtranslation blue scores 108 | # if type is 'test': 109 | # calculate_backtranslation_BLUE(valid_hypotheses, config, logger, type) 110 | # Dynamic Time Warping scores 111 | current_valid_score = np.mean(all_dtw_scores) 112 | 113 | return current_valid_score, valid_loss, valid_references, valid_hypotheses, \ 114 | valid_inputs, all_dtw_scores, file_paths 115 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.8.1 2 | alabaster==0.7.8 3 | apt-clone==0.2.1 4 | apt-xapian-index==0.47 5 | apturl==0.5.2 6 | asn1crypto==0.24.0 7 | astor==0.8.0 8 | astroid==2.3.1 9 | astropy==3.0 10 | attrs==17.4.0 11 | Babel==2.4.0 12 | bleach==2.1.2 13 | Brlapi==0.6.6 14 | certifi==2019.9.11 15 | chardet==3.0.4 16 | cloudpickle==0.5.2 17 | command-not-found==0.3 18 | configobj==5.0.6 19 | cryptography==2.1.4 20 | cupshelpers==1.0 21 | cycler==0.10.0 22 | Cython==0.26.1 23 | decorator==4.1.2 24 | defer==1.0.6 25 | devscripts===2.17.12ubuntu1.1 26 | distro-info===0.18ubuntu0.18.04.1 27 | docutils==0.14 28 | emcee==2.2.1 29 | entrypoints==0.2.3.post1 30 | fail2ban==0.10.2 31 | future==0.18.1 32 | gast==0.3.2 33 | google-pasta==0.1.7 34 | gpg==1.10.0 35 | grpcio==1.24.1 36 | h5py==2.10.0 37 | html5lib==0.999999999 38 | httplib2==0.9.2 39 | idna==2.8 40 | imagesize==0.7.1 41 | ipykernel==4.8.2 42 | ipython==5.5.0 43 | ipython-genutils==0.2.0 44 | isort==4.3.21 45 | jedi==0.11.1 46 | Jinja2==2.10 47 | joeynmt==0.0.1 48 | jsonschema==2.6.0 49 | jupyter-client==5.2.2 50 | jupyter-core==4.4.0 51 | Keras-Applications==1.0.8 52 | Keras-Preprocessing==1.1.0 53 | keyring==10.6.0 54 | keyrings.alt==3.0 55 | kiwisolver==1.1.0 56 | language-selector==0.1 57 | launchpadlib==1.10.6 58 | lazr.restfulclient==0.13.5 59 | lazr.uri==1.0.3 60 | lazy-object-proxy==1.4.2 61 | logilab-common==1.4.1 62 | louis==3.5.0 63 | macaroonbakery==1.1.3 64 | Mako==1.0.7 65 | Markdown==3.1.1 66 | MarkupSafe==1.0 67 | matplotlib==3.1.1 68 | mccabe==0.6.1 69 | mistune==0.8.3 70 | mpmath==1.0.0 71 | nbconvert==5.3.1 72 | nbformat==4.4.0 73 | netifaces==0.10.4 74 | nose==1.3.7 75 | nose-parameterized==0.3.4 76 | numpy==1.17.3 77 | numpydoc==0.7.0 78 | oauth==1.0.1 79 | olefile==0.45.1 80 | PAM==0.4.2 81 | pandas==0.25.1 82 | pandocfilters==1.4.2 83 | parso==0.1.1 84 | pexpect==4.2.1 85 | pickleshare==0.7.4 86 | Pillow==6.2.0 87 | pluggy==0.6.0 88 | ply==3.11 89 | portalocker==1.5.1 90 | prompt-toolkit==1.0.15 91 | protobuf==3.10.0 92 | psutil==5.4.2 93 | py==1.5.2 94 | pycairo==1.16.2 95 | pycodestyle==2.3.1 96 | pycrypto==2.6.1 97 | pycups==1.9.73 98 | pyds9==1.8.1 99 | pyfits==3.4 100 | pyflakes==1.6.0 101 | Pygments==2.2.0 102 | pygobject==3.26.1 103 | PyICU==1.9.8 104 | pyinotify==0.9.6 105 | pylint==2.4.2 106 | pymacaroons==0.13.0 107 | PyNaCl==1.1.2 108 | PyOpenGL==3.1.0 109 | pyparsing==2.4.2 110 | pyqtgraph==0.10.0 111 | pyRFC3339==1.0 112 | pytest==3.3.2 113 | pytest-arraydiff==0.2 114 | pytest-astropy==0.2.1 115 | pytest-doctestplus==0.1.2 116 | pytest-openfiles==0.2.0 117 | pytest-remotedata==0.2.0 118 | python-apt==1.6.5+ubuntu0.4 119 | python-dateutil==2.8.0 120 | python-debian==0.1.32 121 | python-magic==0.4.16 122 | pytz==2019.3 123 | pyxdg==0.25 124 | PyYAML==5.1.2 125 | pyzmq==16.0.2 126 | QtAwesome==0.4.4 127 | qtconsole==4.3.1 128 | QtPy==1.3.1 129 | reportlab==3.4.0 130 | requests==2.22.0 131 | requests-unixsocket==0.1.5 132 | roman==2.0.0 133 | rope==0.10.5 134 | sacrebleu==1.4.2 135 | scipy==1.3.1 136 | scour==0.36 137 | screen-resolution-extra==0.0.0 138 | seaborn==0.9.0 139 | SecretStorage==2.3.1 140 | simplegeneric==0.8.1 141 | simplejson==3.13.2 142 | six==1.12.0 143 | Sphinx==1.6.7 144 | spyder==3.2.6 145 | ssh-import-id==5.7 146 | subword-nmt==0.3.6 147 | sympy==1.1.1 148 | system-service==0.3 149 | systemd-python==234 150 | tensorboard==1.14.0 151 | tensorflow-estimator==1.14.0 152 | termcolor==1.1.0 153 | testpath==0.3.1 154 | torch==1.3.0 155 | torchtext==0.4.0 156 | tornado==4.5.3 157 | tqdm==4.36.1 158 | traitlets==4.3.2 159 | typed-ast==1.4.0 160 | typing==3.7.4.1 161 | ubuntu-drivers-common==0.0.0 162 | ufw==0.36 163 | unattended-upgrades==0.1 164 | unidiff==0.5.4 165 | urllib3==1.25.6 166 | usb-creator==0.3.3 167 | virtualenv==15.1.0 168 | wadllib==1.3.2 169 | wcwidth==0.1.7 170 | webencodings==0.5 171 | Werkzeug==0.16.0 172 | wrapt==1.11.2 173 | xkit==0.0.0 174 | zope.interface==4.3.2 175 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/run-adj.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT" 2 | 3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt 4 | 5 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/210000_every.ckpt 6 | 7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_same_vocab_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D_same_vocab/train1/320000_every.ckpt 8 | 9 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2Dauc_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2Dauc/train1/360000_every.ckpt 10 | 11 | # TODO add ckpt 12 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbodyface2Daub_adj.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Models/handsbodyface2Daub/train1/0000_every.ckpt 13 | 14 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/run-norm.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT" 2 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbody.yaml 3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3D.yaml 4 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc_norm.yaml 5 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml 6 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/410000_every.ckpt 7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_same_vocab.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D_same_vocab/train1/320000_every.ckpt 8 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt 9 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train3/20000_best.ckpt 10 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/run.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT" 2 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbody.yaml 3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3D.yaml 4 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc.yaml 5 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml 6 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/410000_every.ckpt 7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D_same_vocab.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D_same_vocab/train1/320000_every.ckpt 8 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt 9 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train3/20000_best.ckpt 10 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/run2.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH="/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.pycharm_helpers/pydev:/home/carla/MyProjects/SLP-FE-Linguistics:/home/carla/.pycharm_helpers/pycharm_display:/home/carla/.pycharm_helpers/third_party/thriftpy:/home/carla/.pycharm_helpers/pydev:/Users/CarlaViegas/Library/Caches/JetBrains/PyCharm2020.1/cythonExtensions:/home/carla/MyProjects/SLP-FE-Linguistics/PT-G2S:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python37.zip:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/lib-dynload:/usr/lib/python3.7:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages:/home/carla/.pycharm_helpers/pycharm_matplotlib_backend:/home/carla/.local/share/virtualenvs/ProgressiveTransformersSLP-f7T2SrO1/lib/python3.7/site-packages/IPython/extensions:/home/carla/MyProjects/SLP-FE-Linguistics/SLT" 2 | #python ~/MyProjects/SLP-FE-Linguistics/PT-T2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-T2S/Configs/handsbody.yaml 3 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3D.yaml 4 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc.yaml 5 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml 6 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/410000_every.ckpt 7 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/best.ckpt 8 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbody.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbody/train1/330000_every.ckpt 9 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py train ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/train1/180000_every.ckpt 10 | #python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface2D.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface2D/210000_every.ckpt 11 | python ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/__main__.py test ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Configs/handsbodyface3Dauc.yaml --ckpt ~/MyProjects/SLP-FE-Linguistics/PT-TG2S/Models/handsbodyface3Dauc/train1/450000_every.ckpt 12 | -------------------------------------------------------------------------------- /code/generation-model/dynamic_selection/search.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import Tensor 5 | import numpy as np 6 | 7 | from decoders import Decoder, TransformerDecoder 8 | from embeddings import Embeddings 9 | 10 | # pylint: disable=unused-argument 11 | 12 | 13 | def greedy( 14 | src1_mask: Tensor, 15 | src2_mask: Tensor, 16 | 17 | embed: Embeddings, 18 | decoder: Decoder, 19 | encoder1_output: Tensor, 20 | encoder2_output: Tensor, 21 | 22 | trg_input: Tensor, 23 | model, 24 | ) -> (np.array, np.array): 25 | """ 26 | Special greedy function for transformer, since it works differently. 27 | The transformer remembers all previous states and attends to them. 28 | 29 | :param src_mask: mask for source inputs, 0 for positions after 30 | :param embed: target embedding 31 | :param bos_index: index of in the vocabulary 32 | :param max_output_length: maximum length for the hypotheses 33 | :param decoder: decoder to use for greedy decoding 34 | :param encoder_output: encoder hidden states for attention 35 | :param encoder_hidden: encoder final state (unused in Transformer) 36 | :return: 37 | - stacked_output: output hypotheses (2d array of indices), 38 | - stacked_attention_scores: attention scores (3d array) 39 | """ 40 | # Initialise the input 41 | # Extract just the BOS first frame from the target 42 | ys = trg_input[:, :1, :].float() 43 | 44 | # If the counter is coming into the decoder or not 45 | ys_out = ys 46 | 47 | # Set the target mask, by finding the padded rows 48 | trg_mask = trg_input != 0.0 49 | trg_mask = trg_mask.unsqueeze(1) 50 | 51 | # Find the maximum output length for this batch 52 | max_output_length = trg_input.shape[1] 53 | 54 | # If just count in, input is just the counter 55 | if model.just_count_in: 56 | ys = ys[:, :, -1:] 57 | 58 | for i in range(max_output_length): 59 | 60 | # ys here is the input 61 | # Drive the timing by giving the GT timing - add in the counter to the last column 62 | 63 | if model.just_count_in: 64 | # If just counter, drive the input using the GT counter 65 | ys[:, -1] = trg_input[:, i, -1:] 66 | 67 | else: 68 | # Give the GT counter for timing, to drive the timing 69 | ys[:, -1, -1:] = trg_input[:, i, -1:] 70 | 71 | # Embed the target input before passing to the decoder 72 | trg_embed = embed(ys) 73 | 74 | # Cut padding mask to required size (of the size of the input) 75 | padding_mask = trg_mask[:, :, :i+1, :i+1] 76 | # Pad the mask (If required) (To make it square, and used later on correctly) 77 | pad_amount = padding_mask.shape[2] - padding_mask.shape[3] 78 | padding_mask = (F.pad(input=padding_mask.double(), pad=( 79 | pad_amount, 0, 0, 0), mode='replicate') == 1.0) 80 | 81 | # Pass the embedded input and the encoder output into the decoder 82 | with torch.no_grad(): 83 | out, _, _, _ = decoder( 84 | trg_embed=trg_embed, 85 | encoder1_output=encoder1_output, 86 | encoder2_output=encoder2_output, 87 | 88 | src1_mask=src1_mask, 89 | src2_mask=src2_mask, 90 | 91 | trg_mask=padding_mask, 92 | ) 93 | 94 | if model.future_prediction != 0: 95 | # Cut to only the first frame prediction 96 | out = torch.cat( 97 | (out[:, :, :out.shape[2] // (model.future_prediction)], out[:, :, -1:]), dim=2) 98 | 99 | if model.just_count_in: 100 | # If just counter in trg_input, concatenate counters of output 101 | ys = torch.cat([ys, out[:, -1:, -1:]], dim=1) 102 | 103 | # Add this frame prediction to the overall prediction 104 | ys = torch.cat([ys, out[:, -1:, :]], dim=1) 105 | 106 | # Add this next predicted frame to the full frame output 107 | ys_out = torch.cat([ys_out, out[:, -1:, :]], dim=1) 108 | 109 | return ys_out, None 110 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Configs/Base.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | src: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 3 | trg: "skels" # Target - 3D body co-ordinates (skels) 4 | files: "files" # Filenames for each sequence 5 | 6 | train: "./Data/tmp/train" 7 | dev: "./Data/tmp/dev" 8 | test: "./Data/tmp/test" 9 | 10 | max_sent_length: 300 # Max Sentence Length 11 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 12 | src_vocab: "./Configs/src_vocab.txt" 13 | 14 | training: 15 | random_seed: 27 # Random seed for initialisation 16 | optimizer: "adam" # Chosen optimiser (adam, ..) 17 | learning_rate: 0.001 # Initial model learning rate 18 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 19 | weight_decay: 0.0 # Weight Decay 20 | clip_grad_norm: 5.0 # Gradient clipping value 21 | batch_size: 8 # Batch Size for training 22 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 23 | patience: 7 # How many epochs of no improvement causes a LR reduction 24 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 25 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 26 | epochs: 20000 # How many epochs to run for 27 | validation_freq: 10 # After how many steps to run a validation on the model 28 | logging_freq: 250 # After how many steps to log training progress 29 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 30 | model_dir: "./Models/Base" # Where the model shall be stored 31 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 32 | continue: True # Flag to continue from a previous saved model in the model_dir 33 | shuffle: True # Flag to shuffle the data during training 34 | use_cuda: True # Flag to use GPU cuda capabilities 35 | max_output_length: 300 # Max Output Length 36 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 37 | loss: "MSE" # Loss function (MSE, L1) 38 | 39 | model: 40 | initializer: "xavier" # Model initialisation (Xavier, ...) 41 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 42 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 43 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 44 | just_count_in: False # Flag for Just Counter Data Augmentation 45 | gaussian_noise: False # Flag for Gaussian Noise Data Augmentation 46 | noise_rate: 5 # Gaussian Noise rate 47 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 48 | encoder: # Model Encoder 49 | type: "transformer" 50 | num_layers: 2 # Number of layers 51 | num_heads: 4 # Number of Heads 52 | embeddings: 53 | embedding_dim: 512 # Embedding Dimension 54 | dropout: 0.0 # Embedding Dropout 55 | hidden_size: 512 # Hidden Size Dimension 56 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 57 | dropout: 0.0 # Encoder Dropout 58 | decoder: # Model Decoder 59 | type: "transformer" 60 | num_layers: 2 # Number of layers 61 | num_heads: 4 # Number of Heads 62 | embeddings: 63 | embedding_dim: 512 # Embedding Dimension 64 | dropout: 0.0 # Embedding Dropout 65 | hidden_size: 512 # Hidden Size Dimension 66 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 67 | dropout: 0.0 # Decoder Dropout 68 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Configs/Base_real.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | src: "gloss" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 3 | trg: "skels" # Target - 3D body co-ordinates (skels) 4 | files: "files" # Filenames for each sequence 5 | 6 | train: "./Data/PT/train" 7 | dev: "./Data/PT/dev" 8 | test: "./Data/PT/test" 9 | 10 | max_sent_length: 300 # Max Sentence Length 11 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 12 | src_vocab: "./Configs/src_vocab.txt" 13 | 14 | training: 15 | random_seed: 0 # Random seed for initialisation 16 | optimizer: "adam" # Chosen optimiser (adam, ..) 17 | learning_rate: 0.001 # Initial model learning rate 18 | learning_rate_min: 0.0002 # Learning rate minimum, when training will stop 19 | weight_decay: 0.0 # Weight Decay 20 | clip_grad_norm: 5.0 # Gradient clipping value 21 | batch_size: 8 # Batch Size for training 22 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 23 | patience: 7 # How many epochs of no improvement causes a LR reduction 24 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 25 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 26 | epochs: 20000 # How many epochs to run for 27 | validation_freq: 10000 # After how many steps to run a validation on the model 28 | logging_freq: 250 # After how many steps to log training progress 29 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 30 | model_dir: "./Models/Base_Gloss2P_counter" # Where the model shall be stored 31 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 32 | continue: True # Flag to continue from a previous saved model in the model_dir 33 | shuffle: True # Flag to shuffle the data during training 34 | use_cuda: True # Flag to use GPU cuda capabilities 35 | max_output_length: 300 # Max Output Length 36 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 37 | loss: "MSE" # Loss function (MSE, L1) 38 | 39 | model: 40 | initializer: "xavier" # Model initialisation (Xavier, ...) 41 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 42 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 43 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 44 | just_count_in: False # Flag for Just Counter Data Augmentation 45 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 46 | noise_rate: 5 # Gaussian Noise rate 47 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 48 | encoder: # Model Encoder 49 | type: "transformer" 50 | num_layers: 2 # Number of layers 51 | num_heads: 4 # Number of Heads 52 | embeddings: 53 | embedding_dim: 512 # Embedding Dimension 54 | dropout: 0.0 # Embedding Dropout 55 | hidden_size: 512 # Hidden Size Dimension 56 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 57 | dropout: 0.0 # Encoder Dropout 58 | decoder: # Model Decoder 59 | type: "transformer" 60 | num_layers: 2 # Number of layers 61 | num_heads: 4 # Number of Heads 62 | embeddings: 63 | embedding_dim: 512 # Embedding Dimension 64 | dropout: 0.0 # Embedding Dropout 65 | hidden_size: 512 # Hidden Size Dimension 66 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 67 | dropout: 0.0 # Decoder Dropout 68 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Configs/Base_text2Skeleton.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | src: "text" # Source - Either Gloss->Pose or Text->Pose (gloss,text) 3 | trg: "skels" # Target - 3D body co-ordinates (skels) 4 | files: "files" # Filenames for each sequence 5 | 6 | train: "./Data/PT/train" 7 | dev: "./Data/PT/dev" 8 | test: "./Data/PT/test" 9 | 10 | max_sent_length: 300 # Max Sentence Length 11 | skip_frames: 1 # Skip frames in the data, to reduce the data input size 12 | src_vocab: "./Configs/src_text_vocab.txt" 13 | 14 | training: 15 | random_seed: 0 # Random seed for initialisation 16 | optimizer: "adam" # Chosen optimiser (adam, ..) 17 | learning_rate: 0.001 # Initial model learning rate 18 | learning_rate_min: 0.0001 # Learning rate minimum, when training will stop 19 | weight_decay: 0.0 # Weight Decay 20 | clip_grad_norm: 5.0 # Gradient clipping value 21 | batch_size: 8 # Batch Size for training 22 | scheduling: "plateau" # Scheduling at training time (plateau, ...) 23 | patience: 7 # How many epochs of no improvement causes a LR reduction 24 | decrease_factor: 0.7 # LR reduction factor, after the # of patience epochs 25 | early_stopping_metric: "dtw" # Which metric determines scheduling (DTW, loss, BT...) 26 | epochs: 20000 # How many epochs to run for 27 | validation_freq: 10000 # After how many steps to run a validation on the model 28 | logging_freq: 250 # After how many steps to log training progress 29 | eval_metric: "dtw" # Evaluation metric during training (dtw','bt') 30 | model_dir: "./Models/Base_T2P8head" # Where the model shall be stored 31 | overwrite: False # Flag to overwrite a previous saved model in the model_dir 32 | continue: True # Flag to continue from a previous saved model in the model_dir 33 | shuffle: True # Flag to shuffle the data during training 34 | use_cuda: True # Flag to use GPU cuda capabilities 35 | max_output_length: 300 # Max Output Length 36 | keep_last_ckpts: 1 # How many previous best/latest checkpoints to keep 37 | loss: "MSE" # Loss function (MSE, L1) 38 | 39 | model: 40 | initializer: "xavier" # Model initialisation (Xavier, ...) 41 | bias_initializer: "zeros" # Bias initialiser (Zeros, ...) 42 | embed_initializer: "xavier" # Embedding initialiser (Xavier, ...) 43 | trg_size: 150 # Size of target skeleton coordinates (150 for Inverse Kinematics body/hands) 44 | just_count_in: False # Flag for Just Counter Data Augmentation 45 | gaussian_noise: True # Flag for Gaussian Noise Data Augmentation 46 | noise_rate: 5 # Gaussian Noise rate 47 | future_prediction: 0 # Future Prediction Data Augmentation if > 0 48 | encoder: # Model Encoder 49 | type: "transformer" 50 | num_layers: 2 # Number of layers 51 | num_heads: 8 # Number of Heads 52 | embeddings: 53 | embedding_dim: 512 # Embedding Dimension 54 | dropout: 0.0 # Embedding Dropout 55 | hidden_size: 512 # Hidden Size Dimension 56 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 57 | dropout: 0.0 # Encoder Dropout 58 | decoder: # Model Decoder 59 | type: "transformer" 60 | num_layers: 2 # Number of layers 61 | num_heads: 8 # Number of Heads 62 | embeddings: 63 | embedding_dim: 512 # Embedding Dimension 64 | dropout: 0.0 # Embedding Dropout 65 | hidden_size: 512 # Hidden Size Dimension 66 | ff_size: 2048 # Feed-forward dimension (4 x hidden_size) 67 | dropout: 0.0 # Decoder Dropout 68 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/dev.files: -------------------------------------------------------------------------------- 1 | dev/11August_2010_Wednesday_tagesschau-2 2 | dev/11August_2010_Wednesday_tagesschau-3 3 | dev/11August_2010_Wednesday_tagesschau-8 4 | dev/25October_2010_Monday_tagesschau-22 5 | dev/05May_2011_Thursday_tagesschau-25 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/dev.gloss: -------------------------------------------------------------------------------- 1 | DRUCK TIEF KOMMEN 2 | ES-BEDEUTET VIEL WOLKE UND KOENNEN REGEN GEWITTER KOENNEN 3 | WIND MAESSIG SCHWACH REGION WENN GEWITTER WIND KOENNEN 4 | MITTWOCH REGEN KOENNEN NORDWEST WAHRSCHEINLICH NORD STARK WIND 5 | JETZT WETTER WIE-AUSSEHEN MORGEN FREITAG SECHSTE MAI ZEIGEN-BILDSCHIRM 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/dev.text: -------------------------------------------------------------------------------- 1 | tiefer luftdruck bestimmt in den nächsten tagen unser wetter . 2 | das bedeutet viele wolken und immer wieder zum teil kräftige schauer und gewitter . 3 | meist weht nur ein schwacher wind aus unterschiedlichen richtungen der bei schauern und gewittern stark böig sein kann . 4 | am mittwoch hier und da nieselregen in der nordwesthälfte an den küsten kräftiger wind . 5 | und nun die wettervorhersage für morgen freitag den sechsten mai . 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/test.files: -------------------------------------------------------------------------------- 1 | test/25October_2010_Monday_tagesschau-17 2 | test/25October_2010_Monday_tagesschau-24 3 | test/15December_2010_Wednesday_tagesschau-37 4 | test/10March_2011_Thursday_heute-58 5 | test/14August_2009_Friday_tagesschau-62 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/test.gloss: -------------------------------------------------------------------------------- 1 | REGEN SCHNEE REGION VERSCHWINDEN NORD REGEN KOENNEN REGION STERN KOENNEN SEHEN 2 | DONNERSTAG NORDWEST REGEN REGION SONNE WOLKE WECHSELHAFT DANN FREITAG AEHNLICH WETTER 3 | KRAEFTIG AB MORGEN FRUEH MEISTENS SCHNEE SCHNEIEN KALT REGEN 4 | WOCHENENDE SONNE SAMSTAG SCHOEN TEMPERATUR BIS SIEBZEHN GRAD REGION 5 | DEUTSCH LAND MORGEN HOCH DRUCK KOMMEN WOLKE AUFLOESEN 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/test.text: -------------------------------------------------------------------------------- 1 | regen und schnee lassen an den alpen in der nacht nach im norden und nordosten fallen hier und da schauer sonst ist das klar . 2 | am donnerstag regen in der nordhälfte in der südhälfte mal sonne mal wolken ähnliches wetter dann auch am freitag . 3 | vom nordmeer zieht ein kräftiges tief heran und bringt uns ab den morgenstunden heftige schneefälle zum teil auch gefrierenden regen . 4 | sonnig geht es auch ins wochenende samstag ein herrlicher tag mit temperaturen bis siebzehn grad hier im westen . 5 | deutschland liegt morgen unter hochdruckeinfluss der die wolken weitgehend vertreibt . 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/train.files: -------------------------------------------------------------------------------- 1 | train/11August_2010_Wednesday_tagesschau-1 2 | train/11August_2010_Wednesday_tagesschau-4 3 | train/11August_2010_Wednesday_tagesschau-5 4 | train/11August_2010_Wednesday_tagesschau-6 5 | train/11August_2010_Wednesday_tagesschau-7 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/train.gloss: -------------------------------------------------------------------------------- 1 | JETZT WETTER MORGEN DONNERSTAG ZWOELF FEBRUAR 2 | ORT REGEN DURCH REGEN KOENNEN UEBERSCHWEMMUNG KOENNEN 3 | NORDWEST HEUTE NACHT TROCKEN BLEIBEN SUEDWEST KOENNEN REGEN ORT GEWITTER DAZU 4 | TAGSUEBER OFT REGEN GEWITTER KOENNEN MANCHMAL REGEN VIEL REGEN 5 | WOLKE LOCH SPEZIELL NORDWEST 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/Data/tmp/train.text: -------------------------------------------------------------------------------- 1 | und nun die wettervorhersage für morgen donnerstag den zwölften august . 2 | mancherorts regnet es auch länger und ergiebig auch lokale überschwemmungen sind wieder möglich . 3 | im nordwesten bleibt es heute nacht meist trocken sonst muss mit teilweise kräftigen schauern gerechnet werden örtlich mit blitz und donner . 4 | auch am tag gibt es verbreitet zum teil kräftige schauer oder gewitter und in manchen regionen fallen ergiebige regenmengen . 5 | größere wolkenlücken finden sich vor allem im nordwesten . 6 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/README.md: -------------------------------------------------------------------------------- 1 | # Progressive Transformers for End-to-End Sign Language Production 2 | 3 | Source code for "Progressive Transformers for End-to-End Sign Language Production" (Ben Saunders, Necati Cihan Camgoz, Richard Bowden - ECCV 2020) 4 | 5 | Conference video available at https://twitter.com/BenMSaunders/status/1336638886198521857 6 | 7 | # Usage 8 | 9 | Install required packages using the requirements.txt file. 10 | 11 | `pip install -r requirements.txt` 12 | 13 | To run, start __main__.py with arguments "train" and ".\Configs\Base.yaml": 14 | 15 | `python __main__.py train ./Configs/Base.yaml` 16 | 17 | An example train.log file can be found in ".\Configs\train.log" and a validation file at ".\Configs\validations.txt" 18 | 19 | Back Translation model created from https://github.com/neccam/slt. Back Translation evaluation code coming soon. 20 | 21 | # Data 22 | 23 | Pre-processed Phoenix14T data can be requested via email at b.saunders@surrey.ac.uk. If you wish to create the data yourself, please follow below: 24 | 25 | Phoenix14T data can be downloaded from https://www-i6.informatik.rwth-aachen.de/~koller/RWTH-PHOENIX-2014-T/ and skeleton joints can be extracted using OpenPose at https://github.com/CMU-Perceptual-Computing-Lab/openpose and lifted to 3D using the 2D to 3D Inverse Kinematics code at https://github.com/gopeith/SignLanguageProcessing under 3DposeEstimator. 26 | 27 | Prepare Phoenix14T (or other sign language dataset) data as .txt files for .skel, .gloss, .txt and .files. Data format should be parallel .txt files for "src", "trg" and "files", with each line representing a new sequence: 28 | 29 | - The "src" file contains source sentences, with each line representing new sentence. 30 | 31 | - The "trg" file contains skeleton data of each frame, with a space separating frames. The joints should be divided by 3 to match the scaling I used. Each frame contains 150 joint values and a subsequent counter value, all separated by a space. Each sequence should be separated with a new line. If your data contains 150 joints per frame, please ensure that trg_size is set to 150 in the config file. 32 | 33 | - The "files" file should contain the name of each sequence on a new line. 34 | 35 | Examples can be found in /Data/tmp. Data path must be specified in config file. 36 | 37 | # Pre-Trained Model 38 | 39 | A pre-trained Progressive Transformer checkpoint can be downloaded from https://www.dropbox.com/s/l4xmnybp7luz0l3/PreTrained_PTSLP_Model.ckpt?dl=0. 40 | 41 | This model has a size of ```num_layers: 2```, ```num_heads: 4``` and ```embedding_dim: 512```, as outlined in ```./Configs/Base.yaml```. It has been pre-trained on the full PHOENIX14T dataset with the data format as above. The relevant train.log and validations.txt files can be found in ```.\Configs```. 42 | 43 | To initialise a model from this checkpoint, pass the ```--ckpt ./PreTrained_PTSLP_Model.ckpt``` argument to either ```train``` or ```test``` modes. Additionally, to initialise the correct src_embed size, the config argument ```src_vocab: "./Configs/src_vocab.txt"``` must be set to the location of the src_vocab.txt, found under ```./Configs```. Please open an issue if this checkpoint cannot be downloaded or loaded. 44 | 45 | # Reference 46 | 47 | If you use this code in your research, please cite the following [papers](https://arxiv.org/abs/2004.14874): 48 | 49 | ``` 50 | @inproceedings{saunders2020progressive, 51 | title = {{Progressive Transformers for End-to-End Sign Language Production}}, 52 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard}, 53 | booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)}, 54 | year = {2020}} 55 | 56 | @inproceedings{saunders2020adversarial, 57 | title = {{Adversarial Training for Multi-Channel Sign Language Production}}, 58 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard}, 59 | booktitle = {Proceedings of the British Machine Vision Conference (BMVC)}, 60 | year = {2020}} 61 | 62 | @inproceedings{saunders2021continuous, 63 | title = {{Continuous 3D Multi-Channel Sign Language Production via Progressive Transformers and Mixture Density Networks}}, 64 | author = {Saunders, Ben and Camgoz, Necati Cihan and Bowden, Richard}, 65 | booktitle = {International Journal of Computer Vision (IJCV)}, 66 | year = {2021}} 67 | 68 | ``` 69 | 70 | ## Acknowledgements 71 | This work received funding from the SNSF Sinergia project 'SMILE' (CRSII2 160811), the European Union's Horizon2020 research and innovation programme under grant agreement no. 762021 'Content4All' and the EPSRC project 'ExTOL' (EP/R03298X/1). This work reflects only the authors view and the Commission is not responsible for any use that may be made of the information it contains. We would also like to thank NVIDIA Corporation for their GPU grant. 72 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from training import train, test 4 | 5 | 6 | def main(): 7 | 8 | # Example options: 9 | # train ./Configs/Base.yaml 10 | # test ./Configs/Base.yaml 11 | 12 | ap = argparse.ArgumentParser("Progressive Transformers") 13 | 14 | # Choose between Train and Test 15 | ap.add_argument("mode", choices=["train", "test"], 16 | help="train a model or test") 17 | # Path to Config 18 | ap.add_argument("config_path", type=str, 19 | help="path to YAML config file") 20 | 21 | # Optional path to checkpoint 22 | ap.add_argument("--ckpt", type=str, 23 | help="path to model checkpoint") 24 | 25 | args = ap.parse_args() 26 | 27 | # If Train 28 | if args.mode == "train": 29 | train(cfg_file=args.config_path, ckpt=args.ckpt) 30 | # If Test 31 | elif args.mode == "test": 32 | test(cfg_file=args.config_path, ckpt=args.ckpt) 33 | else: 34 | raise ValueError("Unknown mode") 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/batch.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/batch.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/builders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/builders.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/constants.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/constants.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/data.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/decoders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/decoders.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/dtw.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/dtw.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/embeddings.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/embeddings.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/encoders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/encoders.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/helpers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/helpers.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/initialization.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/initialization.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/plot_videos.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/plot_videos.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/prediction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/prediction.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/search.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/search.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/training.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/training.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/training.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/training.cpython-39.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/transformer_layers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/transformer_layers.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/__pycache__/vocabulary.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/progressive_transformer/__pycache__/vocabulary.cpython-37.pyc -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/batch.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """ 4 | Implementation of a mini-batch. 5 | """ 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from constants import TARGET_PAD 11 | 12 | class Batch: 13 | """Object for holding a batch of data with mask during training. 14 | Input is a batch from a torch text iterator. 15 | """ 16 | 17 | def __init__(self, torch_batch, pad_index, model): 18 | 19 | """ 20 | Create a new joey batch from a torch batch. 21 | This batch extends torch text's batch attributes with src and trg 22 | length, masks, number of non-padded tokens in trg. 23 | Furthermore, it can be sorted by src length. 24 | 25 | :param torch_batch: 26 | :param pad_index: 27 | :param use_cuda: 28 | """ 29 | self.src, self.src_lengths = torch_batch.src 30 | self.src_mask = (self.src != pad_index).unsqueeze(1) 31 | self.nseqs = self.src.size(0) 32 | self.trg_input = None 33 | self.trg = None 34 | self.trg_mask = None 35 | self.trg_lengths = None 36 | self.ntokens = None 37 | 38 | self.file_paths = torch_batch.file_paths 39 | self.use_cuda = model.use_cuda 40 | self.target_pad = TARGET_PAD 41 | # Just Count 42 | self.just_count_in = model.just_count_in 43 | # Future Prediction 44 | self.future_prediction = model.future_prediction 45 | 46 | if hasattr(torch_batch, "trg"): 47 | trg = torch_batch.trg 48 | trg_lengths = torch_batch.trg.shape[1] 49 | # trg_input is used for teacher forcing, last one is cut off 50 | # Remove the last frame for target input, as inputs are only up to frame N-1 51 | self.trg_input = trg.clone()[:, :-1,:] 52 | 53 | self.trg_lengths = trg_lengths 54 | # trg is used for loss computation, shifted by one since BOS 55 | self.trg = trg.clone()[:, 1:, :] 56 | 57 | # Just Count 58 | if self.just_count_in: 59 | # If Just Count, cut off the first frame of trg_input 60 | self.trg_input = self.trg_input[:, :, -1:] 61 | 62 | # Future Prediction 63 | if self.future_prediction != 0: 64 | # Loop through the future prediction, concatenating the frames shifted across once each time 65 | future_trg = torch.Tensor() 66 | # Concatenate each frame (Not counter) 67 | for i in range(0, self.future_prediction): 68 | future_trg = torch.cat((future_trg, self.trg[:, i:-(self.future_prediction - i), :-1].clone()), dim=2) 69 | # Create the final target using the collected future_trg and original trg 70 | self.trg = torch.cat((future_trg, self.trg[:,:-self.future_prediction,-1:]), dim=2) 71 | 72 | # Cut off the last N frames of the trg_input 73 | self.trg_input = self.trg_input[:, :-self.future_prediction, :] 74 | 75 | # Target Pad is dynamic, so we exclude the padded areas from the loss computation 76 | trg_mask = (self.trg_input != self.target_pad).unsqueeze(1) 77 | # This increases the shape of the target mask to be even (16,1,120,120) - 78 | # adding padding that replicates - so just continues the False's or True's 79 | pad_amount = self.trg_input.shape[1] - self.trg_input.shape[2] 80 | # Create the target mask the same size as target input 81 | self.trg_mask = (F.pad(input=trg_mask.double(), pad=(pad_amount, 0, 0, 0), mode='replicate') == 1.0) 82 | self.ntokens = (self.trg != pad_index).data.sum().item() 83 | 84 | if self.use_cuda: 85 | self._make_cuda() 86 | 87 | # If using Cuda 88 | def _make_cuda(self): 89 | """ 90 | Move the batch to GPU 91 | 92 | :return: 93 | """ 94 | self.src = self.src.cuda() 95 | self.src_mask = self.src_mask.cuda() 96 | 97 | if self.trg_input is not None: 98 | self.trg_input = self.trg_input.cuda() 99 | self.trg = self.trg.cuda() 100 | self.trg_mask = self.trg_mask.cuda() 101 | 102 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/constants.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Defining global constants 4 | """ 5 | 6 | UNK_TOKEN = '' 7 | PAD_TOKEN = '' 8 | BOS_TOKEN = '' 9 | EOS_TOKEN = '' 10 | 11 | TARGET_PAD = 0.0 12 | 13 | DEFAULT_UNK_ID = lambda: 0 14 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/decoders.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch import Tensor 3 | 4 | from helpers import freeze_params, ConfigurationError, subsequent_mask, uneven_subsequent_mask 5 | from transformer_layers import PositionalEncoding, \ 6 | TransformerDecoderLayer 7 | 8 | class Decoder(nn.Module): 9 | """ 10 | Base decoder class 11 | """ 12 | 13 | @property 14 | def output_size(self): 15 | """ 16 | Return the output size (size of the target vocabulary) 17 | 18 | :return: 19 | """ 20 | return self._output_size 21 | 22 | class TransformerDecoder(Decoder): 23 | """ 24 | A transformer decoder with N masked layers. 25 | Decoder layers are masked so that an attention head cannot see the future. 26 | """ 27 | 28 | def __init__(self, 29 | num_layers: int = 4, 30 | num_heads: int = 8, 31 | hidden_size: int = 512, 32 | ff_size: int = 2048, 33 | dropout: float = 0.1, 34 | emb_dropout: float = 0.1, 35 | vocab_size: int = 1, 36 | freeze: bool = False, 37 | trg_size: int = 97, 38 | decoder_trg_trg_: bool = True, 39 | **kwargs): 40 | """ 41 | Initialize a Transformer decoder. 42 | 43 | :param num_layers: number of Transformer layers 44 | :param num_heads: number of heads for each layer 45 | :param hidden_size: hidden size 46 | :param ff_size: position-wise feed-forward size 47 | :param dropout: dropout probability (1-keep) 48 | :param emb_dropout: dropout probability for embeddings 49 | :param vocab_size: size of the output vocabulary 50 | :param freeze: set to True keep all decoder parameters fixed 51 | :param kwargs: 52 | """ 53 | super(TransformerDecoder, self).__init__() 54 | 55 | self._hidden_size = hidden_size 56 | 57 | # Dynamic output size depending on the target size 58 | self._output_size = trg_size 59 | 60 | # create num_layers decoder layers and put them in a list 61 | self.layers = nn.ModuleList([TransformerDecoderLayer( 62 | size=hidden_size, ff_size=ff_size, num_heads=num_heads, 63 | dropout=dropout, decoder_trg_trg=decoder_trg_trg_) for _ in range(num_layers)]) 64 | 65 | self.pe = PositionalEncoding(hidden_size,mask_count=True) 66 | self.layer_norm = nn.LayerNorm(hidden_size, eps=1e-6) 67 | 68 | self.emb_dropout = nn.Dropout(p=emb_dropout) 69 | 70 | # Output layer to be the size of joints vector + 1 for counter (total is trg_size) 71 | self.output_layer = nn.Linear(hidden_size, trg_size, bias=False) 72 | 73 | if freeze: 74 | freeze_params(self) 75 | 76 | def forward(self, 77 | trg_embed: Tensor = None, 78 | encoder_output: Tensor = None, 79 | src_mask: Tensor = None, 80 | trg_mask: Tensor = None, 81 | **kwargs): 82 | """ 83 | Transformer decoder forward pass. 84 | 85 | :param trg_embed: embedded targets 86 | :param encoder_output: source representations 87 | :param encoder_hidden: unused 88 | :param src_mask: 89 | :param unroll_steps: unused 90 | :param hidden: unused 91 | :param trg_mask: to mask out target paddings 92 | Note that a subsequent mask is applied here. 93 | :param kwargs: 94 | :return: 95 | """ 96 | assert trg_mask is not None, "trg_mask required for Transformer" 97 | 98 | # add position encoding to word embedding 99 | x = self.pe(trg_embed) 100 | # Dropout if given 101 | x = self.emb_dropout(x) 102 | 103 | padding_mask = trg_mask 104 | # Create subsequent mask for decoding 105 | sub_mask = subsequent_mask( 106 | trg_embed.size(1)).type_as(trg_mask) 107 | 108 | # Apply each layer to the input 109 | for layer in self.layers: 110 | x = layer(x=x, memory=encoder_output, 111 | src_mask=src_mask, trg_mask=sub_mask, padding_mask=padding_mask) 112 | 113 | # Apply a layer normalisation 114 | x = self.layer_norm(x) 115 | # Output layer turns it back into vectors of size trg_size 116 | output = self.output_layer(x) 117 | 118 | return output, x, None, None 119 | 120 | def __repr__(self): 121 | return "%s(num_layers=%r, num_heads=%r)" % ( 122 | self.__class__.__name__, len(self.layers), 123 | self.layers[0].trg_trg_att.num_heads) 124 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/dtw.py: -------------------------------------------------------------------------------- 1 | from numpy import array, zeros, full, argmin, inf, ndim 2 | from scipy.spatial.distance import cdist 3 | from math import isinf 4 | 5 | """ 6 | Dynamic time warping (DTW) is used as a similarity measured between temporal sequences. 7 | Original DTW code found at https://github.com/pierre-rouanet/dtw 8 | 9 | """ 10 | 11 | # Apply DTW 12 | def dtw(x, y, dist, warp=1, w=inf, s=1.0): 13 | """ 14 | Computes Dynamic Time Warping (DTW) of two sequences. 15 | 16 | :param array x: N1*M array 17 | :param array y: N2*M array 18 | :param func dist: distance used as cost measure 19 | :param int warp: how many shifts are computed. 20 | :param int w: window size limiting the maximal distance between indices of matched entries |i,j|. 21 | :param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal 22 | Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path. 23 | """ 24 | assert len(x) 25 | assert len(y) 26 | assert isinf(w) or (w >= abs(len(x) - len(y))) 27 | assert s > 0 28 | r, c = len(x), len(y) 29 | if not isinf(w): 30 | D0 = full((r + 1, c + 1), inf) 31 | for i in range(1, r + 1): 32 | D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0 33 | D0[0, 0] = 0 34 | else: 35 | D0 = zeros((r + 1, c + 1)) 36 | D0[0, 1:] = inf 37 | D0[1:, 0] = inf 38 | D1 = D0[1:, 1:] # view 39 | for i in range(r): 40 | for j in range(c): 41 | if (isinf(w) or (max(0, i - w) <= j <= min(c, i + w))): 42 | D1[i, j] = dist(x[i], y[j]) 43 | C = D1.copy() 44 | jrange = range(c) 45 | for i in range(r): 46 | if not isinf(w): 47 | jrange = range(max(0, i - w), min(c, i + w + 1)) 48 | for j in jrange: 49 | min_list = [D0[i, j]] 50 | for k in range(1, warp + 1): 51 | i_k = min(i + k, r) 52 | j_k = min(j + k, c) 53 | min_list += [D0[i_k, j] * s, D0[i, j_k] * s] 54 | D1[i, j] += min(min_list) 55 | if len(x) == 1: 56 | path = zeros(len(y)), range(len(y)) 57 | elif len(y) == 1: 58 | path = range(len(x)), zeros(len(x)) 59 | else: 60 | path = _traceback(D0) 61 | return D1[-1, -1], C, D1, path 62 | 63 | def _traceback(D): 64 | i, j = array(D.shape) - 2 65 | p, q = [i], [j] 66 | while (i > 0) or (j > 0): 67 | tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j])) 68 | if tb == 0: 69 | i -= 1 70 | j -= 1 71 | elif tb == 1: 72 | i -= 1 73 | else: # (tb == 2): 74 | j -= 1 75 | p.insert(0, i) 76 | q.insert(0, j) 77 | return array(p), array(q) 78 | 79 | 80 | if __name__ == '__main__': 81 | w = inf 82 | s = 1.0 83 | if 1: # 1-D numeric 84 | from sklearn.metrics.pairwise import manhattan_distances 85 | x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0] 86 | y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0] 87 | dist_fun = manhattan_distances 88 | w = 1 89 | # s = 1.2 90 | elif 0: # 2-D numeric 91 | from sklearn.metrics.pairwise import euclidean_distances 92 | x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]] 93 | y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]] 94 | dist_fun = euclidean_distances 95 | else: # 1-D list of strings 96 | from nltk.metrics.distance import edit_distance 97 | # x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder'] 98 | # y = ['class', 'too'] 99 | x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls'] 100 | y = ['see', 'drown', 'himself'] 101 | # x = 'we talked about the situation'.split() 102 | # y = 'we talked about the situation'.split() 103 | dist_fun = edit_distance 104 | dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s) 105 | 106 | # Vizualize 107 | from matplotlib import pyplot as plt 108 | plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest') 109 | plt.plot(path[0], path[1], '-o') # relation 110 | plt.xticks(range(len(x)), x) 111 | plt.yticks(range(len(y)), y) 112 | plt.xlabel('x') 113 | plt.ylabel('y') 114 | plt.axis('tight') 115 | if isinf(w): 116 | plt.title('Minimum distance: {}, slope weight: {}'.format(dist, s)) 117 | else: 118 | plt.title('Minimum distance: {}, window widht: {}, slope weight: {}'.format(dist, w, s)) 119 | plt.show() 120 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/embeddings.py: -------------------------------------------------------------------------------- 1 | import math 2 | from torch import nn, Tensor 3 | from helpers import freeze_params 4 | import torch 5 | 6 | class MaskedNorm(nn.Module): 7 | """ 8 | Original Code from: 9 | https://discuss.pytorch.org/t/batchnorm-for-different-sized-samples-in-batch/44251/8 10 | """ 11 | 12 | def __init__(self, norm_type, num_groups, num_features): 13 | super().__init__() 14 | self.norm_type = norm_type 15 | if self.norm_type == "batch": 16 | self.norm = nn.BatchNorm1d(num_features=num_features) 17 | elif self.norm_type == "group": 18 | self.norm = nn.GroupNorm(num_groups=num_groups, num_channels=num_features) 19 | elif self.norm_type == "layer": 20 | self.norm = nn.LayerNorm(normalized_shape=num_features) 21 | else: 22 | raise ValueError("Unsupported Normalization Layer") 23 | 24 | self.num_features = num_features 25 | 26 | def forward(self, x: Tensor, mask: Tensor): 27 | if self.training: 28 | reshaped = x.reshape([-1, self.num_features]) 29 | reshaped_mask = mask.reshape([-1, 1]) > 0 30 | selected = torch.masked_select(reshaped, reshaped_mask).reshape( 31 | [-1, self.num_features] 32 | ) 33 | batch_normed = self.norm(selected) 34 | scattered = reshaped.masked_scatter(reshaped_mask, batch_normed) 35 | return scattered.reshape([x.shape[0], -1, self.num_features]) 36 | else: 37 | reshaped = x.reshape([-1, self.num_features]) 38 | batched_normed = self.norm(reshaped) 39 | return batched_normed.reshape([x.shape[0], -1, self.num_features]) 40 | 41 | class Embeddings(nn.Module): 42 | 43 | """ 44 | Simple embeddings class 45 | """ 46 | 47 | # pylint: disable=unused-argument 48 | def __init__(self, 49 | embedding_dim: int = 64, 50 | scale: bool = False, 51 | vocab_size: int = 0, 52 | padding_idx: int = 1, 53 | freeze: bool = False, 54 | **kwargs): 55 | """ 56 | Create new embeddings for the vocabulary. 57 | Use scaling for the Transformer. 58 | 59 | :param embedding_dim: 60 | :param scale: 61 | :param vocab_size: 62 | :param padding_idx: 63 | :param freeze: freeze the embeddings during training 64 | """ 65 | super(Embeddings, self).__init__() 66 | 67 | self.embedding_dim = embedding_dim 68 | self.scale = scale 69 | self.vocab_size = vocab_size 70 | self.lut = nn.Embedding(vocab_size, self.embedding_dim, 71 | padding_idx=padding_idx) 72 | 73 | if freeze: 74 | freeze_params(self) 75 | 76 | # pylint: disable=arguments-differ 77 | def forward(self, x: Tensor) -> Tensor: 78 | """ 79 | Perform lookup for input `x` in the embedding table. 80 | 81 | :param x: index in the vocabulary 82 | :return: embedded representation for `x` 83 | """ 84 | if self.scale: 85 | return self.lut(x) * math.sqrt(self.embedding_dim) 86 | return self.lut(x) 87 | 88 | def __repr__(self): 89 | return "%s(embedding_dim=%d, vocab_size=%d)" % ( 90 | self.__class__.__name__, self.embedding_dim, self.vocab_size) 91 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/encoders.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch import Tensor 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 7 | import torch.nn.functional as F 8 | 9 | from helpers import freeze_params 10 | from transformer_layers import \ 11 | TransformerEncoderLayer, PositionalEncoding 12 | 13 | from embeddings import MaskedNorm 14 | 15 | #pylint: disable=abstract-method 16 | class Encoder(nn.Module): 17 | """ 18 | Base encoder class 19 | """ 20 | @property 21 | def output_size(self): 22 | """ 23 | Return the output size 24 | 25 | :return: 26 | """ 27 | return self._output_size 28 | 29 | class TransformerEncoder(Encoder): 30 | """ 31 | Transformer Encoder 32 | """ 33 | 34 | #pylint: disable=unused-argument 35 | def __init__(self, 36 | hidden_size: int = 512, 37 | ff_size: int = 2048, 38 | num_layers: int = 8, 39 | num_heads: int = 4, 40 | dropout: float = 0.1, 41 | emb_dropout: float = 0.1, 42 | freeze: bool = False, 43 | **kwargs): 44 | """ 45 | Initializes the Transformer. 46 | :param hidden_size: hidden size and size of embeddings 47 | :param ff_size: position-wise feed-forward layer size. 48 | (Typically this is 2*hidden_size.) 49 | :param num_layers: number of layers 50 | :param num_heads: number of heads for multi-headed attention 51 | :param dropout: dropout probability for Transformer layers 52 | :param emb_dropout: Is applied to the input (word embeddings). 53 | :param freeze: freeze the parameters of the encoder during training 54 | :param kwargs: 55 | """ 56 | super(TransformerEncoder, self).__init__() 57 | 58 | # build all (num_layers) layers 59 | self.layers = nn.ModuleList([ 60 | TransformerEncoderLayer(size=hidden_size, ff_size=ff_size, 61 | num_heads=num_heads, dropout=dropout) 62 | for _ in range(num_layers)]) 63 | 64 | self.layer_norm = nn.LayerNorm(hidden_size, eps=1e-6) 65 | self.pe = PositionalEncoding(hidden_size) 66 | self.emb_dropout = nn.Dropout(p=emb_dropout) 67 | self._output_size = hidden_size 68 | 69 | if freeze: 70 | freeze_params(self) 71 | 72 | #pylint: disable=arguments-differ 73 | def forward(self, 74 | embed_src: Tensor, 75 | src_length: Tensor, 76 | mask: Tensor) -> (Tensor, Tensor): 77 | """ 78 | Pass the input (and mask) through each layer in turn. 79 | Applies a Transformer encoder to sequence of embeddings x. 80 | The input mini-batch x needs to be sorted by src length. 81 | x and mask should have the same dimensions [batch, time, dim]. 82 | 83 | :param embed_src: embedded src inputs, 84 | shape (batch_size, src_len, embed_size) 85 | :param src_length: length of src inputs 86 | (counting tokens before padding), shape (batch_size) 87 | :param mask: indicates padding areas (zeros where padding), shape 88 | (batch_size, src_len, embed_size) 89 | :return: 90 | - output: hidden states with 91 | shape (batch_size, max_length, directions*hidden), 92 | - hidden_concat: last hidden state with 93 | shape (batch_size, directions*hidden) 94 | """ 95 | 96 | x = embed_src 97 | 98 | # Add position encoding to word embeddings 99 | x = self.pe(x) 100 | # Add Dropout 101 | x = self.emb_dropout(x) 102 | 103 | # Apply each layer to the input 104 | for layer in self.layers: 105 | x = layer(x, mask) 106 | 107 | return self.layer_norm(x), None 108 | 109 | def __repr__(self): 110 | return "%s(num_layers=%r, num_heads=%r)" % ( 111 | self.__class__.__name__, len(self.layers), 112 | self.layers[0].src_src_att.num_heads) 113 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/external_metrics/mscoco_rouge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : mscoco_rouge.py 4 | # 5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004) 6 | # 7 | # Creation Date : 2015-01-07 06:03 8 | # Author : Ramakrishna Vedantam 9 | 10 | 11 | def my_lcs(string, sub): 12 | """ 13 | Calculates longest common subsequence for a pair of tokenized strings 14 | :param string : list of str : tokens from a string split using whitespace 15 | :param sub : list of str : shorter string, also split using whitespace 16 | :returns: length (list of int): length of the longest common subsequence between the two strings 17 | 18 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS 19 | """ 20 | if len(string) < len(sub): 21 | sub, string = string, sub 22 | 23 | lengths = [[0 for i in range(0, len(sub) + 1)] for j in range(0, len(string) + 1)] 24 | 25 | for j in range(1, len(sub) + 1): 26 | for i in range(1, len(string) + 1): 27 | if string[i - 1] == sub[j - 1]: 28 | lengths[i][j] = lengths[i - 1][j - 1] + 1 29 | else: 30 | lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1]) 31 | 32 | return lengths[len(string)][len(sub)] 33 | 34 | 35 | def calc_score(hypotheses, references, beta=1.2): 36 | """ 37 | Compute ROUGE-L score given one candidate and references for an image 38 | :param hypotheses: str : candidate sentence to be evaluated 39 | :param references: list of str : COCO reference sentences for the particular image to be evaluated 40 | :returns score: int (ROUGE-L score for the candidate evaluated against references) 41 | """ 42 | assert len(hypotheses) == 1 43 | assert len(references) > 0 44 | prec = [] 45 | rec = [] 46 | 47 | # split into tokens 48 | token_c = hypotheses[0].split(" ") 49 | 50 | for reference in references: 51 | # split into tokens 52 | token_r = reference.split(" ") 53 | # compute the longest common subsequence 54 | lcs = my_lcs(token_r, token_c) 55 | prec.append(lcs / float(len(token_c))) 56 | rec.append(lcs / float(len(token_r))) 57 | 58 | prec_max = max(prec) 59 | rec_max = max(rec) 60 | 61 | if prec_max != 0 and rec_max != 0: 62 | score = ((1 + beta ** 2) * prec_max * rec_max) / float( 63 | rec_max + beta ** 2 * prec_max 64 | ) 65 | else: 66 | score = 0.0 67 | return score 68 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/loss.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Module to implement training loss 4 | """ 5 | 6 | from torch import nn, Tensor 7 | 8 | class RegLoss(nn.Module): 9 | """ 10 | Regression Loss 11 | """ 12 | 13 | def __init__(self, cfg, target_pad=0.0): 14 | super(RegLoss, self).__init__() 15 | 16 | self.loss = cfg["training"]["loss"].lower() 17 | 18 | if self.loss == "l1": 19 | self.criterion = nn.L1Loss() 20 | elif self.loss == "mse": 21 | self.criterion = nn.MSELoss() 22 | 23 | else: 24 | print("Loss not found - revert to default L1 loss") 25 | self.criterion = nn.L1Loss() 26 | 27 | model_cfg = cfg["model"] 28 | 29 | self.target_pad = target_pad 30 | self.loss_scale = model_cfg.get("loss_scale", 1.0) 31 | 32 | # pylint: disable=arguments-differ 33 | def forward(self, preds, targets): 34 | 35 | loss_mask = (targets != self.target_pad) 36 | 37 | # Find the masked predictions and targets using loss mask 38 | preds_masked = preds * loss_mask 39 | targets_masked = targets * loss_mask 40 | 41 | # Calculate loss just over the masked predictions 42 | loss = self.criterion(preds_masked, targets_masked) 43 | 44 | # Multiply loss by the loss scale 45 | if self.loss_scale != 1.0: 46 | loss = loss * self.loss_scale 47 | 48 | return loss 49 | 50 | class XentLoss(nn.Module): 51 | """ 52 | Cross-Entropy Loss with optional label smoothing 53 | """ 54 | 55 | def __init__(self, pad_index: int, smoothing: float = 0.0): 56 | super(XentLoss, self).__init__() 57 | self.smoothing = smoothing 58 | self.pad_index = pad_index 59 | # standard xent loss 60 | self.criterion = nn.NLLLoss(ignore_index=self.pad_index, 61 | reduction='sum') 62 | 63 | # pylint: disable=arguments-differ 64 | def forward(self, log_probs, targets): 65 | """ 66 | Compute the cross-entropy between logits and targets. 67 | If label smoothing is used, target distributions are not one-hot, but 68 | "1-smoothing" for the correct target token and the rest of the 69 | probability mass is uniformly spread across the other tokens. 70 | :param log_probs: log probabilities as predicted by model 71 | :param targets: target indices 72 | :return: 73 | """ 74 | # targets: indices with batch*seq_len 75 | targets = targets.contiguous().view(-1) 76 | loss = self.criterion( 77 | log_probs.contiguous().view(-1, log_probs.size(-1)), targets) 78 | 79 | return loss -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/make_new_vocab.py: -------------------------------------------------------------------------------- 1 | from vocabulary import * 2 | 3 | import os 4 | train_path = "Data/augmented_data/enhanced-gloss/train/" 5 | for fname in os.listdir(train_path): 6 | input_file = open(train_path + fname, "r").readlines() 7 | vocab_ = Vocabulary() 8 | 9 | for line in input_file: 10 | vocab_._from_list(line.split()) 11 | vocab_.to_file("Configs/src_%s_vocab.txt"%fname) -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/metrics.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | This module holds various MT evaluation metrics. 4 | """ 5 | 6 | from external_metrics import sacrebleu 7 | from external_metrics import mscoco_rouge 8 | import logging 9 | import numpy as np 10 | 11 | def wer(hypotheses, references): 12 | wer = del_rate = ins_rate = sub_rate = 0 13 | n_seq = len(hypotheses) 14 | 15 | for h, r in zip(hypotheses, references): 16 | res = wer_single(r=r, h=h) 17 | wer += res["wer"] / n_seq 18 | del_rate += res["del"] / n_seq 19 | ins_rate += res["ins"] / n_seq 20 | sub_rate += res["sub"] / n_seq 21 | 22 | return {"wer": wer, "del": del_rate, "ins": ins_rate, "sub": sub_rate} 23 | 24 | def wer_single(r, h): 25 | edit_distance_matrix = editDistance(r=r, h=h) 26 | step_list = getStepList(r=r, h=h, d=edit_distance_matrix) 27 | 28 | min_distance = float(edit_distance_matrix[len(r)][len(h)]) 29 | num_del = float(np.sum([s == "d" for s in step_list])) 30 | num_ins = float(np.sum([s == "i" for s in step_list])) 31 | num_sub = float(np.sum([s == "s" for s in step_list])) 32 | 33 | word_error_rate = round((min_distance / len(r) * 100), 4) 34 | del_rate = round((num_del / len(r) * 100), 4) 35 | ins_rate = round((num_ins / len(r) * 100), 4) 36 | sub_rate = round((num_sub / len(r) * 100), 4) 37 | 38 | return {"wer": word_error_rate, "del": del_rate, "ins": ins_rate, "sub": sub_rate} 39 | 40 | # Calculate ROUGE scores 41 | def rouge(hypotheses, references): 42 | rouge_score = 0 43 | n_seq = len(hypotheses) 44 | 45 | for h, r in zip(hypotheses, references): 46 | rouge_score += mscoco_rouge.calc_score(hypotheses=[h], references=[r]) / n_seq 47 | 48 | return rouge_score 49 | 50 | # Calculate CHRF scores 51 | def chrf(hypotheses, references): 52 | """ 53 | Character F-score from sacrebleu 54 | 55 | :param hypotheses: list of hypotheses (strings) 56 | :param references: list of references (strings) 57 | :return: 58 | """ 59 | return sacrebleu.corpus_chrf(hypotheses=hypotheses, references=references) 60 | 61 | # Calculate BLEU scores 62 | def bleu(hypotheses, references, all=False): 63 | """ 64 | Raw corpus BLEU from sacrebleu (without tokenization) 65 | 66 | :param hypotheses: list of hypotheses (strings) 67 | :param references: list of references (strings) 68 | :return: 69 | """ 70 | logger = logging.getLogger(__name__) 71 | bleu_scores = sacrebleu.raw_corpus_bleu( 72 | sys_stream=hypotheses, ref_streams=[references] 73 | ).scores 74 | scores = {} 75 | for n in range(len(bleu_scores)): 76 | scores["bleu" + str(n + 1)] = bleu_scores[n] 77 | 78 | rouge_score = rouge(hypotheses, references) * 100 79 | #wer_d = wer(hypotheses, references) 80 | #werStr = "WER: " + str(wer_d["wer"]) 81 | 82 | bleu1Str = "BLEU-1: " + str(scores["bleu1"]) 83 | bleu2Str = "BLEU-2: " + str(scores["bleu2"]) 84 | bleu3Str = "BLEU-3: " + str(scores["bleu3"]) 85 | bleu4Str = "BLEU-4: " + str(scores["bleu4"]) 86 | rougeStr = "Rouge: " + str(rouge_score) 87 | 88 | logger.info(bleu1Str) 89 | logger.info(bleu2Str) 90 | logger.info(bleu3Str) 91 | logger.info(bleu4Str) 92 | logger.info(rougeStr) 93 | 94 | print("--------------- New Scores ------------") 95 | print(bleu1Str) 96 | print(bleu2Str) 97 | print(bleu3Str) 98 | print(bleu4Str) 99 | print(rougeStr) 100 | #print(werStr) 101 | 102 | return bleu_scores[3] -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/optim/lookahead.py: -------------------------------------------------------------------------------- 1 | # from https://github.com/mgrankin/over9000/blob/master/lookahead.py 2 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py 3 | 4 | import itertools as it 5 | from torch.optim import Optimizer, Adam 6 | 7 | 8 | # TODO: This code is buggy. It did not support state saving support 9 | # I did some ad-hoc fixes to get it working but noone should 10 | # use if for training on condor etc. 11 | # Sadly this eliminates over9000 + ranger from options to train 12 | # on servers. 13 | class Lookahead(Optimizer): 14 | def __init__(self, base_optimizer, alpha=0.5, k=6): 15 | if not 0.0 <= alpha <= 1.0: 16 | raise ValueError(f"Invalid slow update rate: {alpha}") 17 | if not 1 <= k: 18 | raise ValueError(f"Invalid lookahead steps: {k}") 19 | 20 | self.optimizer = base_optimizer 21 | self.state = base_optimizer.state 22 | self.defaults = base_optimizer.defaults 23 | 24 | self.param_groups = self.optimizer.param_groups 25 | self.alpha = alpha 26 | self.k = k 27 | for group in self.param_groups: 28 | group["step_counter"] = 0 29 | self.slow_weights = [ 30 | [p.clone().detach() for p in group["params"]] for group in self.param_groups 31 | ] 32 | 33 | for w in it.chain(*self.slow_weights): 34 | w.requires_grad = False 35 | 36 | def step(self, closure=None): 37 | loss = None 38 | if closure is not None: 39 | loss = closure() 40 | loss = self.optimizer.step() 41 | for group, slow_weights in zip(self.param_groups, self.slow_weights): 42 | group["step_counter"] += 1 43 | if group["step_counter"] % self.k != 0: 44 | continue 45 | for p, q in zip(group["params"], slow_weights): 46 | if p.grad is None: 47 | continue 48 | q.data.add_(self.alpha, p.data - q.data) 49 | p.data.copy_(q.data) 50 | return loss 51 | 52 | 53 | def LookaheadAdam(params, alpha=0.5, k=6, *args, **kwargs): 54 | adam = Adam(params, *args, **kwargs) 55 | return Lookahead(adam, alpha, k) 56 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/optim/over9000.py: -------------------------------------------------------------------------------- 1 | # from https://raw.githubusercontent.com/mgrankin/over9000/master/over9000.py 2 | 3 | from SignProdJoey.optim.lookahead import Lookahead 4 | from SignProdJoey.optim.ralamb import Ralamb 5 | 6 | # Lookahead implementation from https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py 7 | # RAdam + LARS implementation from https://gist.github.com/redknightlois/c4023d393eb8f92bb44b2ab582d7ec20 8 | 9 | 10 | # RAdam + LARS + LookAHead 11 | def Over9000(params, alpha=0.5, k=6, *args, **kwargs): 12 | ralamb = Ralamb(params, *args, **kwargs) 13 | return Lookahead(ralamb, alpha, k) 14 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/optim/ranger.py: -------------------------------------------------------------------------------- 1 | # from https://github.com/mgrankin/over9000/blob/master/ranger.py 2 | 3 | from SignProdJoey.optim.lookahead import Lookahead 4 | from SignProdJoey.optim.radam import RAdam 5 | 6 | 7 | def Ranger(params, alpha=0.5, k=6, *args, **kwargs): 8 | radam = RAdam(params, *args, **kwargs) 9 | return Lookahead(radam, alpha, k) 10 | -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/prediction.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import math 4 | 5 | import torch 6 | from torchtext.data import Dataset 7 | 8 | from helpers import bpe_postprocess, load_config, get_latest_checkpoint, \ 9 | load_checkpoint, calculate_dtw 10 | from model import build_model, Model 11 | from batch import Batch 12 | from data import load_data, make_data_iter 13 | from constants import UNK_TOKEN, PAD_TOKEN, EOS_TOKEN 14 | 15 | # Validate epoch given a dataset 16 | def validate_on_data(model: Model, 17 | data: Dataset, 18 | batch_size: int, 19 | max_output_length: int, 20 | eval_metric: str, 21 | loss_function: torch.nn.Module = None, 22 | batch_type: str = "sentence", 23 | type = "val", 24 | BT_model = None): 25 | 26 | valid_iter = make_data_iter( 27 | dataset=data, batch_size=batch_size, batch_type=batch_type, 28 | shuffle=True, train=False) 29 | 30 | pad_index = model.src_vocab.stoi[PAD_TOKEN] 31 | # disable dropout 32 | model.eval() 33 | # don't track gradients during validation 34 | with torch.no_grad(): 35 | valid_hypotheses = [] 36 | valid_references = [] 37 | valid_inputs = [] 38 | file_paths = [] 39 | all_dtw_scores = [] 40 | 41 | valid_loss = 0 42 | total_ntokens = 0 43 | total_nseqs = 0 44 | 45 | batches = 0 46 | for valid_batch in iter(valid_iter): 47 | # Extract batch 48 | batch = Batch(torch_batch=valid_batch, 49 | pad_index = pad_index, 50 | model = model) 51 | targets = batch.trg 52 | 53 | # run as during training with teacher forcing 54 | if loss_function is not None and batch.trg is not None: 55 | # Get the loss for this batch 56 | batch_loss, _ = model.get_loss_for_batch( 57 | batch, loss_function=loss_function) 58 | 59 | valid_loss += batch_loss 60 | total_ntokens += batch.ntokens 61 | total_nseqs += batch.nseqs 62 | 63 | # If not just count in, run inference to produce translation videos 64 | if not model.just_count_in: 65 | # Run batch through the model in an auto-regressive format 66 | output, attention_scores = model.run_batch( 67 | batch=batch, 68 | max_output_length=max_output_length) 69 | 70 | # If future prediction 71 | if model.future_prediction != 0: 72 | # Cut to only the first frame prediction + add the counter 73 | train_output = torch.cat((train_output[:, :, :train_output.shape[2] // (model.future_prediction)], train_output[:, :, -1:]),dim=2) 74 | # Cut to only the first frame prediction + add the counter 75 | targets = torch.cat((targets[:, :, :targets.shape[2] // (model.future_prediction)], targets[:, :, -1:]),dim=2) 76 | 77 | # For just counter, the inference is the same as GTing 78 | if model.just_count_in: 79 | output = train_output 80 | 81 | # Add references, hypotheses and file paths to list 82 | valid_references.extend(targets) 83 | valid_hypotheses.extend(output) 84 | file_paths.extend(batch.file_paths) 85 | # Add the source sentences to list, by using the model source vocab and batch indices 86 | valid_inputs.extend([[model.src_vocab.itos[batch.src[i][j]] for j in range(len(batch.src[i]))] for i in 87 | range(len(batch.src))]) 88 | 89 | # Calculate the full Dynamic Time Warping score - for evaluation 90 | dtw_score = calculate_dtw(targets, output) 91 | all_dtw_scores.extend(dtw_score) 92 | 93 | # Can set to only run a few batches 94 | # if batches == math.ceil(20/batch_size): 95 | # break 96 | batches += 1 97 | 98 | # Dynamic Time Warping scores 99 | current_valid_score = np.mean(all_dtw_scores) 100 | 101 | return current_valid_score, valid_loss, valid_references, valid_hypotheses, \ 102 | valid_inputs, all_dtw_scores, file_paths -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/requirements (1).txt: -------------------------------------------------------------------------------- 1 | absl-py==0.8.1 2 | alabaster==0.7.8 3 | asn1crypto==0.24.0 4 | astor==0.8.0 5 | astroid==2.3.1 6 | #astropy==3.0 7 | attrs==17.4.0 8 | Babel==2.4.0 9 | bleach==2.1.2 10 | certifi==2019.9.11 11 | chardet==3.0.4 12 | cloudpickle==0.5.2 13 | configobj==5.0.6 14 | cryptography==2.1.4 15 | cycler==0.10.0 16 | #Cython==0.27.3 17 | decorator==4.1.2 18 | docutils==0.14 19 | emcee==2.2.1 20 | future==0.18.1 21 | gast==0.3.2 22 | google-pasta==0.1.7 23 | grpcio==1.24.1 24 | h5py==2.10.0 25 | html5lib==0.999999999 26 | httplib2==0.9.2 27 | idna==2.8 28 | imagesize==0.7.1 29 | ipykernel==4.8.2 30 | ipython==5.5.0 31 | ipython-genutils==0.2.0 32 | isort==4.3.21 33 | jedi==0.11.1 34 | Jinja2==2.10 35 | joeynmt==0.0.1 36 | jsonschema==2.6.0 37 | jupyter-client==5.2.2 38 | jupyter-core==4.4.0 39 | Keras-Applications==1.0.8 40 | Keras-Preprocessing==1.1.0 41 | keyring==10.6.0 42 | keyrings.alt==3.0 43 | kiwisolver==1.1.0 44 | launchpadlib==1.10.6 45 | lazr.restfulclient==0.13.5 46 | lazr.uri==1.0.3 47 | lazy-object-proxy==1.4.2 48 | logilab-common==1.4.1 49 | macaroonbakery==1.1.4 50 | Mako==1.0.7 51 | Markdown==3.1.1 52 | matplotlib==3.1.1 53 | mccabe==0.6.1 54 | mistune==0.8.3 55 | mpmath==1.0.0 56 | nbconvert==5.3.1 57 | nbformat==4.4.0 58 | netifaces==0.10.4 59 | nose==1.3.7 60 | nose-parameterized==0.3.4 61 | numpy==1.17.3 62 | numpydoc==0.7.0 63 | oauth==1.0.1 64 | olefile==0.45.1 65 | pandas==0.25.1 66 | pandocfilters==1.4.2 67 | parso==0.1.1 68 | pexpect==4.2.1 69 | pickleshare==0.7.4 70 | Pillow==6.2.0 71 | pluggy==0.6.0 72 | ply==3.11 73 | portalocker==1.5.1 74 | prompt-toolkit==1.0.15 75 | protobuf==3.10.0 76 | psutil==5.4.2 77 | py==1.5.2 78 | pycairo==1.16.2 79 | pycodestyle==2.3.1 80 | pycrypto==2.6.1 81 | pycups==1.9.73 82 | pyds9==1.8.1 83 | pyfits==3.4 84 | pyflakes==1.6.0 85 | Pygments==2.2.0 86 | #pygobject==3.27.0 87 | PyICU==1.9.8 88 | pyinotify==0.9.6 89 | pylint==2.4.2 90 | pymacaroons==0.13.0 91 | PyNaCl==1.1.2 92 | PyOpenGL==3.1.0 93 | pyparsing==2.4.2 94 | pyqtgraph==0.10.0 95 | pyRFC3339==1.0 96 | pytest==3.3.2 97 | pytest-arraydiff==0.2 98 | pytest-astropy==0.2.1 99 | pytest-doctestplus==0.1.2 100 | pytest-openfiles==0.2.0 101 | pytest-remotedata==0.2.0 102 | python-dateutil==2.8.0 103 | python-debian==0.1.32 104 | pytz==2019.3 105 | pyxdg==0.25 106 | PyYAML==5.1.2 107 | pyzmq==19.0.2 108 | QtAwesome==0.4.4 109 | qtconsole==4.3.1 110 | QtPy==1.3.1 111 | reportlab==3.4.0 112 | requests==2.22.0 113 | requests-unixsocket==0.1.5 114 | roman==2.0.0 115 | rope==0.10.5 116 | sacrebleu==1.4.2 117 | scipy==1.3.1 118 | scour==0.36 119 | #screen-resolution-extra==0.0.0 120 | seaborn==0.9.0 121 | SecretStorage==2.3.1 122 | simplegeneric==0.8.1 123 | simplejson==3.13.2 124 | six==1.12.0 125 | Sphinx==1.6.7 126 | spyder==3.2.6 127 | ssh-import-id==5.10 128 | subword-nmt==0.3.6 129 | sympy==1.1.1 130 | tensorboard==1.14.0 131 | tensorflow==1.14.0 132 | termcolor==1.1.0 133 | testpath==0.3.1 134 | torch==1.4.0 135 | torchtext==0.4.0 136 | tornado==4.5.3 137 | tqdm==4.36.1 138 | traitlets==4.3.2 139 | typed-ast==1.4.0 140 | typing==3.7.4.1 141 | unidiff==0.5.4 142 | urllib3==1.25.6 143 | virtualenv==15.1.0 144 | wadllib==1.3.2 145 | wcwidth==0.1.7 146 | webencodings==0.5 147 | Werkzeug==0.16.0 148 | wrapt==1.11.1 149 | #xkit==0.0.0 -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.8.1 2 | alabaster==0.7.8 3 | asn1crypto==0.24.0 4 | astor==0.8.0 5 | astroid==2.3.1 6 | astropy==3.0 7 | attrs==17.4.0 8 | Babel==2.4.0 9 | bleach==2.1.2 10 | certifi==2019.9.11 11 | chardet==3.0.4 12 | cloudpickle==0.5.2 13 | configobj==5.0.6 14 | cryptography==2.1.4 15 | cycler==0.10.0 16 | Cython==0.26.1 17 | decorator==4.1.2 18 | docutils==0.14 19 | emcee==2.2.1 20 | future==0.18.1 21 | gast==0.3.2 22 | google-pasta==0.1.7 23 | grpcio==1.24.1 24 | h5py==2.10.0 25 | html5lib==0.999999999 26 | httplib2==0.9.2 27 | idna==2.8 28 | imagesize==0.7.1 29 | ipykernel==4.8.2 30 | ipython==5.5.0 31 | ipython-genutils==0.2.0 32 | isort==4.3.21 33 | jedi==0.11.1 34 | Jinja2==2.10 35 | joeynmt==0.0.1 36 | jsonschema==2.6.0 37 | jupyter-client==5.2.2 38 | jupyter-core==4.4.0 39 | Keras-Applications==1.0.8 40 | Keras-Preprocessing==1.1.0 41 | keyring==10.6.0 42 | keyrings.alt==3.0 43 | kiwisolver==1.1.0 44 | launchpadlib==1.10.6 45 | lazr.restfulclient==0.13.5 46 | lazr.uri==1.0.3 47 | lazy-object-proxy==1.4.2 48 | logilab-common==1.4.1 49 | macaroonbakery==1.1.3 50 | Mako==1.0.7 51 | Markdown==3.1.1 52 | matplotlib==3.1.1 53 | mccabe==0.6.1 54 | mistune==0.8.3 55 | mpmath==1.0.0 56 | nbconvert==5.3.1 57 | nbformat==4.4.0 58 | netifaces==0.10.4 59 | nose==1.3.7 60 | nose-parameterized==0.3.4 61 | numpy==1.17.3 62 | numpydoc==0.7.0 63 | oauth==1.0.1 64 | olefile==0.45.1 65 | pandas==0.25.1 66 | pandocfilters==1.4.2 67 | parso==0.1.1 68 | pexpect==4.2.1 69 | pickleshare==0.7.4 70 | Pillow==6.2.0 71 | pluggy==0.6.0 72 | ply==3.11 73 | portalocker==1.5.1 74 | prompt-toolkit==1.0.15 75 | protobuf==3.10.0 76 | psutil==5.4.2 77 | py==1.5.2 78 | pycairo==1.16.2 79 | pycodestyle==2.3.1 80 | pycrypto==2.6.1 81 | pycups==1.9.73 82 | pyds9==1.8.1 83 | pyfits==3.4 84 | pyflakes==1.6.0 85 | Pygments==2.2.0 86 | pygobject==3.26.1 87 | PyICU==1.9.8 88 | pyinotify==0.9.6 89 | pylint==2.4.2 90 | pymacaroons==0.13.0 91 | PyNaCl==1.1.2 92 | PyOpenGL==3.1.0 93 | pyparsing==2.4.2 94 | pyqtgraph==0.10.0 95 | pyRFC3339==1.0 96 | pytest==3.3.2 97 | pytest-arraydiff==0.2 98 | pytest-astropy==0.2.1 99 | pytest-doctestplus==0.1.2 100 | pytest-openfiles==0.2.0 101 | pytest-remotedata==0.2.0 102 | python-dateutil==2.8.0 103 | python-debian==0.1.32 104 | pytz==2019.3 105 | pyxdg==0.25 106 | PyYAML==5.1.2 107 | pyzmq==16.0.2 108 | QtAwesome==0.4.4 109 | qtconsole==4.3.1 110 | QtPy==1.3.1 111 | reportlab==3.4.0 112 | requests==2.22.0 113 | requests-unixsocket==0.1.5 114 | roman==2.0.0 115 | rope==0.10.5 116 | sacrebleu==1.4.2 117 | scipy==1.3.1 118 | scour==0.36 119 | screen-resolution-extra==0.0.0 120 | seaborn==0.9.0 121 | SecretStorage==2.3.1 122 | simplegeneric==0.8.1 123 | simplejson==3.13.2 124 | six==1.12.0 125 | Sphinx==1.6.7 126 | spyder==3.2.6 127 | ssh-import-id==5.7 128 | subword-nmt==0.3.6 129 | sympy==1.1.1 130 | tensorflow==1.14.0 131 | tensorboard==1.14.0 132 | tensorflow-estimator==1.14.0 133 | termcolor==1.1.0 134 | testpath==0.3.1 135 | torch==1.3.0 136 | torchtext==0.4.0 137 | tornado==4.5.3 138 | tqdm==4.36.1 139 | traitlets==4.3.2 140 | typed-ast==1.4.0 141 | typing==3.7.4.1 142 | unidiff==0.5.4 143 | urllib3==1.25.6 144 | virtualenv==15.1.0 145 | wadllib==1.3.2 146 | wcwidth==0.1.7 147 | webencodings==0.5 148 | Werkzeug==0.16.0 149 | wrapt==1.11.1 150 | xkit==0.0.0 -------------------------------------------------------------------------------- /code/generation-model/progressive_transformer/search.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import Tensor 5 | import numpy as np 6 | 7 | from decoders import Decoder, TransformerDecoder 8 | from embeddings import Embeddings 9 | 10 | # pylint: disable=unused-argument 11 | def greedy( 12 | src_mask: Tensor, 13 | embed: Embeddings, 14 | decoder: Decoder, 15 | encoder_output: Tensor, 16 | trg_input: Tensor, 17 | model, 18 | ) -> (np.array, np.array): 19 | """ 20 | Special greedy function for transformer, since it works differently. 21 | The transformer remembers all previous states and attends to them. 22 | 23 | :param src_mask: mask for source inputs, 0 for positions after 24 | :param embed: target embedding 25 | :param bos_index: index of in the vocabulary 26 | :param max_output_length: maximum length for the hypotheses 27 | :param decoder: decoder to use for greedy decoding 28 | :param encoder_output: encoder hidden states for attention 29 | :param encoder_hidden: encoder final state (unused in Transformer) 30 | :return: 31 | - stacked_output: output hypotheses (2d array of indices), 32 | - stacked_attention_scores: attention scores (3d array) 33 | """ 34 | # Initialise the input 35 | # Extract just the BOS first frame from the target 36 | ys = trg_input[:,:1,:].float() 37 | 38 | # If the counter is coming into the decoder or not 39 | ys_out = ys 40 | 41 | # Set the target mask, by finding the padded rows 42 | trg_mask = trg_input != 0.0 43 | trg_mask = trg_mask.unsqueeze(1) 44 | 45 | # Find the maximum output length for this batch 46 | max_output_length = trg_input.shape[1] 47 | 48 | # If just count in, input is just the counter 49 | if model.just_count_in: 50 | ys = ys[:,:,-1:] 51 | 52 | for i in range(max_output_length): 53 | 54 | # ys here is the input 55 | # Drive the timing by giving the GT timing - add in the counter to the last column 56 | 57 | if model.just_count_in: 58 | # If just counter, drive the input using the GT counter 59 | ys[:,-1] = trg_input[:, i, -1:] 60 | 61 | else: 62 | # Give the GT counter for timing, to drive the timing 63 | ys[:,-1,-1:] = trg_input[:, i, -1:] 64 | 65 | # Embed the target input before passing to the decoder 66 | trg_embed = embed(ys) 67 | 68 | # Cut padding mask to required size (of the size of the input) 69 | padding_mask = trg_mask[:, :, :i+1, :i+1] 70 | # Pad the mask (If required) (To make it square, and used later on correctly) 71 | pad_amount = padding_mask.shape[2] - padding_mask.shape[3] 72 | padding_mask = (F.pad(input=padding_mask.double(), pad=(pad_amount, 0, 0, 0), mode='replicate') == 1.0) 73 | 74 | # Pass the embedded input and the encoder output into the decoder 75 | with torch.no_grad(): 76 | out, _, _, _ = decoder( 77 | trg_embed=trg_embed, 78 | encoder_output=encoder_output, 79 | src_mask=src_mask, 80 | trg_mask=padding_mask, 81 | ) 82 | 83 | if model.future_prediction != 0: 84 | # Cut to only the first frame prediction 85 | out = torch.cat((out[:, :, :out.shape[2] // (model.future_prediction)],out[:,:,-1:]),dim=2) 86 | 87 | if model.just_count_in: 88 | # If just counter in trg_input, concatenate counters of output 89 | ys = torch.cat([ys, out[:,-1:,-1:]], dim=1) 90 | 91 | # Add this frame prediction to the overall prediction 92 | ys = torch.cat([ys, out[:,-1:,:]], dim=1) 93 | 94 | # Add this next predicted frame to the full frame output 95 | ys_out = torch.cat([ys_out, out[:,-1:,:]], dim=1) 96 | 97 | return ys_out, None 98 | 99 | -------------------------------------------------------------------------------- /code/generation-model/slt/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/ 3 | 4 | *.pyc 5 | -------------------------------------------------------------------------------- /code/generation-model/slt/README.md: -------------------------------------------------------------------------------- 1 | # Sign Language Transformers (CVPR'20) 2 | 3 | This repo contains the training and evaluation code for the paper [Sign Language Transformers: Sign Language Transformers: Joint End-to-end Sign Language Recognition and Translation](https://www.cihancamgoz.com/pub/camgoz2020cvpr.pdf). 4 | 5 | This code is based on [Joey NMT](https://github.com/joeynmt/joeynmt) but modified to realize joint continuous sign language recognition and translation. For text-to-text translation experiments, you can use the original Joey NMT framework. 6 | 7 | ## Requirements 8 | * Download the feature files using the `data/download.sh` script. 9 | 10 | * [Optional] Create a conda or python virtual environment. 11 | 12 | * Install required packages using the `requirements.txt` file. 13 | 14 | `pip install -r requirements.txt` 15 | 16 | ## Usage 17 | 18 | `python -m signjoey train configs/sign.yaml` 19 | 20 | ! Note that the default data directory is `./data`. If you download them to somewhere else, you need to update the `data_path` parameters in your config file. 21 | ## ToDo: 22 | 23 | - [X] *Initial code release.* 24 | - [X] *Release image features for Phoenix2014T.* 25 | - [ ] Share extensive qualitative and quantitative results & config files to generate them. 26 | - [ ] (Nice to have) - Guide to set up conda environment and docker image. 27 | 28 | ## Reference 29 | 30 | Please cite the paper below if you use this code in your research: 31 | 32 | @inproceedings{camgoz2020sign, 33 | author = {Necati Cihan Camgoz and Oscar Koller and Simon Hadfield and Richard Bowden}, 34 | title = {Sign Language Transformers: Joint End-to-end Sign Language Recognition and Translation}, 35 | booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 36 | year = {2020} 37 | } 38 | 39 | ## Acknowledgements 40 | This work was funded by the SNSF Sinergia project "Scalable Multimodal Sign Language Technology for Sign Language Learning and Assessment" (SMILE) grant agreement number CRSII2 160811 and the European Union’s Horizon2020 research and innovation programme under grant agreement no. 762021 (Content4All). This work reflects only the author’s view and the Commission is not responsible for any use that may be made of the information it contains. We would also like to thank NVIDIA Corporation for their GPU grant. 41 | -------------------------------------------------------------------------------- /code/generation-model/slt/base_annotations/check_base_file.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import pickle 3 | 4 | import shutil 5 | with gzip.open('../new_data/phoenix14t.yang.dev', 'rb') as f_in: 6 | loaded_object = pickle.load(f_in) 7 | print(type(loaded_object)) 8 | print(loaded_object[0]) 9 | sign_shape = loaded_object[0]['sign'].shape 10 | sign_shape = loaded_object[1]['sign'].shape 11 | print(sign_shape) -------------------------------------------------------------------------------- /code/generation-model/slt/base_annotations/file.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/file.txt -------------------------------------------------------------------------------- /code/generation-model/slt/base_annotations/out_dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/out_dev -------------------------------------------------------------------------------- /code/generation-model/slt/base_annotations/phoenix14t.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/phoenix14t.dev -------------------------------------------------------------------------------- /code/generation-model/slt/base_annotations/phoenix14t.test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/phoenix14t.test -------------------------------------------------------------------------------- /code/generation-model/slt/base_annotations/phoenix14t.train: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/base_annotations/phoenix14t.train -------------------------------------------------------------------------------- /code/generation-model/slt/configs/sign copy.yaml: -------------------------------------------------------------------------------- 1 | name: sign_experiment 2 | data: 3 | data_path: ./new_data/ 4 | version: phoenix_2014_trans 5 | sgn: sign 6 | txt: text 7 | gls: gloss 8 | train: phoenix14t.yang.train 9 | dev: phoenix14t.yang.dev 10 | test: phoenix14t.yang.test 11 | feature_size: 150 12 | level: word 13 | txt_lowercase: true 14 | max_sent_length: 400 15 | random_train_subset: -1 16 | random_dev_subset: -1 17 | testing: 18 | recognition_beam_sizes: 19 | - 1 20 | - 2 21 | - 3 22 | - 4 23 | - 5 24 | - 6 25 | - 7 26 | - 8 27 | - 9 28 | - 10 29 | translation_beam_sizes: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | - 5 35 | - 6 36 | - 7 37 | - 8 38 | - 9 39 | - 10 40 | translation_beam_alphas: 41 | - -1 42 | - 0 43 | - 1 44 | - 2 45 | - 3 46 | - 4 47 | - 5 48 | training: 49 | reset_best_ckpt: false 50 | reset_scheduler: false 51 | reset_optimizer: false 52 | random_seed: 0 53 | model_dir: "./sign_sample_model_2layer4head" 54 | recognition_loss_weight: 1.0 55 | translation_loss_weight: 1.0 56 | eval_metric: bleu 57 | optimizer: adam 58 | learning_rate: 0.001 59 | batch_size: 32 60 | num_valid_log: 5 61 | epochs: 5000000 62 | early_stopping_metric: eval_metric 63 | batch_type: sentence 64 | translation_normalization: batch 65 | eval_recognition_beam_size: 1 66 | eval_translation_beam_size: 1 67 | eval_translation_beam_alpha: -1 68 | overwrite: true 69 | shuffle: true 70 | use_cuda: true 71 | translation_max_output_length: 30 72 | keep_last_ckpts: 1 73 | batch_multiplier: 1 74 | logging_freq: 100 75 | validation_freq: 500 76 | betas: 77 | - 0.9 78 | - 0.998 79 | scheduling: plateau 80 | learning_rate_min: 1.0e-07 81 | weight_decay: 0.001 82 | patience: 8 83 | decrease_factor: 0.7 84 | label_smoothing: 0.0 85 | model: 86 | initializer: xavier 87 | bias_initializer: zeros 88 | init_gain: 1.0 89 | embed_initializer: xavier 90 | embed_init_gain: 1.0 91 | tied_softmax: false 92 | encoder: 93 | type: transformer 94 | num_layers: 2 95 | num_heads: 4 96 | embeddings: 97 | embedding_dim: 128 98 | scale: false 99 | dropout: 0.1 100 | norm_type: batch 101 | activation_type: softsign 102 | hidden_size: 128 103 | ff_size: 1024 104 | dropout: 0.1 105 | decoder: 106 | type: transformer 107 | num_layers: 2 108 | num_heads: 4 109 | embeddings: 110 | embedding_dim: 128 111 | # embedding_dim: 128 112 | scale: false 113 | dropout: 0.1 114 | norm_type: batch 115 | activation_type: softsign 116 | hidden_size: 128 117 | ff_size: 1024 118 | dropout: 0.1 119 | -------------------------------------------------------------------------------- /code/generation-model/slt/configs/sign.yaml: -------------------------------------------------------------------------------- 1 | name: sign_experiment 2 | data: 3 | data_path: ./new_data/ 4 | version: phoenix_2014_trans 5 | sgn: sign 6 | txt: text 7 | gls: gloss 8 | train: phoenix14t.yang.train 9 | dev: phoenix14t.yang.dev 10 | test: phoenix14t.yang.test 11 | feature_size: 150 12 | level: word 13 | txt_lowercase: true 14 | max_sent_length: 400 15 | random_train_subset: -1 16 | random_dev_subset: -1 17 | testing: 18 | recognition_beam_sizes: 19 | - 1 20 | - 2 21 | - 3 22 | - 4 23 | - 5 24 | - 6 25 | - 7 26 | - 8 27 | - 9 28 | - 10 29 | translation_beam_sizes: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | - 5 35 | - 6 36 | - 7 37 | - 8 38 | - 9 39 | - 10 40 | translation_beam_alphas: 41 | - -1 42 | - 0 43 | - 1 44 | - 2 45 | - 3 46 | - 4 47 | training: 48 | reset_best_ckpt: false 49 | reset_scheduler: false 50 | reset_optimizer: false 51 | random_seed: 0 52 | model_dir: "./sign_sample_model" 53 | recognition_loss_weight: 1.0 54 | translation_loss_weight: 1.0 55 | eval_metric: bleu 56 | optimizer: adam 57 | learning_rate: 0.001 58 | batch_size: 16 59 | num_valid_log: 5 60 | epochs: 5000000 61 | early_stopping_metric: eval_metric 62 | batch_type: sentence 63 | translation_normalization: batch 64 | eval_recognition_beam_size: 1 65 | eval_translation_beam_size: 1 66 | eval_translation_beam_alpha: -1 67 | overwrite: true 68 | shuffle: true 69 | use_cuda: true 70 | translation_max_output_length: 30 71 | keep_last_ckpts: 1 72 | batch_multiplier: 1 73 | logging_freq: 100 74 | validation_freq: 500 75 | betas: 76 | - 0.9 77 | - 0.998 78 | scheduling: plateau 79 | learning_rate_min: 1.0e-07 80 | weight_decay: 0.001 81 | patience: 8 82 | decrease_factor: 0.7 83 | label_smoothing: 0.0 84 | model: 85 | initializer: xavier 86 | bias_initializer: zeros 87 | init_gain: 1.0 88 | embed_initializer: xavier 89 | embed_init_gain: 1.0 90 | tied_softmax: false 91 | encoder: 92 | type: transformer 93 | num_layers: 1 94 | num_heads: 2 95 | embeddings: 96 | embedding_dim: 128 97 | scale: false 98 | dropout: 0.1 99 | norm_type: batch 100 | activation_type: softsign 101 | hidden_size: 128 102 | ff_size: 2048 103 | dropout: 0.1 104 | decoder: 105 | type: transformer 106 | num_layers: 1 107 | num_heads: 2 108 | embeddings: 109 | embedding_dim: 128 110 | # embedding_dim: 128 111 | scale: false 112 | dropout: 0.1 113 | norm_type: batch 114 | activation_type: softsign 115 | hidden_size: 128 116 | ff_size: 2048 117 | dropout: 0.1 118 | -------------------------------------------------------------------------------- /code/generation-model/slt/configs/sign_train_gloss_before_no_repeat.yaml: -------------------------------------------------------------------------------- 1 | name: sign_experiment 2 | data: 3 | data_path: ./All_data/train_gloss_before_no_repeat/ 4 | version: phoenix_2014_trans 5 | sgn: sign 6 | txt: text 7 | gls: gloss 8 | train: phoenix14t.yang.train 9 | dev: phoenix14t.yang.dev 10 | test: phoenix14t.yang.test 11 | feature_size: 150 12 | level: word 13 | txt_lowercase: true 14 | max_sent_length: 400 15 | random_train_subset: -1 16 | random_dev_subset: -1 17 | testing: 18 | recognition_beam_sizes: 19 | - 1 20 | - 2 21 | - 3 22 | - 4 23 | - 5 24 | - 6 25 | - 7 26 | - 8 27 | - 9 28 | - 10 29 | translation_beam_sizes: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | - 5 35 | - 6 36 | - 7 37 | - 8 38 | - 9 39 | - 10 40 | translation_beam_alphas: 41 | - -1 42 | - 0 43 | - 1 44 | - 2 45 | - 3 46 | - 4 47 | - 5 48 | training: 49 | reset_best_ckpt: false 50 | reset_scheduler: false 51 | reset_optimizer: false 52 | random_seed: 0 53 | model_dir: "./sign_sample_model" 54 | recognition_loss_weight: 1.0 55 | translation_loss_weight: 1.0 56 | eval_metric: bleu 57 | optimizer: adam 58 | learning_rate: 0.001 59 | batch_size: 32 60 | num_valid_log: 5 61 | epochs: 5000000 62 | early_stopping_metric: eval_metric 63 | batch_type: sentence 64 | translation_normalization: batch 65 | eval_recognition_beam_size: 1 66 | eval_translation_beam_size: 1 67 | eval_translation_beam_alpha: -1 68 | overwrite: true 69 | shuffle: true 70 | use_cuda: true 71 | translation_max_output_length: 30 72 | keep_last_ckpts: 1 73 | batch_multiplier: 1 74 | logging_freq: 100 75 | validation_freq: 500 76 | betas: 77 | - 0.9 78 | - 0.998 79 | scheduling: plateau 80 | learning_rate_min: 1.0e-07 81 | weight_decay: 0.001 82 | patience: 8 83 | decrease_factor: 0.7 84 | label_smoothing: 0.0 85 | model: 86 | initializer: xavier 87 | bias_initializer: zeros 88 | init_gain: 1.0 89 | embed_initializer: xavier 90 | embed_init_gain: 1.0 91 | tied_softmax: false 92 | encoder: 93 | type: transformer 94 | num_layers: 1 95 | num_heads: 2 96 | embeddings: 97 | embedding_dim: 128 98 | scale: false 99 | dropout: 0.1 100 | norm_type: batch 101 | activation_type: softsign 102 | hidden_size: 128 103 | ff_size: 2048 104 | dropout: 0.1 105 | decoder: 106 | type: transformer 107 | num_layers: 1 108 | num_heads: 2 109 | embeddings: 110 | embedding_dim: 128 111 | # embedding_dim: 128 112 | scale: false 113 | dropout: 0.1 114 | norm_type: batch 115 | activation_type: softsign 116 | hidden_size: 128 117 | ff_size: 2048 118 | dropout: 0.1 119 | -------------------------------------------------------------------------------- /code/generation-model/slt/configs/sign_train_gloss_between_no_repeat.yaml: -------------------------------------------------------------------------------- 1 | name: sign_experiment 2 | data: 3 | data_path: ./All_data/train_gloss_between_no_repeat/ 4 | version: phoenix_2014_trans 5 | sgn: sign 6 | txt: text 7 | gls: gloss 8 | train: phoenix14t.yang.train 9 | dev: phoenix14t.yang.dev 10 | test: phoenix14t.yang.test 11 | feature_size: 150 12 | level: word 13 | txt_lowercase: true 14 | max_sent_length: 400 15 | random_train_subset: -1 16 | random_dev_subset: -1 17 | testing: 18 | recognition_beam_sizes: 19 | - 1 20 | - 2 21 | - 3 22 | - 4 23 | - 5 24 | - 6 25 | - 7 26 | - 8 27 | - 9 28 | - 10 29 | translation_beam_sizes: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | - 5 35 | - 6 36 | - 7 37 | - 8 38 | - 9 39 | - 10 40 | translation_beam_alphas: 41 | - -1 42 | - 0 43 | - 1 44 | - 2 45 | - 3 46 | - 4 47 | - 5 48 | training: 49 | reset_best_ckpt: false 50 | reset_scheduler: false 51 | reset_optimizer: false 52 | random_seed: 0 53 | model_dir: "./sign_sample_model" 54 | recognition_loss_weight: 1.0 55 | translation_loss_weight: 1.0 56 | eval_metric: bleu 57 | optimizer: adam 58 | learning_rate: 0.001 59 | batch_size: 32 60 | num_valid_log: 5 61 | epochs: 5000000 62 | early_stopping_metric: eval_metric 63 | batch_type: sentence 64 | translation_normalization: batch 65 | eval_recognition_beam_size: 1 66 | eval_translation_beam_size: 1 67 | eval_translation_beam_alpha: -1 68 | overwrite: true 69 | shuffle: true 70 | use_cuda: true 71 | translation_max_output_length: 30 72 | keep_last_ckpts: 1 73 | batch_multiplier: 1 74 | logging_freq: 100 75 | validation_freq: 500 76 | betas: 77 | - 0.9 78 | - 0.998 79 | scheduling: plateau 80 | learning_rate_min: 1.0e-07 81 | weight_decay: 0.001 82 | patience: 8 83 | decrease_factor: 0.7 84 | label_smoothing: 0.0 85 | model: 86 | initializer: xavier 87 | bias_initializer: zeros 88 | init_gain: 1.0 89 | embed_initializer: xavier 90 | embed_init_gain: 1.0 91 | tied_softmax: false 92 | encoder: 93 | type: transformer 94 | num_layers: 1 95 | num_heads: 2 96 | embeddings: 97 | embedding_dim: 128 98 | scale: false 99 | dropout: 0.1 100 | norm_type: batch 101 | activation_type: softsign 102 | hidden_size: 128 103 | ff_size: 2048 104 | dropout: 0.1 105 | decoder: 106 | type: transformer 107 | num_layers: 1 108 | num_heads: 2 109 | embeddings: 110 | embedding_dim: 128 111 | # embedding_dim: 128 112 | scale: false 113 | dropout: 0.1 114 | norm_type: batch 115 | activation_type: softsign 116 | hidden_size: 128 117 | ff_size: 2048 118 | dropout: 0.1 119 | -------------------------------------------------------------------------------- /code/generation-model/slt/configs/sign_train_gloss_between_yes_repeat.yaml: -------------------------------------------------------------------------------- 1 | name: sign_experiment 2 | data: 3 | data_path: ./All_data/train_gloss_between_yes_repeat/ 4 | version: phoenix_2014_trans 5 | sgn: sign 6 | txt: text 7 | gls: gloss 8 | train: phoenix14t.yang.train 9 | dev: phoenix14t.yang.dev 10 | test: phoenix14t.yang.test 11 | feature_size: 150 12 | level: word 13 | txt_lowercase: true 14 | max_sent_length: 400 15 | random_train_subset: -1 16 | random_dev_subset: -1 17 | testing: 18 | recognition_beam_sizes: 19 | - 1 20 | - 2 21 | - 3 22 | - 4 23 | - 5 24 | - 6 25 | - 7 26 | - 8 27 | - 9 28 | - 10 29 | translation_beam_sizes: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | - 5 35 | - 6 36 | - 7 37 | - 8 38 | - 9 39 | - 10 40 | translation_beam_alphas: 41 | - -1 42 | - 0 43 | - 1 44 | - 2 45 | - 3 46 | - 4 47 | - 5 48 | training: 49 | reset_best_ckpt: false 50 | reset_scheduler: false 51 | reset_optimizer: false 52 | random_seed: 0 53 | model_dir: "./sign_sample_model" 54 | recognition_loss_weight: 1.0 55 | translation_loss_weight: 1.0 56 | eval_metric: bleu 57 | optimizer: adam 58 | learning_rate: 0.001 59 | batch_size: 32 60 | num_valid_log: 5 61 | epochs: 5000000 62 | early_stopping_metric: eval_metric 63 | batch_type: sentence 64 | translation_normalization: batch 65 | eval_recognition_beam_size: 1 66 | eval_translation_beam_size: 1 67 | eval_translation_beam_alpha: -1 68 | overwrite: true 69 | shuffle: true 70 | use_cuda: true 71 | translation_max_output_length: 30 72 | keep_last_ckpts: 1 73 | batch_multiplier: 1 74 | logging_freq: 100 75 | validation_freq: 500 76 | betas: 77 | - 0.9 78 | - 0.998 79 | scheduling: plateau 80 | learning_rate_min: 1.0e-07 81 | weight_decay: 0.001 82 | patience: 8 83 | decrease_factor: 0.7 84 | label_smoothing: 0.0 85 | model: 86 | initializer: xavier 87 | bias_initializer: zeros 88 | init_gain: 1.0 89 | embed_initializer: xavier 90 | embed_init_gain: 1.0 91 | tied_softmax: false 92 | encoder: 93 | type: transformer 94 | num_layers: 1 95 | num_heads: 2 96 | embeddings: 97 | embedding_dim: 128 98 | scale: false 99 | dropout: 0.1 100 | norm_type: batch 101 | activation_type: softsign 102 | hidden_size: 128 103 | ff_size: 2048 104 | dropout: 0.1 105 | decoder: 106 | type: transformer 107 | num_layers: 1 108 | num_heads: 2 109 | embeddings: 110 | embedding_dim: 128 111 | # embedding_dim: 128 112 | scale: false 113 | dropout: 0.1 114 | norm_type: batch 115 | activation_type: softsign 116 | hidden_size: 128 117 | ff_size: 2048 118 | dropout: 0.1 119 | -------------------------------------------------------------------------------- /code/generation-model/slt/configs/sign_train_gloss_joint_no_repeat.yaml: -------------------------------------------------------------------------------- 1 | name: sign_experiment 2 | data: 3 | data_path: ./All_data/train_gloss_joint_no_repeat/ 4 | version: phoenix_2014_trans 5 | sgn: sign 6 | txt: text 7 | gls: gloss 8 | train: phoenix14t.yang.train 9 | dev: phoenix14t.yang.dev 10 | test: phoenix14t.yang.test 11 | feature_size: 150 12 | level: word 13 | txt_lowercase: true 14 | max_sent_length: 400 15 | random_train_subset: -1 16 | random_dev_subset: -1 17 | testing: 18 | recognition_beam_sizes: 19 | - 1 20 | - 2 21 | - 3 22 | - 4 23 | - 5 24 | - 6 25 | - 7 26 | - 8 27 | - 9 28 | - 10 29 | translation_beam_sizes: 30 | - 1 31 | - 2 32 | - 3 33 | - 4 34 | - 5 35 | - 6 36 | - 7 37 | - 8 38 | - 9 39 | - 10 40 | translation_beam_alphas: 41 | - -1 42 | - 0 43 | - 1 44 | - 2 45 | - 3 46 | - 4 47 | - 5 48 | training: 49 | reset_best_ckpt: false 50 | reset_scheduler: false 51 | reset_optimizer: false 52 | random_seed: 0 53 | model_dir: "./sign_sample_model" 54 | recognition_loss_weight: 1.0 55 | translation_loss_weight: 1.0 56 | eval_metric: bleu 57 | optimizer: adam 58 | learning_rate: 0.001 59 | batch_size: 32 60 | num_valid_log: 5 61 | epochs: 5000000 62 | early_stopping_metric: eval_metric 63 | batch_type: sentence 64 | translation_normalization: batch 65 | eval_recognition_beam_size: 1 66 | eval_translation_beam_size: 1 67 | eval_translation_beam_alpha: -1 68 | overwrite: true 69 | shuffle: true 70 | use_cuda: true 71 | translation_max_output_length: 30 72 | keep_last_ckpts: 1 73 | batch_multiplier: 1 74 | logging_freq: 100 75 | validation_freq: 500 76 | betas: 77 | - 0.9 78 | - 0.998 79 | scheduling: plateau 80 | learning_rate_min: 1.0e-07 81 | weight_decay: 0.001 82 | patience: 8 83 | decrease_factor: 0.7 84 | label_smoothing: 0.0 85 | model: 86 | initializer: xavier 87 | bias_initializer: zeros 88 | init_gain: 1.0 89 | embed_initializer: xavier 90 | embed_init_gain: 1.0 91 | tied_softmax: false 92 | encoder: 93 | type: transformer 94 | num_layers: 1 95 | num_heads: 2 96 | embeddings: 97 | embedding_dim: 128 98 | scale: false 99 | dropout: 0.1 100 | norm_type: batch 101 | activation_type: softsign 102 | hidden_size: 128 103 | ff_size: 2048 104 | dropout: 0.1 105 | decoder: 106 | type: transformer 107 | num_layers: 1 108 | num_heads: 2 109 | embeddings: 110 | embedding_dim: 128 111 | # embedding_dim: 128 112 | scale: false 113 | dropout: 0.1 114 | norm_type: batch 115 | activation_type: softsign 116 | hidden_size: 128 117 | ff_size: 2048 118 | dropout: 0.1 119 | -------------------------------------------------------------------------------- /code/generation-model/slt/experiment_results/dev.ph14t: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/generation-model/slt/experiment_results/results.md: -------------------------------------------------------------------------------- 1 | TODO: Add table of results with the latest code. -------------------------------------------------------------------------------- /code/generation-model/slt/experiment_results/test.ph14t: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /code/generation-model/slt/output/output_gold.dev_results.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/output/output_gold.dev_results.pkl -------------------------------------------------------------------------------- /code/generation-model/slt/output/output_gold.test_results.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/output/output_gold.test_results.pkl -------------------------------------------------------------------------------- /code/generation-model/slt/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.9.0 2 | asn1crypto==1.2.0 3 | astor==0.8.1 4 | attrs==19.3.0 5 | cachetools==4.0.0 6 | certifi==2019.11.28 7 | cffi==1.13.2 8 | chardet==3.0.4 9 | cryptography==2.8 10 | cycler==0.10.0 11 | gast==0.2.2 12 | google-auth==1.10.2 13 | google-auth-oauthlib==0.4.1 14 | google-pasta==0.1.8 15 | grpcio==1.26.0 16 | h5py==2.10.0 17 | idna==2.8 18 | importlib-metadata==1.5.0 19 | joblib==0.14.1 20 | Keras-Applications==1.0.8 21 | Keras-Preprocessing==1.1.0 22 | keyboard==0.13.5 23 | kiwisolver==1.1.0 24 | Markdown==3.1.1 25 | matplotlib==3.1.2 26 | more-itertools==8.2.0 27 | natsort==7.0.1 28 | numpy==1.18.1 29 | oauthlib==3.1.0 30 | opencv-python==4.2.0.32 31 | opt-einsum==3.1.0 32 | packaging==20.3 33 | pandas==1.0.3 34 | Pillow==7.1.0 35 | plotly==4.5.0 36 | pluggy==0.13.1 37 | portalocker==1.5.2 38 | protobuf==3.11.2 39 | py==1.8.1 40 | pyasn1==0.4.8 41 | pyasn1-modules==0.2.8 42 | pycosat==0.6.3 43 | pycparser==2.19 44 | pyOpenSSL==19.1.0 45 | pyparsing==2.4.6 46 | PySocks==1.7.1 47 | pytest==5.4.0 48 | python-dateutil==2.8.1 49 | pytz==2019.3 50 | PyYAML==5.3 51 | requests==2.22.0 52 | requests-oauthlib==1.3.0 53 | retrying==1.3.3 54 | rsa==4.0 55 | ruamel-yaml==0.15.87 56 | sacrebleu==1.4.4 57 | scikit-learn==0.22.2.post1 58 | scipy==1.4.1 59 | sentencepiece==0.1.85 60 | six==1.13.0 61 | tensorboard==2.1.1 62 | tensorflow==2.1.2 63 | tensorflow-estimator==2.1.0 64 | termcolor==1.1.0 65 | torch==1.4.0 66 | torchtext==0.5.0 67 | torchvision==0.5.0 68 | tqdm==4.40.2 69 | typing==3.7.4.1 70 | urllib3==1.25.7 71 | warmup-scheduler==0.3 72 | wcwidth==0.1.8 73 | Werkzeug==0.16.0 74 | wrapt==1.11.2 75 | xmltodict==0.12.0 76 | zipp==3.1.0 77 | -------------------------------------------------------------------------------- /code/generation-model/slt/signjoey/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merterm/Modeling-Intensification-for-SLG/800fff3d3c7bacc86c1db8382f7c2e68d2f0c074/code/generation-model/slt/signjoey/__init__.py -------------------------------------------------------------------------------- /code/generation-model/slt/signjoey/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import sys 5 | from signjoey.training import train 6 | from signjoey.prediction import test 7 | 8 | sys.path.append("/vol/research/extol/personal/cihan/code/SignJoey") 9 | 10 | 11 | def main(): 12 | ap = argparse.ArgumentParser("Joey NMT") 13 | 14 | ap.add_argument("mode", choices=["train", "test"], help="train a model or test") 15 | 16 | ap.add_argument("config_path", type=str, help="path to YAML config file") 17 | 18 | ap.add_argument("--ckpt", type=str, help="checkpoint for prediction") 19 | 20 | ap.add_argument( 21 | "--output_path", type=str, help="path for saving translation output" 22 | ) 23 | ap.add_argument("--gpu_id", type=str, default="0", help="gpu to run your job on") 24 | args = ap.parse_args() 25 | 26 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id 27 | 28 | if args.mode == "train": 29 | train(cfg_file=args.config_path) 30 | elif args.mode == "test": 31 | test(cfg_file=args.config_path, ckpt=args.ckpt, output_path=args.output_path) 32 | else: 33 | raise ValueError("Unknown mode") 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /code/generation-model/slt/signjoey/dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Data module 4 | """ 5 | from torchtext import data 6 | from torchtext.data import Field, RawField 7 | from typing import List, Tuple 8 | import pickle 9 | import gzip 10 | import torch 11 | 12 | 13 | def load_dataset_file(filename): 14 | with gzip.open(filename, "rb") as f: 15 | loaded_object = pickle.load(f) 16 | return loaded_object 17 | 18 | 19 | class SignTranslationDataset(data.Dataset): 20 | """Defines a dataset for machine translation.""" 21 | 22 | @staticmethod 23 | def sort_key(ex): 24 | return data.interleave_keys(len(ex.sgn), len(ex.txt)) 25 | 26 | def __init__( 27 | self, 28 | path: str, 29 | fields: Tuple[RawField, RawField, Field, Field, Field], 30 | **kwargs 31 | ): 32 | """Create a SignTranslationDataset given paths and fields. 33 | 34 | Arguments: 35 | path: Common prefix of paths to the data files for both languages. 36 | exts: A tuple containing the extension to path for each language. 37 | fields: A tuple containing the fields that will be used for data 38 | in each language. 39 | Remaining keyword arguments: Passed to the constructor of 40 | data.Dataset. 41 | """ 42 | if not isinstance(fields[0], (tuple, list)): 43 | fields = [ 44 | ("sequence", fields[0]), 45 | ("signer", fields[1]), 46 | ("sgn", fields[2]), 47 | ("gls", fields[3]), 48 | ("txt", fields[4]), 49 | ] 50 | 51 | if not isinstance(path, list): 52 | path = [path] 53 | 54 | samples = {} 55 | for annotation_file in path: 56 | tmp = load_dataset_file(annotation_file) 57 | for s in tmp: 58 | seq_id = s["name"] 59 | if seq_id in samples: 60 | assert samples[seq_id]["name"] == s["name"] 61 | assert samples[seq_id]["signer"] == s["signer"] 62 | assert samples[seq_id]["gloss"] == s["gloss"] 63 | assert samples[seq_id]["text"] == s["text"] 64 | samples[seq_id]["sign"] = torch.cat( 65 | [samples[seq_id]["sign"], s["sign"]], axis=1 66 | ) 67 | else: 68 | samples[seq_id] = { 69 | "name": s["name"], 70 | "signer": s["signer"], 71 | "gloss": s["gloss"], 72 | "text": s["text"], 73 | "sign": s["sign"], 74 | } 75 | 76 | examples = [] 77 | for s in samples: 78 | sample = samples[s] 79 | 80 | 81 | examples.append( 82 | data.Example.fromlist( 83 | [ 84 | sample["name"], 85 | sample["signer"], 86 | # This is for numerical stability 87 | sample["sign"], 88 | sample["gloss"].strip(), 89 | sample["text"].strip(), 90 | ], 91 | fields, 92 | ) 93 | 94 | ) 95 | # print(sample["gloss"].strip()) 96 | 97 | super().__init__(examples, fields, **kwargs) 98 | -------------------------------------------------------------------------------- /code/generation-model/slt/signjoey/external_metrics/mscoco_rouge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : mscoco_rouge.py 4 | # 5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004) 6 | # 7 | # Creation Date : 2015-01-07 06:03 8 | # Author : Ramakrishna Vedantam 9 | 10 | 11 | def my_lcs(string, sub): 12 | """ 13 | Calculates longest common subsequence for a pair of tokenized strings 14 | :param string : list of str : tokens from a string split using whitespace 15 | :param sub : list of str : shorter string, also split using whitespace 16 | :returns: length (list of int): length of the longest common subsequence between the two strings 17 | 18 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS 19 | """ 20 | if len(string) < len(sub): 21 | sub, string = string, sub 22 | 23 | lengths = [[0 for i in range(0, len(sub) + 1)] for j in range(0, len(string) + 1)] 24 | 25 | for j in range(1, len(sub) + 1): 26 | for i in range(1, len(string) + 1): 27 | if string[i - 1] == sub[j - 1]: 28 | lengths[i][j] = lengths[i - 1][j - 1] + 1 29 | else: 30 | lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1]) 31 | 32 | return lengths[len(string)][len(sub)] 33 | 34 | 35 | def calc_score(hypotheses, references, beta=1.2): 36 | """ 37 | Compute ROUGE-L score given one candidate and references for an image 38 | :param hypotheses: str : candidate sentence to be evaluated 39 | :param references: list of str : COCO reference sentences for the particular image to be evaluated 40 | :returns score: int (ROUGE-L score for the candidate evaluated against references) 41 | """ 42 | assert len(hypotheses) == 1 43 | assert len(references) > 0 44 | prec = [] 45 | rec = [] 46 | 47 | # split into tokens 48 | token_c = hypotheses[0].split(" ") 49 | 50 | for reference in references: 51 | # split into tokens 52 | token_r = reference.split(" ") 53 | # compute the longest common subsequence 54 | lcs = my_lcs(token_r, token_c) 55 | prec.append(lcs / float(len(token_c))) 56 | rec.append(lcs / float(len(token_r))) 57 | 58 | prec_max = max(prec) 59 | rec_max = max(rec) 60 | 61 | if prec_max != 0 and rec_max != 0: 62 | score = ((1 + beta ** 2) * prec_max * rec_max) / float( 63 | rec_max + beta ** 2 * prec_max 64 | ) 65 | else: 66 | score = 0.0 67 | return score 68 | -------------------------------------------------------------------------------- /code/generation-model/slt/signjoey/loss.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Module to implement training loss 4 | """ 5 | 6 | import torch 7 | from torch import nn, Tensor 8 | from torch.autograd import Variable 9 | 10 | 11 | class XentLoss(nn.Module): 12 | """ 13 | Cross-Entropy Loss with optional label smoothing 14 | """ 15 | 16 | def __init__(self, pad_index: int, smoothing: float = 0.0): 17 | super(XentLoss, self).__init__() 18 | self.smoothing = smoothing 19 | self.pad_index = pad_index 20 | if self.smoothing <= 0.0: 21 | # standard xent loss 22 | self.criterion = nn.NLLLoss(ignore_index=self.pad_index, reduction="sum") 23 | else: 24 | # custom label-smoothed loss, computed with KL divergence loss 25 | self.criterion = nn.KLDivLoss(reduction="sum") 26 | 27 | def _smooth_targets(self, targets: Tensor, vocab_size: int): 28 | """ 29 | Smooth target distribution. All non-reference words get uniform 30 | probability mass according to "smoothing". 31 | 32 | :param targets: target indices, batch*seq_len 33 | :param vocab_size: size of the output vocabulary 34 | :return: smoothed target distributions, batch*seq_len x vocab_size 35 | """ 36 | # batch*seq_len x vocab_size 37 | smooth_dist = targets.new_zeros((targets.size(0), vocab_size)).float() 38 | # fill distribution uniformly with smoothing 39 | smooth_dist.fill_(self.smoothing / (vocab_size - 2)) 40 | # assign true label the probability of 1-smoothing ("confidence") 41 | smooth_dist.scatter_(1, targets.unsqueeze(1).data, 1.0 - self.smoothing) 42 | # give padding probability of 0 everywhere 43 | smooth_dist[:, self.pad_index] = 0 44 | # masking out padding area (sum of probabilities for padding area = 0) 45 | padding_positions = torch.nonzero(targets.data == self.pad_index) 46 | # pylint: disable=len-as-condition 47 | if len(padding_positions) > 0: 48 | smooth_dist.index_fill_(0, padding_positions.squeeze(), 0.0) 49 | return Variable(smooth_dist, requires_grad=False) 50 | 51 | # pylint: disable=arguments-differ 52 | def forward(self, log_probs, targets): 53 | """ 54 | Compute the cross-entropy between logits and targets. 55 | 56 | If label smoothing is used, target distributions are not one-hot, but 57 | "1-smoothing" for the correct target token and the rest of the 58 | probability mass is uniformly spread across the other tokens. 59 | 60 | :param log_probs: log probabilities as predicted by model 61 | :param targets: target indices 62 | :return: 63 | """ 64 | if self.smoothing > 0: 65 | targets = self._smooth_targets( 66 | targets=targets.contiguous().view(-1), vocab_size=log_probs.size(-1) 67 | ) 68 | # targets: distributions with batch*seq_len x vocab_size 69 | assert ( 70 | log_probs.contiguous().view(-1, log_probs.size(-1)).shape 71 | == targets.shape 72 | ) 73 | else: 74 | # targets: indices with batch*seq_len 75 | targets = targets.contiguous().view(-1) 76 | loss = self.criterion( 77 | log_probs.contiguous().view(-1, log_probs.size(-1)), targets 78 | ) 79 | return loss 80 | -------------------------------------------------------------------------------- /code/gloss-tagger-model/README.md: -------------------------------------------------------------------------------- 1 | ### the trained model for gloss tagging will be saved in this directory. -------------------------------------------------------------------------------- /data/README.MD: -------------------------------------------------------------------------------- 1 | #### gloss-annotation.tsv: 2 | This file contains manual annotation of the gloss from PHOENIX 14T dataset. 3 | 4 | #### full.train, full.dev, full.test: 5 | These tab separated files contain manual annotation along with tags predicted by our classifier. 6 | The first column denotes whether the label is manual annotation or predicted by the classifier. 7 | The second column contains gloss data of PHOENIX 14T dataset infused with the intensification labels. Gloss tokens that are predicted/annotated to have lower intensity, are surrounded by and . Tokens with higher intensity are surrounded similarly with and . The tokens that do not have any associated intensity, are not surrounded by anything, 8 | Example: 9 | 1. $manual-annotation$ WOLKE LOCH SPEZIELL NORDWEST 10 | - This means the intensity labels for this instance are manually annotated. The token WOLKE has high-intensity, other tokens have no intensity. 11 | 2. $tagged$ SAMSTAG AUCH FREUNDLICH 12 | - This means the intensity labels for this instance are tagged by our classifier. The token FREUNDLICH has low intensity, other tokens have no intensity. 13 | --------------------------------------------------------------------------------