├── .gitignore
├── LICENSE
├── README.md
├── benchmarks
    ├── CMLR
    │   ├── labels
    │   │   └── test.ref
    │   ├── language_models
    │   │   └── README.md
    │   └── models
    │   │   └── README.md
    ├── CMUMOSEAS
    │   ├── labels
    │   │   ├── es
    │   │   │   ├── test.ref
    │   │   │   └── train.ref
    │   │   ├── fr
    │   │   │   ├── test.ref
    │   │   │   └── train.ref
    │   │   └── pt
    │   │   │   ├── test.ref
    │   │   │   └── train.ref
    │   ├── language_models
    │   │   ├── es
    │   │   │   └── README.md
    │   │   ├── fr
    │   │   │   └── README.md
    │   │   └── pt
    │   │   │   └── README.md
    │   └── models
    │   │   ├── es
    │   │       └── README.md
    │   │   ├── fr
    │   │       └── README.md
    │   │   └── pt
    │   │       └── README.md
    ├── GRID
    │   ├── labels
    │   │   ├── overlapped_test.ref
    │   │   ├── overlapped_train.ref
    │   │   ├── unseen_test.ref
    │   │   └── unseen_train.ref
    │   └── models
    │   │   └── README.md
    ├── LRS2
    │   ├── labels
    │   │   └── test.ref
    │   ├── language_models
    │   │   └── README.md
    │   └── models
    │   │   └── README.md
    ├── LRS3
    │   ├── labels
    │   │   └── test.ref
    │   ├── language_models
    │   │   └── README.md
    │   └── models
    │   │   └── README.md
    ├── LombardGRID
    │   ├── labels
    │   │   ├── unseen_fp_test.ref
    │   │   ├── unseen_fp_train.ref
    │   │   ├── unseen_fp_valid.ref
    │   │   ├── unseen_sp_test.ref
    │   │   ├── unseen_sp_train.ref
    │   │   └── unseen_sp_valid.ref
    │   └── models
    │   │   └── README.md
    ├── MultilingualTEDx
    │   └── labels
    │   │   ├── README.md
    │   │   ├── es
    │   │       ├── statistics
    │   │       ├── test.txt
    │   │       ├── train.txt
    │   │       └── valid.txt
    │   │   ├── fr
    │   │       ├── statistics
    │   │       ├── test.txt
    │   │       ├── train.txt
    │   │       └── valid.txt
    │   │   ├── it
    │   │       ├── statistics
    │   │       ├── test.txt
    │   │       ├── train.txt
    │   │       └── valid.txt
    │   │   └── pt
    │   │       ├── statistics
    │   │       ├── test.txt
    │   │       ├── train.txt
    │   │       └── valid.txt
    └── TCDTIMIT
    │   ├── labels
    │       ├── overlapped_test.ref
    │       ├── overlapped_train.ref
    │       ├── unseen_test.ref
    │       └── unseen_train.ref
    │   ├── language_models
    │       └── README.md
    │   └── models
    │       └── README.md
├── configs
    ├── CMLR_V_WER8.0.ini
    ├── CMUMOSEAS_V_ES_WER44.5.ini
    ├── CMUMOSEAS_V_FR_WER58.6.ini
    ├── CMUMOSEAS_V_PT_WER51.4.ini
    ├── GRID_V_WER1.2.ini
    ├── GRID_V_WER4.8.ini
    ├── LRS2_V_WER26.1.ini
    ├── LRS3_AV_WER0.9.ini
    ├── LRS3_A_WER1.0.ini
    ├── LRS3_V_WER19.1.ini
    ├── LRS3_V_WER32.3.ini
    ├── LombardGRID_V_WER4.9.ini
    ├── LombardGRID_V_WER8.0.ini
    ├── TCDTIMIT_V_WER16.9.ini
    └── TCDTIMIT_V_WER21.8.ini
├── crop_mouth.py
├── doc
    ├── lip_white.png
    ├── vsr_1.gif
    └── vsr_2.gif
├── espnet
    ├── asr
    │   └── asr_utils.py
    ├── nets
    │   ├── batch_beam_search.py
    │   ├── beam_search.py
    │   ├── ctc_prefix_score.py
    │   ├── e2e_asr_common.py
    │   ├── lm_interface.py
    │   ├── pytorch_backend
    │   │   ├── backbones
    │   │   │   ├── conv1d_extractor.py
    │   │   │   ├── conv3d_extractor.py
    │   │   │   └── modules
    │   │   │   │   ├── resnet.py
    │   │   │   │   ├── resnet1d.py
    │   │   │   │   └── shufflenetv2.py
    │   │   ├── ctc.py
    │   │   ├── e2e_asr_transformer.py
    │   │   ├── e2e_asr_transformer_av.py
    │   │   ├── lm
    │   │   │   ├── __init__.py
    │   │   │   ├── default.py
    │   │   │   ├── seq_rnn.py
    │   │   │   └── transformer.py
    │   │   ├── nets_utils.py
    │   │   └── transformer
    │   │   │   ├── __init__.py
    │   │   │   ├── add_sos_eos.py
    │   │   │   ├── attention.py
    │   │   │   ├── convolution.py
    │   │   │   ├── decoder.py
    │   │   │   ├── decoder_layer.py
    │   │   │   ├── embedding.py
    │   │   │   ├── encoder.py
    │   │   │   ├── encoder_layer.py
    │   │   │   ├── label_smoothing_loss.py
    │   │   │   ├── layer_norm.py
    │   │   │   ├── mask.py
    │   │   │   ├── multi_layer_conv.py
    │   │   │   ├── optimizer.py
    │   │   │   ├── plot.py
    │   │   │   ├── positionwise_feed_forward.py
    │   │   │   ├── raw_embeddings.py
    │   │   │   ├── repeat.py
    │   │   │   └── subsampling.py
    │   ├── scorer_interface.py
    │   └── scorers
    │   │   ├── __init__.py
    │   │   ├── ctc.py
    │   │   └── length_bonus.py
    └── utils
    │   ├── cli_utils.py
    │   ├── dynamic_import.py
    │   └── fill_missing_args.py
├── eval.py
├── hydra_configs
    └── default.yaml
├── infer.py
├── pipelines
    ├── data
    │   ├── data_module.py
    │   ├── noise
    │   │   ├── babble_noise.wav
    │   │   ├── pink_noise.wav
    │   │   └── white_noise.wav
    │   └── transforms.py
    ├── detectors
    │   ├── mediapipe
    │   │   ├── 20words_mean_face.npy
    │   │   ├── detector.py
    │   │   └── video_process.py
    │   └── retinaface
    │   │   ├── 20words_mean_face.npy
    │   │   ├── detector.py
    │   │   └── video_process.py
    ├── metrics
    │   └── measures.py
    ├── model.py
    ├── pipeline.py
    └── tokens
    │   └── unigram5000_units.txt
├── requirements.txt
└── tools
    └── README.md


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/README.md


--------------------------------------------------------------------------------
/benchmarks/CMLR/labels/test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMLR/labels/test.ref


--------------------------------------------------------------------------------
/benchmarks/CMLR/language_models/README.md:
--------------------------------------------------------------------------------
1 | Put pretrained language model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/CMLR/models/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/labels/es/test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/es/test.ref


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/labels/es/train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/es/train.ref


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/labels/fr/test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/fr/test.ref


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/labels/fr/train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/fr/train.ref


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/labels/pt/test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/pt/test.ref


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/labels/pt/train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/pt/train.ref


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/language_models/es/README.md:
--------------------------------------------------------------------------------
1 | Put pretrained language model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/language_models/fr/README.md:
--------------------------------------------------------------------------------
1 | Put pretrained language model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/language_models/pt/README.md:
--------------------------------------------------------------------------------
1 | Put pretrained language model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/models/es/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/models/fr/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/CMUMOSEAS/models/pt/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/GRID/labels/overlapped_test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/overlapped_test.ref


--------------------------------------------------------------------------------
/benchmarks/GRID/labels/overlapped_train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/overlapped_train.ref


--------------------------------------------------------------------------------
/benchmarks/GRID/labels/unseen_test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/unseen_test.ref


--------------------------------------------------------------------------------
/benchmarks/GRID/labels/unseen_train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/unseen_train.ref


--------------------------------------------------------------------------------
/benchmarks/GRID/models/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/LRS2/labels/test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LRS2/labels/test.ref


--------------------------------------------------------------------------------
/benchmarks/LRS2/language_models/README.md:
--------------------------------------------------------------------------------
1 | Put pretrained language model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/LRS2/models/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/LRS3/labels/test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LRS3/labels/test.ref


--------------------------------------------------------------------------------
/benchmarks/LRS3/language_models/README.md:
--------------------------------------------------------------------------------
1 | Put pretrained language model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/LRS3/models/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/LombardGRID/labels/unseen_fp_test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_fp_test.ref


--------------------------------------------------------------------------------
/benchmarks/LombardGRID/labels/unseen_fp_train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_fp_train.ref


--------------------------------------------------------------------------------
/benchmarks/LombardGRID/labels/unseen_fp_valid.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_fp_valid.ref


--------------------------------------------------------------------------------
/benchmarks/LombardGRID/labels/unseen_sp_test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_sp_test.ref


--------------------------------------------------------------------------------
/benchmarks/LombardGRID/labels/unseen_sp_train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_sp_train.ref


--------------------------------------------------------------------------------
/benchmarks/LombardGRID/labels/unseen_sp_valid.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_sp_valid.ref


--------------------------------------------------------------------------------
/benchmarks/LombardGRID/models/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/README.md


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/es/statistics:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/statistics


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/es/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/test.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/es/train.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/train.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/es/valid.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/valid.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/fr/statistics:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/statistics


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/fr/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/test.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/fr/train.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/train.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/fr/valid.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/valid.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/it/statistics:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/statistics


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/it/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/test.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/it/train.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/train.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/it/valid.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/valid.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/pt/statistics:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/statistics


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/pt/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/test.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/pt/train.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/train.txt


--------------------------------------------------------------------------------
/benchmarks/MultilingualTEDx/labels/pt/valid.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/valid.txt


--------------------------------------------------------------------------------
/benchmarks/TCDTIMIT/labels/overlapped_test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/overlapped_test.ref


--------------------------------------------------------------------------------
/benchmarks/TCDTIMIT/labels/overlapped_train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/overlapped_train.ref


--------------------------------------------------------------------------------
/benchmarks/TCDTIMIT/labels/unseen_test.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/unseen_test.ref


--------------------------------------------------------------------------------
/benchmarks/TCDTIMIT/labels/unseen_train.ref:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/unseen_train.ref


--------------------------------------------------------------------------------
/benchmarks/TCDTIMIT/language_models/README.md:
--------------------------------------------------------------------------------
1 | Put pretrained language model folders here.
2 | 


--------------------------------------------------------------------------------
/benchmarks/TCDTIMIT/models/README.md:
--------------------------------------------------------------------------------
1 | Put model folders here.
2 | 


--------------------------------------------------------------------------------
/configs/CMLR_V_WER8.0.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMLR_V_WER8.0.ini


--------------------------------------------------------------------------------
/configs/CMUMOSEAS_V_ES_WER44.5.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMUMOSEAS_V_ES_WER44.5.ini


--------------------------------------------------------------------------------
/configs/CMUMOSEAS_V_FR_WER58.6.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMUMOSEAS_V_FR_WER58.6.ini


--------------------------------------------------------------------------------
/configs/CMUMOSEAS_V_PT_WER51.4.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMUMOSEAS_V_PT_WER51.4.ini


--------------------------------------------------------------------------------
/configs/GRID_V_WER1.2.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/GRID_V_WER1.2.ini


--------------------------------------------------------------------------------
/configs/GRID_V_WER4.8.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/GRID_V_WER4.8.ini


--------------------------------------------------------------------------------
/configs/LRS2_V_WER26.1.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS2_V_WER26.1.ini


--------------------------------------------------------------------------------
/configs/LRS3_AV_WER0.9.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_AV_WER0.9.ini


--------------------------------------------------------------------------------
/configs/LRS3_A_WER1.0.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_A_WER1.0.ini


--------------------------------------------------------------------------------
/configs/LRS3_V_WER19.1.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_V_WER19.1.ini


--------------------------------------------------------------------------------
/configs/LRS3_V_WER32.3.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_V_WER32.3.ini


--------------------------------------------------------------------------------
/configs/LombardGRID_V_WER4.9.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LombardGRID_V_WER4.9.ini


--------------------------------------------------------------------------------
/configs/LombardGRID_V_WER8.0.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LombardGRID_V_WER8.0.ini


--------------------------------------------------------------------------------
/configs/TCDTIMIT_V_WER16.9.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/TCDTIMIT_V_WER16.9.ini


--------------------------------------------------------------------------------
/configs/TCDTIMIT_V_WER21.8.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/TCDTIMIT_V_WER21.8.ini


--------------------------------------------------------------------------------
/crop_mouth.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/crop_mouth.py


--------------------------------------------------------------------------------
/doc/lip_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/doc/lip_white.png


--------------------------------------------------------------------------------
/doc/vsr_1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/doc/vsr_1.gif


--------------------------------------------------------------------------------
/doc/vsr_2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/doc/vsr_2.gif


--------------------------------------------------------------------------------
/espnet/asr/asr_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/asr/asr_utils.py


--------------------------------------------------------------------------------
/espnet/nets/batch_beam_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/batch_beam_search.py


--------------------------------------------------------------------------------
/espnet/nets/beam_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/beam_search.py


--------------------------------------------------------------------------------
/espnet/nets/ctc_prefix_score.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/ctc_prefix_score.py


--------------------------------------------------------------------------------
/espnet/nets/e2e_asr_common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/e2e_asr_common.py


--------------------------------------------------------------------------------
/espnet/nets/lm_interface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/lm_interface.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/backbones/modules/resnet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/modules/resnet.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/ctc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/ctc.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/e2e_asr_transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/e2e_asr_transformer.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/lm/__init__.py:
--------------------------------------------------------------------------------
1 | """Initialize sub package."""
2 | 


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/lm/default.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/lm/default.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/lm/seq_rnn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/lm/seq_rnn.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/lm/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/lm/transformer.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/nets_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/nets_utils.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | """Initialize sub package."""
2 | 


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/add_sos_eos.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/add_sos_eos.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/attention.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/convolution.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/convolution.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/decoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/decoder.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/decoder_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/decoder_layer.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/embedding.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/encoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/encoder.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/encoder_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/encoder_layer.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/layer_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/layer_norm.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/mask.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/mask.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/optimizer.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/plot.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/plot.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/raw_embeddings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/raw_embeddings.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/repeat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/repeat.py


--------------------------------------------------------------------------------
/espnet/nets/pytorch_backend/transformer/subsampling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/subsampling.py


--------------------------------------------------------------------------------
/espnet/nets/scorer_interface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/scorer_interface.py


--------------------------------------------------------------------------------
/espnet/nets/scorers/__init__.py:
--------------------------------------------------------------------------------
1 | """Initialize sub package."""
2 | 


--------------------------------------------------------------------------------
/espnet/nets/scorers/ctc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/scorers/ctc.py


--------------------------------------------------------------------------------
/espnet/nets/scorers/length_bonus.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/scorers/length_bonus.py


--------------------------------------------------------------------------------
/espnet/utils/cli_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/utils/cli_utils.py


--------------------------------------------------------------------------------
/espnet/utils/dynamic_import.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/utils/dynamic_import.py


--------------------------------------------------------------------------------
/espnet/utils/fill_missing_args.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/utils/fill_missing_args.py


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/eval.py


--------------------------------------------------------------------------------
/hydra_configs/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/hydra_configs/default.yaml


--------------------------------------------------------------------------------
/infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/infer.py


--------------------------------------------------------------------------------
/pipelines/data/data_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/data_module.py


--------------------------------------------------------------------------------
/pipelines/data/noise/babble_noise.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/noise/babble_noise.wav


--------------------------------------------------------------------------------
/pipelines/data/noise/pink_noise.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/noise/pink_noise.wav


--------------------------------------------------------------------------------
/pipelines/data/noise/white_noise.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/noise/white_noise.wav


--------------------------------------------------------------------------------
/pipelines/data/transforms.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/transforms.py


--------------------------------------------------------------------------------
/pipelines/detectors/mediapipe/20words_mean_face.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/mediapipe/20words_mean_face.npy


--------------------------------------------------------------------------------
/pipelines/detectors/mediapipe/detector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/mediapipe/detector.py


--------------------------------------------------------------------------------
/pipelines/detectors/mediapipe/video_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/mediapipe/video_process.py


--------------------------------------------------------------------------------
/pipelines/detectors/retinaface/20words_mean_face.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/retinaface/20words_mean_face.npy


--------------------------------------------------------------------------------
/pipelines/detectors/retinaface/detector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/retinaface/detector.py


--------------------------------------------------------------------------------
/pipelines/detectors/retinaface/video_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/retinaface/video_process.py


--------------------------------------------------------------------------------
/pipelines/metrics/measures.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/metrics/measures.py


--------------------------------------------------------------------------------
/pipelines/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/model.py


--------------------------------------------------------------------------------
/pipelines/pipeline.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/pipeline.py


--------------------------------------------------------------------------------
/pipelines/tokens/unigram5000_units.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/tokens/unigram5000_units.txt


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/requirements.txt


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/tools/README.md


--------------------------------------------------------------------------------