├── .gitignore ├── LICENSE ├── README.md ├── benchmarks ├── CMLR │ ├── labels │ │ └── test.ref │ ├── language_models │ │ └── README.md │ └── models │ │ └── README.md ├── CMUMOSEAS │ ├── labels │ │ ├── es │ │ │ ├── test.ref │ │ │ └── train.ref │ │ ├── fr │ │ │ ├── test.ref │ │ │ └── train.ref │ │ └── pt │ │ │ ├── test.ref │ │ │ └── train.ref │ ├── language_models │ │ ├── es │ │ │ └── README.md │ │ ├── fr │ │ │ └── README.md │ │ └── pt │ │ │ └── README.md │ └── models │ │ ├── es │ │ └── README.md │ │ ├── fr │ │ └── README.md │ │ └── pt │ │ └── README.md ├── GRID │ ├── labels │ │ ├── overlapped_test.ref │ │ ├── overlapped_train.ref │ │ ├── unseen_test.ref │ │ └── unseen_train.ref │ └── models │ │ └── README.md ├── LRS2 │ ├── labels │ │ └── test.ref │ ├── language_models │ │ └── README.md │ └── models │ │ └── README.md ├── LRS3 │ ├── labels │ │ └── test.ref │ ├── language_models │ │ └── README.md │ └── models │ │ └── README.md ├── LombardGRID │ ├── labels │ │ ├── unseen_fp_test.ref │ │ ├── unseen_fp_train.ref │ │ ├── unseen_fp_valid.ref │ │ ├── unseen_sp_test.ref │ │ ├── unseen_sp_train.ref │ │ └── unseen_sp_valid.ref │ └── models │ │ └── README.md ├── MultilingualTEDx │ └── labels │ │ ├── README.md │ │ ├── es │ │ ├── statistics │ │ ├── test.txt │ │ ├── train.txt │ │ └── valid.txt │ │ ├── fr │ │ ├── statistics │ │ ├── test.txt │ │ ├── train.txt │ │ └── valid.txt │ │ ├── it │ │ ├── statistics │ │ ├── test.txt │ │ ├── train.txt │ │ └── valid.txt │ │ └── pt │ │ ├── statistics │ │ ├── test.txt │ │ ├── train.txt │ │ └── valid.txt └── TCDTIMIT │ ├── labels │ ├── overlapped_test.ref │ ├── overlapped_train.ref │ ├── unseen_test.ref │ └── unseen_train.ref │ ├── language_models │ └── README.md │ └── models │ └── README.md ├── configs ├── CMLR_V_WER8.0.ini ├── CMUMOSEAS_V_ES_WER44.5.ini ├── CMUMOSEAS_V_FR_WER58.6.ini ├── CMUMOSEAS_V_PT_WER51.4.ini ├── GRID_V_WER1.2.ini ├── GRID_V_WER4.8.ini ├── LRS2_V_WER26.1.ini ├── LRS3_AV_WER0.9.ini ├── LRS3_A_WER1.0.ini ├── LRS3_V_WER19.1.ini ├── LRS3_V_WER32.3.ini ├── LombardGRID_V_WER4.9.ini ├── LombardGRID_V_WER8.0.ini ├── TCDTIMIT_V_WER16.9.ini └── TCDTIMIT_V_WER21.8.ini ├── crop_mouth.py ├── doc ├── lip_white.png ├── vsr_1.gif └── vsr_2.gif ├── espnet ├── asr │ └── asr_utils.py ├── nets │ ├── batch_beam_search.py │ ├── beam_search.py │ ├── ctc_prefix_score.py │ ├── e2e_asr_common.py │ ├── lm_interface.py │ ├── pytorch_backend │ │ ├── backbones │ │ │ ├── conv1d_extractor.py │ │ │ ├── conv3d_extractor.py │ │ │ └── modules │ │ │ │ ├── resnet.py │ │ │ │ ├── resnet1d.py │ │ │ │ └── shufflenetv2.py │ │ ├── ctc.py │ │ ├── e2e_asr_transformer.py │ │ ├── e2e_asr_transformer_av.py │ │ ├── lm │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── seq_rnn.py │ │ │ └── transformer.py │ │ ├── nets_utils.py │ │ └── transformer │ │ │ ├── __init__.py │ │ │ ├── add_sos_eos.py │ │ │ ├── attention.py │ │ │ ├── convolution.py │ │ │ ├── decoder.py │ │ │ ├── decoder_layer.py │ │ │ ├── embedding.py │ │ │ ├── encoder.py │ │ │ ├── encoder_layer.py │ │ │ ├── label_smoothing_loss.py │ │ │ ├── layer_norm.py │ │ │ ├── mask.py │ │ │ ├── multi_layer_conv.py │ │ │ ├── optimizer.py │ │ │ ├── plot.py │ │ │ ├── positionwise_feed_forward.py │ │ │ ├── raw_embeddings.py │ │ │ ├── repeat.py │ │ │ └── subsampling.py │ ├── scorer_interface.py │ └── scorers │ │ ├── __init__.py │ │ ├── ctc.py │ │ └── length_bonus.py └── utils │ ├── cli_utils.py │ ├── dynamic_import.py │ └── fill_missing_args.py ├── eval.py ├── hydra_configs └── default.yaml ├── infer.py ├── pipelines ├── data │ ├── data_module.py │ ├── noise │ │ ├── babble_noise.wav │ │ ├── pink_noise.wav │ │ └── white_noise.wav │ └── transforms.py ├── detectors │ ├── mediapipe │ │ ├── 20words_mean_face.npy │ │ ├── detector.py │ │ └── video_process.py │ └── retinaface │ │ ├── 20words_mean_face.npy │ │ ├── detector.py │ │ └── video_process.py ├── metrics │ └── measures.py ├── model.py ├── pipeline.py └── tokens │ └── unigram5000_units.txt ├── requirements.txt └── tools └── README.md /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/CMLR/labels/test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMLR/labels/test.ref -------------------------------------------------------------------------------- /benchmarks/CMLR/language_models/README.md: -------------------------------------------------------------------------------- 1 | Put pretrained language model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/CMLR/models/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/labels/es/test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/es/test.ref -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/labels/es/train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/es/train.ref -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/labels/fr/test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/fr/test.ref -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/labels/fr/train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/fr/train.ref -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/labels/pt/test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/pt/test.ref -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/labels/pt/train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/CMUMOSEAS/labels/pt/train.ref -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/language_models/es/README.md: -------------------------------------------------------------------------------- 1 | Put pretrained language model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/language_models/fr/README.md: -------------------------------------------------------------------------------- 1 | Put pretrained language model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/language_models/pt/README.md: -------------------------------------------------------------------------------- 1 | Put pretrained language model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/models/es/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/models/fr/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/CMUMOSEAS/models/pt/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/GRID/labels/overlapped_test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/overlapped_test.ref -------------------------------------------------------------------------------- /benchmarks/GRID/labels/overlapped_train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/overlapped_train.ref -------------------------------------------------------------------------------- /benchmarks/GRID/labels/unseen_test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/unseen_test.ref -------------------------------------------------------------------------------- /benchmarks/GRID/labels/unseen_train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/GRID/labels/unseen_train.ref -------------------------------------------------------------------------------- /benchmarks/GRID/models/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/LRS2/labels/test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LRS2/labels/test.ref -------------------------------------------------------------------------------- /benchmarks/LRS2/language_models/README.md: -------------------------------------------------------------------------------- 1 | Put pretrained language model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/LRS2/models/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/LRS3/labels/test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LRS3/labels/test.ref -------------------------------------------------------------------------------- /benchmarks/LRS3/language_models/README.md: -------------------------------------------------------------------------------- 1 | Put pretrained language model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/LRS3/models/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/LombardGRID/labels/unseen_fp_test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_fp_test.ref -------------------------------------------------------------------------------- /benchmarks/LombardGRID/labels/unseen_fp_train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_fp_train.ref -------------------------------------------------------------------------------- /benchmarks/LombardGRID/labels/unseen_fp_valid.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_fp_valid.ref -------------------------------------------------------------------------------- /benchmarks/LombardGRID/labels/unseen_sp_test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_sp_test.ref -------------------------------------------------------------------------------- /benchmarks/LombardGRID/labels/unseen_sp_train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_sp_train.ref -------------------------------------------------------------------------------- /benchmarks/LombardGRID/labels/unseen_sp_valid.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/LombardGRID/labels/unseen_sp_valid.ref -------------------------------------------------------------------------------- /benchmarks/LombardGRID/models/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/README.md -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/es/statistics: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/statistics -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/es/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/test.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/es/train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/train.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/es/valid.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/es/valid.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/fr/statistics: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/statistics -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/fr/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/test.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/fr/train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/train.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/fr/valid.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/fr/valid.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/it/statistics: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/statistics -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/it/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/test.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/it/train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/train.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/it/valid.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/it/valid.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/pt/statistics: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/statistics -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/pt/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/test.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/pt/train.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/train.txt -------------------------------------------------------------------------------- /benchmarks/MultilingualTEDx/labels/pt/valid.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/MultilingualTEDx/labels/pt/valid.txt -------------------------------------------------------------------------------- /benchmarks/TCDTIMIT/labels/overlapped_test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/overlapped_test.ref -------------------------------------------------------------------------------- /benchmarks/TCDTIMIT/labels/overlapped_train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/overlapped_train.ref -------------------------------------------------------------------------------- /benchmarks/TCDTIMIT/labels/unseen_test.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/unseen_test.ref -------------------------------------------------------------------------------- /benchmarks/TCDTIMIT/labels/unseen_train.ref: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/benchmarks/TCDTIMIT/labels/unseen_train.ref -------------------------------------------------------------------------------- /benchmarks/TCDTIMIT/language_models/README.md: -------------------------------------------------------------------------------- 1 | Put pretrained language model folders here. 2 | -------------------------------------------------------------------------------- /benchmarks/TCDTIMIT/models/README.md: -------------------------------------------------------------------------------- 1 | Put model folders here. 2 | -------------------------------------------------------------------------------- /configs/CMLR_V_WER8.0.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMLR_V_WER8.0.ini -------------------------------------------------------------------------------- /configs/CMUMOSEAS_V_ES_WER44.5.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMUMOSEAS_V_ES_WER44.5.ini -------------------------------------------------------------------------------- /configs/CMUMOSEAS_V_FR_WER58.6.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMUMOSEAS_V_FR_WER58.6.ini -------------------------------------------------------------------------------- /configs/CMUMOSEAS_V_PT_WER51.4.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/CMUMOSEAS_V_PT_WER51.4.ini -------------------------------------------------------------------------------- /configs/GRID_V_WER1.2.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/GRID_V_WER1.2.ini -------------------------------------------------------------------------------- /configs/GRID_V_WER4.8.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/GRID_V_WER4.8.ini -------------------------------------------------------------------------------- /configs/LRS2_V_WER26.1.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS2_V_WER26.1.ini -------------------------------------------------------------------------------- /configs/LRS3_AV_WER0.9.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_AV_WER0.9.ini -------------------------------------------------------------------------------- /configs/LRS3_A_WER1.0.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_A_WER1.0.ini -------------------------------------------------------------------------------- /configs/LRS3_V_WER19.1.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_V_WER19.1.ini -------------------------------------------------------------------------------- /configs/LRS3_V_WER32.3.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LRS3_V_WER32.3.ini -------------------------------------------------------------------------------- /configs/LombardGRID_V_WER4.9.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LombardGRID_V_WER4.9.ini -------------------------------------------------------------------------------- /configs/LombardGRID_V_WER8.0.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/LombardGRID_V_WER8.0.ini -------------------------------------------------------------------------------- /configs/TCDTIMIT_V_WER16.9.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/TCDTIMIT_V_WER16.9.ini -------------------------------------------------------------------------------- /configs/TCDTIMIT_V_WER21.8.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/configs/TCDTIMIT_V_WER21.8.ini -------------------------------------------------------------------------------- /crop_mouth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/crop_mouth.py -------------------------------------------------------------------------------- /doc/lip_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/doc/lip_white.png -------------------------------------------------------------------------------- /doc/vsr_1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/doc/vsr_1.gif -------------------------------------------------------------------------------- /doc/vsr_2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/doc/vsr_2.gif -------------------------------------------------------------------------------- /espnet/asr/asr_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/asr/asr_utils.py -------------------------------------------------------------------------------- /espnet/nets/batch_beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/batch_beam_search.py -------------------------------------------------------------------------------- /espnet/nets/beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/beam_search.py -------------------------------------------------------------------------------- /espnet/nets/ctc_prefix_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/ctc_prefix_score.py -------------------------------------------------------------------------------- /espnet/nets/e2e_asr_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/e2e_asr_common.py -------------------------------------------------------------------------------- /espnet/nets/lm_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/lm_interface.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/backbones/conv1d_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/backbones/conv3d_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/backbones/modules/resnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/modules/resnet.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/backbones/modules/resnet1d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/ctc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/ctc.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/e2e_asr_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/e2e_asr_transformer.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/e2e_asr_transformer_av.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/lm/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize sub package.""" 2 | -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/lm/default.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/lm/default.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/lm/seq_rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/lm/seq_rnn.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/lm/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/lm/transformer.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/nets_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/nets_utils.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize sub package.""" 2 | -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/add_sos_eos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/add_sos_eos.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/attention.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/convolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/convolution.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/decoder.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/decoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/decoder_layer.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/embedding.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/encoder.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/encoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/encoder_layer.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/layer_norm.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/mask.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/multi_layer_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/optimizer.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/plot.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/raw_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/raw_embeddings.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/repeat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/repeat.py -------------------------------------------------------------------------------- /espnet/nets/pytorch_backend/transformer/subsampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/pytorch_backend/transformer/subsampling.py -------------------------------------------------------------------------------- /espnet/nets/scorer_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/scorer_interface.py -------------------------------------------------------------------------------- /espnet/nets/scorers/__init__.py: -------------------------------------------------------------------------------- 1 | """Initialize sub package.""" 2 | -------------------------------------------------------------------------------- /espnet/nets/scorers/ctc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/scorers/ctc.py -------------------------------------------------------------------------------- /espnet/nets/scorers/length_bonus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/nets/scorers/length_bonus.py -------------------------------------------------------------------------------- /espnet/utils/cli_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/utils/cli_utils.py -------------------------------------------------------------------------------- /espnet/utils/dynamic_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/utils/dynamic_import.py -------------------------------------------------------------------------------- /espnet/utils/fill_missing_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/espnet/utils/fill_missing_args.py -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/eval.py -------------------------------------------------------------------------------- /hydra_configs/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/hydra_configs/default.yaml -------------------------------------------------------------------------------- /infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/infer.py -------------------------------------------------------------------------------- /pipelines/data/data_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/data_module.py -------------------------------------------------------------------------------- /pipelines/data/noise/babble_noise.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/noise/babble_noise.wav -------------------------------------------------------------------------------- /pipelines/data/noise/pink_noise.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/noise/pink_noise.wav -------------------------------------------------------------------------------- /pipelines/data/noise/white_noise.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/noise/white_noise.wav -------------------------------------------------------------------------------- /pipelines/data/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/data/transforms.py -------------------------------------------------------------------------------- /pipelines/detectors/mediapipe/20words_mean_face.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/mediapipe/20words_mean_face.npy -------------------------------------------------------------------------------- /pipelines/detectors/mediapipe/detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/mediapipe/detector.py -------------------------------------------------------------------------------- /pipelines/detectors/mediapipe/video_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/mediapipe/video_process.py -------------------------------------------------------------------------------- /pipelines/detectors/retinaface/20words_mean_face.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/retinaface/20words_mean_face.npy -------------------------------------------------------------------------------- /pipelines/detectors/retinaface/detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/retinaface/detector.py -------------------------------------------------------------------------------- /pipelines/detectors/retinaface/video_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/detectors/retinaface/video_process.py -------------------------------------------------------------------------------- /pipelines/metrics/measures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/metrics/measures.py -------------------------------------------------------------------------------- /pipelines/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/model.py -------------------------------------------------------------------------------- /pipelines/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/pipeline.py -------------------------------------------------------------------------------- /pipelines/tokens/unigram5000_units.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/pipelines/tokens/unigram5000_units.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/requirements.txt -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mpc001/Visual_Speech_Recognition_for_Multiple_Languages/HEAD/tools/README.md --------------------------------------------------------------------------------