├── .clang-format ├── .github └── workflows │ ├── CI.yml │ ├── copyright-update.yml │ ├── deploy.yml │ └── links_fail_fast.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── docs ├── NLP-Format.md ├── Synonyms-Format.md ├── Usage.md └── json_log_schema.json ├── sample_data ├── aaddf.txt ├── abcdef.txt └── synonyms.rules.txt ├── src ├── AdaptedComposition.cpp ├── AdaptedComposition.h ├── AlignmentTraversor.cpp ├── AlignmentTraversor.h ├── Ctm.cpp ├── Ctm.h ├── FstFileLoader.cpp ├── FstFileLoader.h ├── FstLoader.cpp ├── FstLoader.h ├── IComposition.h ├── Nlp.cpp ├── Nlp.h ├── OneBestFstLoader.cpp ├── OneBestFstLoader.h ├── PathHeap.cpp ├── PathHeap.h ├── StandardComposition.cpp ├── StandardComposition.h ├── SynonymEngine.cpp ├── SynonymEngine.h ├── Walker.cpp ├── Walker.h ├── fast-d.cpp ├── fast-d.h ├── fstalign.cpp ├── fstalign.h ├── json_logging.h ├── logging.cpp ├── logging.h ├── main.cpp ├── utilities.cpp ├── utilities.h ├── version.h ├── wer.cpp └── wer.h ├── test ├── CMakeLists.txt ├── compose-tests-utils.h ├── compose-tests.cc ├── data │ ├── align_1.aligned.punc_case.nlp │ ├── align_1.hyp.ctm │ ├── align_1.hyp.punc_case.ctm │ ├── align_1.norm.json │ ├── align_1.ref.aligned.nlp │ ├── align_1.ref.nlp │ ├── align_2.hyp.ctm │ ├── align_2.norm.json │ ├── align_2.ref.aligned.nlp │ ├── align_2.ref.aligned.std.nlp │ ├── align_2.ref.nlp │ ├── align_3.hyp.ctm │ ├── align_3.norm.json │ ├── align_3.ref.aligned.nlp │ ├── align_3.ref.nlp │ ├── align_4.hyp1.ctm │ ├── align_4.hyp2.ctm │ ├── align_4.norm.json │ ├── align_4.ref.aligned1.nlp │ ├── align_4.ref.aligned2.nlp │ ├── align_4.ref.nlp │ ├── align_5.hyp1.ctm │ ├── align_5.hyp2.ctm │ ├── align_5.ref.aligned1-2.nlp │ ├── align_5.ref.aligned1.nlp │ ├── align_5.ref.aligned2-a2.nlp │ ├── align_5.ref.aligned2.nlp │ ├── align_5.ref.nlp │ ├── align_6.hyp.ctm │ ├── align_6.ref.aligned.nlp │ ├── align_6.ref.nlp │ ├── empty.hyp.ctm │ ├── empty.hyp.nlp │ ├── empty.hyp.txt │ ├── empty.ref.txt │ ├── fstalign-50.hyp.txt │ ├── fstalign-50.new.sbs.txt │ ├── fstalign-50.ref.txt │ ├── noise.hyp1.ctm │ ├── noise.hyp2.ctm │ ├── noise_1.hyp1.aligned │ ├── noise_1.hyp2.aligned │ ├── noise_1.ref.nlp │ ├── oracle_1.hyp.fst │ ├── oracle_1.ref.txt │ ├── oracle_1.symbols.txt │ ├── short.aligned.case.nlp │ ├── short.aligned.nlp │ ├── short.aligned.punc.nlp │ ├── short.aligned.punc_case.nlp │ ├── short.aligned.strict.nlp │ ├── short.hyp.nlp │ ├── short.hyp.txt │ ├── short.ref.nlp │ ├── short.sbs.txt │ ├── short_punc.hyp.nlp │ ├── short_punc.ref.nlp │ ├── short_punc.wer_tag.json │ ├── speaker_1.hyp.txt │ ├── speaker_1.ref.nlp │ ├── speaker_2.hyp.txt │ ├── speaker_2.ref.nlp │ ├── syn_1.hyp.adapted.sbs │ ├── syn_1.hyp.sbs │ ├── syn_1.hyp.txt │ ├── syn_1.ref.txt │ ├── syn_10.hyp.txt │ ├── syn_10.ref.txt │ ├── syn_2.hyp.txt │ ├── syn_2.ref.txt │ ├── syn_3.hyp.txt │ ├── syn_3.ref.txt │ ├── syn_4.hyp.txt │ ├── syn_4.ref.txt │ ├── syn_5.hyp.txt │ ├── syn_5.ref.txt │ ├── syn_6.hyp.txt │ ├── syn_6.ref.txt │ ├── syn_7.hyp.txt │ ├── syn_7.hyp2.txt │ ├── syn_7.hyp3.txt │ ├── syn_7.hyp4.txt │ ├── syn_7.norm.json │ ├── syn_7.ref.nlp │ ├── syn_7.synonym.rules.txt │ ├── syn_7_ref4.nlp │ ├── syn_8.hyp.ctm │ ├── syn_8.ref.nlp │ ├── syn_9.hyp.txt │ ├── syn_9.ref.txt │ ├── syn_9.synonym.rules.txt │ ├── syn_compound_1.hyp.txt │ ├── syn_compound_1.ref.txt │ ├── syn_compound_2.hyp.txt │ ├── syn_compound_2.ref.txt │ ├── test1.hyp.txt │ ├── test1.ref.txt │ ├── twenty.aligned.punc_case.nlp │ ├── twenty.hyp-a2.sbs │ ├── twenty.hyp.punc_case.txt │ ├── twenty.hyp.sbs │ ├── twenty.hyp.txt │ ├── twenty.norm.json │ ├── twenty.ref.nlp │ ├── twenty.ref.testing.nlp │ ├── twenty.ref.testing.norm.json │ ├── wer_utf.hyp.txt │ └── wer_utf.ref.txt ├── fast-d-tests.cc ├── fstalign_Test.cc └── test-utilties.h └── tools ├── README.md ├── gather_runtime_metrics.sh ├── generate_wer_test_data.pl ├── images ├── 120_short_files.png ├── 120_vs_130_ram.png ├── 120_vs_130_runtime.png └── 130_short_files.png └── sbs2fst.py /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/.clang-format -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/.github/workflows/CI.yml -------------------------------------------------------------------------------- /.github/workflows/copyright-update.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/.github/workflows/copyright-update.yml -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/.github/workflows/deploy.yml -------------------------------------------------------------------------------- /.github/workflows/links_fail_fast.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/.github/workflows/links_fail_fast.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/README.md -------------------------------------------------------------------------------- /docs/NLP-Format.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/docs/NLP-Format.md -------------------------------------------------------------------------------- /docs/Synonyms-Format.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/docs/Synonyms-Format.md -------------------------------------------------------------------------------- /docs/Usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/docs/Usage.md -------------------------------------------------------------------------------- /docs/json_log_schema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/docs/json_log_schema.json -------------------------------------------------------------------------------- /sample_data/aaddf.txt: -------------------------------------------------------------------------------- 1 | a a d d f 2 | -------------------------------------------------------------------------------- /sample_data/abcdef.txt: -------------------------------------------------------------------------------- 1 | a b c d e f 2 | -------------------------------------------------------------------------------- /sample_data/synonyms.rules.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/sample_data/synonyms.rules.txt -------------------------------------------------------------------------------- /src/AdaptedComposition.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/AdaptedComposition.cpp -------------------------------------------------------------------------------- /src/AdaptedComposition.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/AdaptedComposition.h -------------------------------------------------------------------------------- /src/AlignmentTraversor.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/AlignmentTraversor.cpp -------------------------------------------------------------------------------- /src/AlignmentTraversor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/AlignmentTraversor.h -------------------------------------------------------------------------------- /src/Ctm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/Ctm.cpp -------------------------------------------------------------------------------- /src/Ctm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/Ctm.h -------------------------------------------------------------------------------- /src/FstFileLoader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/FstFileLoader.cpp -------------------------------------------------------------------------------- /src/FstFileLoader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/FstFileLoader.h -------------------------------------------------------------------------------- /src/FstLoader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/FstLoader.cpp -------------------------------------------------------------------------------- /src/FstLoader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/FstLoader.h -------------------------------------------------------------------------------- /src/IComposition.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/IComposition.h -------------------------------------------------------------------------------- /src/Nlp.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/Nlp.cpp -------------------------------------------------------------------------------- /src/Nlp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/Nlp.h -------------------------------------------------------------------------------- /src/OneBestFstLoader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/OneBestFstLoader.cpp -------------------------------------------------------------------------------- /src/OneBestFstLoader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/OneBestFstLoader.h -------------------------------------------------------------------------------- /src/PathHeap.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/PathHeap.cpp -------------------------------------------------------------------------------- /src/PathHeap.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/PathHeap.h -------------------------------------------------------------------------------- /src/StandardComposition.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/StandardComposition.cpp -------------------------------------------------------------------------------- /src/StandardComposition.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/StandardComposition.h -------------------------------------------------------------------------------- /src/SynonymEngine.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/SynonymEngine.cpp -------------------------------------------------------------------------------- /src/SynonymEngine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/SynonymEngine.h -------------------------------------------------------------------------------- /src/Walker.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/Walker.cpp -------------------------------------------------------------------------------- /src/Walker.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/Walker.h -------------------------------------------------------------------------------- /src/fast-d.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/fast-d.cpp -------------------------------------------------------------------------------- /src/fast-d.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/fast-d.h -------------------------------------------------------------------------------- /src/fstalign.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/fstalign.cpp -------------------------------------------------------------------------------- /src/fstalign.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/fstalign.h -------------------------------------------------------------------------------- /src/json_logging.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/json_logging.h -------------------------------------------------------------------------------- /src/logging.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/logging.cpp -------------------------------------------------------------------------------- /src/logging.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/logging.h -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/main.cpp -------------------------------------------------------------------------------- /src/utilities.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/utilities.cpp -------------------------------------------------------------------------------- /src/utilities.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/utilities.h -------------------------------------------------------------------------------- /src/version.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/version.h -------------------------------------------------------------------------------- /src/wer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/wer.cpp -------------------------------------------------------------------------------- /src/wer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/src/wer.h -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/CMakeLists.txt -------------------------------------------------------------------------------- /test/compose-tests-utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/compose-tests-utils.h -------------------------------------------------------------------------------- /test/compose-tests.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/compose-tests.cc -------------------------------------------------------------------------------- /test/data/align_1.aligned.punc_case.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_1.aligned.punc_case.nlp -------------------------------------------------------------------------------- /test/data/align_1.hyp.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_1.hyp.ctm -------------------------------------------------------------------------------- /test/data/align_1.hyp.punc_case.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_1.hyp.punc_case.ctm -------------------------------------------------------------------------------- /test/data/align_1.norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_1.norm.json -------------------------------------------------------------------------------- /test/data/align_1.ref.aligned.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_1.ref.aligned.nlp -------------------------------------------------------------------------------- /test/data/align_1.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_1.ref.nlp -------------------------------------------------------------------------------- /test/data/align_2.hyp.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_2.hyp.ctm -------------------------------------------------------------------------------- /test/data/align_2.norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_2.norm.json -------------------------------------------------------------------------------- /test/data/align_2.ref.aligned.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_2.ref.aligned.nlp -------------------------------------------------------------------------------- /test/data/align_2.ref.aligned.std.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_2.ref.aligned.std.nlp -------------------------------------------------------------------------------- /test/data/align_2.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_2.ref.nlp -------------------------------------------------------------------------------- /test/data/align_3.hyp.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_3.hyp.ctm -------------------------------------------------------------------------------- /test/data/align_3.norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_3.norm.json -------------------------------------------------------------------------------- /test/data/align_3.ref.aligned.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_3.ref.aligned.nlp -------------------------------------------------------------------------------- /test/data/align_3.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_3.ref.nlp -------------------------------------------------------------------------------- /test/data/align_4.hyp1.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_4.hyp1.ctm -------------------------------------------------------------------------------- /test/data/align_4.hyp2.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_4.hyp2.ctm -------------------------------------------------------------------------------- /test/data/align_4.norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_4.norm.json -------------------------------------------------------------------------------- /test/data/align_4.ref.aligned1.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_4.ref.aligned1.nlp -------------------------------------------------------------------------------- /test/data/align_4.ref.aligned2.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_4.ref.aligned2.nlp -------------------------------------------------------------------------------- /test/data/align_4.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_4.ref.nlp -------------------------------------------------------------------------------- /test/data/align_5.hyp1.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_5.hyp1.ctm -------------------------------------------------------------------------------- /test/data/align_5.hyp2.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_5.hyp2.ctm -------------------------------------------------------------------------------- /test/data/align_5.ref.aligned1-2.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_5.ref.aligned1-2.nlp -------------------------------------------------------------------------------- /test/data/align_5.ref.aligned1.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_5.ref.aligned1.nlp -------------------------------------------------------------------------------- /test/data/align_5.ref.aligned2-a2.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_5.ref.aligned2-a2.nlp -------------------------------------------------------------------------------- /test/data/align_5.ref.aligned2.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_5.ref.aligned2.nlp -------------------------------------------------------------------------------- /test/data/align_5.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_5.ref.nlp -------------------------------------------------------------------------------- /test/data/align_6.hyp.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_6.hyp.ctm -------------------------------------------------------------------------------- /test/data/align_6.ref.aligned.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_6.ref.aligned.nlp -------------------------------------------------------------------------------- /test/data/align_6.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/align_6.ref.nlp -------------------------------------------------------------------------------- /test/data/empty.hyp.ctm: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/data/empty.hyp.nlp: -------------------------------------------------------------------------------- 1 | token|speaker|ts|endTs|punctuation|case|tags 2 | -------------------------------------------------------------------------------- /test/data/empty.hyp.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/data/empty.ref.txt: -------------------------------------------------------------------------------- 1 | not really empty 2 | -------------------------------------------------------------------------------- /test/data/fstalign-50.hyp.txt: -------------------------------------------------------------------------------- 1 | su capital es la ciudad de pau 2 | -------------------------------------------------------------------------------- /test/data/fstalign-50.new.sbs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/fstalign-50.new.sbs.txt -------------------------------------------------------------------------------- /test/data/fstalign-50.ref.txt: -------------------------------------------------------------------------------- 1 | su capital es la ciudad de palu 2 | -------------------------------------------------------------------------------- /test/data/noise.hyp1.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/noise.hyp1.ctm -------------------------------------------------------------------------------- /test/data/noise.hyp2.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/noise.hyp2.ctm -------------------------------------------------------------------------------- /test/data/noise_1.hyp1.aligned: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/noise_1.hyp1.aligned -------------------------------------------------------------------------------- /test/data/noise_1.hyp2.aligned: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/noise_1.hyp2.aligned -------------------------------------------------------------------------------- /test/data/noise_1.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/noise_1.ref.nlp -------------------------------------------------------------------------------- /test/data/oracle_1.hyp.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/oracle_1.hyp.fst -------------------------------------------------------------------------------- /test/data/oracle_1.ref.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/oracle_1.ref.txt -------------------------------------------------------------------------------- /test/data/oracle_1.symbols.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/oracle_1.symbols.txt -------------------------------------------------------------------------------- /test/data/short.aligned.case.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.aligned.case.nlp -------------------------------------------------------------------------------- /test/data/short.aligned.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.aligned.nlp -------------------------------------------------------------------------------- /test/data/short.aligned.punc.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.aligned.punc.nlp -------------------------------------------------------------------------------- /test/data/short.aligned.punc_case.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.aligned.punc_case.nlp -------------------------------------------------------------------------------- /test/data/short.aligned.strict.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.aligned.strict.nlp -------------------------------------------------------------------------------- /test/data/short.hyp.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.hyp.nlp -------------------------------------------------------------------------------- /test/data/short.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.hyp.txt -------------------------------------------------------------------------------- /test/data/short.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.ref.nlp -------------------------------------------------------------------------------- /test/data/short.sbs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short.sbs.txt -------------------------------------------------------------------------------- /test/data/short_punc.hyp.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short_punc.hyp.nlp -------------------------------------------------------------------------------- /test/data/short_punc.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short_punc.ref.nlp -------------------------------------------------------------------------------- /test/data/short_punc.wer_tag.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/short_punc.wer_tag.json -------------------------------------------------------------------------------- /test/data/speaker_1.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/speaker_1.hyp.txt -------------------------------------------------------------------------------- /test/data/speaker_1.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/speaker_1.ref.nlp -------------------------------------------------------------------------------- /test/data/speaker_2.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/speaker_2.hyp.txt -------------------------------------------------------------------------------- /test/data/speaker_2.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/speaker_2.ref.nlp -------------------------------------------------------------------------------- /test/data/syn_1.hyp.adapted.sbs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_1.hyp.adapted.sbs -------------------------------------------------------------------------------- /test/data/syn_1.hyp.sbs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_1.hyp.sbs -------------------------------------------------------------------------------- /test/data/syn_1.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_1.hyp.txt -------------------------------------------------------------------------------- /test/data/syn_1.ref.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_1.ref.txt -------------------------------------------------------------------------------- /test/data/syn_10.hyp.txt: -------------------------------------------------------------------------------- 1 | she will have a great evening 2 | -------------------------------------------------------------------------------- /test/data/syn_10.ref.txt: -------------------------------------------------------------------------------- 1 | she- will have a great evening 2 | -------------------------------------------------------------------------------- /test/data/syn_2.hyp.txt: -------------------------------------------------------------------------------- 1 | we'll 2 | -------------------------------------------------------------------------------- /test/data/syn_2.ref.txt: -------------------------------------------------------------------------------- 1 | we will 2 | -------------------------------------------------------------------------------- /test/data/syn_3.hyp.txt: -------------------------------------------------------------------------------- 1 | we will 2 | -------------------------------------------------------------------------------- /test/data/syn_3.ref.txt: -------------------------------------------------------------------------------- 1 | we'll 2 | -------------------------------------------------------------------------------- /test/data/syn_4.hyp.txt: -------------------------------------------------------------------------------- 1 | no 2 | -------------------------------------------------------------------------------- /test/data/syn_4.ref.txt: -------------------------------------------------------------------------------- 1 | we will 2 | -------------------------------------------------------------------------------- /test/data/syn_5.hyp.txt: -------------------------------------------------------------------------------- 1 | will 2 | -------------------------------------------------------------------------------- /test/data/syn_5.ref.txt: -------------------------------------------------------------------------------- 1 | we'll 2 | -------------------------------------------------------------------------------- /test/data/syn_6.hyp.txt: -------------------------------------------------------------------------------- 1 | this is what saying 2 | -------------------------------------------------------------------------------- /test/data/syn_6.ref.txt: -------------------------------------------------------------------------------- 1 | this- is what she's saying 2 | -------------------------------------------------------------------------------- /test/data/syn_7.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_7.hyp.txt -------------------------------------------------------------------------------- /test/data/syn_7.hyp2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_7.hyp2.txt -------------------------------------------------------------------------------- /test/data/syn_7.hyp3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_7.hyp3.txt -------------------------------------------------------------------------------- /test/data/syn_7.hyp4.txt: -------------------------------------------------------------------------------- 1 | it costs -------------------------------------------------------------------------------- /test/data/syn_7.norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_7.norm.json -------------------------------------------------------------------------------- /test/data/syn_7.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_7.ref.nlp -------------------------------------------------------------------------------- /test/data/syn_7.synonym.rules.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_7.synonym.rules.txt -------------------------------------------------------------------------------- /test/data/syn_7_ref4.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_7_ref4.nlp -------------------------------------------------------------------------------- /test/data/syn_8.hyp.ctm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_8.hyp.ctm -------------------------------------------------------------------------------- /test/data/syn_8.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_8.ref.nlp -------------------------------------------------------------------------------- /test/data/syn_9.hyp.txt: -------------------------------------------------------------------------------- 1 | he will have a great evening -------------------------------------------------------------------------------- /test/data/syn_9.ref.txt: -------------------------------------------------------------------------------- 1 | she- will have a great evening -------------------------------------------------------------------------------- /test/data/syn_9.synonym.rules.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_9.synonym.rules.txt -------------------------------------------------------------------------------- /test/data/syn_compound_1.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_compound_1.hyp.txt -------------------------------------------------------------------------------- /test/data/syn_compound_1.ref.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/syn_compound_1.ref.txt -------------------------------------------------------------------------------- /test/data/syn_compound_2.hyp.txt: -------------------------------------------------------------------------------- 1 | that is are long-term view on politics -------------------------------------------------------------------------------- /test/data/syn_compound_2.ref.txt: -------------------------------------------------------------------------------- 1 | what is our long term view on politics -------------------------------------------------------------------------------- /test/data/test1.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/test1.hyp.txt -------------------------------------------------------------------------------- /test/data/test1.ref.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/test1.ref.txt -------------------------------------------------------------------------------- /test/data/twenty.aligned.punc_case.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.aligned.punc_case.nlp -------------------------------------------------------------------------------- /test/data/twenty.hyp-a2.sbs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.hyp-a2.sbs -------------------------------------------------------------------------------- /test/data/twenty.hyp.punc_case.txt: -------------------------------------------------------------------------------- 1 | in Twenty tHiRtY , is one TWENTY two three 2 | 3 | -------------------------------------------------------------------------------- /test/data/twenty.hyp.sbs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.hyp.sbs -------------------------------------------------------------------------------- /test/data/twenty.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.hyp.txt -------------------------------------------------------------------------------- /test/data/twenty.norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.norm.json -------------------------------------------------------------------------------- /test/data/twenty.ref.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.ref.nlp -------------------------------------------------------------------------------- /test/data/twenty.ref.testing.nlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.ref.testing.nlp -------------------------------------------------------------------------------- /test/data/twenty.ref.testing.norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/twenty.ref.testing.norm.json -------------------------------------------------------------------------------- /test/data/wer_utf.hyp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/data/wer_utf.hyp.txt -------------------------------------------------------------------------------- /test/data/wer_utf.ref.txt: -------------------------------------------------------------------------------- 1 | Ça va bien aujourd'hui étÉ inutile êtes -------------------------------------------------------------------------------- /test/fast-d-tests.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/fast-d-tests.cc -------------------------------------------------------------------------------- /test/fstalign_Test.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/fstalign_Test.cc -------------------------------------------------------------------------------- /test/test-utilties.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/test/test-utilties.h -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/README.md -------------------------------------------------------------------------------- /tools/gather_runtime_metrics.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/gather_runtime_metrics.sh -------------------------------------------------------------------------------- /tools/generate_wer_test_data.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/generate_wer_test_data.pl -------------------------------------------------------------------------------- /tools/images/120_short_files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/images/120_short_files.png -------------------------------------------------------------------------------- /tools/images/120_vs_130_ram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/images/120_vs_130_ram.png -------------------------------------------------------------------------------- /tools/images/120_vs_130_runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/images/120_vs_130_runtime.png -------------------------------------------------------------------------------- /tools/images/130_short_files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/images/130_short_files.png -------------------------------------------------------------------------------- /tools/sbs2fst.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revdotcom/fstalign/HEAD/tools/sbs2fst.py --------------------------------------------------------------------------------