├── LICENSE ├── README.md ├── _config.yml ├── mono ├── 0-do-it-all.sh ├── 1-unique.sh ├── 2-identify-language.sh ├── 3-moses-scripts-subword-nmt.sh ├── README.md ├── filter-parallel.php ├── filter.php ├── find-bad.php └── non-alpha.php ├── parallel ├── 0-do-it-all.sh ├── 1-find-equal-lines.sh ├── 2-unique-parallel.sh ├── 3-identify-language.sh ├── 4-moses-scripts-subword-nmt.sh ├── README.md ├── chars-numbers.php ├── concat-parallel.php ├── empty-numbers.php ├── filter-parallel.php ├── filter-repeating.php ├── filter-text.php ├── filter.php ├── find-bad.php ├── non-alpha.php ├── non-matching-non-alpha.php ├── repeating-tokens.php ├── split-parallel.php └── uniqe-numbers.php └── regular-expressions.php /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/README.md -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/_config.yml -------------------------------------------------------------------------------- /mono/0-do-it-all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/0-do-it-all.sh -------------------------------------------------------------------------------- /mono/1-unique.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/1-unique.sh -------------------------------------------------------------------------------- /mono/2-identify-language.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/2-identify-language.sh -------------------------------------------------------------------------------- /mono/3-moses-scripts-subword-nmt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/3-moses-scripts-subword-nmt.sh -------------------------------------------------------------------------------- /mono/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/README.md -------------------------------------------------------------------------------- /mono/filter-parallel.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/filter-parallel.php -------------------------------------------------------------------------------- /mono/filter.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/filter.php -------------------------------------------------------------------------------- /mono/find-bad.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/find-bad.php -------------------------------------------------------------------------------- /mono/non-alpha.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/mono/non-alpha.php -------------------------------------------------------------------------------- /parallel/0-do-it-all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/0-do-it-all.sh -------------------------------------------------------------------------------- /parallel/1-find-equal-lines.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/1-find-equal-lines.sh -------------------------------------------------------------------------------- /parallel/2-unique-parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/2-unique-parallel.sh -------------------------------------------------------------------------------- /parallel/3-identify-language.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/3-identify-language.sh -------------------------------------------------------------------------------- /parallel/4-moses-scripts-subword-nmt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/4-moses-scripts-subword-nmt.sh -------------------------------------------------------------------------------- /parallel/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/README.md -------------------------------------------------------------------------------- /parallel/chars-numbers.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/chars-numbers.php -------------------------------------------------------------------------------- /parallel/concat-parallel.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/concat-parallel.php -------------------------------------------------------------------------------- /parallel/empty-numbers.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/empty-numbers.php -------------------------------------------------------------------------------- /parallel/filter-parallel.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/filter-parallel.php -------------------------------------------------------------------------------- /parallel/filter-repeating.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/filter-repeating.php -------------------------------------------------------------------------------- /parallel/filter-text.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/filter-text.php -------------------------------------------------------------------------------- /parallel/filter.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/filter.php -------------------------------------------------------------------------------- /parallel/find-bad.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/find-bad.php -------------------------------------------------------------------------------- /parallel/non-alpha.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/non-alpha.php -------------------------------------------------------------------------------- /parallel/non-matching-non-alpha.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/non-matching-non-alpha.php -------------------------------------------------------------------------------- /parallel/repeating-tokens.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/repeating-tokens.php -------------------------------------------------------------------------------- /parallel/split-parallel.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/split-parallel.php -------------------------------------------------------------------------------- /parallel/uniqe-numbers.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/parallel/uniqe-numbers.php -------------------------------------------------------------------------------- /regular-expressions.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/M4t1ss/parallel-corpora-tools/HEAD/regular-expressions.php --------------------------------------------------------------------------------