├── .gitignore ├── LICENSE.txt ├── README.md ├── how-to-release.md ├── lib ├── JSON.pm ├── JSON │ ├── backportPP.pm │ └── backportPP │ │ ├── Boolean.pm │ │ ├── Compat5005.pm │ │ └── Compat5006.pm └── NLP │ ├── Chinese.pm │ ├── English.pm │ ├── Romanizer.pm │ ├── UTF8.pm │ ├── stringDistance.pm │ └── utilities.pm ├── pyproject.toml ├── requirements.txt ├── test ├── multi-script.txt ├── multi-script.uroman-ref-perl.txt ├── multi-script.uroman-ref.txt ├── string-similarity-test-input.txt └── string-similarity-test-output-ref.txt ├── text ├── amh.txt ├── ara.txt ├── ben.txt ├── bod.txt ├── egy.txt ├── ell.txt ├── fas.txt ├── heb.txt ├── hin.txt ├── jpn.txt ├── kor.txt ├── mar.txt ├── multiple.txt ├── mya.txt ├── nep.txt ├── rus.txt ├── tam.txt ├── tha.txt ├── tlh.txt ├── tur.txt ├── tzm.txt ├── uig.txt └── zho.txt ├── uroman.ipynb └── uroman ├── __init__.py ├── __main__.py ├── data-aux ├── NumPropsRejects.jsonl ├── UnicodeData-v15.1.0.txt └── string-distance-cost-rules.txt ├── data ├── Chinese_to_Pinyin.txt ├── ISO-639-3-list.txt ├── NumProps.jsonl ├── Scripts.txt ├── UnicodeData.txt ├── UnicodeDataOverwrite.txt ├── UnicodeDataProps.txt ├── UnicodeDataPropsCJK.txt ├── UnicodeDataPropsHangul.txt ├── romanization-auto-table.txt ├── romanization-table-arabic-block.txt └── romanization-table.txt ├── de-accent.pl ├── mini-test ├── fas.txt ├── hin.txt ├── line_with_non_utf8.txt ├── multi-script.txt └── multi-script.uroman-ref.txt ├── string-distance.pl ├── uroman-quick.pl ├── uroman-tsv.sh ├── uroman.pl └── uroman.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/README.md -------------------------------------------------------------------------------- /how-to-release.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/how-to-release.md -------------------------------------------------------------------------------- /lib/JSON.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/JSON.pm -------------------------------------------------------------------------------- /lib/JSON/backportPP.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/JSON/backportPP.pm -------------------------------------------------------------------------------- /lib/JSON/backportPP/Boolean.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/JSON/backportPP/Boolean.pm -------------------------------------------------------------------------------- /lib/JSON/backportPP/Compat5005.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/JSON/backportPP/Compat5005.pm -------------------------------------------------------------------------------- /lib/JSON/backportPP/Compat5006.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/JSON/backportPP/Compat5006.pm -------------------------------------------------------------------------------- /lib/NLP/Chinese.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/NLP/Chinese.pm -------------------------------------------------------------------------------- /lib/NLP/English.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/NLP/English.pm -------------------------------------------------------------------------------- /lib/NLP/Romanizer.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/NLP/Romanizer.pm -------------------------------------------------------------------------------- /lib/NLP/UTF8.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/NLP/UTF8.pm -------------------------------------------------------------------------------- /lib/NLP/stringDistance.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/NLP/stringDistance.pm -------------------------------------------------------------------------------- /lib/NLP/utilities.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/lib/NLP/utilities.pm -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | regex>=2024.5.15 2 | -------------------------------------------------------------------------------- /test/multi-script.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/test/multi-script.txt -------------------------------------------------------------------------------- /test/multi-script.uroman-ref-perl.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/test/multi-script.uroman-ref-perl.txt -------------------------------------------------------------------------------- /test/multi-script.uroman-ref.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/test/multi-script.uroman-ref.txt -------------------------------------------------------------------------------- /test/string-similarity-test-input.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/test/string-similarity-test-input.txt -------------------------------------------------------------------------------- /test/string-similarity-test-output-ref.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/test/string-similarity-test-output-ref.txt -------------------------------------------------------------------------------- /text/amh.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/amh.txt -------------------------------------------------------------------------------- /text/ara.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/ara.txt -------------------------------------------------------------------------------- /text/ben.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/ben.txt -------------------------------------------------------------------------------- /text/bod.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/bod.txt -------------------------------------------------------------------------------- /text/egy.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/egy.txt -------------------------------------------------------------------------------- /text/ell.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/ell.txt -------------------------------------------------------------------------------- /text/fas.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/fas.txt -------------------------------------------------------------------------------- /text/heb.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/heb.txt -------------------------------------------------------------------------------- /text/hin.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/hin.txt -------------------------------------------------------------------------------- /text/jpn.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/jpn.txt -------------------------------------------------------------------------------- /text/kor.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/kor.txt -------------------------------------------------------------------------------- /text/mar.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/mar.txt -------------------------------------------------------------------------------- /text/multiple.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/multiple.txt -------------------------------------------------------------------------------- /text/mya.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/mya.txt -------------------------------------------------------------------------------- /text/nep.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/nep.txt -------------------------------------------------------------------------------- /text/rus.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/rus.txt -------------------------------------------------------------------------------- /text/tam.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/tam.txt -------------------------------------------------------------------------------- /text/tha.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/tha.txt -------------------------------------------------------------------------------- /text/tlh.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/tlh.txt -------------------------------------------------------------------------------- /text/tur.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/tur.txt -------------------------------------------------------------------------------- /text/tzm.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/tzm.txt -------------------------------------------------------------------------------- /text/uig.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/uig.txt -------------------------------------------------------------------------------- /text/zho.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/text/zho.txt -------------------------------------------------------------------------------- /uroman.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman.ipynb -------------------------------------------------------------------------------- /uroman/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/__init__.py -------------------------------------------------------------------------------- /uroman/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/__main__.py -------------------------------------------------------------------------------- /uroman/data-aux/NumPropsRejects.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data-aux/NumPropsRejects.jsonl -------------------------------------------------------------------------------- /uroman/data-aux/UnicodeData-v15.1.0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data-aux/UnicodeData-v15.1.0.txt -------------------------------------------------------------------------------- /uroman/data-aux/string-distance-cost-rules.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data-aux/string-distance-cost-rules.txt -------------------------------------------------------------------------------- /uroman/data/Chinese_to_Pinyin.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/Chinese_to_Pinyin.txt -------------------------------------------------------------------------------- /uroman/data/ISO-639-3-list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/ISO-639-3-list.txt -------------------------------------------------------------------------------- /uroman/data/NumProps.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/NumProps.jsonl -------------------------------------------------------------------------------- /uroman/data/Scripts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/Scripts.txt -------------------------------------------------------------------------------- /uroman/data/UnicodeData.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/UnicodeData.txt -------------------------------------------------------------------------------- /uroman/data/UnicodeDataOverwrite.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/UnicodeDataOverwrite.txt -------------------------------------------------------------------------------- /uroman/data/UnicodeDataProps.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/UnicodeDataProps.txt -------------------------------------------------------------------------------- /uroman/data/UnicodeDataPropsCJK.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/UnicodeDataPropsCJK.txt -------------------------------------------------------------------------------- /uroman/data/UnicodeDataPropsHangul.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/UnicodeDataPropsHangul.txt -------------------------------------------------------------------------------- /uroman/data/romanization-auto-table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/romanization-auto-table.txt -------------------------------------------------------------------------------- /uroman/data/romanization-table-arabic-block.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/romanization-table-arabic-block.txt -------------------------------------------------------------------------------- /uroman/data/romanization-table.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/data/romanization-table.txt -------------------------------------------------------------------------------- /uroman/de-accent.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/de-accent.pl -------------------------------------------------------------------------------- /uroman/mini-test/fas.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/mini-test/fas.txt -------------------------------------------------------------------------------- /uroman/mini-test/hin.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/mini-test/hin.txt -------------------------------------------------------------------------------- /uroman/mini-test/line_with_non_utf8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/mini-test/line_with_non_utf8.txt -------------------------------------------------------------------------------- /uroman/mini-test/multi-script.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/mini-test/multi-script.txt -------------------------------------------------------------------------------- /uroman/mini-test/multi-script.uroman-ref.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/mini-test/multi-script.uroman-ref.txt -------------------------------------------------------------------------------- /uroman/string-distance.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/string-distance.pl -------------------------------------------------------------------------------- /uroman/uroman-quick.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/uroman-quick.pl -------------------------------------------------------------------------------- /uroman/uroman-tsv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/uroman-tsv.sh -------------------------------------------------------------------------------- /uroman/uroman.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/uroman.pl -------------------------------------------------------------------------------- /uroman/uroman.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isi-nlp/uroman/HEAD/uroman/uroman.py --------------------------------------------------------------------------------