├── .circleci ├── check_head.py └── config.yml ├── .github └── workflows │ ├── shellcheck.yml │ └── typos.yml ├── .gitignore ├── .isort.cfg ├── .markdownlint.json ├── LICENSE.txt ├── Makefile ├── README.md ├── config.example.json ├── docs └── mC4_wiki40b.md ├── mypy.ini ├── poetry.lock ├── pyproject.toml ├── scripts ├── dist.py ├── gen.py ├── jalan │ ├── __init__.py │ ├── checksums.tsv │ ├── dummy_data │ │ └── TODO-add_fake_data_in_this_directory.txt │ ├── jalan.py │ └── jalan_test.py ├── mywiki40b │ ├── __init__.py │ ├── checksums.tsv │ ├── dummy_data │ │ └── TODO-add_fake_data_in_this_directory.txt │ ├── mywiki40b.py │ └── mywiki40b_test.py ├── setup_wikipedia.py └── task.py ├── setup.cfg └── tpu.sh /.circleci/check_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/.circleci/check_head.py -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/.circleci/config.yml -------------------------------------------------------------------------------- /.github/workflows/shellcheck.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/.github/workflows/shellcheck.yml -------------------------------------------------------------------------------- /.github/workflows/typos.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/.github/workflows/typos.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/.gitignore -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | known_third_party= 3 | -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/.markdownlint.json -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/README.md -------------------------------------------------------------------------------- /config.example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/config.example.json -------------------------------------------------------------------------------- /docs/mC4_wiki40b.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/docs/mC4_wiki40b.md -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/mypy.ini -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/dist.py -------------------------------------------------------------------------------- /scripts/gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/gen.py -------------------------------------------------------------------------------- /scripts/jalan/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/jalan/__init__.py -------------------------------------------------------------------------------- /scripts/jalan/checksums.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/jalan/checksums.tsv -------------------------------------------------------------------------------- /scripts/jalan/dummy_data/TODO-add_fake_data_in_this_directory.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/jalan/jalan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/jalan/jalan.py -------------------------------------------------------------------------------- /scripts/jalan/jalan_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/jalan/jalan_test.py -------------------------------------------------------------------------------- /scripts/mywiki40b/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/mywiki40b/__init__.py -------------------------------------------------------------------------------- /scripts/mywiki40b/checksums.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/mywiki40b/checksums.tsv -------------------------------------------------------------------------------- /scripts/mywiki40b/dummy_data/TODO-add_fake_data_in_this_directory.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/mywiki40b/mywiki40b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/mywiki40b/mywiki40b.py -------------------------------------------------------------------------------- /scripts/mywiki40b/mywiki40b_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/mywiki40b/mywiki40b_test.py -------------------------------------------------------------------------------- /scripts/setup_wikipedia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/setup_wikipedia.py -------------------------------------------------------------------------------- /scripts/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/scripts/task.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length=119 3 | -------------------------------------------------------------------------------- /tpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megagonlabs/t5-japanese/HEAD/tpu.sh --------------------------------------------------------------------------------