├── README.md
├── arguments.py
├── change_mp.py
├── chinese_sentencepiece
    ├── cog-pretrain.model
    └── cog-pretrain.vocab
├── configure_data.py
├── data_utils
    ├── __init__.py
    ├── corpora.py
    ├── datasets.py
    ├── extraction.py
    ├── file_utils.py
    ├── lazy_loader.py
    ├── samplers.py
    ├── sp_tokenizer.py
    ├── tf_dl.py
    ├── tokenization.py
    ├── tokenization_gpt2.py
    └── wordpiece.py
├── detokenizer.py
├── docker
    ├── Dockerfile
    ├── Dockerfile-cuda101
    ├── prepare.sh
    └── ssh-env-config.sh
├── evaluate_perplexity.py
├── fp16
    ├── __init__.py
    ├── fp16.py
    ├── fp16util.py
    └── loss_scaler.py
├── generate_samples.py
├── generation_utils.py
├── gpt2_data_loader.py
├── learning_rates.py
├── model
    ├── __init__.py
    ├── distributed.py
    ├── gpt2_modeling.py
    ├── model.py
    └── modeling.py
├── mpu
    ├── __init__.py
    ├── cross_entropy.py
    ├── data.py
    ├── grads.py
    ├── initialize.py
    ├── layers.py
    ├── mappings.py
    ├── random.py
    ├── tests
    │   ├── __init__.py
    │   ├── commons.py
    │   ├── test_cross_entropy.py
    │   ├── test_data.py
    │   ├── test_initialize.py
    │   ├── test_layers.py
    │   └── test_random.py
    ├── transformer.py
    └── utils.py
├── openwebtext
    ├── README.md
    ├── blacklist_urls.py
    ├── cleanup_dataset.py
    ├── find_duplicates.py
    ├── group_duplicates_url.py
    ├── make_gpt2_dataset.py
    ├── make_gpt2_sizes.py
    ├── merge_jsons.py
    ├── remove_group_duplicates.py
    ├── run_make_gpt2_dataset.sh
    └── tokenizer.py
├── pretrain_bert.py
├── pretrain_gpt2.py
├── requirements.txt
├── scripts
    ├── ds_config_2.9B.json
    ├── ds_config_2.9B_finetune.json
    ├── ds_finetune_gpt_2.9B.sh
    ├── ds_pretrain_gpt_2.9B.sh
    ├── generate_text.sh
    ├── generate_text_small.sh
    ├── presplit_sentences_json.py
    └── split_json.py
└── utils.py


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/README.md


--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/arguments.py


--------------------------------------------------------------------------------
/change_mp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/change_mp.py


--------------------------------------------------------------------------------
/chinese_sentencepiece/cog-pretrain.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/chinese_sentencepiece/cog-pretrain.model


--------------------------------------------------------------------------------
/chinese_sentencepiece/cog-pretrain.vocab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/chinese_sentencepiece/cog-pretrain.vocab


--------------------------------------------------------------------------------
/configure_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/configure_data.py


--------------------------------------------------------------------------------
/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/__init__.py


--------------------------------------------------------------------------------
/data_utils/corpora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/corpora.py


--------------------------------------------------------------------------------
/data_utils/datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/datasets.py


--------------------------------------------------------------------------------
/data_utils/extraction.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/extraction.py


--------------------------------------------------------------------------------
/data_utils/file_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/file_utils.py


--------------------------------------------------------------------------------
/data_utils/lazy_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/lazy_loader.py


--------------------------------------------------------------------------------
/data_utils/samplers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/samplers.py


--------------------------------------------------------------------------------
/data_utils/sp_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/sp_tokenizer.py


--------------------------------------------------------------------------------
/data_utils/tf_dl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/tf_dl.py


--------------------------------------------------------------------------------
/data_utils/tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/tokenization.py


--------------------------------------------------------------------------------
/data_utils/tokenization_gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/tokenization_gpt2.py


--------------------------------------------------------------------------------
/data_utils/wordpiece.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/data_utils/wordpiece.py


--------------------------------------------------------------------------------
/detokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/detokenizer.py


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/docker/Dockerfile


--------------------------------------------------------------------------------
/docker/Dockerfile-cuda101:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/docker/Dockerfile-cuda101


--------------------------------------------------------------------------------
/docker/prepare.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/docker/prepare.sh


--------------------------------------------------------------------------------
/docker/ssh-env-config.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/docker/ssh-env-config.sh


--------------------------------------------------------------------------------
/evaluate_perplexity.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/evaluate_perplexity.py


--------------------------------------------------------------------------------
/fp16/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/fp16/__init__.py


--------------------------------------------------------------------------------
/fp16/fp16.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/fp16/fp16.py


--------------------------------------------------------------------------------
/fp16/fp16util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/fp16/fp16util.py


--------------------------------------------------------------------------------
/fp16/loss_scaler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/fp16/loss_scaler.py


--------------------------------------------------------------------------------
/generate_samples.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/generate_samples.py


--------------------------------------------------------------------------------
/generation_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/generation_utils.py


--------------------------------------------------------------------------------
/gpt2_data_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/gpt2_data_loader.py


--------------------------------------------------------------------------------
/learning_rates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/learning_rates.py


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/model/__init__.py


--------------------------------------------------------------------------------
/model/distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/model/distributed.py


--------------------------------------------------------------------------------
/model/gpt2_modeling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/model/gpt2_modeling.py


--------------------------------------------------------------------------------
/model/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/model/model.py


--------------------------------------------------------------------------------
/model/modeling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/model/modeling.py


--------------------------------------------------------------------------------
/mpu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/__init__.py


--------------------------------------------------------------------------------
/mpu/cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/cross_entropy.py


--------------------------------------------------------------------------------
/mpu/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/data.py


--------------------------------------------------------------------------------
/mpu/grads.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/grads.py


--------------------------------------------------------------------------------
/mpu/initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/initialize.py


--------------------------------------------------------------------------------
/mpu/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/layers.py


--------------------------------------------------------------------------------
/mpu/mappings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/mappings.py


--------------------------------------------------------------------------------
/mpu/random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/random.py


--------------------------------------------------------------------------------
/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mpu/tests/commons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/tests/commons.py


--------------------------------------------------------------------------------
/mpu/tests/test_cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/tests/test_cross_entropy.py


--------------------------------------------------------------------------------
/mpu/tests/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/tests/test_data.py


--------------------------------------------------------------------------------
/mpu/tests/test_initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/tests/test_initialize.py


--------------------------------------------------------------------------------
/mpu/tests/test_layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/tests/test_layers.py


--------------------------------------------------------------------------------
/mpu/tests/test_random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/tests/test_random.py


--------------------------------------------------------------------------------
/mpu/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/transformer.py


--------------------------------------------------------------------------------
/mpu/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/mpu/utils.py


--------------------------------------------------------------------------------
/openwebtext/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/README.md


--------------------------------------------------------------------------------
/openwebtext/blacklist_urls.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/blacklist_urls.py


--------------------------------------------------------------------------------
/openwebtext/cleanup_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/cleanup_dataset.py


--------------------------------------------------------------------------------
/openwebtext/find_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/find_duplicates.py


--------------------------------------------------------------------------------
/openwebtext/group_duplicates_url.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/group_duplicates_url.py


--------------------------------------------------------------------------------
/openwebtext/make_gpt2_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/make_gpt2_dataset.py


--------------------------------------------------------------------------------
/openwebtext/make_gpt2_sizes.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/make_gpt2_sizes.py


--------------------------------------------------------------------------------
/openwebtext/merge_jsons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/merge_jsons.py


--------------------------------------------------------------------------------
/openwebtext/remove_group_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/remove_group_duplicates.py


--------------------------------------------------------------------------------
/openwebtext/run_make_gpt2_dataset.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/run_make_gpt2_dataset.sh


--------------------------------------------------------------------------------
/openwebtext/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/openwebtext/tokenizer.py


--------------------------------------------------------------------------------
/pretrain_bert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/pretrain_bert.py


--------------------------------------------------------------------------------
/pretrain_gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/pretrain_gpt2.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/requirements.txt


--------------------------------------------------------------------------------
/scripts/ds_config_2.9B.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/ds_config_2.9B.json


--------------------------------------------------------------------------------
/scripts/ds_config_2.9B_finetune.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/ds_config_2.9B_finetune.json


--------------------------------------------------------------------------------
/scripts/ds_finetune_gpt_2.9B.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/ds_finetune_gpt_2.9B.sh


--------------------------------------------------------------------------------
/scripts/ds_pretrain_gpt_2.9B.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/ds_pretrain_gpt_2.9B.sh


--------------------------------------------------------------------------------
/scripts/generate_text.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/generate_text.sh


--------------------------------------------------------------------------------
/scripts/generate_text_small.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/generate_text_small.sh


--------------------------------------------------------------------------------
/scripts/presplit_sentences_json.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/presplit_sentences_json.py


--------------------------------------------------------------------------------
/scripts/split_json.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/scripts/split_json.py


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/Chinese-Transformer-XL/HEAD/utils.py


--------------------------------------------------------------------------------