├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── distillation ├── .gitignore ├── 1_layer_train_multi_gpu.sh ├── 3_layer_train_multi_gpu.sh ├── 6_layer_train_multi_gpu.sh ├── README.md ├── binarize.sh ├── distiller.py ├── grouped_batch_sampler.py ├── lm_seqs_dataset.py ├── requirements.txt ├── scripts │ ├── binarized_data.py │ ├── extract.py │ ├── extract_distilbert.py │ ├── token_counts.py │ └── tokenization_kobert.py ├── token_count.sh ├── train.py ├── train_single_gpu_3_layer.sh ├── training_configs │ ├── distilbert-base-uncased.json │ ├── distilgpt2.json │ ├── distilkobert_1_layer.json │ ├── distilkobert_3_layer.json │ └── distilkobert_6_layer.json └── utils.py ├── requirements.txt └── tokenization_kobert.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/README.md -------------------------------------------------------------------------------- /distillation/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/.gitignore -------------------------------------------------------------------------------- /distillation/1_layer_train_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/1_layer_train_multi_gpu.sh -------------------------------------------------------------------------------- /distillation/3_layer_train_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/3_layer_train_multi_gpu.sh -------------------------------------------------------------------------------- /distillation/6_layer_train_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/6_layer_train_multi_gpu.sh -------------------------------------------------------------------------------- /distillation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/README.md -------------------------------------------------------------------------------- /distillation/binarize.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/binarize.sh -------------------------------------------------------------------------------- /distillation/distiller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/distiller.py -------------------------------------------------------------------------------- /distillation/grouped_batch_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/grouped_batch_sampler.py -------------------------------------------------------------------------------- /distillation/lm_seqs_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/lm_seqs_dataset.py -------------------------------------------------------------------------------- /distillation/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/requirements.txt -------------------------------------------------------------------------------- /distillation/scripts/binarized_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/scripts/binarized_data.py -------------------------------------------------------------------------------- /distillation/scripts/extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/scripts/extract.py -------------------------------------------------------------------------------- /distillation/scripts/extract_distilbert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/scripts/extract_distilbert.py -------------------------------------------------------------------------------- /distillation/scripts/token_counts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/scripts/token_counts.py -------------------------------------------------------------------------------- /distillation/scripts/tokenization_kobert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/scripts/tokenization_kobert.py -------------------------------------------------------------------------------- /distillation/token_count.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/token_count.sh -------------------------------------------------------------------------------- /distillation/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/train.py -------------------------------------------------------------------------------- /distillation/train_single_gpu_3_layer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/train_single_gpu_3_layer.sh -------------------------------------------------------------------------------- /distillation/training_configs/distilbert-base-uncased.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/training_configs/distilbert-base-uncased.json -------------------------------------------------------------------------------- /distillation/training_configs/distilgpt2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/training_configs/distilgpt2.json -------------------------------------------------------------------------------- /distillation/training_configs/distilkobert_1_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/training_configs/distilkobert_1_layer.json -------------------------------------------------------------------------------- /distillation/training_configs/distilkobert_3_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/training_configs/distilkobert_3_layer.json -------------------------------------------------------------------------------- /distillation/training_configs/distilkobert_6_layer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/training_configs/distilkobert_6_layer.json -------------------------------------------------------------------------------- /distillation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/distillation/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.1.0 2 | transformers==2.9.1 3 | -------------------------------------------------------------------------------- /tokenization_kobert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monologg/DistilKoBERT/HEAD/tokenization_kobert.py --------------------------------------------------------------------------------