├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── LICENSE ├── README.md ├── env.yml ├── images ├── ELECTRA Model.png ├── Electra RC2020 - Learning - Original.png └── Electra RC2020 - Learning.png ├── models └── ByteLevelBPETokenizer-vocab_size=30522-min_frequency=2 ├── requirements.txt ├── run_downstream.py ├── run_glue.py ├── run_pretraining.py ├── src ├── data │ ├── __init__.py │ ├── make_dataset_document_electra.py │ └── utils.py ├── features │ ├── __init__.py │ └── features_document_electra.py ├── models │ ├── __init__.py │ ├── metrics.py │ ├── modeling_document_electra.py │ ├── optimizers.py │ ├── train_model_downstream.py │ ├── train_model_pretraining.py │ ├── train_model_tokenizer.py │ └── utils.py └── visualization │ ├── __init__.py │ ├── tensorboard_utils.py │ └── wandb_callbacks.py └── train_tokenizer.py /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/.github/workflows/python-package.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/README.md -------------------------------------------------------------------------------- /env.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/env.yml -------------------------------------------------------------------------------- /images/ELECTRA Model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/images/ELECTRA Model.png -------------------------------------------------------------------------------- /images/Electra RC2020 - Learning - Original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/images/Electra RC2020 - Learning - Original.png -------------------------------------------------------------------------------- /images/Electra RC2020 - Learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/images/Electra RC2020 - Learning.png -------------------------------------------------------------------------------- /models/ByteLevelBPETokenizer-vocab_size=30522-min_frequency=2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/models/ByteLevelBPETokenizer-vocab_size=30522-min_frequency=2 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/requirements.txt -------------------------------------------------------------------------------- /run_downstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/run_downstream.py -------------------------------------------------------------------------------- /run_glue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/run_glue.py -------------------------------------------------------------------------------- /run_pretraining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/run_pretraining.py -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/make_dataset_document_electra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/data/make_dataset_document_electra.py -------------------------------------------------------------------------------- /src/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/data/utils.py -------------------------------------------------------------------------------- /src/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/features/__init__.py -------------------------------------------------------------------------------- /src/features/features_document_electra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/features/features_document_electra.py -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/metrics.py -------------------------------------------------------------------------------- /src/models/modeling_document_electra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/modeling_document_electra.py -------------------------------------------------------------------------------- /src/models/optimizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/optimizers.py -------------------------------------------------------------------------------- /src/models/train_model_downstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/train_model_downstream.py -------------------------------------------------------------------------------- /src/models/train_model_pretraining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/train_model_pretraining.py -------------------------------------------------------------------------------- /src/models/train_model_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/train_model_tokenizer.py -------------------------------------------------------------------------------- /src/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/models/utils.py -------------------------------------------------------------------------------- /src/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/visualization/__init__.py -------------------------------------------------------------------------------- /src/visualization/tensorboard_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/visualization/tensorboard_utils.py -------------------------------------------------------------------------------- /src/visualization/wandb_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/src/visualization/wandb_callbacks.py -------------------------------------------------------------------------------- /train_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cccwam/rc2020_electra/HEAD/train_tokenizer.py --------------------------------------------------------------------------------