├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── images └── wd_summary_slide.png ├── large_language_models ├── config │ ├── eval_gpt2.py │ ├── eval_gpt2_large.py │ ├── eval_gpt2_medium.py │ ├── eval_gpt2_xl.py │ ├── finetune_shakespeare.py │ ├── train_gpt2_micro.py │ ├── train_gpt2_small.py │ ├── train_gpt2_small_block256.py │ └── train_shakespeare_char.py ├── configurator.py ├── data │ ├── openwebtext │ │ ├── prepare.py │ │ └── readme.md │ ├── shakespeare │ │ ├── prepare.py │ │ └── readme.md │ └── shakespeare_char │ │ ├── prepare.py │ │ └── readme.md ├── model.py └── train.py └── overparameterized_nets ├── .gitignore ├── configs ├── __init__.py └── config.py ├── data └── get_dataset.py ├── environment.yml ├── exp_utils ├── setup_exp.py └── utils.py ├── models ├── get_models.py └── get_models_scale_inv.py ├── notebooks └── plots.ipynb ├── traceh.py └── train.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/README.md -------------------------------------------------------------------------------- /images/wd_summary_slide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/images/wd_summary_slide.png -------------------------------------------------------------------------------- /large_language_models/config/eval_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/eval_gpt2.py -------------------------------------------------------------------------------- /large_language_models/config/eval_gpt2_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/eval_gpt2_large.py -------------------------------------------------------------------------------- /large_language_models/config/eval_gpt2_medium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/eval_gpt2_medium.py -------------------------------------------------------------------------------- /large_language_models/config/eval_gpt2_xl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/eval_gpt2_xl.py -------------------------------------------------------------------------------- /large_language_models/config/finetune_shakespeare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/finetune_shakespeare.py -------------------------------------------------------------------------------- /large_language_models/config/train_gpt2_micro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/train_gpt2_micro.py -------------------------------------------------------------------------------- /large_language_models/config/train_gpt2_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/train_gpt2_small.py -------------------------------------------------------------------------------- /large_language_models/config/train_gpt2_small_block256.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/train_gpt2_small_block256.py -------------------------------------------------------------------------------- /large_language_models/config/train_shakespeare_char.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/config/train_shakespeare_char.py -------------------------------------------------------------------------------- /large_language_models/configurator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/configurator.py -------------------------------------------------------------------------------- /large_language_models/data/openwebtext/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/data/openwebtext/prepare.py -------------------------------------------------------------------------------- /large_language_models/data/openwebtext/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/data/openwebtext/readme.md -------------------------------------------------------------------------------- /large_language_models/data/shakespeare/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/data/shakespeare/prepare.py -------------------------------------------------------------------------------- /large_language_models/data/shakespeare/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/data/shakespeare/readme.md -------------------------------------------------------------------------------- /large_language_models/data/shakespeare_char/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/data/shakespeare_char/prepare.py -------------------------------------------------------------------------------- /large_language_models/data/shakespeare_char/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/data/shakespeare_char/readme.md -------------------------------------------------------------------------------- /large_language_models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/model.py -------------------------------------------------------------------------------- /large_language_models/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/large_language_models/train.py -------------------------------------------------------------------------------- /overparameterized_nets/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/.gitignore -------------------------------------------------------------------------------- /overparameterized_nets/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /overparameterized_nets/configs/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/configs/config.py -------------------------------------------------------------------------------- /overparameterized_nets/data/get_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/data/get_dataset.py -------------------------------------------------------------------------------- /overparameterized_nets/environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/environment.yml -------------------------------------------------------------------------------- /overparameterized_nets/exp_utils/setup_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/exp_utils/setup_exp.py -------------------------------------------------------------------------------- /overparameterized_nets/exp_utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/exp_utils/utils.py -------------------------------------------------------------------------------- /overparameterized_nets/models/get_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/models/get_models.py -------------------------------------------------------------------------------- /overparameterized_nets/models/get_models_scale_inv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/models/get_models_scale_inv.py -------------------------------------------------------------------------------- /overparameterized_nets/notebooks/plots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/notebooks/plots.ipynb -------------------------------------------------------------------------------- /overparameterized_nets/traceh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/traceh.py -------------------------------------------------------------------------------- /overparameterized_nets/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tml-epfl/why-weight-decay/HEAD/overparameterized_nets/train.py --------------------------------------------------------------------------------