├── .gitignore ├── LICENSE ├── README.md ├── docs └── flash-stu.webp ├── flash_stu ├── __init__.py ├── config.py ├── layers │ ├── __init__.py │ ├── attention_layer.py │ └── stu_layer.py ├── model.py ├── modules │ ├── __init__.py │ ├── attention.py │ ├── stu.py │ └── swiglu.py └── utils │ ├── __init__.py │ ├── numerics.py │ ├── stu_utils.py │ └── training.py ├── pyproject.toml └── training ├── config.json ├── data.py ├── dataloader.py ├── distributed.py ├── example.py └── job.slurm /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/README.md -------------------------------------------------------------------------------- /docs/flash-stu.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/docs/flash-stu.webp -------------------------------------------------------------------------------- /flash_stu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/__init__.py -------------------------------------------------------------------------------- /flash_stu/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/config.py -------------------------------------------------------------------------------- /flash_stu/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_stu/layers/attention_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/layers/attention_layer.py -------------------------------------------------------------------------------- /flash_stu/layers/stu_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/layers/stu_layer.py -------------------------------------------------------------------------------- /flash_stu/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/model.py -------------------------------------------------------------------------------- /flash_stu/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_stu/modules/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/modules/attention.py -------------------------------------------------------------------------------- /flash_stu/modules/stu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/modules/stu.py -------------------------------------------------------------------------------- /flash_stu/modules/swiglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/modules/swiglu.py -------------------------------------------------------------------------------- /flash_stu/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_stu/utils/numerics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/utils/numerics.py -------------------------------------------------------------------------------- /flash_stu/utils/stu_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/utils/stu_utils.py -------------------------------------------------------------------------------- /flash_stu/utils/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/flash_stu/utils/training.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/pyproject.toml -------------------------------------------------------------------------------- /training/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/training/config.json -------------------------------------------------------------------------------- /training/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/training/data.py -------------------------------------------------------------------------------- /training/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/training/dataloader.py -------------------------------------------------------------------------------- /training/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/training/distributed.py -------------------------------------------------------------------------------- /training/example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/training/example.py -------------------------------------------------------------------------------- /training/job.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hazan-lab/flash-stu/HEAD/training/job.slurm --------------------------------------------------------------------------------