├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── cs336_basics ├── __init__.py └── pretokenization_example.py ├── cs336_spring2025_assignment1_basics.pdf ├── make_submission.sh ├── pyproject.toml ├── tests ├── __init__.py ├── _snapshots │ ├── test_4d_scaled_dot_product_attention.npz │ ├── test_adamw.npz │ ├── test_embedding.npz │ ├── test_linear.npz │ ├── test_multihead_self_attention.npz │ ├── test_multihead_self_attention_with_rope.npz │ ├── test_positionwise_feedforward.npz │ ├── test_rmsnorm.npz │ ├── test_rope.npz │ ├── test_scaled_dot_product_attention.npz │ ├── test_swiglu.npz │ ├── test_train_bpe_special_tokens.pkl │ ├── test_transformer_block.npz │ ├── test_transformer_lm.npz │ └── test_transformer_lm_truncated_input.npz ├── adapters.py ├── common.py ├── conftest.py ├── fixtures │ ├── address.txt │ ├── corpus.en │ ├── german.txt │ ├── gpt2_merges.txt │ ├── gpt2_vocab.json │ ├── special_token_double_newlines_non_whitespace.txt │ ├── special_token_trailing_newlines.txt │ ├── tinystories_sample.txt │ ├── tinystories_sample_5M.txt │ ├── train-bpe-reference-merges.txt │ ├── train-bpe-reference-vocab.json │ └── ts_tests │ │ ├── model.pt │ │ └── model_config.json ├── test_data.py ├── test_model.py ├── test_nn_utils.py ├── test_optimizer.py ├── test_serialization.py ├── test_tokenizer.py └── test_train_bpe.py └── uv.lock /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/.gitignore -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/README.md -------------------------------------------------------------------------------- /cs336_basics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/cs336_basics/__init__.py -------------------------------------------------------------------------------- /cs336_basics/pretokenization_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/cs336_basics/pretokenization_example.py -------------------------------------------------------------------------------- /cs336_spring2025_assignment1_basics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/cs336_spring2025_assignment1_basics.pdf -------------------------------------------------------------------------------- /make_submission.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/make_submission.sh -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/pyproject.toml -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/_snapshots/test_4d_scaled_dot_product_attention.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_4d_scaled_dot_product_attention.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_adamw.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_adamw.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_embedding.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_embedding.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_linear.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_linear.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_multihead_self_attention.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_multihead_self_attention.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_multihead_self_attention_with_rope.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_multihead_self_attention_with_rope.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_positionwise_feedforward.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_positionwise_feedforward.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_rmsnorm.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_rmsnorm.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_rope.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_rope.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_scaled_dot_product_attention.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_scaled_dot_product_attention.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_swiglu.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_swiglu.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_train_bpe_special_tokens.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_train_bpe_special_tokens.pkl -------------------------------------------------------------------------------- /tests/_snapshots/test_transformer_block.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_transformer_block.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_transformer_lm.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_transformer_lm.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_transformer_lm_truncated_input.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/_snapshots/test_transformer_lm_truncated_input.npz -------------------------------------------------------------------------------- /tests/adapters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/adapters.py -------------------------------------------------------------------------------- /tests/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/common.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/fixtures/address.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/address.txt -------------------------------------------------------------------------------- /tests/fixtures/corpus.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/corpus.en -------------------------------------------------------------------------------- /tests/fixtures/german.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/german.txt -------------------------------------------------------------------------------- /tests/fixtures/gpt2_merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/gpt2_merges.txt -------------------------------------------------------------------------------- /tests/fixtures/gpt2_vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/gpt2_vocab.json -------------------------------------------------------------------------------- /tests/fixtures/special_token_double_newlines_non_whitespace.txt: -------------------------------------------------------------------------------- 1 | <|endoftext|> 2 | 3 | testing! -------------------------------------------------------------------------------- /tests/fixtures/special_token_trailing_newlines.txt: -------------------------------------------------------------------------------- 1 | <|endoftext|> 2 | 3 | -------------------------------------------------------------------------------- /tests/fixtures/tinystories_sample.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/tinystories_sample.txt -------------------------------------------------------------------------------- /tests/fixtures/tinystories_sample_5M.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/tinystories_sample_5M.txt -------------------------------------------------------------------------------- /tests/fixtures/train-bpe-reference-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/train-bpe-reference-merges.txt -------------------------------------------------------------------------------- /tests/fixtures/train-bpe-reference-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/train-bpe-reference-vocab.json -------------------------------------------------------------------------------- /tests/fixtures/ts_tests/model.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/ts_tests/model.pt -------------------------------------------------------------------------------- /tests/fixtures/ts_tests/model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/fixtures/ts_tests/model_config.json -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/test_model.py -------------------------------------------------------------------------------- /tests/test_nn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/test_nn_utils.py -------------------------------------------------------------------------------- /tests/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/test_optimizer.py -------------------------------------------------------------------------------- /tests/test_serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/test_serialization.py -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/test_tokenizer.py -------------------------------------------------------------------------------- /tests/test_train_bpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/tests/test_train_bpe.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs336/assignment1-basics/HEAD/uv.lock --------------------------------------------------------------------------------