├── .gitignore ├── BENCHMARK.md ├── LICENSE ├── README.md ├── README_cn.md ├── alignment ├── __init__.py ├── args.py ├── dataset.py ├── drgrpo_grader.py ├── evaluate.py ├── grpo.py ├── prompts │ ├── alpaca_sft.prompt │ ├── question_only.prompt │ ├── r1_zero.prompt │ └── zero_shot_system_prompt.prompt ├── r1_prompt.py ├── sft.py ├── sft_tokenizer_saver.py ├── train_rl.py └── util.py ├── bench_mark ├── __init__.py ├── bench_mark_atten.py ├── bench_mark_atten_jit.py ├── bench_mark_ddp.py ├── bench_mark_flash_attention.py └── bench_mark_model.py ├── data_processing ├── __init__.py ├── deduplicate.py ├── harmful_detect.py ├── html_process.py ├── language_identification.py ├── mask_pii.py ├── quality_classfier.py └── quality_filter.py ├── docs ├── 1.md ├── 2.md ├── 3.md ├── 4.md ├── 5-sft.md ├── qwen25-math-gsm8k-rl-finetune.md └── technical_article3.md ├── img ├── loss.png └── lr.png ├── kernel ├── __init__.py ├── flash_attention_mock.py └── flash_attention_triton.py ├── llm ├── __init__.py ├── args.py ├── bpe_tokenizer.py ├── checkpoint.py ├── generating.py ├── training.py └── transformer.py ├── parallel ├── __init__.py ├── ddp.py └── sharded_optimizer.py ├── pyproject.toml ├── tests ├── __init__.py ├── _snapshots │ ├── test_4d_scaled_dot_product_attention.npz │ ├── test_adamw.npz │ ├── test_compute_entropy.npz │ ├── test_compute_group_normalized_rewards_no_normalize_by_std.npz │ ├── test_compute_group_normalized_rewards_normalize_by_std.npz │ ├── test_compute_grpo_clip_loss_large_cliprange.npz │ ├── test_compute_grpo_clip_loss_small_cliprange.npz │ ├── test_compute_naive_policy_gradient_loss.npz │ ├── test_compute_policy_gradient_loss_grpo_clip.npz │ ├── test_compute_policy_gradient_loss_no_baseline.npz │ ├── test_compute_policy_gradient_loss_reinforce_with_baseline.npz │ ├── test_embedding.npz │ ├── test_get_response_log_probs.npz │ ├── test_grpo_microbatch_train_step_grpo_clip.npz │ ├── test_grpo_microbatch_train_step_grpo_clip_10_steps.npz │ ├── test_linear.npz │ ├── test_masked_mean_dim0.npz │ ├── test_masked_mean_dim1.npz │ ├── test_masked_mean_dimNone.npz │ ├── test_masked_mean_dimlast.npz │ ├── test_masked_normalize_dim0.npz │ ├── test_masked_normalize_dim1.npz │ ├── test_masked_normalize_dimNone.npz │ ├── test_masked_normalize_dimlast.npz │ ├── test_multihead_self_attention.npz │ ├── test_multihead_self_attention_with_rope.npz │ ├── test_positionwise_feedforward.npz │ ├── test_rmsnorm.npz │ ├── test_rope.npz │ ├── test_scaled_dot_product_attention.npz │ ├── test_sft_microbatch_train_step.npz │ ├── test_sft_microbatch_train_step_10_steps.npz │ ├── test_sft_microbatch_train_step_normalize.npz │ ├── test_swiglu.npz │ ├── test_tokenize_prompt_and_output.npz │ ├── test_train_bpe_special_tokens.pkl │ ├── test_transformer_block.npz │ ├── test_transformer_lm.npz │ └── test_transformer_lm_truncated_input.npz ├── adapters.py ├── common.py ├── conftest.py ├── fixtures │ ├── Meta-Llama-3-8B │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ └── tokenizer_config.json │ ├── address.txt │ ├── corpus.en │ ├── ddp_test_data.pt │ ├── ddp_test_labels.pt │ ├── documents_line_deduplicated │ │ ├── doc1.txt │ │ ├── doc2.txt │ │ ├── doc3.txt │ │ ├── doc4.txt │ │ └── doc5.txt │ ├── documents_with_fuzzy_duplicates │ │ ├── pytorch_license.txt │ │ ├── rails_mit_license.txt │ │ └── react_mit_license.txt │ ├── documents_with_line_duplicates │ │ ├── doc1.txt │ │ ├── doc2.txt │ │ ├── doc3.txt │ │ ├── doc4.txt │ │ └── doc5.txt │ ├── german.txt │ ├── gpt2_merges.txt │ ├── gpt2_vocab.json │ ├── high_quality_wiki_reference.txt │ ├── low_quality_cc.txt │ ├── moby.html │ ├── moby_extracted.txt │ ├── sft_sample.jsonl │ ├── special_token_double_newlines_non_whitespace.txt │ ├── special_token_trailing_newlines.txt │ ├── tiny-gpt2-ref │ │ ├── config.json │ │ ├── generation_config.json │ │ └── model.safetensors │ ├── tiny-gpt2 │ │ ├── config.json │ │ ├── generation_config.json │ │ └── model.safetensors │ ├── tinystories_sample.txt │ ├── tinystories_sample_5M.txt │ ├── tokenized_sft_sample.json │ ├── train-bpe-reference-merges.txt │ ├── train-bpe-reference-vocab.json │ └── ts_tests │ │ ├── model.pt │ │ └── model_config.json ├── test_attention.py ├── test_data.py ├── test_ddp.py ├── test_ddp_individual_parameters.py ├── test_deduplication.py ├── test_dpo.py ├── test_extract.py ├── test_generating.py ├── test_grpo.py ├── test_langid.py ├── test_metrics.py ├── test_model.py ├── test_nn_utils.py ├── test_optimizer.py ├── test_pii.py ├── test_quality.py ├── test_serialization.py ├── test_sft.py ├── test_sharded_optimizer.py ├── test_tokenizer.py ├── test_toxicity.py └── test_train_bpe.py └── uv.lock /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/.gitignore -------------------------------------------------------------------------------- /BENCHMARK.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/BENCHMARK.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/README.md -------------------------------------------------------------------------------- /README_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/README_cn.md -------------------------------------------------------------------------------- /alignment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/__init__.py -------------------------------------------------------------------------------- /alignment/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/args.py -------------------------------------------------------------------------------- /alignment/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/dataset.py -------------------------------------------------------------------------------- /alignment/drgrpo_grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/drgrpo_grader.py -------------------------------------------------------------------------------- /alignment/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/evaluate.py -------------------------------------------------------------------------------- /alignment/grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/grpo.py -------------------------------------------------------------------------------- /alignment/prompts/alpaca_sft.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/prompts/alpaca_sft.prompt -------------------------------------------------------------------------------- /alignment/prompts/question_only.prompt: -------------------------------------------------------------------------------- 1 | {question} -------------------------------------------------------------------------------- /alignment/prompts/r1_zero.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/prompts/r1_zero.prompt -------------------------------------------------------------------------------- /alignment/prompts/zero_shot_system_prompt.prompt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/prompts/zero_shot_system_prompt.prompt -------------------------------------------------------------------------------- /alignment/r1_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/r1_prompt.py -------------------------------------------------------------------------------- /alignment/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/sft.py -------------------------------------------------------------------------------- /alignment/sft_tokenizer_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/sft_tokenizer_saver.py -------------------------------------------------------------------------------- /alignment/train_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/train_rl.py -------------------------------------------------------------------------------- /alignment/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/alignment/util.py -------------------------------------------------------------------------------- /bench_mark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bench_mark/bench_mark_atten.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/bench_mark/bench_mark_atten.py -------------------------------------------------------------------------------- /bench_mark/bench_mark_atten_jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/bench_mark/bench_mark_atten_jit.py -------------------------------------------------------------------------------- /bench_mark/bench_mark_ddp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/bench_mark/bench_mark_ddp.py -------------------------------------------------------------------------------- /bench_mark/bench_mark_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/bench_mark/bench_mark_flash_attention.py -------------------------------------------------------------------------------- /bench_mark/bench_mark_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/bench_mark/bench_mark_model.py -------------------------------------------------------------------------------- /data_processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/__init__.py -------------------------------------------------------------------------------- /data_processing/deduplicate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/deduplicate.py -------------------------------------------------------------------------------- /data_processing/harmful_detect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/harmful_detect.py -------------------------------------------------------------------------------- /data_processing/html_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/html_process.py -------------------------------------------------------------------------------- /data_processing/language_identification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/language_identification.py -------------------------------------------------------------------------------- /data_processing/mask_pii.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/mask_pii.py -------------------------------------------------------------------------------- /data_processing/quality_classfier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/quality_classfier.py -------------------------------------------------------------------------------- /data_processing/quality_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/data_processing/quality_filter.py -------------------------------------------------------------------------------- /docs/1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/docs/1.md -------------------------------------------------------------------------------- /docs/2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/docs/2.md -------------------------------------------------------------------------------- /docs/3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/docs/3.md -------------------------------------------------------------------------------- /docs/4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/docs/4.md -------------------------------------------------------------------------------- /docs/5-sft.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/docs/5-sft.md -------------------------------------------------------------------------------- /docs/qwen25-math-gsm8k-rl-finetune.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/docs/qwen25-math-gsm8k-rl-finetune.md -------------------------------------------------------------------------------- /docs/technical_article3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/docs/technical_article3.md -------------------------------------------------------------------------------- /img/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/img/loss.png -------------------------------------------------------------------------------- /img/lr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/img/lr.png -------------------------------------------------------------------------------- /kernel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/kernel/__init__.py -------------------------------------------------------------------------------- /kernel/flash_attention_mock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/kernel/flash_attention_mock.py -------------------------------------------------------------------------------- /kernel/flash_attention_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/kernel/flash_attention_triton.py -------------------------------------------------------------------------------- /llm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/llm/__init__.py -------------------------------------------------------------------------------- /llm/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/llm/args.py -------------------------------------------------------------------------------- /llm/bpe_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/llm/bpe_tokenizer.py -------------------------------------------------------------------------------- /llm/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/llm/checkpoint.py -------------------------------------------------------------------------------- /llm/generating.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/llm/generating.py -------------------------------------------------------------------------------- /llm/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/llm/training.py -------------------------------------------------------------------------------- /llm/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/llm/transformer.py -------------------------------------------------------------------------------- /parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/parallel/__init__.py -------------------------------------------------------------------------------- /parallel/ddp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/parallel/ddp.py -------------------------------------------------------------------------------- /parallel/sharded_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/parallel/sharded_optimizer.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/pyproject.toml -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/_snapshots/test_4d_scaled_dot_product_attention.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_4d_scaled_dot_product_attention.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_adamw.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_adamw.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_entropy.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_entropy.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_group_normalized_rewards_no_normalize_by_std.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_group_normalized_rewards_no_normalize_by_std.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_group_normalized_rewards_normalize_by_std.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_group_normalized_rewards_normalize_by_std.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_grpo_clip_loss_large_cliprange.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_grpo_clip_loss_large_cliprange.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_grpo_clip_loss_small_cliprange.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_grpo_clip_loss_small_cliprange.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_naive_policy_gradient_loss.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_naive_policy_gradient_loss.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_policy_gradient_loss_grpo_clip.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_policy_gradient_loss_grpo_clip.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_policy_gradient_loss_no_baseline.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_policy_gradient_loss_no_baseline.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_compute_policy_gradient_loss_reinforce_with_baseline.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_compute_policy_gradient_loss_reinforce_with_baseline.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_embedding.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_embedding.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_get_response_log_probs.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_get_response_log_probs.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_grpo_microbatch_train_step_grpo_clip.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_grpo_microbatch_train_step_grpo_clip.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_grpo_microbatch_train_step_grpo_clip_10_steps.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_grpo_microbatch_train_step_grpo_clip_10_steps.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_linear.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_linear.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_mean_dim0.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_mean_dim0.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_mean_dim1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_mean_dim1.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_mean_dimNone.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_mean_dimNone.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_mean_dimlast.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_mean_dimlast.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_normalize_dim0.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_normalize_dim0.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_normalize_dim1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_normalize_dim1.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_normalize_dimNone.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_normalize_dimNone.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_masked_normalize_dimlast.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_masked_normalize_dimlast.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_multihead_self_attention.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_multihead_self_attention.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_multihead_self_attention_with_rope.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_multihead_self_attention_with_rope.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_positionwise_feedforward.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_positionwise_feedforward.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_rmsnorm.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_rmsnorm.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_rope.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_rope.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_scaled_dot_product_attention.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_scaled_dot_product_attention.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_sft_microbatch_train_step.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_sft_microbatch_train_step.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_sft_microbatch_train_step_10_steps.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_sft_microbatch_train_step_10_steps.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_sft_microbatch_train_step_normalize.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_sft_microbatch_train_step_normalize.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_swiglu.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_swiglu.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_tokenize_prompt_and_output.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_tokenize_prompt_and_output.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_train_bpe_special_tokens.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_train_bpe_special_tokens.pkl -------------------------------------------------------------------------------- /tests/_snapshots/test_transformer_block.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_transformer_block.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_transformer_lm.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_transformer_lm.npz -------------------------------------------------------------------------------- /tests/_snapshots/test_transformer_lm_truncated_input.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/_snapshots/test_transformer_lm_truncated_input.npz -------------------------------------------------------------------------------- /tests/adapters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/adapters.py -------------------------------------------------------------------------------- /tests/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/common.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/fixtures/Meta-Llama-3-8B/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/Meta-Llama-3-8B/special_tokens_map.json -------------------------------------------------------------------------------- /tests/fixtures/Meta-Llama-3-8B/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/Meta-Llama-3-8B/tokenizer.json -------------------------------------------------------------------------------- /tests/fixtures/Meta-Llama-3-8B/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/Meta-Llama-3-8B/tokenizer_config.json -------------------------------------------------------------------------------- /tests/fixtures/address.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/address.txt -------------------------------------------------------------------------------- /tests/fixtures/corpus.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/corpus.en -------------------------------------------------------------------------------- /tests/fixtures/ddp_test_data.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/ddp_test_data.pt -------------------------------------------------------------------------------- /tests/fixtures/ddp_test_labels.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/ddp_test_labels.pt -------------------------------------------------------------------------------- /tests/fixtures/documents_line_deduplicated/doc1.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/documents_line_deduplicated/doc2.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/documents_line_deduplicated/doc3.txt: -------------------------------------------------------------------------------- 1 | - back 2 | 3 | -------------------------------------------------------------------------------- /tests/fixtures/documents_line_deduplicated/doc4.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/fixtures/documents_line_deduplicated/doc5.txt: -------------------------------------------------------------------------------- 1 | Document 5 doesn't have any line-level duplicates. 2 | -------------------------------------------------------------------------------- /tests/fixtures/documents_with_fuzzy_duplicates/pytorch_license.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/documents_with_fuzzy_duplicates/pytorch_license.txt -------------------------------------------------------------------------------- /tests/fixtures/documents_with_fuzzy_duplicates/rails_mit_license.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/documents_with_fuzzy_duplicates/rails_mit_license.txt -------------------------------------------------------------------------------- /tests/fixtures/documents_with_fuzzy_duplicates/react_mit_license.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/documents_with_fuzzy_duplicates/react_mit_license.txt -------------------------------------------------------------------------------- /tests/fixtures/documents_with_line_duplicates/doc1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/documents_with_line_duplicates/doc1.txt -------------------------------------------------------------------------------- /tests/fixtures/documents_with_line_duplicates/doc2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/documents_with_line_duplicates/doc2.txt -------------------------------------------------------------------------------- /tests/fixtures/documents_with_line_duplicates/doc3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/documents_with_line_duplicates/doc3.txt -------------------------------------------------------------------------------- /tests/fixtures/documents_with_line_duplicates/doc4.txt: -------------------------------------------------------------------------------- 1 | - home 2 | - menu 3 | 4 | -------------------------------------------------------------------------------- /tests/fixtures/documents_with_line_duplicates/doc5.txt: -------------------------------------------------------------------------------- 1 | Document 5 doesn't have any line-level duplicates. 2 | -------------------------------------------------------------------------------- /tests/fixtures/german.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/german.txt -------------------------------------------------------------------------------- /tests/fixtures/gpt2_merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/gpt2_merges.txt -------------------------------------------------------------------------------- /tests/fixtures/gpt2_vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/gpt2_vocab.json -------------------------------------------------------------------------------- /tests/fixtures/high_quality_wiki_reference.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/high_quality_wiki_reference.txt -------------------------------------------------------------------------------- /tests/fixtures/low_quality_cc.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/low_quality_cc.txt -------------------------------------------------------------------------------- /tests/fixtures/moby.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/moby.html -------------------------------------------------------------------------------- /tests/fixtures/moby_extracted.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/moby_extracted.txt -------------------------------------------------------------------------------- /tests/fixtures/sft_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/sft_sample.jsonl -------------------------------------------------------------------------------- /tests/fixtures/special_token_double_newlines_non_whitespace.txt: -------------------------------------------------------------------------------- 1 | <|endoftext|> 2 | 3 | testing! -------------------------------------------------------------------------------- /tests/fixtures/special_token_trailing_newlines.txt: -------------------------------------------------------------------------------- 1 | <|endoftext|> 2 | 3 | -------------------------------------------------------------------------------- /tests/fixtures/tiny-gpt2-ref/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tiny-gpt2-ref/config.json -------------------------------------------------------------------------------- /tests/fixtures/tiny-gpt2-ref/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tiny-gpt2-ref/generation_config.json -------------------------------------------------------------------------------- /tests/fixtures/tiny-gpt2-ref/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tiny-gpt2-ref/model.safetensors -------------------------------------------------------------------------------- /tests/fixtures/tiny-gpt2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tiny-gpt2/config.json -------------------------------------------------------------------------------- /tests/fixtures/tiny-gpt2/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tiny-gpt2/generation_config.json -------------------------------------------------------------------------------- /tests/fixtures/tiny-gpt2/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tiny-gpt2/model.safetensors -------------------------------------------------------------------------------- /tests/fixtures/tinystories_sample.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tinystories_sample.txt -------------------------------------------------------------------------------- /tests/fixtures/tinystories_sample_5M.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tinystories_sample_5M.txt -------------------------------------------------------------------------------- /tests/fixtures/tokenized_sft_sample.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/tokenized_sft_sample.json -------------------------------------------------------------------------------- /tests/fixtures/train-bpe-reference-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/train-bpe-reference-merges.txt -------------------------------------------------------------------------------- /tests/fixtures/train-bpe-reference-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/train-bpe-reference-vocab.json -------------------------------------------------------------------------------- /tests/fixtures/ts_tests/model.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/ts_tests/model.pt -------------------------------------------------------------------------------- /tests/fixtures/ts_tests/model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/fixtures/ts_tests/model_config.json -------------------------------------------------------------------------------- /tests/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_attention.py -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tests/test_ddp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_ddp.py -------------------------------------------------------------------------------- /tests/test_ddp_individual_parameters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_ddp_individual_parameters.py -------------------------------------------------------------------------------- /tests/test_deduplication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_deduplication.py -------------------------------------------------------------------------------- /tests/test_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_dpo.py -------------------------------------------------------------------------------- /tests/test_extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_extract.py -------------------------------------------------------------------------------- /tests/test_generating.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_generating.py -------------------------------------------------------------------------------- /tests/test_grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_grpo.py -------------------------------------------------------------------------------- /tests/test_langid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_langid.py -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_metrics.py -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_model.py -------------------------------------------------------------------------------- /tests/test_nn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_nn_utils.py -------------------------------------------------------------------------------- /tests/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_optimizer.py -------------------------------------------------------------------------------- /tests/test_pii.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_pii.py -------------------------------------------------------------------------------- /tests/test_quality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_quality.py -------------------------------------------------------------------------------- /tests/test_serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_serialization.py -------------------------------------------------------------------------------- /tests/test_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_sft.py -------------------------------------------------------------------------------- /tests/test_sharded_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_sharded_optimizer.py -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_tokenizer.py -------------------------------------------------------------------------------- /tests/test_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_toxicity.py -------------------------------------------------------------------------------- /tests/test_train_bpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/tests/test_train_bpe.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangpin/llm-from-scratch/HEAD/uv.lock --------------------------------------------------------------------------------