├── .gitignore ├── Deepdive-llama3-from-scratch-en.ipynb ├── Deepdive-llama3-from-scratch-zh.ipynb ├── LICENSE ├── README.md ├── README_zh.md ├── images ├── 42.png ├── a10.png ├── afterattention.png ├── archi.png ├── attention.png ├── embeddings.png ├── finallayer.png ├── freq_cis.png ├── god.png ├── heads.png ├── implllama3_30_0.png ├── implllama3_39_0.png ├── implllama3_41_0.png ├── implllama3_42_0.png ├── implllama3_50_0.png ├── implllama3_52_0.png ├── implllama3_54_0.png ├── karpathyminbpe.png ├── keys.png ├── keys0.png ├── last_norm.png ├── logo.png ├── mask.png ├── model.png ├── norm.png ├── norm_after.png ├── output_47_1.png ├── output_65_0.png ├── output_67_0.png ├── output_69_0.png ├── pmatrix.png ├── q_per_token.png ├── qkmatmul.png ├── qkv.png ├── qsplit.png ├── rms.png ├── rope.png ├── ropesplit.png ├── softmax.png ├── stacked.png ├── swiglu.png ├── tokens.png ├── v0.png ├── value.png └── weightmatrix.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | 3 | !/.gitignore 4 | 5 | /Meta-Llama-3-8B/ -------------------------------------------------------------------------------- /Deepdive-llama3-from-scratch-en.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/Deepdive-llama3-from-scratch-en.ipynb -------------------------------------------------------------------------------- /Deepdive-llama3-from-scratch-zh.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/Deepdive-llama3-from-scratch-zh.ipynb -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/README.md -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/README_zh.md -------------------------------------------------------------------------------- /images/42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/42.png -------------------------------------------------------------------------------- /images/a10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/a10.png -------------------------------------------------------------------------------- /images/afterattention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/afterattention.png -------------------------------------------------------------------------------- /images/archi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/archi.png -------------------------------------------------------------------------------- /images/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/attention.png -------------------------------------------------------------------------------- /images/embeddings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/embeddings.png -------------------------------------------------------------------------------- /images/finallayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/finallayer.png -------------------------------------------------------------------------------- /images/freq_cis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/freq_cis.png -------------------------------------------------------------------------------- /images/god.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/god.png -------------------------------------------------------------------------------- /images/heads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/heads.png -------------------------------------------------------------------------------- /images/implllama3_30_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/implllama3_30_0.png -------------------------------------------------------------------------------- /images/implllama3_39_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/implllama3_39_0.png -------------------------------------------------------------------------------- /images/implllama3_41_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/implllama3_41_0.png -------------------------------------------------------------------------------- /images/implllama3_42_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/implllama3_42_0.png -------------------------------------------------------------------------------- /images/implllama3_50_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/implllama3_50_0.png -------------------------------------------------------------------------------- /images/implllama3_52_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/implllama3_52_0.png -------------------------------------------------------------------------------- /images/implllama3_54_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/implllama3_54_0.png -------------------------------------------------------------------------------- /images/karpathyminbpe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/karpathyminbpe.png -------------------------------------------------------------------------------- /images/keys.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/keys.png -------------------------------------------------------------------------------- /images/keys0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/keys0.png -------------------------------------------------------------------------------- /images/last_norm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/last_norm.png -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/logo.png -------------------------------------------------------------------------------- /images/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/mask.png -------------------------------------------------------------------------------- /images/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/model.png -------------------------------------------------------------------------------- /images/norm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/norm.png -------------------------------------------------------------------------------- /images/norm_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/norm_after.png -------------------------------------------------------------------------------- /images/output_47_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/output_47_1.png -------------------------------------------------------------------------------- /images/output_65_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/output_65_0.png -------------------------------------------------------------------------------- /images/output_67_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/output_67_0.png -------------------------------------------------------------------------------- /images/output_69_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/output_69_0.png -------------------------------------------------------------------------------- /images/pmatrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/pmatrix.png -------------------------------------------------------------------------------- /images/q_per_token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/q_per_token.png -------------------------------------------------------------------------------- /images/qkmatmul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/qkmatmul.png -------------------------------------------------------------------------------- /images/qkv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/qkv.png -------------------------------------------------------------------------------- /images/qsplit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/qsplit.png -------------------------------------------------------------------------------- /images/rms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/rms.png -------------------------------------------------------------------------------- /images/rope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/rope.png -------------------------------------------------------------------------------- /images/ropesplit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/ropesplit.png -------------------------------------------------------------------------------- /images/softmax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/softmax.png -------------------------------------------------------------------------------- /images/stacked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/stacked.png -------------------------------------------------------------------------------- /images/swiglu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/swiglu.png -------------------------------------------------------------------------------- /images/tokens.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/tokens.png -------------------------------------------------------------------------------- /images/v0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/v0.png -------------------------------------------------------------------------------- /images/value.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/value.png -------------------------------------------------------------------------------- /images/weightmatrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/images/weightmatrix.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealoliver/Deepdive-llama3-from-scratch/HEAD/requirements.txt --------------------------------------------------------------------------------