├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── condensed └── condensed.js ├── globals.js ├── index.html ├── instructions.js ├── model.js ├── other ├── conversion_scripts │ ├── README.md │ ├── ckpt.pt │ ├── convert_checkpoint_pytorch.py │ ├── convert_pretrained_pytorch.py │ └── sample_shakespeare_ckpt.pt ├── int8-gemm.js ├── misc │ ├── files.png │ └── header.png ├── scratchpad.js ├── test.js └── validation │ ├── README.md │ ├── test │ ├── gpt2medium_validation.json │ └── shakepeare_validation.json │ └── validation.js ├── tokenizer.js ├── visuals.js └── weights ├── better_shakespeare ├── lm_head.weight_gpt.bin ├── params_gpt.json ├── transformer.h.0.attn.c_attn.bias_gpt.bin ├── transformer.h.0.attn.c_attn.weight_gpt.bin ├── transformer.h.0.attn.c_proj.bias_gpt.bin ├── transformer.h.0.attn.c_proj.weight_gpt.bin ├── transformer.h.0.ln_1.bias_gpt.bin ├── transformer.h.0.ln_1.weight_gpt.bin ├── transformer.h.0.ln_2.bias_gpt.bin ├── transformer.h.0.ln_2.weight_gpt.bin ├── transformer.h.0.mlp.c_fc.bias_gpt.bin ├── transformer.h.0.mlp.c_fc.weight_gpt.bin ├── transformer.h.0.mlp.c_proj.bias_gpt.bin ├── transformer.h.0.mlp.c_proj.weight_gpt.bin ├── transformer.h.1.attn.c_attn.bias_gpt.bin ├── transformer.h.1.attn.c_attn.weight_gpt.bin ├── transformer.h.1.attn.c_proj.bias_gpt.bin ├── transformer.h.1.attn.c_proj.weight_gpt.bin ├── transformer.h.1.ln_1.bias_gpt.bin ├── transformer.h.1.ln_1.weight_gpt.bin ├── transformer.h.1.ln_2.bias_gpt.bin ├── transformer.h.1.ln_2.weight_gpt.bin ├── transformer.h.1.mlp.c_fc.bias_gpt.bin ├── transformer.h.1.mlp.c_fc.weight_gpt.bin ├── transformer.h.1.mlp.c_proj.bias_gpt.bin ├── transformer.h.1.mlp.c_proj.weight_gpt.bin ├── transformer.h.2.attn.c_attn.bias_gpt.bin ├── transformer.h.2.attn.c_attn.weight_gpt.bin ├── transformer.h.2.attn.c_proj.bias_gpt.bin ├── transformer.h.2.attn.c_proj.weight_gpt.bin ├── transformer.h.2.ln_1.bias_gpt.bin ├── transformer.h.2.ln_1.weight_gpt.bin ├── transformer.h.2.ln_2.bias_gpt.bin ├── transformer.h.2.ln_2.weight_gpt.bin ├── transformer.h.2.mlp.c_fc.bias_gpt.bin ├── transformer.h.2.mlp.c_fc.weight_gpt.bin ├── transformer.h.2.mlp.c_proj.bias_gpt.bin ├── transformer.h.2.mlp.c_proj.weight_gpt.bin ├── transformer.h.3.attn.c_attn.bias_gpt.bin ├── transformer.h.3.attn.c_attn.weight_gpt.bin ├── transformer.h.3.attn.c_proj.bias_gpt.bin ├── transformer.h.3.attn.c_proj.weight_gpt.bin ├── transformer.h.3.ln_1.bias_gpt.bin ├── transformer.h.3.ln_1.weight_gpt.bin ├── transformer.h.3.ln_2.bias_gpt.bin ├── transformer.h.3.ln_2.weight_gpt.bin ├── transformer.h.3.mlp.c_fc.bias_gpt.bin ├── transformer.h.3.mlp.c_fc.weight_gpt.bin ├── transformer.h.3.mlp.c_proj.bias_gpt.bin ├── transformer.h.3.mlp.c_proj.weight_gpt.bin ├── transformer.ln_f.bias_gpt.bin ├── transformer.ln_f.weight_gpt.bin ├── transformer.wpe.weight_gpt.bin └── transformer.wte.weight_gpt.bin ├── gpt2 ├── lm_head.weight_gpt.bin ├── params_gpt.json ├── transformer.h.0.attn.bias_gpt.bin ├── transformer.h.0.attn.c_attn.bias_gpt.bin ├── transformer.h.0.attn.c_attn.weight_gpt.bin ├── transformer.h.0.attn.c_proj.bias_gpt.bin ├── transformer.h.0.attn.c_proj.weight_gpt.bin ├── transformer.h.0.attn.masked_bias_gpt.bin ├── transformer.h.0.ln_1.bias_gpt.bin ├── transformer.h.0.ln_1.weight_gpt.bin ├── transformer.h.0.ln_2.bias_gpt.bin ├── transformer.h.0.ln_2.weight_gpt.bin ├── transformer.h.0.mlp.c_fc.bias_gpt.bin ├── transformer.h.0.mlp.c_fc.weight_gpt.bin ├── transformer.h.0.mlp.c_proj.bias_gpt.bin ├── transformer.h.0.mlp.c_proj.weight_gpt.bin ├── transformer.h.1.attn.bias_gpt.bin ├── transformer.h.1.attn.c_attn.bias_gpt.bin ├── transformer.h.1.attn.c_attn.weight_gpt.bin ├── transformer.h.1.attn.c_proj.bias_gpt.bin ├── transformer.h.1.attn.c_proj.weight_gpt.bin ├── transformer.h.1.attn.masked_bias_gpt.bin ├── transformer.h.1.ln_1.bias_gpt.bin ├── transformer.h.1.ln_1.weight_gpt.bin ├── transformer.h.1.ln_2.bias_gpt.bin ├── transformer.h.1.ln_2.weight_gpt.bin ├── transformer.h.1.mlp.c_fc.bias_gpt.bin ├── transformer.h.1.mlp.c_fc.weight_gpt.bin ├── transformer.h.1.mlp.c_proj.bias_gpt.bin ├── transformer.h.1.mlp.c_proj.weight_gpt.bin ├── transformer.h.10.attn.bias_gpt.bin ├── transformer.h.10.attn.c_attn.bias_gpt.bin ├── transformer.h.10.attn.c_attn.weight_gpt.bin ├── transformer.h.10.attn.c_proj.bias_gpt.bin ├── transformer.h.10.attn.c_proj.weight_gpt.bin ├── transformer.h.10.attn.masked_bias_gpt.bin ├── transformer.h.10.ln_1.bias_gpt.bin ├── transformer.h.10.ln_1.weight_gpt.bin ├── transformer.h.10.ln_2.bias_gpt.bin ├── transformer.h.10.ln_2.weight_gpt.bin ├── transformer.h.10.mlp.c_fc.bias_gpt.bin ├── transformer.h.10.mlp.c_fc.weight_gpt.bin ├── transformer.h.10.mlp.c_proj.bias_gpt.bin ├── transformer.h.10.mlp.c_proj.weight_gpt.bin ├── transformer.h.11.attn.bias_gpt.bin ├── transformer.h.11.attn.c_attn.bias_gpt.bin ├── transformer.h.11.attn.c_attn.weight_gpt.bin ├── transformer.h.11.attn.c_proj.bias_gpt.bin ├── transformer.h.11.attn.c_proj.weight_gpt.bin ├── transformer.h.11.attn.masked_bias_gpt.bin ├── transformer.h.11.ln_1.bias_gpt.bin ├── transformer.h.11.ln_1.weight_gpt.bin ├── transformer.h.11.ln_2.bias_gpt.bin ├── transformer.h.11.ln_2.weight_gpt.bin ├── transformer.h.11.mlp.c_fc.bias_gpt.bin ├── transformer.h.11.mlp.c_fc.weight_gpt.bin ├── transformer.h.11.mlp.c_proj.bias_gpt.bin ├── transformer.h.11.mlp.c_proj.weight_gpt.bin ├── transformer.h.2.attn.bias_gpt.bin ├── transformer.h.2.attn.c_attn.bias_gpt.bin ├── transformer.h.2.attn.c_attn.weight_gpt.bin ├── transformer.h.2.attn.c_proj.bias_gpt.bin ├── transformer.h.2.attn.c_proj.weight_gpt.bin ├── transformer.h.2.attn.masked_bias_gpt.bin ├── transformer.h.2.ln_1.bias_gpt.bin ├── transformer.h.2.ln_1.weight_gpt.bin ├── transformer.h.2.ln_2.bias_gpt.bin ├── transformer.h.2.ln_2.weight_gpt.bin ├── transformer.h.2.mlp.c_fc.bias_gpt.bin ├── transformer.h.2.mlp.c_fc.weight_gpt.bin ├── transformer.h.2.mlp.c_proj.bias_gpt.bin ├── transformer.h.2.mlp.c_proj.weight_gpt.bin ├── transformer.h.3.attn.bias_gpt.bin ├── transformer.h.3.attn.c_attn.bias_gpt.bin ├── transformer.h.3.attn.c_attn.weight_gpt.bin ├── transformer.h.3.attn.c_proj.bias_gpt.bin ├── transformer.h.3.attn.c_proj.weight_gpt.bin ├── transformer.h.3.attn.masked_bias_gpt.bin ├── transformer.h.3.ln_1.bias_gpt.bin ├── transformer.h.3.ln_1.weight_gpt.bin ├── transformer.h.3.ln_2.bias_gpt.bin ├── transformer.h.3.ln_2.weight_gpt.bin ├── transformer.h.3.mlp.c_fc.bias_gpt.bin ├── transformer.h.3.mlp.c_fc.weight_gpt.bin ├── transformer.h.3.mlp.c_proj.bias_gpt.bin ├── transformer.h.3.mlp.c_proj.weight_gpt.bin ├── transformer.h.4.attn.bias_gpt.bin ├── transformer.h.4.attn.c_attn.bias_gpt.bin ├── transformer.h.4.attn.c_attn.weight_gpt.bin ├── transformer.h.4.attn.c_proj.bias_gpt.bin ├── transformer.h.4.attn.c_proj.weight_gpt.bin ├── transformer.h.4.attn.masked_bias_gpt.bin ├── transformer.h.4.ln_1.bias_gpt.bin ├── transformer.h.4.ln_1.weight_gpt.bin ├── transformer.h.4.ln_2.bias_gpt.bin ├── transformer.h.4.ln_2.weight_gpt.bin ├── transformer.h.4.mlp.c_fc.bias_gpt.bin ├── transformer.h.4.mlp.c_fc.weight_gpt.bin ├── transformer.h.4.mlp.c_proj.bias_gpt.bin ├── transformer.h.4.mlp.c_proj.weight_gpt.bin ├── transformer.h.5.attn.bias_gpt.bin ├── transformer.h.5.attn.c_attn.bias_gpt.bin ├── transformer.h.5.attn.c_attn.weight_gpt.bin ├── transformer.h.5.attn.c_proj.bias_gpt.bin ├── transformer.h.5.attn.c_proj.weight_gpt.bin ├── transformer.h.5.attn.masked_bias_gpt.bin ├── transformer.h.5.ln_1.bias_gpt.bin ├── transformer.h.5.ln_1.weight_gpt.bin ├── transformer.h.5.ln_2.bias_gpt.bin ├── transformer.h.5.ln_2.weight_gpt.bin ├── transformer.h.5.mlp.c_fc.bias_gpt.bin ├── transformer.h.5.mlp.c_fc.weight_gpt.bin ├── transformer.h.5.mlp.c_proj.bias_gpt.bin ├── transformer.h.5.mlp.c_proj.weight_gpt.bin ├── transformer.h.6.attn.bias_gpt.bin ├── transformer.h.6.attn.c_attn.bias_gpt.bin ├── transformer.h.6.attn.c_attn.weight_gpt.bin ├── transformer.h.6.attn.c_proj.bias_gpt.bin ├── transformer.h.6.attn.c_proj.weight_gpt.bin ├── transformer.h.6.attn.masked_bias_gpt.bin ├── transformer.h.6.ln_1.bias_gpt.bin ├── transformer.h.6.ln_1.weight_gpt.bin ├── transformer.h.6.ln_2.bias_gpt.bin ├── transformer.h.6.ln_2.weight_gpt.bin ├── transformer.h.6.mlp.c_fc.bias_gpt.bin ├── transformer.h.6.mlp.c_fc.weight_gpt.bin ├── transformer.h.6.mlp.c_proj.bias_gpt.bin ├── transformer.h.6.mlp.c_proj.weight_gpt.bin ├── transformer.h.7.attn.bias_gpt.bin ├── transformer.h.7.attn.c_attn.bias_gpt.bin ├── transformer.h.7.attn.c_attn.weight_gpt.bin ├── transformer.h.7.attn.c_proj.bias_gpt.bin ├── transformer.h.7.attn.c_proj.weight_gpt.bin ├── transformer.h.7.attn.masked_bias_gpt.bin ├── transformer.h.7.ln_1.bias_gpt.bin ├── transformer.h.7.ln_1.weight_gpt.bin ├── transformer.h.7.ln_2.bias_gpt.bin ├── transformer.h.7.ln_2.weight_gpt.bin ├── transformer.h.7.mlp.c_fc.bias_gpt.bin ├── transformer.h.7.mlp.c_fc.weight_gpt.bin ├── transformer.h.7.mlp.c_proj.bias_gpt.bin ├── transformer.h.7.mlp.c_proj.weight_gpt.bin ├── transformer.h.8.attn.bias_gpt.bin ├── transformer.h.8.attn.c_attn.bias_gpt.bin ├── transformer.h.8.attn.c_attn.weight_gpt.bin ├── transformer.h.8.attn.c_proj.bias_gpt.bin ├── transformer.h.8.attn.c_proj.weight_gpt.bin ├── transformer.h.8.attn.masked_bias_gpt.bin ├── transformer.h.8.ln_1.bias_gpt.bin ├── transformer.h.8.ln_1.weight_gpt.bin ├── transformer.h.8.ln_2.bias_gpt.bin ├── transformer.h.8.ln_2.weight_gpt.bin ├── transformer.h.8.mlp.c_fc.bias_gpt.bin ├── transformer.h.8.mlp.c_fc.weight_gpt.bin ├── transformer.h.8.mlp.c_proj.bias_gpt.bin ├── transformer.h.8.mlp.c_proj.weight_gpt.bin ├── transformer.h.9.attn.bias_gpt.bin ├── transformer.h.9.attn.c_attn.bias_gpt.bin ├── transformer.h.9.attn.c_attn.weight_gpt.bin ├── transformer.h.9.attn.c_proj.bias_gpt.bin ├── transformer.h.9.attn.c_proj.weight_gpt.bin ├── transformer.h.9.attn.masked_bias_gpt.bin ├── transformer.h.9.ln_1.bias_gpt.bin ├── transformer.h.9.ln_1.weight_gpt.bin ├── transformer.h.9.ln_2.bias_gpt.bin ├── transformer.h.9.ln_2.weight_gpt.bin ├── transformer.h.9.mlp.c_fc.bias_gpt.bin ├── transformer.h.9.mlp.c_fc.weight_gpt.bin ├── transformer.h.9.mlp.c_proj.bias_gpt.bin ├── transformer.h.9.mlp.c_proj.weight_gpt.bin ├── transformer.ln_f.bias_gpt.bin ├── transformer.ln_f.weight_gpt.bin ├── transformer.wpe.weight_gpt.bin └── transformer.wte.weight_gpt.bin └── tokenization ├── gpt_tokens.json ├── simple_tokens.json └── vocab.bpe /.gitattributes: -------------------------------------------------------------------------------- 1 | *.bin filter=lfs diff=lfs merge=lfs -text 2 | *.json filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | weights/large-models -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | IFCOOLTELLME License 2 | 3 | Copyright (c) 2023 Will DePue 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | If this software is used for any purpose that is substantially epic, awesome, or 16 | incredible, notice is required to the Author, reachable at will@depue.net. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WebGPT 2 | 3 | ![webGPT](other/misc/header.png) 4 | 5 | After six years of development, WebGPU is about to launch across most major web browsers. This is massive: web applications now have near-native access to the GPU, with the added capacity of compute shaders. 6 | 7 | WebGPT is a vanilla JS and HTML implementation of a transformer model, intended as a proof-of-concept as well as educational resource. WebGPT has been tested to be working with models up to 500 M parameters, though could likely support far more with further testing/optimization. 8 | 9 | ### Current Stats 10 | 2020 M1 Mac: 3ms/token at 5M parameters with f32 precision. 11 | 2020 M1 Mac: 30ms/token at 117M parameters with f32 precision. 12 | 2020 M1 Mac: 70ms/token at 377M parameters with f32 precision. 13 | 2020 M1 Mac: 120ms/token at 775M parameters with f32 precision. 14 | 1.5B is working but unstable, sitting around 1000ms/token due to inefficiencies. 15 | 16 | ## Running WebGPT 17 | 18 | Running WebGPT is remarkably simple, as it's just a set of HTML + JS files. Since WebGPU is still in the process of being released, you'll need to open with a compatible browser. WebGPU is currently available on Chrome v113 but the most straightforward way to ensure proper functionality is to install [Chrome Canary](https://www.google.com/chrome/canary/) or Edge Canary. 19 | 20 | I've included two different models: a toy GPT-Shakespeare model (which is severly undertrained haha) and GPT-2 117M. See main.js for more information on how to run these models. If you want to import custom models, take a look at misc/conversion_scripts. 21 | 22 | If you want to try out WebGPT, visit the demo website here [KMeans.org](https://www.kmeans.org). I'd generally reccomend cloning the repo and running locally, just because loading the weights remotely is significantly slower. 23 | Note: **You'll need to use Git LFS** to download the model files, after cloning the repository. 24 | 25 | ![file sizes](other/misc/files.png) 26 | 27 | ## Roadmap / Fixing Stupid Decisions 28 | 29 | - [x] Embeddings / de-embeddings on GPU. 30 | - [x] Initializing pipelines on every step is incredibly inefficient. 31 | - [x] Key-value caching. 32 | - [x] Reuse buffers. 33 | - [x] Kernel shared memory for matmul! 34 | - [x] Destroy buffers after use! 35 | - [x] Create kernel instruction classes + optimize pipeline creation. 36 | - [X] Fuse all kernels. 37 | - [X] Optimize all other kernels. 38 | - [X] Compute pass splitting for larger models _(maxStorageBufferBindingSize)_ 39 | - [ ] Run selection ops on GPU (topk, selection softmax) 40 | - [ ] Attention kernel is optimized for small models, not for large models where each head having it's own matmul is more efficient. 41 | - [ ] Investigate why attention cache isn't giving proper speed-ups. 42 | - [ ] Make simple instructional version without special stuff. 43 | - [ ] Optimize workgroup sizes, specifically for single row/col operations. 44 | - [ ] Convert into a package. 45 | - [ ] Write better comments + make Youtube explainer. 46 | 47 | ## Acknowledgements 48 | 49 | When I started this project I had no idea how transformers worked or how to implement them (or GPUs or matmul kernels or WebGPU or tokenization for that matter), so Andrej Karpathy's series on neural networks and building GPT from scratch were invaluable: [Andrej's Youtube](https://www.youtube.com/@AndrejKarpathy). I've also used some code as well from the nanoGPT repository: [nanoGPT](https://github.com/karpathy/nanoGPT). 50 | 51 | I copied from LatitudeGames' implementation of OpenAI's GPT-3 tokenizer in Javascript: [GPT-3-Encoder](https://github.com/latitudegames/GPT-3-Encoder). 52 | -------------------------------------------------------------------------------- /globals.js: -------------------------------------------------------------------------------- 1 | const FastMatMulBlock = new FastMatMulBlockClass(); 2 | const AttentionBlock = new AttentionBlockClass(); 3 | const ResidualBlock = new ResidualBlockClass(); 4 | const EmbedBlock = new EmbedBlockClass(); 5 | const DeEmbedBlock = new DeEmbedBlockClass(); 6 | const GeluBlock = new GeluBlockClass(); 7 | const LayerNormBlock = new LayerNormBlockClass(); 8 | const SoftmaxBlock = new SoftmaxBlockClass(); 9 | 10 | // Needed for deletion. 11 | let operations = [FastMatMulBlock, AttentionBlock, ResidualBlock, EmbedBlock, DeEmbedBlock, GeluBlock, LayerNormBlock, SoftmaxBlock]; 12 | 13 | function initializeOperations(device) { 14 | for (const operation of operations) operation.initialize(device); 15 | } 16 | 17 | function destroyOperationBuffers() { 18 | for (const operation of operations) operation.destroyBuffers(); 19 | } 20 | 21 | function clearOperationCache() { 22 | for (const operation of operations) operation.clearBufferCache(); 23 | } 24 | 25 | function destroyOperations() { 26 | for (const operation of operations) operation.destroy(); 27 | } 28 | 29 | const bufferUsageDict = { 30 | copy_from: GPUBufferUsage.COPY_SRC, 31 | copy_to: GPUBufferUsage.COPY_DST, 32 | storage: GPUBufferUsage.STORAGE, 33 | uniform: GPUBufferUsage.UNIFORM, 34 | map_read: GPUBufferUsage.MAP_READ, 35 | }; 36 | 37 | // ---------------- Helper Functions ---------------- 38 | 39 | async function fetchBin(url) { 40 | const response = await fetch(url); 41 | const buffer = await response.arrayBuffer(); 42 | return new Float32Array(buffer); 43 | } 44 | 45 | const wgSize = (dim, size) => Math.min(Math.ceil(dim / size), Infinity); 46 | 47 | function sampleFromDistribution(probs) { 48 | const rand = Math.random(); 49 | let cumulativeProb = 0; 50 | for (let i = 0; i < probs.length; i++) { 51 | cumulativeProb += probs[i]; 52 | if (rand < cumulativeProb) { 53 | return i; 54 | } 55 | } 56 | return probs.length - 1; 57 | } 58 | 59 | function cpuSoftmax(logits, temperature = 1.0) { 60 | const maxLogit = Math.max(...logits); 61 | const expLogits = logits.map((logit) => Math.exp((logit - maxLogit) / temperature)); 62 | const sumExpLogits = expLogits.reduce((a, b) => a + b, 0); 63 | return expLogits.map((expLogit) => expLogit / sumExpLogits); 64 | } 65 | 66 | function selectTopK(probs, top_k) { 67 | const sortedIndices = Array.from(probs) 68 | .map((value, index) => ({ value, index })) 69 | .sort((a, b) => b.value - a.value) 70 | .map(({ index }) => index); 71 | const topKIndices = sortedIndices.slice(0, top_k); 72 | const topKProbs = topKIndices.map((index) => probs[index]); 73 | return { topKIndices, topKProbs }; 74 | } 75 | 76 | // ----------------------- Matrix Operations ----------------------- 77 | 78 | const zeros = (dim) => new Float32Array(dim).fill(0); 79 | 80 | function transpose(array, input_rows, input_cols) { 81 | if (array.length !== input_rows * input_cols) { 82 | console.log(array.length, input_rows, input_cols); 83 | throw new Error("Transpose dims failed"); 84 | } 85 | 86 | const transpose = []; 87 | for (let col = 0; col < input_cols; col++) { 88 | for (let row = 0; row < input_rows; row++) { 89 | transpose.push(array[row * input_cols + col]); 90 | } 91 | } 92 | 93 | return new Float32Array(transpose); 94 | } 95 | 96 | function leastPrimeFactor(n, start = 2) { 97 | for (let i = start; i <= Math.sqrt(n); i++) { 98 | if (n % i === 0) return i; 99 | } 100 | return n; 101 | } 102 | 103 | function formatAsMatrix(floatArray, dimA, dimB) { 104 | const resultMatrix = []; 105 | for (let i = 0; i < dimA; i++) { 106 | resultMatrix.push(floatArray.slice(i * dimB, (i + 1) * dimB)); 107 | } 108 | return resultMatrix; 109 | } 110 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | WebGPU GPT Model Demo 5 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 |

WebGPU GPT Model Demo

17 |

Checking WebGPU support...

18 |

19 | PS: Loading models is 5x slower on the web rather than running locally. Just clone the repo and open! 20 |

21 | 22 | 23 | 24 | 25 | 26 |

27 | Special models (download required): 28 |

29 | 30 | 31 | 32 | 33 |
34 |
35 | 36 | 37 |

38 | 39 | 40 |

41 | 42 | 43 |

44 | 45 |

46 | 51 |

52 | 53 |

54 |
55 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /model.js: -------------------------------------------------------------------------------- 1 | class GPT { 2 | constructor(folder, type) { 3 | this.folder = folder; 4 | this.tokenizerType = type; 5 | this.initialized = false; 6 | 7 | this.device; 8 | this.model; 9 | this.tokenizer; 10 | this.params; 11 | this.minBufferOffset = 1; 12 | 13 | this.defaultPrompt; 14 | this.defaultTopK; 15 | this.defaultTemperature; 16 | this.defaultTokens; 17 | 18 | this.externalBuffer; 19 | 20 | this.unloadDeletionStack = []; 21 | } 22 | 23 | async initialize() { 24 | if (this.initialized) return console.error("Model already initialized"); 25 | if (!navigator.gpu) throw new Error("WebGPU is not supported"); 26 | 27 | const adapter = await navigator.gpu.requestAdapter(); 28 | this.device = await adapter.requestDevice(); 29 | 30 | initializeOperations(this.device); 31 | 32 | [this.model, this.params] = await this.loadModel(this.folder); 33 | this.tokenizer = this.tokenizerType == "bpe" ? new GPT2Tokenizer() : new SimpleTokenizer(); 34 | await this.tokenizer.load(); 35 | 36 | if (this.tokenizerType == "bpe") { 37 | this.defaultPrompt = `What is the answer to life, the universe, and everything?\n`; 38 | this.defaultTopK = 3; 39 | this.defaultTemperature = 1; 40 | this.defaultTokens = 30; 41 | } else { 42 | this.defaultPrompt = `WILL:\nAh, how dare you challenge me?\nHave you forgotten I built WebGPT?\n`; 43 | this.defaultTopK = 2; 44 | this.defaultTemperature = 1; 45 | this.defaultTokens = 80; 46 | } 47 | 48 | this.initialized = true; 49 | 50 | console.log("Model initialized"); 51 | } 52 | 53 | async *generate(prompt, max_new_tokens, top_k, temperature) { 54 | if (!this.initialized) { 55 | console.error("Model not loaded yet"); 56 | return; 57 | } 58 | 59 | // Buffer size (321644800) exceeds the max buffer size limit (268435456). 60 | // - While calling [Device].CreateBuffer([BufferDescriptor]). 61 | 62 | let history = this.tokenizer.encode(prompt); 63 | console.log(`Prompt (${history.length} tokens):\n${prompt}`); 64 | 65 | const warmupRuns = 3; 66 | let totalTime = 0; 67 | 68 | for (let i = 0; i < max_new_tokens; i++) { 69 | const idx_cond = history.slice(-this.params.n_ctx); 70 | const useAttCache = i !== 0 && history.length <= this.params.n_ctx; 71 | 72 | const startTime = performance.now(); 73 | const logits = await this.run(idx_cond, useAttCache); 74 | const endTime = performance.now(); 75 | 76 | // console.log(`\nIteration ${i + 1} of ${max_new_tokens}`); 77 | const lapsedTime = endTime - startTime; 78 | console.log(`Kernel execution time: ${lapsedTime} ms`); 79 | i >= warmupRuns && (totalTime += lapsedTime); 80 | 81 | const { topKIndices, topKProbs } = selectTopK(logits, top_k); 82 | const probs = cpuSoftmax(topKProbs, temperature); 83 | const idx_next = topKIndices[sampleFromDistribution(probs)]; 84 | 85 | history = history.concat(idx_next); 86 | 87 | // console.log(`Output:\n${this.tokenizer.decode(history)}`); 88 | 89 | // const totalProbs = cpuSoftmax(logits, temperature); 90 | // const tokenProbsString = Array.from(totalProbs) 91 | // .map((value, index) => ({ value, index })) 92 | // .sort((a, b) => b.value - a.value) 93 | // .slice(0, 8) 94 | // .map((prob) => `{ ${this.tokenizer.decode([prob.index]).replace(/(\r\n|\n|\r)/gm, "newline")} } : ${prob.value.toPrecision(3)}`) 95 | // .join(" | "); 96 | // console.log("Top 8 token probs:", tokenProbsString); 97 | 98 | yield this.tokenizer.decode([idx_next]); 99 | } 100 | 101 | console.log(`Average kernel execution time: ${totalTime / (max_new_tokens - warmupRuns)} ms`); 102 | } 103 | 104 | async run(idx) { 105 | const { posEmbdBuffer, layer_buffers, normGammaBuffer, normBetaBuffer, embeddingsBuffers, deEmbeddingsBuffers } = this.model; 106 | const { attention_scale, n_embd, n_head, head_size, n_layer, vocab_size, hidden_size, vocab_chunk_size, vocab_chunk_instances } = this.params; 107 | const seq_length = idx.length; 108 | 109 | // ---------------- Create Passes ---------------- // 110 | // Note: These are re-initialized because everytime seq_length changes buffers are different sizes. 111 | 112 | // Pipeline creation is major bottleneck to spin up speed! Also buffer re-use. 113 | 114 | this.computePasses = []; 115 | let intermediateBuffer; 116 | let residualBuffer; 117 | { 118 | const { passes, resultBuffer } = EmbedBlock.newInstance(idx, seq_length, n_embd, vocab_chunk_size, embeddingsBuffers, posEmbdBuffer, ResidualBlock); 119 | intermediateBuffer = resultBuffer; 120 | residualBuffer = resultBuffer; 121 | this.computePasses.push(...passes); 122 | } 123 | for (let i = 0; i < n_layer; i++) { 124 | const buffers = layer_buffers[i]; 125 | { 126 | const { passes, resultBuffer } = LayerNormBlock.newInstance( 127 | seq_length, 128 | n_embd, 129 | intermediateBuffer, 130 | buffers.normAttentionGammaBuffer, 131 | buffers.normAttentionBetaBuffer 132 | ); 133 | intermediateBuffer = resultBuffer; 134 | this.computePasses.push(...passes); 135 | } 136 | { 137 | const { passes, resultBuffer } = AttentionBlock.newFusedInstance( 138 | seq_length, 139 | n_embd, 140 | attention_scale, 141 | n_head, 142 | head_size, 143 | intermediateBuffer, 144 | buffers.qkvWeightArray[0], 145 | buffers.qkvBiasArray[0], 146 | buffers.qkvWeightArray[1], 147 | buffers.qkvBiasArray[1], 148 | buffers.qkvWeightArray[2], 149 | buffers.qkvBiasArray[2], 150 | buffers.linearWeightsBuffer, 151 | buffers.linearBiasBuffer, 152 | FastMatMulBlock, 153 | SoftmaxBlock 154 | ); 155 | intermediateBuffer = resultBuffer; 156 | this.computePasses.push(...passes); 157 | } 158 | { 159 | const { passes, resultBuffer } = ResidualBlock.newInstance(seq_length, n_embd, intermediateBuffer, residualBuffer); 160 | intermediateBuffer = resultBuffer; 161 | residualBuffer = resultBuffer; 162 | this.computePasses.push(...passes); 163 | } 164 | { 165 | const { passes, resultBuffer } = LayerNormBlock.newInstance( 166 | seq_length, 167 | n_embd, 168 | intermediateBuffer, 169 | buffers.normLinearGammaBuffer, 170 | buffers.normLinearBetaBuffer 171 | ); 172 | intermediateBuffer = resultBuffer; 173 | this.computePasses.push(...passes); 174 | } 175 | { 176 | const { resultBuffer, passes } = FastMatMulBlock.newInstance( 177 | seq_length, 178 | hidden_size, 179 | n_embd, 180 | intermediateBuffer, 181 | buffers.firstLayerWeightsBuffer, 182 | buffers.firstLayerBiasBuffer 183 | ); 184 | intermediateBuffer = resultBuffer; 185 | this.computePasses.push(...passes); 186 | } 187 | { 188 | const { resultBuffer, passes } = GeluBlock.newInstance(seq_length, hidden_size, intermediateBuffer); 189 | intermediateBuffer = resultBuffer; 190 | this.computePasses.push(...passes); 191 | } 192 | { 193 | const { resultBuffer, passes } = FastMatMulBlock.newInstance( 194 | seq_length, 195 | n_embd, 196 | hidden_size, 197 | intermediateBuffer, 198 | buffers.secondLayerWeightsBuffer, 199 | buffers.secondLayerBiasBuffer 200 | ); 201 | intermediateBuffer = resultBuffer; 202 | this.computePasses.push(...passes); 203 | } 204 | { 205 | const { passes, resultBuffer } = ResidualBlock.newInstance(seq_length, n_embd, intermediateBuffer, residualBuffer); 206 | intermediateBuffer = resultBuffer; 207 | residualBuffer = resultBuffer; 208 | this.computePasses.push(...passes); 209 | } 210 | } 211 | { 212 | if (this.externalBuffer) { 213 | this.computePasses.push({ 214 | flag: "copy", 215 | src: intermediateBuffer, 216 | srcOffset: 0, 217 | dst: this.externalBuffer, 218 | dstOffset: 0, 219 | size: this.bufferSize(seq_length, n_embd), 220 | }); 221 | } 222 | } 223 | { 224 | const { passes, resultBuffer } = LayerNormBlock.newInstance(seq_length, n_embd, intermediateBuffer, normGammaBuffer, normBetaBuffer); 225 | intermediateBuffer = resultBuffer; 226 | this.computePasses.push(...passes); 227 | } 228 | { 229 | const { passes, resultBuffer } = DeEmbedBlock.newInstance( 230 | n_embd, 231 | vocab_size, 232 | vocab_chunk_size * vocab_chunk_instances, 233 | seq_length, 234 | vocab_chunk_size, 235 | intermediateBuffer, 236 | deEmbeddingsBuffers 237 | ); 238 | intermediateBuffer = resultBuffer; 239 | this.computePasses.push(...passes); 240 | } 241 | const resultBuffer = intermediateBuffer; 242 | 243 | // ---------------- Compute Passes ---------------- 244 | 245 | const commandEncoder = this.device.createCommandEncoder(); 246 | for (const pass of this.computePasses) { 247 | if (pass.flag === "compute") { 248 | const passEncoder = commandEncoder.beginComputePass(); 249 | passEncoder.setPipeline(pass.pipeline); 250 | for (let i = 0; i < pass.groups.length; i++) passEncoder.setBindGroup(i, pass.groups[i]); 251 | passEncoder.dispatchWorkgroups(pass.workgroups.x, pass.workgroups.y); 252 | passEncoder.end(); 253 | } else if (pass.flag === "copy") { 254 | commandEncoder.copyBufferToBuffer(pass.src, pass.srcOffset, pass.dst, pass.dstOffset, pass.size); 255 | } 256 | } 257 | this.device.queue.submit([commandEncoder.finish()]); 258 | 259 | // ---------------- Read Results ---------------- 260 | 261 | await resultBuffer.mapAsync(GPUMapMode.READ); 262 | const output = resultBuffer.getMappedRange(); 263 | const outputArray = new Float32Array(output).slice(0); // Copy the array, otherwise it'll be destroyed. 264 | 265 | clearOperationCache(); 266 | 267 | return outputArray; 268 | } 269 | 270 | async loadModel(folder) { 271 | if (this.initialized) return console.error("Model already loaded"); 272 | 273 | console.log("Loading model from folder:", folder); 274 | const weightsFolder = `weights/${folder}/`; 275 | 276 | const params = await this.loadParameters(weightsFolder); 277 | const { embeddingsBuffers, deEmbeddingsBuffers } = await this.loadEmbeddings(params, weightsFolder); 278 | const { posEmbdBuffer } = await this.loadPositionalEmbeddings(params, weightsFolder); 279 | const layer_buffers = await this.loadLayers(params, weightsFolder); 280 | 281 | console.log("Loading final layer norm..."); 282 | const { normGammaBuffer, normBetaBuffer } = await this.loadFinalLayerNorm(params, weightsFolder); 283 | 284 | const output = { layer_buffers, embeddingsBuffers, deEmbeddingsBuffers, posEmbdBuffer, normGammaBuffer, normBetaBuffer }; 285 | console.log("Finished loading model.", output, params); 286 | return [output, params]; 287 | } 288 | 289 | async loadParameters(weightsFolder) { 290 | console.log("Loading params..."); 291 | const params = await (await fetch(`${weightsFolder}/params_gpt.json`)).json(); 292 | 293 | // Did you enable GitHub LFS? Won't work without it. 294 | if (params.n_embd % 4 !== 0) throw new Error("Model load failed: n_embd must be divisible by 4."); 295 | if (params.n_embd % params.n_head !== 0) throw new Error("Model load failed: n_embd must be divisible by n_head."); 296 | // I'm unsure if this is a reasonable requirement here. At worst, I can figure out some padding method. 297 | if ((params.n_embd / params.n_head) % 4 !== 0) throw new Error("Model load failed: n_embd / n_head must be divisible by 4."); 298 | const tokenParam = this.bufferSize(params.vocab_size, params.n_embd); 299 | let minSplits = Math.ceil(tokenParam / this.device.limits.maxStorageBufferBindingSize); 300 | function vocabChunkSizeCalc(vocab_size, n_embd, splits, maxStorageBufferBindingSize) { 301 | // Possibly could be better? Needs actual benchmarking to know what approach is best. 302 | const optimisticSize = Math.ceil(vocab_size / splits / 4) * 4 * n_embd; 303 | const pessimiticSize = Math.floor(vocab_size / splits / 4) * 4 * n_embd; 304 | let vocab_chunk_size = optimisticSize; 305 | if (optimisticSize > maxStorageBufferBindingSize) { 306 | vocab_chunk_size = pessimiticSize; 307 | if (pessimiticSize * splits < tokenParam) { 308 | return vocabChunkSizeCalc(vocab_size, n_embd, splits + 1, maxStorageBufferBindingSize); 309 | } 310 | } 311 | return { vocab_chunk_size: vocab_chunk_size / n_embd, splits }; 312 | } 313 | const { vocab_chunk_size, splits } = vocabChunkSizeCalc(params.vocab_size, params.n_embd, minSplits, this.device.limits.maxStorageBufferBindingSize); 314 | if (splits > minSplits) console.warn(`Non-optimal number of vocab splits. Optimal: ${minSplits}, Selected: ${splits}`); 315 | 316 | // Set derived parameters 317 | params.vocab_chunk_size = vocab_chunk_size; 318 | params.vocab_chunk_instances = splits; 319 | params.head_size = params.n_embd / params.n_head; 320 | params.hidden_size = params.n_embd * 4; 321 | params.attention_scale = 1 / Math.sqrt(params.n_embd / params.n_head); 322 | params.bias = params.bias == undefined ? true : params.bias; 323 | 324 | // Check for overflow in buffers larger than maxStorageBufferBindingSize 325 | const maxBufferSize = this.device.limits.maxStorageBufferBindingSize / 4; 326 | if (params.n_embd * params.n_ctx > maxBufferSize) console.warn("Model load failed: n_embd * n_ctx must be less than maxStorageBufferBindingSize."); 327 | if (params.n_embd * params.hidden_size > maxBufferSize) 328 | console.warn("Model load failed: n_embd * hidden_size must be less than maxStorageBufferBindingSize."); 329 | if (params.n_ctx * params.n_ctx * params.n_head > maxBufferSize) 330 | console.warn("Model load failed: n_ctx * n_ctx must be less than maxStorageBufferBindingSize."); 331 | if (params.n_embd * params.n_embd * 3 > maxBufferSize) 332 | console.warn("Model load failed: n_embd * n_embd * 3 must be less than maxStorageBufferBindingSize."); 333 | 334 | console.log("Params:", params); 335 | 336 | return params; 337 | } 338 | 339 | async loadEmbeddings(params, weightsFolder) { 340 | console.log("Loading token embeddings..."); 341 | const embeddingWeights = await fetchBin(`${weightsFolder}/transformer.wte.weight_gpt.bin`); 342 | 343 | // Chunks are stored in row-major order and are of dimensions n_embd x vocab_chunk_size. 344 | // Embedding weights are imported in column-major order and are of dimensions vocab_size x n_embd. 345 | // We pre-transpose the chunk for the deEmbedding process for the matmul. Could do this on GPU later. 346 | const embeddingsBuffers = []; 347 | const deEmbeddingsBuffers = []; 348 | for (let i = 0; i < params.vocab_chunk_instances; i++) { 349 | console.log(`Loading deEmbedding chunk ${i + 1}/${params.vocab_chunk_instances}...`); 350 | const offset = i * params.vocab_chunk_size; 351 | let size = params.vocab_chunk_size; 352 | 353 | const paddedArray = new Float32Array(params.vocab_chunk_size * params.n_embd); 354 | if (i === params.vocab_chunk_instances - 1) { 355 | size = params.vocab_size - offset; 356 | paddedArray.set(size * params.n_embd, zeros((params.vocab_chunk_size * params.vocab_chunk_instances - params.vocab_size) * params.n_embd)); 357 | } 358 | paddedArray.set(embeddingWeights.subarray(offset * params.n_embd, offset * params.n_embd + size * params.n_embd)); 359 | 360 | embeddingsBuffers.push(this.initTensor(paddedArray, [params.vocab_chunk_size, params.n_embd], ["copy_from"])); 361 | 362 | const chunk = transpose(paddedArray, params.vocab_chunk_size, params.n_embd); // Use GPU perhaps? 363 | deEmbeddingsBuffers.push(this.initTensor(chunk, [params.n_embd, params.vocab_chunk_size], ["storage"])); 364 | } 365 | 366 | return { embeddingsBuffers, deEmbeddingsBuffers }; 367 | } 368 | 369 | async loadPositionalEmbeddings(params, weightsFolder) { 370 | console.log("Loading positional embeddings..."); 371 | const posEmbeddings = await fetchBin(`${weightsFolder}/transformer.wpe.weight_gpt.bin`); 372 | const posEmbdBuffer = this.initTensor(posEmbeddings, [params.n_ctx, params.n_embd], ["copy_from"]); 373 | 374 | return { posEmbdBuffer }; 375 | } 376 | 377 | async loadFinalLayerNorm(params, weightsFolder) { 378 | console.log("Loading final norm..."); 379 | const prefix = `${weightsFolder}/transformer.ln_f.`; 380 | 381 | const tensorPromises = [ 382 | this.fetchAndInitTensor(`${prefix}weight_gpt.bin`, [params.n_embd], ["storage"]), 383 | this.fetchAndInitTensor(`${prefix}bias_gpt.bin`, [params.n_embd], ["storage"]), 384 | ]; 385 | 386 | const [normGammaBuffer, normBetaBuffer] = await Promise.all(tensorPromises); 387 | 388 | return { normGammaBuffer, normBetaBuffer }; 389 | } 390 | 391 | async loadLayers(params, weightsFolder) { 392 | console.log("Loading layers..."); 393 | const layerPromises = []; 394 | 395 | for (let i = 0; i < params.n_layer; i++) { 396 | layerPromises.push(this.loadLayer(params, weightsFolder, i)); 397 | } 398 | 399 | const layer_buffers = await Promise.all(layerPromises); 400 | return layer_buffers; 401 | } 402 | 403 | async loadLayer(params, weightsFolder, layerIndex) { 404 | console.log("Starting to load layer...", layerIndex); 405 | const prefix = `${weightsFolder}transformer.h.${layerIndex}.`; 406 | 407 | // Create an array of promises for fetching and initializing the tensors 408 | const tensorPromises = [ 409 | this.fetchAndInitTensor(`${prefix}ln_1.weight_gpt.bin`, [params.n_embd], ["storage"]), 410 | this.fetchAndInitTensor(`${prefix}ln_1.bias_gpt.bin`, [params.n_embd], ["storage"]), 411 | this.fetchAndSplitQKVWeightTensors(`${prefix}attn.c_attn.weight_gpt.bin`, [params.n_embd, 3 * params.n_embd], ["storage"]), 412 | this.fetchAndSplitQKVBiasTensors(`${prefix}attn.c_attn.bias_gpt.bin`, [params.n_embd], ["storage"]), 413 | this.fetchAndInitTensor(`${prefix}attn.c_proj.weight_gpt.bin`, [params.n_embd, params.n_embd], ["storage"]), 414 | this.fetchAndInitTensor(`${prefix}attn.c_proj.bias_gpt.bin`, [params.n_embd], ["storage"]), 415 | this.fetchAndInitTensor(`${prefix}ln_2.weight_gpt.bin`, [params.n_embd], ["storage"]), 416 | this.fetchAndInitTensor(`${prefix}ln_2.bias_gpt.bin`, [params.n_embd], ["storage"]), 417 | this.fetchAndInitTensor(`${prefix}mlp.c_fc.weight_gpt.bin`, [params.n_embd, params.hidden_size], ["storage"]), 418 | this.fetchAndInitTensor(`${prefix}mlp.c_fc.bias_gpt.bin`, [params.hidden_size], ["storage"]), 419 | this.fetchAndInitTensor(`${prefix}mlp.c_proj.weight_gpt.bin`, [params.hidden_size, params.n_embd], ["storage"]), 420 | this.fetchAndInitTensor(`${prefix}mlp.c_proj.bias_gpt.bin`, [params.n_embd], ["storage"]), 421 | ]; 422 | 423 | // Wait for all tensors to be fetched and initialized 424 | const [ 425 | normAttentionGammaBuffer, 426 | normAttentionBetaBuffer, 427 | qkvWeightArray, 428 | qkvBiasArray, 429 | linearWeightsBuffer, 430 | linearBiasBuffer, 431 | normLinearGammaBuffer, 432 | normLinearBetaBuffer, 433 | firstLayerWeightsBuffer, 434 | firstLayerBiasBuffer, 435 | secondLayerWeightsBuffer, 436 | secondLayerBiasBuffer, 437 | ] = await Promise.all(tensorPromises); 438 | 439 | // Process the fetched data and return the layer buffers 440 | return { 441 | normAttentionGammaBuffer, 442 | normAttentionBetaBuffer, 443 | qkvWeightArray, 444 | qkvBiasArray, 445 | linearWeightsBuffer, 446 | linearBiasBuffer, 447 | normLinearGammaBuffer, 448 | normLinearBetaBuffer, 449 | firstLayerWeightsBuffer, 450 | firstLayerBiasBuffer, 451 | secondLayerWeightsBuffer, 452 | secondLayerBiasBuffer, 453 | }; 454 | } 455 | 456 | async fetchAndSplitQKVWeightTensors(url, dims, ops) { 457 | const data = transpose(await fetchBin(url), dims[0], dims[1]); 458 | 459 | const qWeights = transpose(data.subarray(0, dims[0] * dims[0]), dims[0], dims[0]); 460 | const kWeights = transpose(data.subarray(dims[0] * dims[0], dims[0] * dims[0] * 2), dims[0], dims[0]); 461 | const vWeights = transpose(data.subarray(dims[0] * dims[0] * 2, dims[0] * dims[0] * 3), dims[0], dims[0]); 462 | 463 | const qWeightsBuffer = this.initTensor(qWeights, [dims[0], dims[0]], ops); 464 | const kWeightsBuffer = this.initTensor(kWeights, [dims[0], dims[0]], ops); 465 | const vWeightsBuffer = this.initTensor(vWeights, [dims[0], dims[0]], ops); 466 | 467 | return [qWeightsBuffer, kWeightsBuffer, vWeightsBuffer]; 468 | } 469 | 470 | async fetchAndSplitQKVBiasTensors(url, dims, ops) { 471 | const data = await fetchBin(url); 472 | 473 | const qBias = data.subarray(0, dims[0]); 474 | const kBias = data.subarray(dims[0], dims[0] * 2); 475 | const vBias = data.subarray(dims[0] * 2, dims[0] * 3); 476 | 477 | const qBiasBuffer = this.initTensor(qBias, [dims[0]], ops); 478 | const kBiasBuffer = this.initTensor(kBias, [dims[0]], ops); 479 | const vBiasBuffer = this.initTensor(vBias, [dims[0]], ops); 480 | 481 | return [qBiasBuffer, kBiasBuffer, vBiasBuffer]; 482 | } 483 | 484 | async fetchAndInitTensor(url, dims, ops) { 485 | console.log("Fetching and initializing tensor...", url); 486 | const data = await fetchBin(url); 487 | return this.initTensor(data, dims, ops); 488 | } 489 | 490 | initTensor(data, dims, ops) { 491 | const buffer = this.device.createBuffer({ 492 | size: this.bufferSize(dims[0], dims[1] || 1, dims[2] || 1), 493 | usage: ops.map((u) => bufferUsageDict[u]).reduce((a, b) => a | b), 494 | mappedAtCreation: true, 495 | }); 496 | new Float32Array(buffer.getMappedRange()).set(data); 497 | buffer.unmap(); 498 | this.unloadDeletionStack.push(buffer); 499 | return buffer; 500 | } 501 | 502 | unloadBuffers() { 503 | this.unloadDeletionStack.map((buffer) => buffer.destroy()); 504 | this.unloadDeletionStack = []; 505 | } 506 | 507 | bufferSize(dimX, dimY = 1, dimZ = 1) { 508 | const size = Math.ceil((dimX * dimY * dimZ * Float32Array.BYTES_PER_ELEMENT) / this.minBufferOffset) * this.minBufferOffset; 509 | if (size > this.device.limits.maxStorageBufferBindingSize) 510 | console.warn("Warning: Buffer size calc result exceeds GPU limit, are you using this value for a tensor size?", dimX, dimY, dimZ, size); 511 | return size; 512 | } 513 | } 514 | -------------------------------------------------------------------------------- /other/conversion_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Running custom models on WebGPU 2 | 3 | It's fairly easy to run custom models on WebGPU. At the moment, I only support PyTorch models via the scripts below but it should be fairly simple to export other model weights to work here. 4 | 5 | Importing weights requires you to export transformer weights as a series of individual .bin files. Pardon the somewhat inconvenient process as loading such significant file sizes into Javascript requires some clever engineering. 6 | 7 | An example structure with only two layers. Each matrix is collapes into a row-major 1-dimensional array. 8 | 9 | ``` 10 | transformer.wte.weight.bin: [65, 128] 11 | transformer.wpe.weight.bin: [64, 128] 12 | transformer.h.0.ln_1.weight.bin: [128] 13 | transformer.h.0.ln_1.bias.bin: [128] 14 | transformer.h.0.attn.c_attn.weight.bin: [384, 128] 15 | transformer.h.0.attn.c_attn.bias.bin: [384] 16 | transformer.h.0.attn.c_proj.weight.bin: [128, 128] 17 | transformer.h.0.attn.c_proj.bias.bin: [128] 18 | transformer.h.0.ln_2.weight.bin: [128] 19 | transformer.h.0.ln_2.bias.bin: [128] 20 | transformer.h.0.mlp.c_fc.weight.bin: [512, 128] 21 | transformer.h.0.mlp.c_fc.bias.bin: [512] 22 | transformer.h.0.mlp.c_proj.weight.bin: [128, 512] 23 | transformer.h.0.mlp.c_proj.bias.bin: [128] 24 | transformer.h.1.ln_1.weight.bin: [128] 25 | transformer.h.1.ln_1.bias.bin: [128] 26 | transformer.h.1.attn.c_attn.weight.bin: [384, 128] 27 | transformer.h.1.attn.c_attn.bias.bin: [384] 28 | transformer.h.1.attn.c_proj.weight.bin: [128, 128] 29 | transformer.h.1.attn.c_proj.bias.bin: [128] 30 | transformer.h.1.ln_2.weight.bin: [128] 31 | transformer.h.1.ln_2.bias.bin: [128] 32 | transformer.h.1.mlp.c_fc.weight.bin: [512, 128] 33 | transformer.h.1.mlp.c_fc.bias.bin: [512] 34 | transformer.h.1.mlp.c_proj.weight.bin: [128, 512] 35 | transformer.h.1.mlp.c_proj.bias.bin: [128] 36 | transformer.ln_f.weight.bin: [128] 37 | transformer.ln_f.bias.bin: [128] 38 | lm_head.weight.bin: [65, 128] 39 | ``` 40 | 41 | I've included a export script for PyTorch models. Quite simply, you must use the model.state_dict() and export into individual files. If you want to export pre-trained GPT models, you'll need to slightly format the parameters to work correctly. 42 | -------------------------------------------------------------------------------- /other/conversion_scripts/ckpt.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0hq/WebGPT/a83cdc8d46e8d140b55d87089482999580b64a3d/other/conversion_scripts/ckpt.pt -------------------------------------------------------------------------------- /other/conversion_scripts/convert_checkpoint_pytorch.py: -------------------------------------------------------------------------------- 1 | import json 2 | import struct 3 | import torch 4 | import os 5 | 6 | transposed = ['attn.c_attn.weight', 'attn.c_proj.weight', 7 | 'mlp.c_fc.weight', 'mlp.c_proj.weight'] 8 | 9 | 10 | def save_weights_to_bin_files(checkpoint, folder_name): 11 | for key, value in checkpoint['model'].items(): 12 | print(f"{key}: {value.shape}") 13 | if key.startswith('_orig_mod.'): 14 | continue 15 | with open(os.path.join(folder_name, f"{key}_gpt.bin"), 'wb') as file: 16 | values = value.cpu().numpy() 17 | # Only use this if using old minGPT model. 18 | # if any(key.endswith(w) for w in transposed): 19 | # values = values.T 20 | 21 | for single_value in values.flatten(): 22 | file.write(struct.pack('> 24) / 127.0) * absmax; 56 | matrix[i + 1] = (((packedValue << 16) >> 24) / 127.0) * absmax; 57 | matrix[i + 2] = (((packedValue << 8) >> 24) / 127.0) * absmax; 58 | matrix[i + 3] = ((packedValue >> 24) / 127.0) * absmax; 59 | } 60 | 61 | return matrix; 62 | } 63 | 64 | const qa = quantizeMatrix(A, M, K); 65 | const qb = quantizeMatrix(B, K, N); 66 | 67 | const quantizedA = qa.quantizedMatrix; 68 | const quantizedB = qb.quantizedMatrix; 69 | 70 | const dqB = dequantizeMatrix(quantizedB, qb.absmax, K, N); 71 | 72 | // for (let i = 0; i < 10; i++) { 73 | // console.log(B[i], dqB[i]); 74 | // } 75 | 76 | const absmax = Math.max(qa.absmax, qb.absmax); 77 | 78 | // Naive CPU implementation of matrix multiplication 79 | function multiplyMatrices(A, B, C, M, N, K) { 80 | for (let i = 0; i < M; i++) { 81 | for (let j = 0; j < N; j++) { 82 | let sum = 0; 83 | for (let k = 0; k < K; k++) { 84 | sum += A[i * K + k] * B[k * N + j]; 85 | } 86 | C[i * N + j] = sum; 87 | } 88 | } 89 | } 90 | 91 | async function run() { 92 | // Create WebGPU device and queue 93 | const adapter = await navigator.gpu.requestAdapter(); 94 | const device = await adapter.requestDevice(); 95 | const queue = device.queue; 96 | 97 | // Create buffers for matrices A, B, and C 98 | const aBuffer = device.createBuffer({ 99 | size: A.byteLength, 100 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, 101 | }); 102 | const bBuffer = device.createBuffer({ 103 | size: quantizedB.byteLength, 104 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, 105 | }); 106 | const cBuffer = device.createBuffer({ 107 | size: C.byteLength, 108 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, 109 | }); 110 | 111 | // Copy matrices A and B to their respective buffers 112 | queue.writeBuffer(aBuffer, 0, A); 113 | queue.writeBuffer(bBuffer, 0, quantizedB); 114 | 115 | // Create bind group layout and bind group 116 | 117 | const shaderCode = ` 118 | 119 | @group(0) @binding(0) var array_a: array>; 120 | @group(0) @binding(1) var array_b: array; 121 | 122 | @group(0) @binding(2) var array_c: array>; 123 | 124 | const absmax = ${absmax}; 125 | 126 | fn unpackInt8x4(value: i32) -> vec4 { 127 | let x = f32((value << 24) >> 24) / 127.0 * absmax; 128 | let y = f32(((value << 16) >> 24)) / 127.0 * absmax; 129 | let z = f32(((value << 8) >> 24)) / 127.0 * absmax; 130 | let w = f32(((value >> 24))) / 127.0 * absmax; 131 | return vec4(x, y, z, w); 132 | } 133 | 134 | @compute @workgroup_size(${workgroupSizeX}, ${workgroupSizeY}) 135 | fn main(@builtin(global_invocation_id) global_id: vec3) { 136 | var M: u32 = ${M}; 137 | var N: u32 = ${N}; 138 | var ND4: u32 = ${Math.ceil(N / 4)}; 139 | var KD4: u32 = ${Math.ceil(K / 4)}; 140 | var x: u32 = global_id.x; 141 | var y: u32 = global_id.y; 142 | 143 | if (x * 8 >= N || y * 4 >= M) { 144 | return; 145 | } 146 | 147 | var sum00: vec4 = vec4(); 148 | var sum01: vec4 = vec4(); 149 | var sum02: vec4 = vec4(); 150 | var sum03: vec4 = vec4(); 151 | var sum10: vec4 = vec4(); 152 | var sum11: vec4 = vec4(); 153 | var sum12: vec4 = vec4(); 154 | var sum13: vec4 = vec4(); 155 | 156 | for(var k: u32 = 0u; k < KD4; k = k + 1u) { 157 | var arow0: vec4 = array_a[(y * 4u + 0u) * KD4 + k]; 158 | var arow1: vec4 = array_a[(y * 4u + 1u) * KD4 + k]; 159 | var arow2: vec4 = array_a[(y * 4u + 2u) * KD4 + k]; 160 | var arow3: vec4 = array_a[(y * 4u + 3u) * KD4 + k]; 161 | var brow: vec4; 162 | 163 | brow = unpackInt8x4(array_b[(k * 4u + 0u) * ND4 + x * 2u + 0u]); 164 | sum00 = vec4(arow0.x) * brow + sum00; 165 | sum01 = vec4(arow1.x) * brow + sum01; 166 | sum02 = vec4(arow2.x) * brow + sum02; 167 | sum03 = vec4(arow3.x) * brow + sum03; 168 | 169 | brow = unpackInt8x4(array_b[(k * 4u + 0u) * ND4 + x * 2u + 1u]); 170 | sum10 = vec4(arow0.x) * brow + sum10; 171 | sum11 = vec4(arow1.x) * brow + sum11; 172 | sum12 = vec4(arow2.x) * brow + sum12; 173 | sum13 = vec4(arow3.x) * brow + sum13; 174 | 175 | brow = unpackInt8x4(array_b[(k * 4u + 1u) * ND4 + x * 2u + 0u]); 176 | sum00 = vec4(arow0.y) * brow + sum00; 177 | sum01 = vec4(arow1.y) * brow + sum01; 178 | sum02 = vec4(arow2.y) * brow + sum02; 179 | sum03 = vec4(arow3.y) * brow + sum03; 180 | 181 | brow = unpackInt8x4(array_b[(k * 4u + 1u) * ND4 + x * 2u + 1u]); 182 | sum10 = vec4(arow0.y) * brow + sum10; 183 | sum11 = vec4(arow1.y) * brow + sum11; 184 | sum12 = vec4(arow2.y) * brow + sum12; 185 | sum13 = vec4(arow3.y) * brow + sum13; 186 | 187 | brow = unpackInt8x4(array_b[(k * 4u + 2u) * ND4 + x * 2u + 0u]); 188 | sum00 = vec4(arow0.z) * brow + sum00; 189 | sum01 = vec4(arow1.z) * brow + sum01; 190 | sum02 = vec4(arow2.z) * brow + sum02; 191 | sum03 = vec4(arow3.z) * brow + sum03; 192 | 193 | brow = unpackInt8x4(array_b[(k * 4u + 2u) * ND4 + x * 2u + 1u]); 194 | sum10 = vec4(arow0.z) * brow + sum10; 195 | sum11 = vec4(arow1.z) * brow + sum11; 196 | sum12 = vec4(arow2.z) * brow + sum12; 197 | sum13 = vec4(arow3.z) * brow + sum13; 198 | 199 | brow = unpackInt8x4(array_b[(k * 4u + 3u) * ND4 + x * 2u + 0u]); 200 | sum00 = vec4(arow0.w) * brow + sum00; 201 | sum01 = vec4(arow1.w) * brow + sum01; 202 | sum02 = vec4(arow2.w) * brow + sum02; 203 | sum03 = vec4(arow3.w) * brow + sum03; 204 | 205 | brow = unpackInt8x4(array_b[(k * 4u + 3u) * ND4 + x * 2u + 1u]); 206 | sum10 = vec4(arow0.w) * brow + sum10; 207 | sum11 = vec4(arow1.w) * brow + sum11; 208 | sum12 = vec4(arow2.w) * brow + sum12; 209 | sum13 = vec4(arow3.w) * brow + sum13; 210 | } 211 | 212 | if (y * 4u + 0u < M) { 213 | array_c[x * 2u + 0u + (y * 4u + 0u) * ND4] = sum00; 214 | array_c[x * 2u + 1u + (y * 4u + 0u) * ND4] = sum10; 215 | } 216 | if (y * 4u + 1u < M) { 217 | array_c[x * 2u + 0u + (y * 4u + 1u) * ND4] = sum01; 218 | array_c[x * 2u + 1u + (y * 4u + 1u) * ND4] = sum11; 219 | } 220 | if (y * 4u + 2u < M) { 221 | array_c[x * 2u + 0u + (y * 4u + 2u) * ND4] = sum02; 222 | array_c[x * 2u + 1u + (y * 4u + 2u) * ND4] = sum12; 223 | } 224 | if (y * 4u + 3u < M) { 225 | array_c[x * 2u + 0u + (y * 4u + 3u) * ND4] = sum03; 226 | array_c[x * 2u + 1u + (y * 4u + 3u) * ND4] = sum13; 227 | } 228 | } 229 | `; 230 | 231 | const shaderModule = device.createShaderModule({ 232 | code: shaderCode, 233 | }); 234 | 235 | const bindGroupLayout = device.createBindGroupLayout({ 236 | entries: [ 237 | { 238 | binding: 0, 239 | visibility: GPUShaderStage.COMPUTE, 240 | buffer: { 241 | type: "read-only-storage", 242 | }, 243 | }, 244 | { 245 | binding: 1, 246 | visibility: GPUShaderStage.COMPUTE, 247 | buffer: { 248 | type: "read-only-storage", 249 | }, 250 | }, 251 | { 252 | binding: 2, 253 | visibility: GPUShaderStage.COMPUTE, 254 | buffer: { 255 | type: "storage", 256 | }, 257 | }, 258 | ], 259 | }); 260 | 261 | const bindGroup = device.createBindGroup({ 262 | layout: bindGroupLayout, 263 | entries: [ 264 | { 265 | binding: 0, 266 | resource: { 267 | buffer: aBuffer, 268 | }, 269 | }, 270 | { 271 | binding: 1, 272 | resource: { 273 | buffer: bBuffer, 274 | }, 275 | }, 276 | { 277 | binding: 2, 278 | resource: { 279 | buffer: cBuffer, 280 | }, 281 | }, 282 | ], 283 | }); 284 | 285 | const pipelineLayout = device.createPipelineLayout({ 286 | bindGroupLayouts: [bindGroupLayout], 287 | }); 288 | 289 | const pipeline = device.createComputePipeline({ 290 | layout: pipelineLayout, 291 | compute: { 292 | module: shaderModule, 293 | entryPoint: "main", 294 | }, 295 | }); 296 | const encoder = device.createCommandEncoder(); 297 | const passEncoder = encoder.beginComputePass(); 298 | 299 | // Dispatch the compute kernel 300 | passEncoder.setPipeline(pipeline); 301 | passEncoder.setBindGroup(0, bindGroup); 302 | passEncoder.dispatchWorkgroups(workgroupSizeX, workgroupSizeY, 1); 303 | passEncoder.end(); 304 | 305 | const readBuffer = device.createBuffer({ 306 | size: C.byteLength, 307 | usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, 308 | }); 309 | 310 | // Copy matrix C from the GPU to the CPU 311 | encoder.copyBufferToBuffer(cBuffer, 0, readBuffer, 0, C.byteLength); 312 | 313 | device.queue.submit([encoder.finish()]); 314 | 315 | await readBuffer.mapAsync(GPUMapMode.READ); 316 | const readBufferData = new Float32Array(readBuffer.getMappedRange()); 317 | 318 | const C_cpu = new Float32Array(M * N); 319 | multiplyMatrices(A, B, C_cpu, M, N, K); 320 | 321 | for (let i = 0; i < M * N; i++) { 322 | if (Math.abs(C_cpu[i] - readBufferData[i]) > 0.1) { 323 | console.error("CPU and GPU results differ at index", i); 324 | console.error("CPU:", C_cpu[i], "GPU:", readBufferData[i]); 325 | break; 326 | } 327 | // } else { 328 | // console.log("CPU and GPU results are the same at index", i); 329 | // console.log("CPU:", C_cpu[i], "GPU:", readBufferData[i]); 330 | // } 331 | } 332 | 333 | let mae = 0; 334 | for (let i = 0; i < M * N; i++) { 335 | mae += Math.abs(C_cpu[i] - readBufferData[i]); 336 | } 337 | mae /= M * N; 338 | console.log("Mean Absolute Error:", mae); 339 | 340 | const NUM_RUNS = 100; 341 | 342 | //warmup 343 | 344 | for (let i = 0; i < NUM_RUNS; i++) { 345 | // Dispatch the compute kernel 346 | const encoder = device.createCommandEncoder(); 347 | const passEncoder = encoder.beginComputePass(); 348 | 349 | // Dispatch the compute kernel 350 | passEncoder.setPipeline(pipeline); 351 | passEncoder.setBindGroup(0, bindGroup); 352 | passEncoder.dispatchWorkgroups(workgroupSizeX, workgroupSizeY, 1); 353 | 354 | passEncoder.end(); 355 | 356 | const readBuffer = device.createBuffer({ 357 | size: C.byteLength, 358 | usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, 359 | }); 360 | 361 | // Copy matrix C from the GPU to the CPU 362 | encoder.copyBufferToBuffer(cBuffer, 0, readBuffer, 0, C.byteLength); 363 | } 364 | 365 | // Run GPU kernel NUM_RUNS times and measure time 366 | let totalTime = 0; 367 | for (let i = 0; i < NUM_RUNS; i++) { 368 | const start = performance.now(); 369 | 370 | // Dispatch the compute kernel 371 | const encoder = device.createCommandEncoder(); 372 | const passEncoder = encoder.beginComputePass(); 373 | 374 | // Dispatch the compute kernel 375 | passEncoder.setPipeline(pipeline); 376 | passEncoder.setBindGroup(0, bindGroup); 377 | passEncoder.dispatchWorkgroups(M / workgroupSizeX, N / workgroupSizeY, 1); 378 | 379 | passEncoder.end(); 380 | 381 | const readBuffer = device.createBuffer({ 382 | size: C.byteLength, 383 | usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, 384 | }); 385 | 386 | // Copy matrix C from the GPU to the CPU 387 | encoder.copyBufferToBuffer(cBuffer, 0, readBuffer, 0, C.byteLength); 388 | 389 | const end = performance.now(); 390 | totalTime += end - start; 391 | } 392 | const averageTime = totalTime / NUM_RUNS; 393 | console.log(`Average time per run: ${averageTime.toFixed(2)} ms`); 394 | // print flops 395 | 396 | const flops = (2 * M * N * K) / averageTime; 397 | console.log(`GFLOPS: ${flops / 1e9}`); 398 | } 399 | 400 | run(); 401 | -------------------------------------------------------------------------------- /other/misc/files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0hq/WebGPT/a83cdc8d46e8d140b55d87089482999580b64a3d/other/misc/files.png -------------------------------------------------------------------------------- /other/misc/header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0hq/WebGPT/a83cdc8d46e8d140b55d87089482999580b64a3d/other/misc/header.png -------------------------------------------------------------------------------- /other/scratchpad.js: -------------------------------------------------------------------------------- 1 | class Instruction { 2 | constructor(device) { 3 | this.device = device; 4 | this.bufferDeletionStack = []; 5 | this.unloadDeletionStack = []; 6 | 7 | this.initBindGroups(); 8 | } 9 | 10 | initBindGroup(layout, buffers, label = "") { 11 | return this.device.createBindGroup({ 12 | layout, 13 | entries: buffers.map((buffer, i) => ({ 14 | binding: i, 15 | resource: { buffer }, 16 | })), 17 | label, 18 | }); 19 | } 20 | 21 | initBuffer(ops, row, col = 1, noDelete = false) { 22 | const buffer = this.device.createBuffer({ 23 | size: this.bufferSize(row, col), 24 | usage: ops.map((u) => bufferUsageDict[u]).reduce((a, b) => a | b), 25 | }); 26 | if (!noDelete) this.bufferDeletionStack.push(buffer); 27 | else this.unloadDeletionStack.push(buffer); 28 | return buffer; 29 | } 30 | 31 | bufferSize(dimA, dimB = 1) { 32 | return Math.ceil((dimA * dimB * Float32Array.BYTES_PER_ELEMENT) / 1) * 1; 33 | } 34 | 35 | initBindGroups() { 36 | const bg = (types) => 37 | this.device.createBindGroupLayout({ 38 | entries: types.map((entry, i) => ({ 39 | binding: i, 40 | visibility: GPUShaderStage.COMPUTE, 41 | buffer: { type: entry }, 42 | })), 43 | }); 44 | 45 | this.r_r_r_Layout = bg(["read-only-storage", "read-only-storage", "read-only-storage"]); 46 | this.r_r_Layout = bg(["read-only-storage", "read-only-storage"]); 47 | this.r_Layout = bg(["read-only-storage"]); 48 | this.u_s_Layout = bg(["uniform", "storage"]); 49 | this.u_s_s_s_Layout = bg(["uniform", "storage", "storage", "storage"]); 50 | } 51 | 52 | initPipeline(code, bindGroupLayouts, label = "", constants = {}) { 53 | return this.device.createComputePipeline({ 54 | layout: this.device.createPipelineLayout({ bindGroupLayouts }), 55 | compute: { 56 | module: this.device.createShaderModule({ code }), 57 | entryPoint: "main", 58 | constants, 59 | }, 60 | label, 61 | }); 62 | } 63 | 64 | unloadBuffers() { 65 | this.unloadDeletionStack.map((buffer) => buffer.destroy()); 66 | this.unloadDeletionStack = []; 67 | } 68 | 69 | destroyBuffers() { 70 | this.bufferDeletionStack.map((buffer) => buffer.destroy()); 71 | this.bufferDeletionStack = []; 72 | } 73 | } 74 | 75 | class FastMatMul extends Instruction { 76 | constructor(device) { 77 | super(device); 78 | this.name = "fastMatMul"; 79 | this.pipelineCache = new Map(); 80 | } 81 | 82 | getPipeline(rows) { 83 | const div4 = rows % 4 === 0; 84 | const pipelineCacheKey = div4 ? "fastMatMulNoCheck" : "fastMatMul"; 85 | if (this.pipelineCache.has(pipelineCacheKey)) { 86 | return this.pipelineCache.get(pipelineCacheKey); 87 | } 88 | const kernel = div4 ? this.fastMatMulNoCheck : this.fastMatMul; 89 | const pipeline = this.initPipeline(kernel, [this.u_s_Layout, this.r_r_Layout], pipelineCacheKey); 90 | this.pipelineCache.set(pipelineCacheKey, pipeline); 91 | return pipeline; 92 | } 93 | 94 | newInstance(rows, cols, shared, bufA, bufB) { 95 | const pipeline = this.getPipeline(rows); 96 | const uniformBuffer = this.initBuffer(["uniform", "copy_to"], 4); 97 | const resultBuf = this.initBuffer(["storage", "copy_from"], rows, cols); 98 | const opBindGroup = this.initBindGroup(this.u_s_Layout, [uniformBuffer, resultBuf], "opBindGroup"); 99 | const inputBindGroup = this.initBindGroup(this.r_r_Layout, [bufA, bufB], "inputBindGroup"); 100 | const workgroups = { x: wgSize(cols, 64), y: wgSize(rows, 32) }; 101 | this.device.queue.writeBuffer(uniformBuffer, 0, new Uint32Array([rows, cols, Math.ceil(cols / 4), Math.ceil(shared / 4)])); 102 | 103 | return { 104 | resultBuf, 105 | pass: { 106 | pipeline, 107 | groups: [opBindGroup, inputBindGroup], 108 | workgroups, 109 | }, 110 | }; 111 | } 112 | 113 | fastMatMul = ` 114 | struct CMeta { 115 | M: u32, 116 | N: u32, 117 | ND4: u32, 118 | KD4: u32, 119 | } 120 | 121 | @group(1) @binding(0) var array_a: array>; 122 | @group(1) @binding(1) var array_b: array>; 123 | 124 | @group(0) @binding(0) var cmeta: CMeta; 125 | @group(0) @binding(1) var array_c: array>; 126 | 127 | @compute @workgroup_size(8, 8) 128 | fn main(@builtin(global_invocation_id) global_id: vec3) { 129 | var M: u32 = cmeta.M; 130 | var N: u32 = cmeta.N; 131 | var ND4: u32 = cmeta.ND4; 132 | var KD4: u32 = cmeta.KD4; 133 | var x: u32 = global_id.x; 134 | var y: u32 = global_id.y; 135 | 136 | if (x * 8 >= N || y * 4 >= M) { 137 | return; 138 | } 139 | 140 | var sum00: vec4 = vec4(); 141 | var sum01: vec4 = vec4(); 142 | var sum02: vec4 = vec4(); 143 | var sum03: vec4 = vec4(); 144 | var sum10: vec4 = vec4(); 145 | var sum11: vec4 = vec4(); 146 | var sum12: vec4 = vec4(); 147 | var sum13: vec4 = vec4(); 148 | 149 | for(var k: u32 = 0u; k < KD4; k = k + 1u) { 150 | var arow0: vec4 = array_a[(y * 4u + 0u) * KD4 + k]; 151 | var arow1: vec4 = array_a[(y * 4u + 1u) * KD4 + k]; 152 | var arow2: vec4 = array_a[(y * 4u + 2u) * KD4 + k]; 153 | var arow3: vec4 = array_a[(y * 4u + 3u) * KD4 + k]; 154 | var brow: vec4; 155 | 156 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 0u]; 157 | sum00 = vec4(arow0.x) * brow + sum00; 158 | sum01 = vec4(arow1.x) * brow + sum01; 159 | sum02 = vec4(arow2.x) * brow + sum02; 160 | sum03 = vec4(arow3.x) * brow + sum03; 161 | 162 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 1u]; 163 | sum10 = vec4(arow0.x) * brow + sum10; 164 | sum11 = vec4(arow1.x) * brow + sum11; 165 | sum12 = vec4(arow2.x) * brow + sum12; 166 | sum13 = vec4(arow3.x) * brow + sum13; 167 | 168 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 0u]; 169 | sum00 = vec4(arow0.y) * brow + sum00; 170 | sum01 = vec4(arow1.y) * brow + sum01; 171 | sum02 = vec4(arow2.y) * brow + sum02; 172 | sum03 = vec4(arow3.y) * brow + sum03; 173 | 174 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 1u]; 175 | sum10 = vec4(arow0.y) * brow + sum10; 176 | sum11 = vec4(arow1.y) * brow + sum11; 177 | sum12 = vec4(arow2.y) * brow + sum12; 178 | sum13 = vec4(arow3.y) * brow + sum13; 179 | 180 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 0u]; 181 | sum00 = vec4(arow0.z) * brow + sum00; 182 | sum01 = vec4(arow1.z) * brow + sum01; 183 | sum02 = vec4(arow2.z) * brow + sum02; 184 | sum03 = vec4(arow3.z) * brow + sum03; 185 | 186 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 1u]; 187 | sum10 = vec4(arow0.z) * brow + sum10; 188 | sum11 = vec4(arow1.z) * brow + sum11; 189 | sum12 = vec4(arow2.z) * brow + sum12; 190 | sum13 = vec4(arow3.z) * brow + sum13; 191 | 192 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 0u]; 193 | sum00 = vec4(arow0.w) * brow + sum00; 194 | sum01 = vec4(arow1.w) * brow + sum01; 195 | sum02 = vec4(arow2.w) * brow + sum02; 196 | sum03 = vec4(arow3.w) * brow + sum03; 197 | 198 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 1u]; 199 | sum10 = vec4(arow0.w) * brow + sum10; 200 | sum11 = vec4(arow1.w) * brow + sum11; 201 | sum12 = vec4(arow2.w) * brow + sum12; 202 | sum13 = vec4(arow3.w) * brow + sum13; 203 | } 204 | 205 | if (y * 4u + 0u < M) { 206 | array_c[x * 2u + 0u + (y * 4u + 0u) * ND4] = sum00; 207 | array_c[x * 2u + 1u + (y * 4u + 0u) * ND4] = sum10; 208 | } 209 | if (y * 4u + 1u < M) { 210 | array_c[x * 2u + 0u + (y * 4u + 1u) * ND4] = sum01; 211 | array_c[x * 2u + 1u + (y * 4u + 1u) * ND4] = sum11; 212 | } 213 | if (y * 4u + 2u < M) { 214 | array_c[x * 2u + 0u + (y * 4u + 2u) * ND4] = sum02; 215 | array_c[x * 2u + 1u + (y * 4u + 2u) * ND4] = sum12; 216 | } 217 | if (y * 4u + 3u < M) { 218 | array_c[x * 2u + 0u + (y * 4u + 3u) * ND4] = sum03; 219 | array_c[x * 2u + 1u + (y * 4u + 3u) * ND4] = sum13; 220 | } 221 | } 222 | `; 223 | 224 | fastMatMulNoCheck = ` 225 | struct CMeta { 226 | M: u32, 227 | N: u32, 228 | ND4: u32, 229 | KD4: u32, 230 | } 231 | 232 | @group(1) @binding(0) var array_a: array>; 233 | @group(1) @binding(1) var array_b: array>; 234 | 235 | @group(0) @binding(0) var cmeta: CMeta; 236 | @group(0) @binding(1) var array_c: array>; 237 | 238 | @compute @workgroup_size(8, 8) 239 | fn main(@builtin(global_invocation_id) global_id: vec3) { 240 | var M: u32 = cmeta.M; 241 | var N: u32 = cmeta.N; 242 | var ND4: u32 = cmeta.ND4; 243 | var KD4: u32 = cmeta.KD4; 244 | var x: u32 = global_id.x; 245 | var y: u32 = global_id.y; 246 | 247 | if (x * 8 >= N || y * 4 >= M) { 248 | return; 249 | } 250 | 251 | var sum00: vec4 = vec4(); 252 | var sum01: vec4 = vec4(); 253 | var sum02: vec4 = vec4(); 254 | var sum03: vec4 = vec4(); 255 | var sum10: vec4 = vec4(); 256 | var sum11: vec4 = vec4(); 257 | var sum12: vec4 = vec4(); 258 | var sum13: vec4 = vec4(); 259 | 260 | for(var k: u32 = 0u; k < KD4; k = k + 1u) { 261 | var arow0: vec4 = array_a[(y * 4u + 0u) * KD4 + k]; 262 | var arow1: vec4 = array_a[(y * 4u + 1u) * KD4 + k]; 263 | var arow2: vec4 = array_a[(y * 4u + 2u) * KD4 + k]; 264 | var arow3: vec4 = array_a[(y * 4u + 3u) * KD4 + k]; 265 | var brow: vec4; 266 | 267 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 0u]; 268 | sum00 = vec4(arow0.x) * brow + sum00; 269 | sum01 = vec4(arow1.x) * brow + sum01; 270 | sum02 = vec4(arow2.x) * brow + sum02; 271 | sum03 = vec4(arow3.x) * brow + sum03; 272 | 273 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 1u]; 274 | sum10 = vec4(arow0.x) * brow + sum10; 275 | sum11 = vec4(arow1.x) * brow + sum11; 276 | sum12 = vec4(arow2.x) * brow + sum12; 277 | sum13 = vec4(arow3.x) * brow + sum13; 278 | 279 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 0u]; 280 | sum00 = vec4(arow0.y) * brow + sum00; 281 | sum01 = vec4(arow1.y) * brow + sum01; 282 | sum02 = vec4(arow2.y) * brow + sum02; 283 | sum03 = vec4(arow3.y) * brow + sum03; 284 | 285 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 1u]; 286 | sum10 = vec4(arow0.y) * brow + sum10; 287 | sum11 = vec4(arow1.y) * brow + sum11; 288 | sum12 = vec4(arow2.y) * brow + sum12; 289 | sum13 = vec4(arow3.y) * brow + sum13; 290 | 291 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 0u]; 292 | sum00 = vec4(arow0.z) * brow + sum00; 293 | sum01 = vec4(arow1.z) * brow + sum01; 294 | sum02 = vec4(arow2.z) * brow + sum02; 295 | sum03 = vec4(arow3.z) * brow + sum03; 296 | 297 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 1u]; 298 | sum10 = vec4(arow0.z) * brow + sum10; 299 | sum11 = vec4(arow1.z) * brow + sum11; 300 | sum12 = vec4(arow2.z) * brow + sum12; 301 | sum13 = vec4(arow3.z) * brow + sum13; 302 | 303 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 0u]; 304 | sum00 = vec4(arow0.w) * brow + sum00; 305 | sum01 = vec4(arow1.w) * brow + sum01; 306 | sum02 = vec4(arow2.w) * brow + sum02; 307 | sum03 = vec4(arow3.w) * brow + sum03; 308 | 309 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 1u]; 310 | sum10 = vec4(arow0.w) * brow + sum10; 311 | sum11 = vec4(arow1.w) * brow + sum11; 312 | sum12 = vec4(arow2.w) * brow + sum12; 313 | sum13 = vec4(arow3.w) * brow + sum13; 314 | } 315 | 316 | array_c[x * 2u + 0u + (y * 4u + 0u) * ND4] = sum00; 317 | array_c[x * 2u + 1u + (y * 4u + 0u) * ND4] = sum10; 318 | array_c[x * 2u + 0u + (y * 4u + 1u) * ND4] = sum01; 319 | array_c[x * 2u + 1u + (y * 4u + 1u) * ND4] = sum11; 320 | array_c[x * 2u + 0u + (y * 4u + 2u) * ND4] = sum02; 321 | array_c[x * 2u + 1u + (y * 4u + 2u) * ND4] = sum12; 322 | array_c[x * 2u + 0u + (y * 4u + 3u) * ND4] = sum03; 323 | array_c[x * 2u + 1u + (y * 4u + 3u) * ND4] = sum13; 324 | } 325 | `; 326 | } 327 | 328 | class TestGPT { 329 | constructor(folder, type, doAttentionCache = false) { 330 | this.folder = folder; 331 | this.tokenizerType = type; 332 | this.initialized = false; 333 | 334 | this.device; 335 | this.model; 336 | this.tokenizer; 337 | this.params; 338 | this.minBufferOffset = 1; 339 | this.doAttentionCache = doAttentionCache; 340 | 341 | this.defaultPrompt; 342 | this.defaultTopK; 343 | this.defaultTemperature; 344 | this.defaultTokens; 345 | 346 | this.bufferDeletionStack = []; 347 | this.unloadDeletionStack = []; 348 | } 349 | 350 | async initialize() { 351 | if (this.initialized) return console.error("Model already initialized"); 352 | if (!navigator.gpu) throw new Error("WebGPU is not supported"); 353 | 354 | const adapter = await navigator.gpu.requestAdapter(); 355 | this.device = await adapter.requestDevice(); 356 | 357 | this.matMulOperation = new FastMatMul(this.device); 358 | 359 | const dimM = 10; 360 | const dimN = 10; 361 | const demo = new Float32Array(dimM * dimN); 362 | for (let i = 0; i < dimM * dimN; i++) demo[i] = 1; 363 | const weights1 = this.initTensor(demo, [dimM, dimN], ["storage", "copy_from"]); 364 | // const weights2 = this.initTensor(demo, [dimM, dimN], ["storage", "copy_from"]); 365 | this.inputBuffer = this.initBuffer(["storage", "copy_from", "copy_to"], dimM, dimN); 366 | 367 | this.computePasses = []; 368 | let intermediateBuffer = this.inputBuffer; 369 | for (let i = 0; i < 10; i++) { 370 | let { pass, resultBuf } = this.matMulOperation.newInstance(10, 10, 10, intermediateBuffer, weights1); 371 | intermediateBuffer = resultBuf; 372 | this.computePasses.push(pass); 373 | } 374 | this.resultBuffer = intermediateBuffer; 375 | this.outputBuffer = this.initBuffer(["map_read", "copy_to"], dimM, dimN); 376 | 377 | this.initialized = true; 378 | } 379 | 380 | async test() { 381 | const dimM = 10; 382 | const dimN = 10; 383 | const matrixA = new Float32Array(dimM * dimN); 384 | for (let i = 0; i < dimM * dimN; i++) matrixA[i] = i * 0.1; 385 | 386 | this.device.queue.writeBuffer(this.inputBuffer, 0, matrixA); 387 | 388 | const commandEncoder = this.device.createCommandEncoder(); 389 | for (const pass of this.computePasses) { 390 | const passEncoder = commandEncoder.beginComputePass(); 391 | passEncoder.setPipeline(pass.pipeline); 392 | for (let i = 0; i < pass.groups.length; i++) passEncoder.setBindGroup(i, pass.groups[i]); 393 | passEncoder.dispatchWorkgroups(pass.workgroups.x, pass.workgroups.y); 394 | passEncoder.end(); 395 | } 396 | commandEncoder.copyBufferToBuffer(this.resultBuffer, 0, this.outputBuffer, 0, this.bufferSize(dimM, dimN)); 397 | this.device.queue.submit([commandEncoder.finish()]); 398 | 399 | await this.outputBuffer.mapAsync(GPUMapMode.READ); 400 | const output = this.outputBuffer.getMappedRange(); 401 | const outputArray = new Float32Array(output).slice(0); // Prevent destruction. 402 | console.log(outputArray, formatAsMatrix(outputArray, dimM, dimN)); 403 | 404 | this.destroyBuffers(); 405 | } 406 | 407 | initBindGroup(layout, buffers) { 408 | return this.device.createBindGroup({ 409 | layout, 410 | entries: buffers.map((buffer, i) => ({ 411 | binding: i, 412 | resource: { buffer }, 413 | })), 414 | }); 415 | } 416 | 417 | initOutputBuffer(commandEncoder, buffer, row, col) { 418 | const outputBuffer = this.initBuffer(["map_read", "copy_to"], row, col); 419 | commandEncoder.copyBufferToBuffer(buffer, 0, outputBuffer, 0, this.bufferSize(row, col)); 420 | return outputBuffer; 421 | } 422 | 423 | initBuffer(ops, row, col = 1, noDelete = false) { 424 | const buffer = this.device.createBuffer({ 425 | size: this.bufferSize(row, col), 426 | usage: ops.map((u) => bufferUsageDict[u]).reduce((a, b) => a | b), 427 | }); 428 | if (!noDelete) this.bufferDeletionStack.push(buffer); 429 | else this.unloadDeletionStack.push(buffer); 430 | return buffer; 431 | } 432 | 433 | initTensor(data, dims, ops) { 434 | const buffer = this.device.createBuffer({ 435 | size: this.bufferSize(dims[0], dims[1], dims[2] || 1), 436 | usage: ops.map((u) => bufferUsageDict[u]).reduce((a, b) => a | b), 437 | mappedAtCreation: true, 438 | }); 439 | const array = new Float32Array(buffer.getMappedRange()); 440 | array.set(data); 441 | buffer.unmap(); 442 | this.unloadDeletionStack.push(buffer); 443 | return buffer; 444 | } 445 | 446 | bufferSize(dimX, dimY = 1, dimZ = 1) { 447 | return Math.ceil((dimX * dimY * dimZ * Float32Array.BYTES_PER_ELEMENT) / this.minBufferOffset) * this.minBufferOffset; 448 | } 449 | 450 | unloadBuffers() { 451 | this.unloadDeletionStack.map((buffer) => buffer.destroy()); 452 | this.unloadDeletionStack = []; 453 | } 454 | 455 | destroyBuffers() { 456 | this.bufferDeletionStack.map((buffer) => buffer.destroy()); 457 | this.bufferDeletionStack = []; 458 | } 459 | 460 | initBindGroups() { 461 | const bg = (types) => 462 | this.device.createBindGroupLayout({ 463 | entries: types.map((entry, i) => ({ 464 | binding: i, 465 | visibility: GPUShaderStage.COMPUTE, 466 | buffer: { type: entry }, 467 | })), 468 | }); 469 | 470 | this.r_r_r_Layout = bg(["read-only-storage", "read-only-storage", "read-only-storage"]); 471 | this.r_r_Layout = bg(["read-only-storage", "read-only-storage"]); 472 | this.r_Layout = bg(["read-only-storage"]); 473 | this.u_s_Layout = bg(["uniform", "storage"]); 474 | this.u_s_s_s_Layout = bg(["uniform", "storage", "storage", "storage"]); 475 | } 476 | 477 | async initPipelines() { 478 | const p = (code, bindGroupLayouts) => { 479 | return this.device.createComputePipelineAsync({ 480 | layout: this.device.createPipelineLayout({ bindGroupLayouts }), 481 | compute: { 482 | module: this.device.createShaderModule({ code }), 483 | entryPoint: "main", 484 | }, 485 | }); 486 | }; 487 | } 488 | } 489 | 490 | async function test() { 491 | const GPU = new TestGPT(); 492 | await GPU.initialize(); 493 | await GPU.test(); 494 | } 495 | 496 | /* 497 | 498 | 499 | fast row add shader for reference 500 | struct BMeta { 501 | M: u32, 502 | N: u32, 503 | ND4: u32, 504 | } 505 | 506 | @group(1) @binding(0) var array_matrix: array>; 507 | @group(1) @binding(1) var array_bias: array>; 508 | @group(0) @binding(0) var bmeta: BMeta; 509 | @group(0) @binding(1) var array_output: array>; 510 | 511 | @compute @workgroup_size(8,8) 512 | fn main(@builtin(global_invocation_id) global_id: vec3) { 513 | var col: u32 = global_id.x; 514 | var row: u32 = global_id.y; 515 | var ND4: u32 = bmeta.ND4; 516 | var M: u32 = bmeta.M; 517 | 518 | if (row >= M || col >= ND4) { 519 | return; 520 | } 521 | 522 | array_output[row * ND4 + col] = array_matrix[row * ND4 + col] + array_bias[col]; 523 | } 524 | 525 | class FastMatMulBlockClass extends Block { 526 | constructor() { 527 | super(); 528 | this.name = "fastMatMul"; 529 | this.pipelineCache = new Map(); 530 | } 531 | 532 | getPipeline(rows) { 533 | const div4 = rows % 4 === 0; 534 | const pipelineCacheKey = div4 ? "fastMatMulNoCheck" : "fastMatMul"; 535 | if (this.pipelineCache.has(pipelineCacheKey)) return this.pipelineCache.get(pipelineCacheKey); 536 | const kernel = div4 ? this.fastMatMulNoCheck : this.fastMatMul; 537 | const pipeline = this.initPipeline(kernel, [this.u_s_Layout, this.r_r_Layout], `${this.name}_Pipeline_${pipelineCacheKey}`); 538 | this.pipelineCache.set(pipelineCacheKey, pipeline); 539 | return pipeline; 540 | } 541 | 542 | newInstance(rows, cols, shared, bufA, bufB) { 543 | const pipeline = this.getPipeline(rows); 544 | const uniformBuffer = this.initBuffer(["uniform", "copy_to"], [4]); 545 | const resultBuffer = this.initBuffer(["storage", "copy_from"], [rows, cols]); 546 | const opBindGroup = this.initBindGroup(this.u_s_Layout, [uniformBuffer, resultBuffer], `${this.name}_OpG`); 547 | const inputBindGroup = this.initBindGroup(this.r_r_Layout, [bufA, bufB], `${this.name}_InputG`); 548 | const workgroups = { x: wgSize(cols, 64), y: wgSize(rows, 32) }; 549 | this.device.queue.writeBuffer(uniformBuffer, 0, new Uint32Array([rows, cols, Math.ceil(cols / 4), Math.ceil(shared / 4)])); 550 | 551 | return { 552 | resultBuffer, 553 | passes: [ 554 | { 555 | flag: "compute", 556 | pipeline, 557 | groups: [opBindGroup, inputBindGroup], 558 | workgroups, 559 | }, 560 | ], 561 | }; 562 | } 563 | 564 | fastMatMul = ` 565 | struct CMeta { 566 | M: u32, 567 | N: u32, 568 | ND4: u32, 569 | KD4: u32, 570 | } 571 | 572 | @group(1) @binding(0) var array_a: array>; 573 | @group(1) @binding(1) var array_b: array>; 574 | 575 | @group(0) @binding(0) var cmeta: CMeta; 576 | @group(0) @binding(1) var array_c: array>; 577 | 578 | @compute @workgroup_size(8, 8) 579 | fn main(@builtin(global_invocation_id) global_id: vec3) { 580 | var M: u32 = cmeta.M; 581 | var N: u32 = cmeta.N; 582 | var ND4: u32 = cmeta.ND4; 583 | var KD4: u32 = cmeta.KD4; 584 | var x: u32 = global_id.x; 585 | var y: u32 = global_id.y; 586 | 587 | if (x * 8 >= N || y * 4 >= M) { 588 | return; 589 | } 590 | 591 | var sum00: vec4 = vec4(); 592 | var sum01: vec4 = vec4(); 593 | var sum02: vec4 = vec4(); 594 | var sum03: vec4 = vec4(); 595 | var sum10: vec4 = vec4(); 596 | var sum11: vec4 = vec4(); 597 | var sum12: vec4 = vec4(); 598 | var sum13: vec4 = vec4(); 599 | 600 | for(var k: u32 = 0u; k < KD4; k = k + 1u) { 601 | var arow0: vec4 = array_a[(y * 4u + 0u) * KD4 + k]; 602 | var arow1: vec4 = array_a[(y * 4u + 1u) * KD4 + k]; 603 | var arow2: vec4 = array_a[(y * 4u + 2u) * KD4 + k]; 604 | var arow3: vec4 = array_a[(y * 4u + 3u) * KD4 + k]; 605 | var brow: vec4; 606 | 607 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 0u]; 608 | sum00 = vec4(arow0.x) * brow + sum00; 609 | sum01 = vec4(arow1.x) * brow + sum01; 610 | sum02 = vec4(arow2.x) * brow + sum02; 611 | sum03 = vec4(arow3.x) * brow + sum03; 612 | 613 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 1u]; 614 | sum10 = vec4(arow0.x) * brow + sum10; 615 | sum11 = vec4(arow1.x) * brow + sum11; 616 | sum12 = vec4(arow2.x) * brow + sum12; 617 | sum13 = vec4(arow3.x) * brow + sum13; 618 | 619 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 0u]; 620 | sum00 = vec4(arow0.y) * brow + sum00; 621 | sum01 = vec4(arow1.y) * brow + sum01; 622 | sum02 = vec4(arow2.y) * brow + sum02; 623 | sum03 = vec4(arow3.y) * brow + sum03; 624 | 625 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 1u]; 626 | sum10 = vec4(arow0.y) * brow + sum10; 627 | sum11 = vec4(arow1.y) * brow + sum11; 628 | sum12 = vec4(arow2.y) * brow + sum12; 629 | sum13 = vec4(arow3.y) * brow + sum13; 630 | 631 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 0u]; 632 | sum00 = vec4(arow0.z) * brow + sum00; 633 | sum01 = vec4(arow1.z) * brow + sum01; 634 | sum02 = vec4(arow2.z) * brow + sum02; 635 | sum03 = vec4(arow3.z) * brow + sum03; 636 | 637 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 1u]; 638 | sum10 = vec4(arow0.z) * brow + sum10; 639 | sum11 = vec4(arow1.z) * brow + sum11; 640 | sum12 = vec4(arow2.z) * brow + sum12; 641 | sum13 = vec4(arow3.z) * brow + sum13; 642 | 643 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 0u]; 644 | sum00 = vec4(arow0.w) * brow + sum00; 645 | sum01 = vec4(arow1.w) * brow + sum01; 646 | sum02 = vec4(arow2.w) * brow + sum02; 647 | sum03 = vec4(arow3.w) * brow + sum03; 648 | 649 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 1u]; 650 | sum10 = vec4(arow0.w) * brow + sum10; 651 | sum11 = vec4(arow1.w) * brow + sum11; 652 | sum12 = vec4(arow2.w) * brow + sum12; 653 | sum13 = vec4(arow3.w) * brow + sum13; 654 | } 655 | 656 | if (y * 4u + 0u < M) { 657 | array_c[x * 2u + 0u + (y * 4u + 0u) * ND4] = sum00; 658 | array_c[x * 2u + 1u + (y * 4u + 0u) * ND4] = sum10; 659 | } 660 | if (y * 4u + 1u < M) { 661 | array_c[x * 2u + 0u + (y * 4u + 1u) * ND4] = sum01; 662 | array_c[x * 2u + 1u + (y * 4u + 1u) * ND4] = sum11; 663 | } 664 | if (y * 4u + 2u < M) { 665 | array_c[x * 2u + 0u + (y * 4u + 2u) * ND4] = sum02; 666 | array_c[x * 2u + 1u + (y * 4u + 2u) * ND4] = sum12; 667 | } 668 | if (y * 4u + 3u < M) { 669 | array_c[x * 2u + 0u + (y * 4u + 3u) * ND4] = sum03; 670 | array_c[x * 2u + 1u + (y * 4u + 3u) * ND4] = sum13; 671 | } 672 | } 673 | `; 674 | 675 | fastMatMulNoCheck = ` 676 | struct CMeta { 677 | M: u32, 678 | N: u32, 679 | ND4: u32, 680 | KD4: u32, 681 | } 682 | 683 | @group(1) @binding(0) var array_a: array>; 684 | @group(1) @binding(1) var array_b: array>; 685 | 686 | @group(0) @binding(0) var cmeta: CMeta; 687 | @group(0) @binding(1) var array_c: array>; 688 | 689 | @compute @workgroup_size(8, 8) 690 | fn main(@builtin(global_invocation_id) global_id: vec3) { 691 | var M: u32 = cmeta.M; 692 | var N: u32 = cmeta.N; 693 | var ND4: u32 = cmeta.ND4; 694 | var KD4: u32 = cmeta.KD4; 695 | var x: u32 = global_id.x; 696 | var y: u32 = global_id.y; 697 | 698 | if (x * 8 >= N || y * 4 >= M) { 699 | return; 700 | } 701 | 702 | var sum00: vec4 = vec4(); 703 | var sum01: vec4 = vec4(); 704 | var sum02: vec4 = vec4(); 705 | var sum03: vec4 = vec4(); 706 | var sum10: vec4 = vec4(); 707 | var sum11: vec4 = vec4(); 708 | var sum12: vec4 = vec4(); 709 | var sum13: vec4 = vec4(); 710 | 711 | for(var k: u32 = 0u; k < KD4; k = k + 1u) { 712 | var arow0: vec4 = array_a[(y * 4u + 0u) * KD4 + k]; 713 | var arow1: vec4 = array_a[(y * 4u + 1u) * KD4 + k]; 714 | var arow2: vec4 = array_a[(y * 4u + 2u) * KD4 + k]; 715 | var arow3: vec4 = array_a[(y * 4u + 3u) * KD4 + k]; 716 | var brow: vec4; 717 | 718 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 0u]; 719 | sum00 = vec4(arow0.x) * brow + sum00; 720 | sum01 = vec4(arow1.x) * brow + sum01; 721 | sum02 = vec4(arow2.x) * brow + sum02; 722 | sum03 = vec4(arow3.x) * brow + sum03; 723 | 724 | brow = array_b[(k * 4u + 0u) * ND4 + x * 2u + 1u]; 725 | sum10 = vec4(arow0.x) * brow + sum10; 726 | sum11 = vec4(arow1.x) * brow + sum11; 727 | sum12 = vec4(arow2.x) * brow + sum12; 728 | sum13 = vec4(arow3.x) * brow + sum13; 729 | 730 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 0u]; 731 | sum00 = vec4(arow0.y) * brow + sum00; 732 | sum01 = vec4(arow1.y) * brow + sum01; 733 | sum02 = vec4(arow2.y) * brow + sum02; 734 | sum03 = vec4(arow3.y) * brow + sum03; 735 | 736 | brow = array_b[(k * 4u + 1u) * ND4 + x * 2u + 1u]; 737 | sum10 = vec4(arow0.y) * brow + sum10; 738 | sum11 = vec4(arow1.y) * brow + sum11; 739 | sum12 = vec4(arow2.y) * brow + sum12; 740 | sum13 = vec4(arow3.y) * brow + sum13; 741 | 742 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 0u]; 743 | sum00 = vec4(arow0.z) * brow + sum00; 744 | sum01 = vec4(arow1.z) * brow + sum01; 745 | sum02 = vec4(arow2.z) * brow + sum02; 746 | sum03 = vec4(arow3.z) * brow + sum03; 747 | 748 | brow = array_b[(k * 4u + 2u) * ND4 + x * 2u + 1u]; 749 | sum10 = vec4(arow0.z) * brow + sum10; 750 | sum11 = vec4(arow1.z) * brow + sum11; 751 | sum12 = vec4(arow2.z) * brow + sum12; 752 | sum13 = vec4(arow3.z) * brow + sum13; 753 | 754 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 0u]; 755 | sum00 = vec4(arow0.w) * brow + sum00; 756 | sum01 = vec4(arow1.w) * brow + sum01; 757 | sum02 = vec4(arow2.w) * brow + sum02; 758 | sum03 = vec4(arow3.w) * brow + sum03; 759 | 760 | brow = array_b[(k * 4u + 3u) * ND4 + x * 2u + 1u]; 761 | sum10 = vec4(arow0.w) * brow + sum10; 762 | sum11 = vec4(arow1.w) * brow + sum11; 763 | sum12 = vec4(arow2.w) * brow + sum12; 764 | sum13 = vec4(arow3.w) * brow + sum13; 765 | } 766 | 767 | array_c[x * 2u + 0u + (y * 4u + 0u) * ND4] = sum00; 768 | array_c[x * 2u + 1u + (y * 4u + 0u) * ND4] = sum10; 769 | array_c[x * 2u + 0u + (y * 4u + 1u) * ND4] = sum01; 770 | array_c[x * 2u + 1u + (y * 4u + 1u) * ND4] = sum11; 771 | array_c[x * 2u + 0u + (y * 4u + 2u) * ND4] = sum02; 772 | array_c[x * 2u + 1u + (y * 4u + 2u) * ND4] = sum12; 773 | array_c[x * 2u + 0u + (y * 4u + 3u) * ND4] = sum03; 774 | array_c[x * 2u + 1u + (y * 4u + 3u) * ND4] = sum13; 775 | } 776 | `; 777 | } 778 | 779 | 780 | fusedAttentionShaderNew = ` 781 | struct Meta { 782 | M: u32, 783 | N: u32, 784 | ND4: u32, 785 | KD4: u32, 786 | attentionScale: f32, 787 | } 788 | 789 | @group(1) @binding(0) var query_array: array>; 790 | @group(1) @binding(1) var key_array: array>; 791 | 792 | @group(0) @binding(0) var uniforms: Meta; 793 | @group(0) @binding(1) var array_c: array>; 794 | 795 | @compute @workgroup_size(8, 8) 796 | fn main(@builtin(global_invocation_id) global_id: vec3) { 797 | var M: u32 = uniforms.M; 798 | var N: u32 = uniforms.N; 799 | var ND4: u32 = uniforms.ND4; 800 | var KD4: u32 = uniforms.KD4; 801 | var x: u32 = global_id.x; 802 | var y: u32 = global_id.y; 803 | 804 | if (x * 8 >= N || y * 4 >= M) { 805 | return; 806 | } 807 | 808 | var sum00: vec4 = vec4(); 809 | var sum01: vec4 = vec4(); 810 | var sum02: vec4 = vec4(); 811 | var sum03: vec4 = vec4(); 812 | var sum10: vec4 = vec4(); 813 | var sum11: vec4 = vec4(); 814 | var sum12: vec4 = vec4(); 815 | var sum13: vec4 = vec4(); 816 | 817 | for(var k: u32 = 0u; k < KD4; k = k + 1u) { 818 | var arow0: vec4 = query_array[(y * 4u + 0u) * KD4 + k]; 819 | var arow1: vec4 = query_array[(y * 4u + 1u) * KD4 + k]; 820 | var arow2: vec4 = query_array[(y * 4u + 2u) * KD4 + k]; 821 | var arow3: vec4 = query_array[(y * 4u + 3u) * KD4 + k]; 822 | var brow: vec4; 823 | 824 | brow = key_array[(k * 4u + 0u) * ND4 + x * 2u + 0u]; 825 | sum00 = vec4(arow0.x) * brow + sum00; 826 | sum01 = vec4(arow1.x) * brow + sum01; 827 | sum02 = vec4(arow2.x) * brow + sum02; 828 | sum03 = vec4(arow3.x) * brow + sum03; 829 | 830 | brow = key_array[(k * 4u + 0u) * ND4 + x * 2u + 1u]; 831 | sum10 = vec4(arow0.x) * brow + sum10; 832 | sum11 = vec4(arow1.x) * brow + sum11; 833 | sum12 = vec4(arow2.x) * brow + sum12; 834 | sum13 = vec4(arow3.x) * brow + sum13; 835 | 836 | brow = key_array[(k * 4u + 1u) * ND4 + x * 2u + 0u]; 837 | sum00 = vec4(arow0.y) * brow + sum00; 838 | sum01 = vec4(arow1.y) * brow + sum01; 839 | sum02 = vec4(arow2.y) * brow + sum02; 840 | sum03 = vec4(arow3.y) * brow + sum03; 841 | 842 | brow = key_array[(k * 4u + 1u) * ND4 + x * 2u + 1u]; 843 | sum10 = vec4(arow0.y) * brow + sum10; 844 | sum11 = vec4(arow1.y) * brow + sum11; 845 | sum12 = vec4(arow2.y) * brow + sum12; 846 | sum13 = vec4(arow3.y) * brow + sum13; 847 | 848 | brow = key_array[(k * 4u + 2u) * ND4 + x * 2u + 0u]; 849 | sum00 = vec4(arow0.z) * brow + sum00; 850 | sum01 = vec4(arow1.z) * brow + sum01; 851 | sum02 = vec4(arow2.z) * brow + sum02; 852 | sum03 = vec4(arow3.z) * brow + sum03; 853 | 854 | brow = key_array[(k * 4u + 2u) * ND4 + x * 2u + 1u]; 855 | sum10 = vec4(arow0.z) * brow + sum10; 856 | sum11 = vec4(arow1.z) * brow + sum11; 857 | sum12 = vec4(arow2.z) * brow + sum12; 858 | sum13 = vec4(arow3.z) * brow + sum13; 859 | 860 | brow = key_array[(k * 4u + 3u) * ND4 + x * 2u + 0u]; 861 | sum00 = vec4(arow0.w) * brow + sum00; 862 | sum01 = vec4(arow1.w) * brow + sum01; 863 | sum02 = vec4(arow2.w) * brow + sum02; 864 | sum03 = vec4(arow3.w) * brow + sum03; 865 | 866 | brow = key_array[(k * 4u + 3u) * ND4 + x * 2u + 1u]; 867 | sum10 = vec4(arow0.w) * brow + sum10; 868 | sum11 = vec4(arow1.w) * brow + sum11; 869 | sum12 = vec4(arow2.w) * brow + sum12; 870 | sum13 = vec4(arow3.w) * brow + sum13; 871 | } 872 | 873 | if (y * 4u + 0u < M) { 874 | array_c[x * 2u + 0u + (y * 4u + 0u) * ND4] = sum00; 875 | array_c[x * 2u + 1u + (y * 4u + 0u) * ND4] = sum10; 876 | } 877 | if (y * 4u + 1u < M) { 878 | array_c[x * 2u + 0u + (y * 4u + 1u) * ND4] = sum01; 879 | array_c[x * 2u + 1u + (y * 4u + 1u) * ND4] = sum11; 880 | } 881 | if (y * 4u + 2u < M) { 882 | array_c[x * 2u + 0u + (y * 4u + 2u) * ND4] = sum02; 883 | array_c[x * 2u + 1u + (y * 4u + 2u) * ND4] = sum12; 884 | } 885 | if (y * 4u + 3u < M) { 886 | array_c[x * 2u + 0u + (y * 4u + 3u) * ND4] = sum03; 887 | array_c[x * 2u + 1u + (y * 4u + 3u) * ND4] = sum13; 888 | } 889 | `; 890 | 891 | 892 | // In progress. 893 | // withCheckOffset: ` 894 | // var x1Offset: u32 = ((x * 2u + 0u) / uniforms.TOffset) * uniforms.TOffset * M; 895 | // var x2Offset: u32 = ((x * 2u + 1u) / uniforms.TOffset) * uniforms.TOffset * M; 896 | // 897 | 898 | // if (y * 4u + 0u < M) { 899 | // array_c[xMod * 2u + 0u + x1Offset + (y * 4u + 0u) * uniforms.TOffset] = vec4(1.0); 900 | // array_c[xMod * 2u + 1u + x2Offset + (y * 4u + 0u) * uniforms.TOffset] = vec4(f32(x1Offset)); 901 | // } 902 | // if (y * 4u + 1u < M) { 903 | // array_c[xMod * 2u + 0u + x1Offset + (y * 4u + 1u) * uniforms.TOffset] = vec4(2.0); 904 | // array_c[xMod * 2u + 1u + x2Offset + (y * 4u + 1u) * uniforms.TOffset] = vec4(f32(x2Offset)); 905 | // } 906 | // if (y * 4u + 2u < M) { 907 | // array_c[xMod * 2u + 0u + x1Offset + (y * 4u + 2u) * uniforms.TOffset] = vec4(3.0); 908 | // array_c[xMod * 2u + 1u + x2Offset + (y * 4u + 2u) * uniforms.TOffset] = vec4(3.0); 909 | // } 910 | // if (y * 4u + 3u < M) { 911 | // array_c[xMod * 2u + 0u + x1Offset + (y * 4u + 3u) * uniforms.TOffset] = vec4(4.0); 912 | // array_c[xMod * 2u + 1u + x2Offset + (y * 4u + 3u) * uniforms.TOffset] = vec4(4.0); 913 | // } 914 | // `, 915 | 916 | transposeShader = ` 917 | struct Meta { 918 | M: u32, 919 | N: u32, 920 | } 921 | 922 | @group(1) @binding(0) var input_array: array; 923 | 924 | @group(0) @binding(0) var uniforms: Meta; 925 | @group(0) @binding(1) var result_array: array; 926 | 927 | // Bank conflicts? 928 | var tile: array, 8>; 929 | 930 | @compute @workgroup_size(8, 8) 931 | fn main (@builtin(workgroup_id) wg_id: vec3, @builtin(local_invocation_id) local_id: vec3) { 932 | let col: u32 = wg_id.x; 933 | let row: u32 = wg_id.y; 934 | let N: u32 = uniforms.N; 935 | let M: u32 = uniforms.M; 936 | 937 | let tile_col = col * 8u + local_id.x; 938 | let tile_row = row * 8u + local_id.y; 939 | 940 | // Load a tile from input_array to shared memory tile 941 | if (tile_row < M && tile_col < N) { 942 | tile[local_id.y][local_id.x] = input_array[tile_row * N + tile_col]; 943 | } 944 | 945 | workgroupBarrier(); // Ensure all threads have finished writing to the shared memory before proceeding 946 | 947 | // Write the transposed tile to result_array. Flips dims. 948 | if (tile_row < M && tile_col < N) { 949 | result_array[tile_col * M + tile_row] = tile[local_id.x][local_id.y]; 950 | } 951 | } 952 | `; 953 | 954 | */ 955 | -------------------------------------------------------------------------------- /other/test.js: -------------------------------------------------------------------------------- 1 | class OutputBlockClass extends Block { 2 | constructor() { 3 | super(); 4 | this.name = "output"; 5 | } 6 | 7 | newInstance(row, col, inputBuffer) { 8 | const outputBuffer = this.initBuffer(["map_read", "copy_to"], [row, col]); 9 | 10 | const copyCommand = { 11 | flag: "copy", 12 | src: inputBuffer, 13 | srcOffset: 0, 14 | dst: outputBuffer, 15 | dstOffset: 0, 16 | size: this.bufferSize(row, col), 17 | }; 18 | 19 | return { 20 | resultBuffer: outputBuffer, 21 | passes: [copyCommand], 22 | }; 23 | } 24 | } 25 | 26 | class CausalMaskBlockClass extends Block { 27 | constructor() { 28 | super(); 29 | this.name = "causal_mask"; 30 | this.pipelineCache = new Map(); 31 | } 32 | 33 | getSimpleCausalMaskPipeline() { 34 | const pipelineCacheKey = `${this.name}_simplecausalmask`; // No param optimization. 35 | if (this.pipelineCache.has(pipelineCacheKey)) return this.pipelineCache.get(pipelineCacheKey); 36 | const pipeline = this.initPipeline(this.origCausalMaskShader, [this.u_s_Layout, this.r_Layout], `${this.name}_Pipeline_CausalMask`); 37 | this.pipelineCache.set(pipelineCacheKey, pipeline); 38 | return pipeline; 39 | } 40 | 41 | getCausalMaskPipeline() { 42 | const pipelineCacheKey = `${this.name}_causalmask`; // No param optimization. 43 | if (this.pipelineCache.has(pipelineCacheKey)) return this.pipelineCache.get(pipelineCacheKey); 44 | const pipeline = this.initPipeline(this.causalMaskShader, [this.u_s_Layout, this.r_Layout], `${this.name}_Pipeline_CausalMask`); 45 | this.pipelineCache.set(pipelineCacheKey, pipeline); 46 | return pipeline; 47 | } 48 | 49 | newInstance(rows, cols, inputBuffer) { 50 | const causalMaskPipeline = this.getCausalMaskPipeline(); 51 | const causalMaskUniformBuffer = this.initBuffer(["uniform", "copy_to"], [4]); 52 | const causalMaskResultBuffer = this.initBuffer(["storage", "copy_from"], [rows, cols]); 53 | const causalMaskBindGroup = this.initBindGroup(this.u_s_Layout, [causalMaskUniformBuffer, causalMaskResultBuffer], `${this.name}_CausalMaskG`); 54 | const causalMaskInputBindGroup = this.initBindGroup(this.r_Layout, [inputBuffer], `${this.name}_CausalMaskInputG`); 55 | this.device.queue.writeBuffer(causalMaskUniformBuffer, 0, new Uint32Array([cols, rows])); // Transposes! This is needed for softmax. 56 | const causalMaskWorkgroups = { x: wgSize(rows, 16), y: wgSize(cols, 16), z: 1 }; 57 | 58 | return { 59 | resultBuffer: causalMaskResultBuffer, 60 | passes: [ 61 | { 62 | flag: "compute", 63 | pipeline: causalMaskPipeline, 64 | groups: [causalMaskBindGroup, causalMaskInputBindGroup], 65 | workgroups: causalMaskWorkgroups, 66 | }, 67 | ], 68 | }; 69 | } 70 | 71 | simpleCausalMaskShader = ` 72 | struct Matrix { 73 | data: array, 74 | } 75 | 76 | struct Dimensions { 77 | dimY: u32, // row dimension of input matrix 78 | dimX: u32, // col dimension of input matrix 79 | }; 80 | 81 | @group(0) @binding(0) var DimBuffer: Dimensions; 82 | @group(0) @binding(1) var Result: Matrix; 83 | 84 | @group(1) @binding(0) var Input: Matrix; 85 | 86 | @compute @workgroup_size(16, 16) 87 | fn main (@builtin(global_invocation_id) global_id: vec3) { 88 | let col: u32 = global_id.x; 89 | let row: u32 = global_id.y; 90 | let dimX: u32 = DimBuffer.dimX; 91 | let dimY: u32 = DimBuffer.dimY; 92 | 93 | let rowMask: u32 = row % dimX; 94 | if (row >= dimY || col >= dimX) { 95 | return; 96 | } 97 | 98 | if (col > rowMask) { 99 | Result.data[row * dimX + col] = 0.0; 100 | } else { 101 | let rowNum: u32 = row / dimX; 102 | Result.data[row * dimX + col] = Input.data[rowMask * dimY + col + rowNum * dimX]; 103 | } 104 | } 105 | `; 106 | 107 | origCausalMaskShader = ` 108 | struct Matrix { 109 | data: array, 110 | } 111 | 112 | struct Dimensions { 113 | dimY: u32, // row dimension of input matrix 114 | dimX: u32, // col dimension of input matrix 115 | }; 116 | 117 | @group(0) @binding(0) var DimBuffer: Dimensions; 118 | @group(0) @binding(1) var Result: Matrix; 119 | 120 | @group(1) @binding(0) var Input: Matrix; 121 | 122 | @compute @workgroup_size(16, 16) 123 | fn main (@builtin(global_invocation_id) global_id: vec3) { 124 | let row: u32 = global_id.x; 125 | let col: u32 = global_id.y; 126 | let dimX: u32 = DimBuffer.dimX; 127 | let dimY: u32 = DimBuffer.dimY; 128 | 129 | let rowMask: u32 = row % dimX; 130 | if (row >= dimY || col > rowMask) { 131 | return; 132 | } 133 | 134 | let rowNum: u32 = row / dimX; 135 | Result.data[row * dimX + col] = Input.data[rowMask * dimY + col + rowNum * dimX]; 136 | } 137 | `; 138 | 139 | causalMaskShader = ` 140 | struct Matrix { 141 | data: array, 142 | } 143 | 144 | struct Dimensions { 145 | dimY: u32, // row dimension of input matrix 146 | dimX: u32, // col dimension of input matrix 147 | }; 148 | 149 | @group(0) @binding(0) var DimBuffer: Dimensions; 150 | @group(0) @binding(1) var Result: Matrix; 151 | 152 | @group(1) @binding(0) var Input: Matrix; 153 | 154 | @compute @workgroup_size(16, 16) 155 | fn main (@builtin(global_invocation_id) global_id: vec3) { 156 | let col: u32 = global_id.x; 157 | let row: u32 = global_id.y; 158 | let dimX: u32 = DimBuffer.dimX; 159 | let dimY: u32 = DimBuffer.dimY; 160 | 161 | if (row >= dimY || col >= dimX) { 162 | return; 163 | } 164 | 165 | let rowMask: u32 = row % dimX; 166 | let rowNum: u32 = row / dimX; 167 | let index = row * dimX + col; 168 | let causalMask: bool = (col <= rowMask); 169 | Result.data[index] = select(-1e9, Input.data[rowMask * dimY + col + rowNum * dimX], causalMask); 170 | } 171 | `; 172 | } 173 | 174 | class TransposeBlockClass extends Block { 175 | constructor() { 176 | super(); 177 | this.name = "transpose"; 178 | this.pipelineCache = new Map(); 179 | } 180 | 181 | getPipeline() { 182 | const pipelineCacheKey = this.name; // No param optimization. 183 | if (this.pipelineCache.has(pipelineCacheKey)) return this.pipelineCache.get(pipelineCacheKey); 184 | const pipeline = this.initPipeline(this.transposeNewShader, [this.u_s_Layout, this.r_Layout], `${this.name}_Pipeline`); 185 | this.pipelineCache.set(pipelineCacheKey, pipeline); 186 | return pipeline; 187 | } 188 | 189 | newInstance(rows, cols, inputBuf) { 190 | const pipeline = this.getPipeline(); 191 | const uniformBuffer = this.initBuffer(["uniform", "copy_to"], [4]); 192 | const resultBuffer = this.initBuffer(["storage", "copy_from"], [rows, cols]); 193 | const opBindGroup = this.initBindGroup(this.u_s_Layout, [uniformBuffer, resultBuffer], `${this.name}_OpG`); 194 | const inputBindGroup = this.initBindGroup(this.r_Layout, [inputBuf], `${this.name}_InputG`); 195 | const workgroups = { x: 100, y: 100, z: 1 }; 196 | this.device.queue.writeBuffer(uniformBuffer, 0, new Uint32Array([rows, cols])); 197 | 198 | return { 199 | resultBuffer, 200 | passes: [ 201 | { 202 | flag: "compute", 203 | pipeline, 204 | groups: [opBindGroup, inputBindGroup], 205 | workgroups, 206 | }, 207 | ], 208 | }; 209 | } 210 | 211 | transposeNewShader = ` 212 | struct Meta { 213 | M: u32, 214 | N: u32, 215 | } 216 | 217 | @group(1) @binding(0) var input_array: array; 218 | 219 | @group(0) @binding(0) var uniforms: Meta; 220 | @group(0) @binding(1) var result_array: array; 221 | 222 | // Bank conflicts? 223 | var tile: array, 8>; 224 | 225 | @compute @workgroup_size(8, 8) 226 | fn main (@builtin(workgroup_id) wg_id: vec3, @builtin(local_invocation_id) local_id: vec3) { 227 | let col: u32 = wg_id.x; 228 | let row: u32 = wg_id.y; 229 | let N: u32 = uniforms.N; 230 | let M: u32 = uniforms.M; 231 | 232 | let tile_col = col * 8u + local_id.x; 233 | let tile_row = row * 8u + local_id.y; 234 | 235 | // Load a tile from input_array to shared memory tile 236 | if (tile_row < M && tile_col < N) { 237 | tile[local_id.y][local_id.x] = input_array[tile_row * N + tile_col]; 238 | } 239 | 240 | workgroupBarrier(); // Ensure all threads have finished writing to the shared memory before proceeding 241 | 242 | // Compute transposed coordinates 243 | let transposed_col: u32 = row * 8u + local_id.x; 244 | let transposed_row: u32 = col * 8u + local_id.y; 245 | 246 | // Write the transposed tile to result_array 247 | if (transposed_col < M && transposed_row < N) { 248 | result_array[transposed_row * M + transposed_col] = tile[local_id.x][local_id.y]; // This line was incorrect 249 | } 250 | } 251 | `; 252 | } 253 | 254 | class SplitQBlockClass extends Block { 255 | constructor() { 256 | super(); 257 | this.name = "splitq"; 258 | this.pipelineCache = new Map(); 259 | } 260 | 261 | getPipeline() { 262 | const pipelineCacheKey = this.name; // No param optimization. 263 | if (this.pipelineCache.has(pipelineCacheKey)) return this.pipelineCache.get(pipelineCacheKey); 264 | const pipeline = this.initPipeline(this.splitQShader, [this.u_s_Layout, this.r_Layout], `${this.name}_Pipeline`); 265 | this.pipelineCache.set(pipelineCacheKey, pipeline); 266 | return pipeline; 267 | } 268 | 269 | newInstance(rows, cols, numHeads, inputBuf) { 270 | if (cols % numHeads !== 0) throw new Error(`cols ${cols} must be divisible by numHeads ${numHeads}`); 271 | const pipeline = this.getPipeline(); 272 | const uniformBuffer = this.initBuffer(["uniform", "copy_to"], [4]); 273 | const resultBuffer = this.initBuffer(["storage", "copy_from"], [rows, cols]); 274 | const opBindGroup = this.initBindGroup(this.u_s_Layout, [uniformBuffer, resultBuffer], `${this.name}_OpG`); 275 | const inputBindGroup = this.initBindGroup(this.r_Layout, [inputBuf], `${this.name}_InputG`); 276 | const workgroups = { x: 100, y: 100, z: 1 }; 277 | this.device.queue.writeBuffer(uniformBuffer, 0, new Uint32Array([rows, cols, cols / numHeads])); 278 | 279 | return { 280 | resultBuffer, 281 | passes: [ 282 | { 283 | flag: "compute", 284 | pipeline, 285 | groups: [opBindGroup, inputBindGroup], 286 | workgroups, 287 | }, 288 | ], 289 | }; 290 | } 291 | 292 | splitQShader = ` 293 | struct Meta { 294 | M: u32, 295 | N: u32, 296 | HSize: u32, 297 | } 298 | 299 | @group(1) @binding(0) var input_array: array; 300 | 301 | @group(0) @binding(0) var uniforms: Meta; 302 | @group(0) @binding(1) var result_array: array; 303 | 304 | var tile: array, 8>; 305 | 306 | @compute @workgroup_size(8, 8) 307 | fn main (@builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) workgroup_id: vec3) { 308 | let col: u32 = workgroup_id.x * 8 + local_id.x; 309 | let row: u32 = workgroup_id.y * 8 + local_id.y; 310 | let N: u32 = uniforms.N; 311 | let M: u32 = uniforms.M; 312 | 313 | // Load a tile from input_array to shared memory tile 314 | if (row < M && col < N) { 315 | tile[local_id.y][local_id.x] = input_array[row * N + col]; 316 | } 317 | 318 | workgroupBarrier(); // Ensure all threads have finished writing to the shared memory before proceeding 319 | 320 | let HSize: u32 = uniforms.HSize; 321 | let xOffset: u32 = col % HSize; 322 | let yOffset: u32 = row * HSize + (col / HSize) * HSize * M; 323 | 324 | // Write the tile to result_array 325 | if (row < M && col < N) { 326 | result_array[yOffset + xOffset] = tile[local_id.y][local_id.x]; 327 | } 328 | } 329 | `; 330 | } 331 | 332 | const CausalMaskBlock = new CausalMaskBlockClass(); 333 | const OutputBlock = new OutputBlockClass(); 334 | const TransposeBlock = new TransposeBlockClass(); 335 | const SplitQBlock = new SplitQBlockClass(); 336 | 337 | operations.push(CausalMaskBlock, OutputBlock, TransposeBlock, SplitQBlock); 338 | 339 | class TestShader { 340 | constructor(folder, type) { 341 | this.folder = folder; 342 | this.tokenizerType = type; 343 | this.initialized = false; 344 | 345 | this.device; 346 | this.model; 347 | this.tokenizer; 348 | this.params; 349 | this.minBufferOffset = 1; 350 | 351 | this.unloadDeletionStack = []; 352 | } 353 | 354 | async initialize() { 355 | if (this.initialized) return console.error("Model already initialized"); 356 | if (!navigator.gpu) throw new Error("WebGPU is not supported"); 357 | 358 | const adapter = await navigator.gpu.requestAdapter(); 359 | this.device = await adapter.requestDevice(); 360 | 361 | initializeOperations(this.device); 362 | 363 | this.initialized = true; 364 | 365 | console.log("Model initialized"); 366 | } 367 | 368 | async test() { 369 | // ---------------- Create Passes ---------------- // 370 | const seq_length = 15; 371 | const n_embd = 128; 372 | const n_head = 4; 373 | const head_size = n_embd / n_head; 374 | const { M, N } = { M: seq_length * n_head, N: seq_length }; 375 | const input_array = new Float32Array(M * N); // Softmax 376 | const weight_array = new Float32Array(seq_length * n_embd); 377 | for (let y = 0; y < M; y++) { 378 | for (let x = 0; x < N; x++) { 379 | input_array[y * N + x] = Math.floor(y / N) + 1; 380 | // causal mask 381 | if (x > y % N) input_array[y * N + x] = 0; 382 | } 383 | } 384 | for (let y = 0; y < seq_length; y++) { 385 | for (let x = 0; x < n_embd; x++) { 386 | weight_array[y * n_embd + x] = Math.floor(x / head_size) + 1; 387 | } 388 | } 389 | 390 | console.log(formatAsMatrix(input_array, M, N)); 391 | console.log(formatAsMatrix(weight_array, seq_length, n_embd)); 392 | 393 | const inputBuffer = this.initTensor(input_array, [M, N], ["storage"]); 394 | const weightBuffer = this.initTensor(weight_array, [seq_length, n_embd], ["storage", "copy_from"]); 395 | 396 | this.computePasses = []; 397 | const push = ({ passes, resultBuffer }) => { 398 | this.computePasses.push(...passes); 399 | return resultBuffer; 400 | }; 401 | 402 | let intermediateBuffer = inputBuffer; 403 | intermediateBuffer = push(AttentionBlock.newTestInstance(seq_length, n_embd, head_size, intermediateBuffer, weightBuffer)); 404 | // intermediateBuffer = push(AttentionBlock.newTestOldInstance(seq_length, n_embd, head_size, n_head, intermediateBuffer, weightBuffer)); 405 | intermediateBuffer = push(OutputBlock.newInstance(seq_length, n_embd, intermediateBuffer)); 406 | let resultBuffer = intermediateBuffer; 407 | 408 | // ---------------- Compute Passes ---------------- 409 | 410 | const commandEncoder = this.device.createCommandEncoder(); 411 | for (const pass of this.computePasses) { 412 | if (pass.flag === "compute") { 413 | const passEncoder = commandEncoder.beginComputePass(); 414 | passEncoder.setPipeline(pass.pipeline); 415 | for (let i = 0; i < pass.groups.length; i++) passEncoder.setBindGroup(i, pass.groups[i]); 416 | passEncoder.dispatchWorkgroups(pass.workgroups.x, pass.workgroups.y); 417 | passEncoder.end(); 418 | } else if (pass.flag === "copy") { 419 | commandEncoder.copyBufferToBuffer(pass.src, pass.srcOffset, pass.dst, pass.dstOffset, pass.size); 420 | } 421 | } 422 | this.device.queue.submit([commandEncoder.finish()]); 423 | 424 | // ---------------- Read Results ---------------- 425 | 426 | await resultBuffer.mapAsync(GPUMapMode.READ); 427 | const output = resultBuffer.getMappedRange(); 428 | const outputArray = new Float32Array(output).slice(0); // Copy the array, otherwise it'll be destroyed. 429 | console.log(formatAsMatrix(outputArray, seq_length, n_embd)); 430 | 431 | // ---------------- Create Passes ---------------- // 432 | 433 | // this.computePasses = []; 434 | 435 | // intermediateBuffer = inputBuffer; 436 | // intermediateBuffer = push(CausalMaskBlock.newInstance(M, N, intermediateBuffer)); // Transposes! 437 | // intermediateBuffer = push(SoftmaxBlock.newInstance(N, M, intermediateBuffer)); 438 | // intermediateBuffer = push(OutputBlock.newInstance(N, M, intermediateBuffer)); 439 | // resultBuffer = intermediateBuffer; 440 | 441 | // // ---------------- Compute Passes ---------------- 442 | 443 | // const commandEncoder2 = this.device.createCommandEncoder(); 444 | // for (const pass of this.computePasses) { 445 | // if (pass.flag === "compute") { 446 | // const passEncoder = commandEncoder2.beginComputePass(); 447 | // passEncoder.setPipeline(pass.pipeline); 448 | // for (let i = 0; i < pass.groups.length; i++) passEncoder.setBindGroup(i, pass.groups[i]); 449 | // passEncoder.dispatchWorkgroups(pass.workgroups.x, pass.workgroups.y); 450 | // passEncoder.end(); 451 | // } else if (pass.flag === "copy") { 452 | // commandEncoder2.copyBufferToBuffer(pass.src, pass.srcOffset, pass.dst, pass.dstOffset, pass.size); 453 | // } 454 | // } 455 | // this.device.queue.submit([commandEncoder2.finish()]); 456 | 457 | // // ---------------- Read Results ---------------- 458 | 459 | // await resultBuffer.mapAsync(GPUMapMode.READ); 460 | // const output2 = resultBuffer.getMappedRange(); 461 | // const outputArray2 = new Float32Array(output2).slice(0); // Copy the array, otherwise it'll be destroyed. 462 | // console.log(formatAsMatrix(outputArray2, N, M)); 463 | 464 | // // ---------------- Compare Results ---------------- 465 | 466 | // let error = 0; 467 | // for (let i = 0; i < outputArray.length; i++) { 468 | // error += Math.abs(outputArray[i] - outputArray2[i]); 469 | // } 470 | // console.log("Error: ", error); 471 | 472 | // ---------------- Cleanup ---------------- 473 | 474 | destroyOperationBuffers(); 475 | this.unloadBuffers(); 476 | 477 | return outputArray; 478 | } 479 | 480 | initTensor(data, dims, ops) { 481 | const buffer = this.device.createBuffer({ 482 | size: this.bufferSize(dims[0], dims[1] || 1, dims[2] || 1), 483 | usage: ops.map((u) => bufferUsageDict[u]).reduce((a, b) => a | b), 484 | mappedAtCreation: true, 485 | }); 486 | new Float32Array(buffer.getMappedRange()).set(data); 487 | buffer.unmap(); 488 | this.unloadDeletionStack.push(buffer); 489 | return buffer; 490 | } 491 | 492 | unloadBuffers() { 493 | this.unloadDeletionStack.map((buffer) => buffer.destroy()); 494 | this.unloadDeletionStack = []; 495 | } 496 | 497 | bufferSize(dimX, dimY = 1, dimZ = 1) { 498 | return Math.ceil((dimX * dimY * dimZ * Float32Array.BYTES_PER_ELEMENT) / this.minBufferOffset) * this.minBufferOffset; 499 | } 500 | } 501 | 502 | async function testInstruction() { 503 | const testShader = new TestShader(); 504 | await testShader.initialize(); 505 | await testShader.test(); 506 | } 507 | -------------------------------------------------------------------------------- /other/validation/README.md: -------------------------------------------------------------------------------- 1 | # Validating Results 2 | 3 | PSA: This is old code and not meant to be super maintained. More general guideline. 4 | 5 | This is an extremely helpful validation tool for checking the results of your WebGPU model versus the original when writing kernels or otherwise. 6 | 7 | The format is an array of model states at each point in a generation sequence, first generating from the reference model and saving the state of the model as you generate each token, then comparing to the browser model. You must greedily select tokens, of course, to maintain determinism. This can be done simply by setting top_k = 1. 8 | 9 | I haven't included a script for how to export this generation as my code was quite sloppy and this will likely be quite different depending on your implementation. Here's an example of how you might save from Andrej Karpathy's NanoGPT code: 10 | 11 | ``` 12 | 13 | def generate(self, idx, max_new_tokens, temperature=1.0, top_k=1): 14 | for i in range(max_new_tokens): 15 | 16 | # I sloppily made a global that tracks the generation index. 17 | index = i 18 | 19 | # Another global variable. 20 | tensors.append({}) 21 | 22 | idx_cond = idx if idx.size( 23 | 1) <= self.config.block_size else idx[:, -self.config.block_size:] 24 | 25 | # Save inputs. 26 | logits, _ = self(idx_cond) 27 | 28 | # Save the logits. 29 | tensors[index]['logits'] = logits 30 | 31 | logits = logits[:, -1, :] / temperature 32 | tensors[index]['logits_t'] = logits 33 | 34 | if top_k is not None: 35 | v, _ = torch.topk(logits, min(top_k, logits.size(-1))) 36 | logits[logits < v[:, [-1]]] = -float('Inf') 37 | 38 | probs = F.softmax(logits, dim=-1) 39 | 40 | # Save the probs. 41 | tensors[index]['probs'] = probs 42 | 43 | idx_next = torch.multinomial(probs, num_samples=1) 44 | idx = torch.cat((idx, idx_next), dim=1) 45 | 46 | # Save tensors to JSON + format them correctly. 47 | # See conversion scripts for correct formatting. 48 | ``` 49 | 50 | # Included validation files. 51 | 52 | I've included 2 validation files (gpt2medium_validation.json and shakespeare_validation.json) for convenience. 53 | 54 | Both are sampled with the prompt "What is the answer to life, the universe, and everything?". 55 | -------------------------------------------------------------------------------- /other/validation/test/gpt2medium_validation.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:49670416a4d85c6e1b1a3b651217f8ece6a6b764625de36fb7222dde79cef798 3 | size 456324435 4 | -------------------------------------------------------------------------------- /other/validation/test/shakepeare_validation.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bed2aa71484d9be3e0eb99d498bd08967ed649551416e359c7dd36b5d4d12d67 3 | size 10122136 4 | -------------------------------------------------------------------------------- /other/validation/validation.js: -------------------------------------------------------------------------------- 1 | async function runGPTValidation( 2 | device, 3 | queue, 4 | seq_length, 5 | vocab_size, 6 | n_embd, 7 | n_heads, 8 | n_layers, 9 | attentionDotProductScale, 10 | embdOutputBuffer, 11 | posEmbdBuffer, 12 | layer_buffers, 13 | normGammaBuffer, 14 | normBetaBuffer, 15 | validateIndex 16 | ) { 17 | console.log("Running GPT validation..."); 18 | 19 | const commandEncoder = device.createCommandEncoder(); 20 | 21 | console.log("Mixing embeddings..."); 22 | // Crop the position embeddings to the correct size. 23 | const posEmbdOutputBuffer = createBuffer( 24 | device, 25 | bufferSizeCalc(seq_length, n_embd), 26 | GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC 27 | ); 28 | commandEncoder.copyBufferToBuffer( 29 | posEmbdBuffer, // Source buffer (original position embeddings) 30 | 0, // Source offset (starting from the beginning of the buffer) 31 | posEmbdOutputBuffer, // Destination buffer (cropped buffer) 32 | 0, // Destination offset (starting from the beginning of the cropped buffer) 33 | bufferSizeCalc(seq_length, n_embd) // Number of bytes to copy 34 | ); 35 | // Residual connection is just elementwise addition, can be used for combining embedding and position embedding. 36 | const embeddedInputBuffer = inlineResidual(device, queue, commandEncoder, seq_length, n_embd, embdOutputBuffer, posEmbdOutputBuffer); 37 | let layerBuffer = embeddedInputBuffer; 38 | 39 | // Used for validation. 40 | const buffers = []; 41 | 42 | for (let i = 0; i < n_layers; i++) { 43 | console.log(`Processing block ${i}...`); 44 | const layer_params = layer_buffers[i]; 45 | const { 46 | layerNormAttentionOutputBuffer, 47 | attentionOutputBuffer, 48 | residualAttentionOutputBuffer, 49 | layerNormLinearOutputBuffer, 50 | linearOutputBuffer, 51 | residualLinearOutputBuffer, 52 | } = transformerBlock(device, queue, commandEncoder, seq_length, n_embd, n_heads, attentionDotProductScale, layerBuffer, ...layer_params); 53 | buffers.push({ 54 | layerNormAttentionOutputBuffer, 55 | attentionOutputBuffer, 56 | residualAttentionOutputBuffer, 57 | layerNormLinearOutputBuffer, 58 | linearOutputBuffer, 59 | residualLinearOutputBuffer, 60 | }); 61 | layerBuffer = residualLinearOutputBuffer; 62 | } 63 | 64 | console.log("Normalizing output..."); 65 | 66 | const layerNormOutputBuffer = inlineLayerNorm(device, queue, commandEncoder, seq_length, n_embd, layerBuffer, normGammaBuffer, normBetaBuffer); 67 | 68 | // OUTPUT and VALIDATION 69 | 70 | const outputEmbedBuffer = createOutputBuffer(device, commandEncoder, embeddedInputBuffer, seq_length, n_embd); 71 | 72 | const outputBlockBuffers = []; 73 | for (let i = 0; i < n_layers; i++) { 74 | const block = buffers[i]; 75 | const outputLayerNormAttentionBuffer = createOutputBuffer(device, commandEncoder, block.layerNormAttentionOutputBuffer, seq_length, n_embd); 76 | const outputAttentionBuffer = createOutputBuffer(device, commandEncoder, block.attentionOutputBuffer, seq_length, n_embd); 77 | const outputResidualAttentionBuffer = createOutputBuffer(device, commandEncoder, block.residualAttentionOutputBuffer, seq_length, n_embd); 78 | const outputLayerNormLinearBuffer = createOutputBuffer(device, commandEncoder, block.layerNormLinearOutputBuffer, seq_length, n_embd); 79 | const outputLinearBuffer = createOutputBuffer(device, commandEncoder, block.linearOutputBuffer, seq_length, n_embd); 80 | const outputResidualLinearBuffer = createOutputBuffer(device, commandEncoder, block.residualLinearOutputBuffer, seq_length, n_embd); 81 | outputBlockBuffers.push([ 82 | outputLayerNormAttentionBuffer, 83 | outputAttentionBuffer, 84 | outputResidualAttentionBuffer, 85 | outputLayerNormLinearBuffer, 86 | outputLinearBuffer, 87 | outputResidualLinearBuffer, 88 | ]); 89 | } 90 | const outputLayerBuffer = createOutputBuffer(device, commandEncoder, layerBuffer, seq_length, n_embd); 91 | const outputLayerNormBuffer = createOutputBuffer(device, commandEncoder, layerNormOutputBuffer, seq_length, n_embd); 92 | 93 | queue.submit([commandEncoder.finish()]); 94 | 95 | await outputEmbedBuffer.mapAsync(GPUMapMode.READ); 96 | 97 | for (let i = 0; i < n_layers; i++) { 98 | const block = outputBlockBuffers[i]; 99 | for (let j = 0; j < block.length; j++) { 100 | await block[j].mapAsync(GPUMapMode.READ); 101 | } 102 | } 103 | await outputLayerBuffer.mapAsync(GPUMapMode.READ); 104 | await outputLayerNormBuffer.mapAsync(GPUMapMode.READ); 105 | 106 | // You can't read twice from mapped range. 107 | const layerNormOutput = outputLayerNormBuffer.getMappedRange(); 108 | const output = deEmbedCPU(layerNormOutput, seq_length, n_embd, vocab_size); 109 | 110 | console.log("Validating output..."); 111 | console.log("Expected output block:", validateModel[validateIndex]); 112 | console.log("Validating embedding..."); 113 | validateResult(new Float32Array(outputEmbedBuffer.getMappedRange()), validateModel[validateIndex].tok_pos_emb); 114 | console.log("Validating blocks..."); 115 | for (let i = 0; i < n_layers; i++) { 116 | console.log(`\tValidating block ${i}...`); 117 | const block = outputBlockBuffers[i]; 118 | console.log("\t\tValidating first layer norm..."); 119 | validateResult(new Float32Array(outputBlockBuffers[i][0].getMappedRange()), validateModel[validateIndex][`block${i}_ln1`]); 120 | console.log("\t\tValidating attention..."); 121 | validateResult(new Float32Array(outputBlockBuffers[i][1].getMappedRange()), validateModel[validateIndex][`block${i}_attn`]); 122 | console.log("\t\tValidating residual attention..."); 123 | validateResult(new Float32Array(outputBlockBuffers[i][2].getMappedRange()), validateModel[validateIndex][`block${i}_r1`]); 124 | console.log("\t\tValidating second layer norm..."); 125 | validateResult(new Float32Array(outputBlockBuffers[i][3].getMappedRange()), validateModel[validateIndex][`block${i}_ln2`]); 126 | console.log("\t\tValidating mlp..."); 127 | validateResult(new Float32Array(outputBlockBuffers[i][4].getMappedRange()), validateModel[validateIndex][`block${i}_mlp`]); 128 | console.log("\t\tValidating residual mlp..."); 129 | validateResult(new Float32Array(outputBlockBuffers[i][5].getMappedRange()), validateModel[validateIndex][`block${i}_r2`]); 130 | } 131 | console.log("Validating layer norm..."); 132 | validateResult(new Float32Array(layerNormOutput), validateModel[validateIndex].ln_f); 133 | console.log("Validating logits..."); 134 | validateResult(new Float32Array(output), validateModel[validateIndex].logits); 135 | 136 | return output; 137 | } 138 | 139 | function validateResult(result, validate, verbose = false) { 140 | const resultArray = formatAsMatrix(result, validate.shape[1], validate.shape[2]); 141 | const validateArray = validate.data[0]; // Unpack from batch of 1 142 | 143 | const equal = checkAlmostEqualMatrices(resultArray, validateArray); 144 | 145 | if (!equal) { 146 | // console.log("Result:", result); 147 | // console.log("Validate:", validate); 148 | console.log("Result mat:", resultArray); 149 | console.log("Validate mat:", validateArray); 150 | 151 | // Calculate the difference 152 | const diff = subtractMatrices(resultArray, validateArray); 153 | console.log("Diff mat:", diff); 154 | 155 | // Sum the absolute values of the difference 156 | const sum = sumMatrix(diff); 157 | console.log("Sum:", sum); 158 | 159 | throw new Error("Test failed"); 160 | } else { 161 | // console.log("Test passed!"); 162 | if (verbose) { 163 | console.log("Result mat:", resultArray, validateArray); 164 | // console.log("Validate mat:", validateArray); 165 | } 166 | } 167 | } 168 | 169 | function reshapeRecursively(flatArray, shape) { 170 | if (shape.length === 1) { 171 | return flatArray.slice(0, shape[0]); 172 | } 173 | 174 | let result = []; 175 | let elementsPerSection = shape.slice(1).reduce((a, b) => a * b); 176 | for (let i = 0; i < flatArray.length; i += elementsPerSection) { 177 | result.push(reshapeRecursively(flatArray.slice(i, i + elementsPerSection), shape.slice(1))); 178 | } 179 | 180 | return result; 181 | } 182 | 183 | async function loadValidateModel(validateFile) { 184 | console.log("Loading validation model..."); 185 | 186 | const validateData = await (await fetch(`test/${validateFile}`)).json(); 187 | 188 | const steps = []; 189 | for (let i = 0; i < validateData.length; i++) { 190 | const loadedData = {}; 191 | for (const key in validateData[i]) { 192 | const shape = validateData[i][key].shape; 193 | const data = validateData[i][key].data.flat(Infinity).map((value) => parseFloat(value)); 194 | const typedArray = new Float32Array(data); 195 | 196 | loadedData[key] = { 197 | shape, 198 | data: reshapeRecursively(typedArray, shape), 199 | }; 200 | } 201 | steps.push(loadedData); 202 | } 203 | 204 | return steps; 205 | } 206 | 207 | function checkAlmostEqualMatrices(a, b) { 208 | if (a.length !== b.length) { 209 | return false; 210 | } 211 | for (let i = 0; i < a.length; i++) { 212 | if (a[i].length !== b[i].length) { 213 | return false; 214 | } 215 | for (let j = 0; j < a[i].length; j++) { 216 | if (a[i][j] - b[i][j] > 0.001) { 217 | return false; 218 | } 219 | } 220 | } 221 | return true; 222 | } 223 | 224 | function formatAsMatrix(floatArray, dimA, dimB) { 225 | const resultMatrix = []; 226 | for (let i = 0; i < dimA; i++) { 227 | resultMatrix.push(floatArray.slice(i * dimB, (i + 1) * dimB)); 228 | } 229 | return resultMatrix; 230 | } 231 | 232 | async function runValidation(idx, validationIndex) { 233 | if (!modelParams || !embeddingWeights) { 234 | console.log("Model not loaded yet"); 235 | return; 236 | } 237 | 238 | console.log("\nRunning model inference."); 239 | console.log("Starting with", idx.length, "tokens."); 240 | 241 | const { device, queue, params, posEmbdBuffer, layer_buffers, normGammaBuffer, normBetaBuffer } = modelParams; 242 | const { attentionDotProductScale, n_embd, n_heads, n_layers, vocab_size } = params; 243 | const seq_length = idx.length; 244 | 245 | console.log("Embedding inputs..."); 246 | 247 | const embeddings = idx.map((token) => embeddingWeights.slice(token * n_embd, (token + 1) * n_embd)); 248 | const flattened = flattenEmbeddings(embeddings); 249 | const embdOutputBuffer = createBuffer(device, bufferSizeCalc(seq_length, n_embd), GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST); 250 | queue.writeBuffer(embdOutputBuffer, 0, flattened); 251 | 252 | const startTime = performance.now(); 253 | const result = await runGPTValidation( 254 | device, 255 | queue, 256 | seq_length, 257 | vocab_size, 258 | n_embd, 259 | n_heads, 260 | n_layers, 261 | attentionDotProductScale, 262 | embdOutputBuffer, 263 | posEmbdBuffer, 264 | layer_buffers, 265 | normGammaBuffer, 266 | normBetaBuffer, 267 | validationIndex 268 | ); 269 | 270 | const endTime = performance.now(); 271 | console.log(`Time: ${endTime - startTime} ms`); 272 | 273 | return new Float32Array(result); 274 | } 275 | 276 | async function validateAgainstModel() { 277 | if (!modelParams || !validateModel) { 278 | console.log("Model not loaded yet"); 279 | return; 280 | } 281 | 282 | const context_size = modelParams.params.context_size; 283 | 284 | console.log(`Starting validation.`); 285 | console.log("Validate model loaded", validateModel); 286 | console.log("Model params", modelParams); 287 | console.log("Context size", context_size); 288 | 289 | for (let i = 0; i < validateModel.length; i++) { 290 | const step = validateModel[i]; 291 | 292 | const idx_cond = Array.from(step.idx.data[0].slice(-context_size)); 293 | const logits = await runInference(idx_cond, i); 294 | const probs = cpuSoftmax(logits, 1.0); 295 | 296 | const idx_next = sampleFromDistribution(probs, 1); 297 | 298 | console.log("Next token", idx_next); 299 | console.log("Expected token", sampleFromDistribution(step.probs.data[0], 1)); 300 | 301 | if (idx_next !== sampleFromDistribution(step.probs.data[0], 1)) { 302 | throw new Error("Validation failed"); 303 | } 304 | } 305 | } 306 | -------------------------------------------------------------------------------- /tokenizer.js: -------------------------------------------------------------------------------- 1 | class Tokenizer { 2 | constructor() { 3 | this.encoder = undefined; 4 | this.decoder = undefined; 5 | this.vocab_size = undefined; 6 | } 7 | 8 | async load() { 9 | throw new Error("Not implemented."); 10 | } 11 | 12 | getVocabSize() { 13 | return this.vocab_size; 14 | } 15 | 16 | encode(str) { 17 | throw new Error("Not implemented."); 18 | } 19 | 20 | decode(arr) { 21 | throw new Error("Not implemented."); 22 | } 23 | } 24 | 25 | class SimpleTokenizer extends Tokenizer { 26 | constructor() { 27 | super(); 28 | } 29 | 30 | async load() { 31 | console.log("Loading simple tokenizer..."); 32 | this.encoder = await (await fetch("weights/tokenization/simple_tokens.json")).json(); 33 | this.decoder = Object.keys(this.encoder).reduce((acc, x) => ({ ...acc, [this.encoder[x]]: x }), {}); 34 | this.vocab_size = Object.keys(this.encoder).length; 35 | } 36 | 37 | encode(str) { 38 | return str.split("").map((x) => this.encoder[x]); 39 | } 40 | 41 | decode(arr) { 42 | return arr.map((x) => this.decoder[x]).join(""); 43 | } 44 | } 45 | 46 | // ------------------ GPT Tokenizer ------------------ 47 | // Credit to https://github.com/latitudegames/GPT-3-Encoder 48 | 49 | class GPT2Tokenizer extends Tokenizer { 50 | constructor() { 51 | super(); 52 | this.pat = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu; 53 | this.textEncoder = new TextEncoder(); // always utf-8 by spec 54 | this.textDecoder = new TextDecoder("utf-8"); 55 | } 56 | 57 | async load() { 58 | console.log("Loading GPT2 tokenizer..."); 59 | 60 | const bpe_file = await (await fetch("weights/tokenization/vocab.bpe")).text(); 61 | const encoder = await (await fetch("weights/tokenization/gpt_tokens.json")).json(); 62 | this.encoder = encoder; 63 | 64 | console.log("Building decoder..."); 65 | const decoder = {}; 66 | Object.keys(encoder).map((x) => { 67 | decoder[encoder[x]] = x; 68 | }); 69 | this.decoder = decoder; 70 | 71 | const lines = bpe_file.split("\n"); 72 | const bpe_merges = lines.slice(1, lines.length - 1).map((x) => { 73 | return x.split(/(\s+)/).filter(function (e) { 74 | return e.trim().length > 0; 75 | }); 76 | }); 77 | 78 | const byte_encoder = bytes_to_unicode(); 79 | const byte_decoder = {}; 80 | Object.keys(byte_encoder).map((x) => { 81 | byte_decoder[byte_encoder[x]] = x; 82 | }); 83 | this.byte_encoder = byte_encoder; 84 | this.byte_decoder = byte_decoder; 85 | 86 | this.bpe_ranks = dictZip(bpe_merges, range(0, bpe_merges.length)); 87 | this.cache = new Map(); 88 | this.vocab_size = Object.keys(encoder).length; 89 | } 90 | 91 | encode(text) { 92 | if (!this.byte_encoder) throw new Error("Tokenizer not loaded."); 93 | let bpe_tokens = []; 94 | const matches = Array.from(text.matchAll(this.pat)).map((x) => x[0]); 95 | for (let token of matches) { 96 | const encoded_bytes = this.textEncoder.encode(token); 97 | let bytes = []; 98 | for (let i = 0; i < encoded_bytes.length; i++) { 99 | bytes.push(this.byte_encoder[encoded_bytes[i].toString()]); 100 | } 101 | token = bytes.join(""); 102 | 103 | const new_tokens = this.bpe(token) 104 | .split(" ") 105 | .map((x) => this.encoder[x]); 106 | bpe_tokens = bpe_tokens.concat(new_tokens); 107 | } 108 | return bpe_tokens; 109 | } 110 | 111 | decode(tokens) { 112 | if (!this.byte_decoder) throw new Error("Tokenizer not loaded."); 113 | let text = tokens.map((x) => this.decoder[x]).join(""); 114 | text = this.textDecoder.decode(new Uint8Array(text.split("").map((x) => this.byte_decoder[x]))); 115 | return text; 116 | } 117 | 118 | bpe(token) { 119 | if (this.cache.has(token)) return this.cache.get(token); 120 | let word = token.split(""); 121 | let pairs = get_pairs(word); 122 | if (!pairs) return token; 123 | while (true) { 124 | const minPairs = {}; 125 | pairs.forEach(pair => { 126 | const rank = this.bpe_ranks[pair]; 127 | minPairs[isNaN(rank) ? 10e10 : rank] = pair; 128 | }); 129 | const keys = Object.keys(minPairs).map((x) => parseInt(x)); 130 | const bigram = minPairs[Math.min(...keys)]; 131 | if (!Object.hasOwn(this.bpe_ranks, bigram)) break; 132 | const first = bigram[0]; 133 | const second = bigram[1]; 134 | let new_word = []; 135 | let i = 0; 136 | while (i < word.length) { 137 | const j = word.indexOf(first, i); 138 | if (j === -1) { 139 | new_word = new_word.concat(word.slice(i)); 140 | break; 141 | } 142 | new_word = new_word.concat(word.slice(i, j)); 143 | i = j; 144 | if (word[i] === first && i < word.length - 1 && word[i + 1] === second) { 145 | new_word.push(first + second); 146 | i = i + 2; 147 | } else { 148 | new_word.push(word[i]); 149 | i = i + 1; 150 | } 151 | } 152 | word = new_word; 153 | if (word.length === 1) break; 154 | else pairs = get_pairs(word); 155 | } 156 | word = word.join(" "); 157 | this.cache.set(token, word); 158 | return word; 159 | } 160 | } 161 | 162 | const range = (x, y) => { 163 | const res = []; 164 | for (let i = x; i < y; i++) { res.push(i) } 165 | return res; 166 | }; 167 | 168 | const ord = (x) => { 169 | return x.charCodeAt(0); 170 | }; 171 | 172 | const dictZip = (x, y) => { 173 | const result = {}; 174 | x.map((_, i) => { 175 | result[x[i]] = y[i]; 176 | }); 177 | return result; 178 | }; 179 | 180 | const bytes_to_unicode = () => { 181 | const bs = range(ord("!"), ord("~") + 1).concat(range(ord("¡"), ord("¬") + 1), range(ord("®"), ord("ÿ") + 1)); 182 | let cs = bs.slice(); 183 | let n = 0; 184 | for (let b = 0; b < 2 ** 8; b++) { 185 | if (!bs.includes(b)) { 186 | bs.push(b); 187 | cs.push(2 ** 8 + n); 188 | n = n + 1; 189 | } 190 | } 191 | cs = cs.map((x) => String.fromCharCode(x)); 192 | const result = {}; 193 | bs.map((_, i) => { 194 | result[bs[i]] = cs[i]; 195 | }); 196 | return result; 197 | }; 198 | 199 | const get_pairs = (word) => { 200 | const pairs = new Set(); 201 | let prev_char = word[0]; 202 | for (let i = 1; i < word.length; i++) { 203 | const char = word[i]; 204 | pairs.add([prev_char, char]); 205 | prev_char = char; 206 | } 207 | return pairs; 208 | }; 209 | -------------------------------------------------------------------------------- /visuals.js: -------------------------------------------------------------------------------- 1 | class Visuals { 2 | 3 | initialized = false; 4 | 5 | constructor(model) { 6 | this.model = model; 7 | this.device = model.device; 8 | this.params = model.params; 9 | } 10 | 11 | init() { 12 | this.initFoundation(); 13 | this.initUniforms(); 14 | this.initLayoutAndPipeline(); 15 | this.initBuffersAndBindGroup(); 16 | this.updateModelBuffer(); 17 | 18 | this.initialized = true; 19 | } 20 | 21 | initFoundation() { 22 | const containerEl = document.getElementById("visualsContainer"); 23 | const gpuCanvasEl = document.createElement("canvas"); 24 | 25 | containerEl.style.width = this.params.n_embd + "px"; 26 | containerEl.style.height = this.params.n_ctx + "px"; 27 | gpuCanvasEl.style.width = "100%"; 28 | gpuCanvasEl.style.height = "100%"; 29 | gpuCanvasEl.width = this.params.n_embd; 30 | gpuCanvasEl.height = this.params.n_ctx; 31 | 32 | const gpuContext = gpuCanvasEl.getContext("webgpu"); 33 | const gpuCanvasFormat = navigator.gpu.getPreferredCanvasFormat(); 34 | 35 | gpuContext.configure({ 36 | device: this.device, 37 | format: gpuCanvasFormat, 38 | }); 39 | 40 | containerEl.appendChild(gpuCanvasEl); 41 | 42 | this.containerEl = containerEl; 43 | this.gpuCanvasFormat = gpuCanvasFormat; 44 | this.gpuCanvasEl = gpuCanvasEl; 45 | this.gpuContext = gpuContext; 46 | } 47 | 48 | updateModelBuffer() { 49 | this.model.externalBuffer = this.embeddingsBuffer; 50 | } 51 | 52 | initUniforms() { 53 | this.uniforms = { 54 | width: this.model.params.n_embd, 55 | height: this.model.params.n_ctx, 56 | }; 57 | } 58 | 59 | initLayoutAndPipeline() { 60 | this.bindGroupLayout = this.device.createBindGroupLayout({ 61 | entries: [ 62 | { 63 | binding: 0, 64 | visibility: GPUShaderStage.FRAGMENT, 65 | buffer: { 66 | type: "uniform", 67 | } 68 | }, 69 | { 70 | binding: 1, 71 | visibility: GPUShaderStage.FRAGMENT, 72 | buffer: { 73 | type: "read-only-storage", 74 | } 75 | }, 76 | ] 77 | }); 78 | 79 | this.renderShaderModule = this.device.createShaderModule({ 80 | label: 'visuals', 81 | code: ` 82 | struct UniformData { 83 | width: f32, 84 | height: f32, 85 | } 86 | 87 | @vertex 88 | fn vsMain(@builtin(vertex_index) vertexIndex: u32) -> @builtin(position) vec4 { 89 | var positions = array, 6>( 90 | vec2(-1.0, -1.0), // bottom left 91 | vec2( 1.0, -1.0), // bottom right 92 | vec2(-1.0, 1.0), // top left 93 | vec2(-1.0, 1.0), // top left 94 | vec2( 1.0, -1.0), // bottom right 95 | vec2( 1.0, 1.0) // top right 96 | ); 97 | return vec4(positions[vertexIndex], 0.0, 1.0); 98 | } 99 | 100 | @group(0) @binding(0) var uniformData: UniformData; 101 | @group(0) @binding(1) var embeddingsBuffer: array; 102 | 103 | @fragment 104 | fn fsMain(@builtin(position) fragCoord: vec4) -> @location(0) vec4 { 105 | let xNormalized = fragCoord.x / uniformData.width; 106 | let yNormalized = fragCoord.y / uniformData.height; 107 | 108 | let xIndex = xNormalized * ${this.params.n_embd}; 109 | let yIndex = yNormalized * ${this.params.n_ctx}; 110 | let index = u32(yIndex) * ${this.params.n_embd} + u32(xIndex); 111 | 112 | let vectorValue = embeddingsBuffer[index]; 113 | 114 | var outColor = vec4(0.0); 115 | outColor = hdrColorMapping(outColor, 1.0, vectorValue * 0.1); 116 | 117 | return outColor; 118 | } 119 | 120 | fn hdrColorMapping(colorRef: vec4, hdrThreshold: f32, vectorValue: f32) -> vec4 { 121 | var color = colorRef; 122 | 123 | if (vectorValue < 0.0) { 124 | color.b = -vectorValue; 125 | if (vectorValue < -hdrThreshold) { 126 | color.g = -vectorValue - hdrThreshold; 127 | } 128 | } else { 129 | color.r = vectorValue; 130 | if (vectorValue > hdrThreshold) { 131 | color.g = vectorValue - hdrThreshold; 132 | } 133 | } 134 | color.g = min(color.g, 0.7); 135 | return color; 136 | } 137 | ` 138 | }); 139 | 140 | this.renderPipeline = this.device.createRenderPipeline({ 141 | layout: this.device.createPipelineLayout({ 142 | bindGroupLayouts: [this.bindGroupLayout], 143 | }), 144 | vertex: { 145 | module: this.renderShaderModule, 146 | entryPoint: 'vsMain', 147 | buffers: [] 148 | }, 149 | fragment: { 150 | module: this.renderShaderModule, 151 | entryPoint: 'fsMain', 152 | targets: [ 153 | { 154 | format: this.gpuCanvasFormat, 155 | }, 156 | ], 157 | }, 158 | }); 159 | } 160 | 161 | initBuffersAndBindGroup() { 162 | const uniformCount = Object.values(this.uniforms).length; 163 | 164 | this.uniformBuffer = this.device.createBuffer({ 165 | size: uniformCount * Float32Array.BYTES_PER_ELEMENT, 166 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, 167 | }); 168 | 169 | this.embeddingsBuffer = this.device.createBuffer({ 170 | size: this.model.bufferSize(this.params.n_ctx, this.params.n_embd), 171 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, 172 | }); 173 | 174 | this.bindGroup = this.device.createBindGroup({ 175 | layout: this.bindGroupLayout, 176 | entries: [ 177 | { 178 | binding: 0, 179 | resource: { 180 | buffer: this.uniformBuffer, 181 | } 182 | }, 183 | { 184 | binding: 1, 185 | resource: { 186 | buffer: this.embeddingsBuffer, 187 | } 188 | }, 189 | ] 190 | }); 191 | 192 | this.updateUniforms(); 193 | } 194 | 195 | updateUniforms() { 196 | this.uniforms.width = this.gpuCanvasEl.width; 197 | this.uniforms.height = this.gpuCanvasEl.height; 198 | 199 | const uniformArray = new Float32Array([ 200 | this.uniforms.width, 201 | this.uniforms.height, 202 | ]); 203 | 204 | this.device.queue.writeBuffer( 205 | this.uniformBuffer, 206 | 0, 207 | uniformArray.buffer, 208 | uniformArray.byteOffset, 209 | uniformArray.byteLength, 210 | ); 211 | } 212 | 213 | render(existingCommandEncoder) { 214 | const commandEncoder = existingCommandEncoder ?? this.device.createCommandEncoder(); 215 | 216 | const textureView = this.gpuContext.getCurrentTexture().createView(); 217 | 218 | const renderPassDescriptor = { 219 | colorAttachments: [ 220 | { 221 | view: textureView, 222 | loadOp: 'clear', 223 | loadValue: { r: 0.0, g: 0.0, b: 0.0, a: 1.0 }, 224 | storeOp: 'store', 225 | } 226 | ] 227 | }; 228 | 229 | const passEncoder = commandEncoder.beginRenderPass(renderPassDescriptor); 230 | passEncoder.setPipeline(this.renderPipeline); 231 | passEncoder.setBindGroup(0, this.bindGroup); 232 | passEncoder.draw(6, 1, 0, 0); 233 | passEncoder.end(); 234 | 235 | this.device.queue.submit([commandEncoder.finish()]); 236 | } 237 | 238 | destroy() { 239 | this.gpuCanvasEl.remove(); 240 | this.uniformBuffer.destroy(); 241 | this.embeddingsBuffer.destroy(); 242 | } 243 | } -------------------------------------------------------------------------------- /weights/better_shakespeare/lm_head.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0ab6e98e9867735d96b6fbf28d7343c0505145d97adc4ead3b6d11cee9084746 3 | size 33280 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/params_gpt.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:68125bf8d6d60d4f19cf4cd5b69b9fe6ec2f158acf0f39588226d0ecde6f9ba9 3 | size 120 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:72f84a760f8f97776cb5556538a6a38cfa9e228ab309630942c548c96162199e 3 | size 1536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8580563fe350ed41e165b80189067b4af5104896bbc71a255cfd1a2dda59dace 3 | size 196608 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bf23b1142e8eb119456ebd1724bcb11e0ff7cc819e85ddc642bd3be7b42d0188 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c9f2eed35a856a866e5a1f7cf4304441d0111bf052d9448efdde72354efd5a3b 3 | size 65536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:99257b1bccd1d6573a23aa1524175061116818c03c155b9ffa037c1db43ed072 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9295026c2ddd94b0dca4b9f938981076e791e239f67bab3dbcef0fe946293916 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:cc3856e2d74abdf0b59c5f2e513515c688de43d4a78029d4c1ac423a80d489c3 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0b823c79c5f465a125265a1ed1605bcf6d0fdee9e0a61b1ec29d6d01b99cd99b 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3ef0dab8b13160f91a11552f3021601e72ff0b39bf8345ac5e4ac4bdb3c93007 3 | size 2048 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f064ec06e3e7b0bd08e5a24dd6974e9b60ef98b58a68accb436957d4e747b5d7 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ff124fdd060c6393e93eb411973d5a80da1619338071366d29e97905b1fed92b 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.0.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:98aa8ec3a193694183f83093412c9be071ffec682d1e4da9de8582dfbe02f38c 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:06f8994bed3de9d75b578ceab225c5b0d018c6f3dc87055b6abc6b867011404d 3 | size 1536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:763c6c84cb177cd40c30590ffde950090a2bcbf53fef1ab2407200e3f5d35b7a 3 | size 196608 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c083b7f18d953d8708ef66a1cb7a009a6b2cb28aee483ffdc426eada87f5e5bc 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:61e7c84491124abe4f05572850f598814a3d91d55a378dea5d51c297a8b698df 3 | size 65536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d17f07aff6ebc1d8b181af8f7913b7990cfed6978f2912c9f19c6c1f8875a2d8 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8712a34288538e1865b73420fd0cb058f06806633d1e46dc02d494b4c145ffb6 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8dc5e2789371a24cc0122f7c80855762974e6c2d935251cfe1012e76f483b884 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3d91cd410e1f1b2f9ef9e0326edd3649a2a62df5d297e5d855f526316565abd9 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0a217a9ad20381aa6bf35adb0b4a837672568c519ca7778871233b48f3b9b226 3 | size 2048 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b1d761486220d7e14f9890eae8e7222773b41603b69c6b9e9d597b24fb772383 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:221e43b3f354bf9b6e9f7fd7188ed287f26b1fd14b1a33c3cd60707d0f843436 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.1.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bead9ba9a96d2ac38d2200189d82662c4d37d4858d017494e6d7e8665d164fe3 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:893b4c782f43910e43265a3e1bba9fc75401fbe505ee0e3124371b442f56f3ce 3 | size 1536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:979cc99c4a3fc016331d71133d833802db0eaf4b6e2b750978b9157320edd994 3 | size 196608 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a93153e6c295e60ae9891ff9f4818b36b5062da7f3cafea5278d66583b10420b 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4b007d8ab36a09440893b2e174e41af91a23fc1d8e980c954d022244808fce4f 3 | size 65536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:48322aae8e9bf86fb9cc36ea8f8c020690d2f59c20cb9c964bdfdf7fc8f5ffd0 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2c499c13600198ee052898a737493f17ceb329ab33e2a7daec5e0cebdc9625e7 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7341ebef6c3b8de112bce50478e4de7cf3f42eb48275a0c1e8272def4f782527 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:aa00a84b155a682111ec8eb5b157c87fae88d56f0700f88c5bbdeef38c928c2c 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f41aab925b92ed74eeffbd6bbc873e6f87e58e58f8c95b0ee185e7076c5cd7ad 3 | size 2048 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:25446ada6261b3c9c661d419a49a32f6b71a0c61aba8c9efa2195c845420604b 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1e54397368cc314fc4207ebc2bf934d89467efceca23d522b458f5a1e2fe81df 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.2.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:20fff27d1d97120732f6602494e1c62948ee5be66025701a93c7028bed17c3ab 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:79a4fc56a2174859aca9ac89b9b56342660e229aa20103e98dc6c9efb0c354a9 3 | size 1536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f67f2b9e211ec51eb8c2769c8205990de9ff7443d7cba743ed9099a9cfdd579c 3 | size 196608 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:916c10888a36ad9378d6acc0950bd48f6ef37a13cec3250aebd8b072ec4c942d 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a9c47bfc087a1e040d50147d7be178e7f34fd0d2371b53c14a70c323ad0f7ec1 3 | size 65536 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4dd31a6816ff915f15ad563e4783b978adcf23372fe69207229946572a94aef0 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ba212f3661f16cf701fedc42c8cde2552d5ca59d70d22e9b0cdc084af2bd9e50 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8c6b81ca4cd8f9fc1dfc96cc55c5d9a5d16c4920f529ffad30938e918b011eaa 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f808c9220f7b765a32345ae16eb9c0ac51ec4a23fb33dd7dd19567e0896e7416 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f93bb7c4a4035893d0dcf84d1cec2abdc7804443f4e81a752a90817157ac8eb4 3 | size 2048 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:af50ec178134ae89654b88700038ab334b09fc82bcb472c4657b93ce2df60623 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1409028c19e4206378b5a032ac2c8c2aa051b5d74e83d467d8ac270524988a46 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.h.3.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:85c861ecf42c294d20de0a7f13f9cbb1c464f63896afad0b9d47084d1cbf537d 3 | size 262144 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.ln_f.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9d68bef558eec5196825f8434389b67d6a4dad2b174b1d54135745a139ef1988 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.ln_f.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:899b3347e3794f65c4e5b84c23a1f386afc6a0767362f067ff2228f55c690f4f 3 | size 512 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.wpe.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:526c6e8a3e416a0094a0e007e3f9984535d00a007a2b8d579d48d7ef186cd7d3 3 | size 32768 4 | -------------------------------------------------------------------------------- /weights/better_shakespeare/transformer.wte.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0ab6e98e9867735d96b6fbf28d7343c0505145d97adc4ead3b6d11cee9084746 3 | size 33280 4 | -------------------------------------------------------------------------------- /weights/gpt2/lm_head.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e182e433b37dbdb47448e0413c840edf6965113c1fc8048c63b69795d8cf875a 3 | size 154389504 4 | -------------------------------------------------------------------------------- /weights/gpt2/params_gpt.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d7c996104a4971dcc7208714675c1e95787dd89ab46f107ba37cfc7c3100bf8d 3 | size 127 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ef36c6f4fcc7f15fdb5882316834dc7e304ae5f9e8ae32673108c6efa441f787 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:df332e9d9ae908db358a00423f14af1c0036bc9a2332aca395adf68e3eb08b48 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:52c55de1fa9685a50e5bd91c5741fc9ad80c03671c1eec71d92010194140e9a7 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e13178678e8c1e591d293bc5a96f081251d734d8a43b7038cc98329742e79952 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5a4a949fb3e7a463e3f1aca6f5285979f5fd330d9432ef9f272adb8d55bfbb2d 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:87c92a5f0409ab2a8f2b92a9ebbb62ab9215590402d38929402c84357d53e4ae 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:52af0f6c03edf6298c4819da6d63b831cae48de804a0a23b19aca674efadf3b2 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b9e28ffbdfcac5b56ba9a9e8ccd9fe0efcc603f4f0d00685615e1c9cc9994960 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d71a74a230d32edd0a3e2562ba65a4cbc8ca1d1ff918a5f53468b0d096ad0086 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:534d05221a3d2e0b68a64c7b26e58fb39afcfd20adf1c6bef55bd100adb834a6 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:10a0e86f0d29a95ba16fa1a2cf5cca3ccc9b8c5cc21e82ed3169f40101e459be 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.0.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f4c17b36a51b9ae77a93ee6dd767512626076da8295342d8a760cf4aa0880993 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:467fba1ba8e52f8646c93bb101234c37b07fd51ba3cb5772c010e6aa68d1ed34 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:17cadf5370083f5d58dc8be9ceda794154d29c0f11cf8a38f347e84492093f32 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2637585a0b7a7cddc1cb97129712dcc3f1ee75cc4455cfb2429c956867ea24c5 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a6a27ec95bbc643c83a5206dca39af86d02a9c17c12b24d5640b7eccc48f9b21 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c9a75b7ca299432f7c4547e856d5914595d064548e9b9d375b4e3e920219f580 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9614528015a8b5dc32de46003e25e9bca7715ee69f4a838836c789509f51acd7 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fccc1781905485f36ab7d62ed9d2d265cebda07d27e9d267923ac3cbbd43a82b 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b01fbb6df507a3ebe395e92e88c1df066ad477141130dff987e030caf6b5ceba 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:65347695d5a622d1f3a42b6a95b9010a1fce980dfd707abcefcb36088f9076d8 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:604b9974694fc19b1a68c83104ae4c8f6fa1dc2ead7be7e6d6ffa7a2558a3a25 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:39965842da4b639219358ba6757393ade2d2d0b0b6eda6c5e51cad2ac4aec205 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.1.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4b18010d6564311c1dad46ce345f2b912188ea8c6162321420031f3c74234c6d 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f9377e6334fdc42c0cabba230f355daf07ce76f6af20edfdc8ae8e1dd5eaa074 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b43d111a3f0cee2c55a141f6724f11516d91c0a6e9c5fdf18cd85ef8ba36fc93 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5488c5c27460f555fd4b983213d51e8f57a91b10e9ce63aecaf1713008ba5c1b 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7a6473a5c23cf4efe0b37d89f183e151d480e31c6b6dc06f56a5c41cc5dbfefa 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d3090fe7c30e68fbc6029cccaca150eb39b853d7982b81dffe7374981f34d2f9 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9bb50ac0614875e3e0ad6b9ba9acb2db50ed1ccfa15e34ae54a9c867eebff6e5 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d8f6b44d46bf330757f31931761bea5d06176d96930871a216893662c0b54531 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3605b6e73b6764e8d8329e6cbe1561203d57f69a4745e87a6b82e16b7906a2a5 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2f0c88abc4d0c1871371cf5ee6ab71617ba71e0d7d428fb00d7a8f40dd859041 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3dd499b76016e3c7dd6b9a14df2a96e1807b9354addca69e6f7ffdd03a231cf8 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9f76592c0f287e9d07f754e437e5eb382b52fb0f8f9fc98d83421b319e32404b 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.10.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0b8d7258c7c4fc018aa4bd896355f5049eb3a68a373a7019235071c872f37901 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c6986457b215979836e0c8c4def716590f8d1db9ba372fc99007715cd1adf4b6 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b4fc281ff5bf255dfccd63ea1c13281481134c84cc565b5a648cfabcbecd1630 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9ff98d1c914971eb382ec53ddbf885c75db8c0d970626512d565922845d30213 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:be2b17561815008138e23302883718760ac336ee06ae3c008db0fb3c8d5fd83f 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8012007aadcbf1ae8a6cb5e33495dfd565dcc819e6e38cbd3879c64d7538a67e 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:829592908c2c2d4fce699c300c01aac3b185777ef231a29d91f880c21997f5e6 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8d347b0fede57daad897ce7fdd096ac7e0a34061634a1f7a71eb1ef178fccfb9 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2da7a8debd3bf751ae402e3a99cef5f8b400ff4cb6f4cf81e48b49ec09a04280 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9af0c1f873cd929c9ddfd672368e0292a46df76103b93fb53d1d76cf890f82f5 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:78a86284471ef4b965d6cff04239040d1614cab1422bcde1ff73acab34453dc8 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1d87ba328b87dd9de0de3e323d52b375d9e4b9296734462462c3a1c6b21ae2bc 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.11.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:67ea7df0336063f954695d4e8d747feae4a4c1f2d1ed34f8c7beab2b3686cb0c 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9cd34935245ac07685284ea2eac419f2a68bcbe8f045eecf262544d457678bc7 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c5702a21258404c3f3393891d8ac943944732e58269955c3506ff164190eb285 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:22dc4d1b92f2696b72362acd3238dc8491fe45d6ffa07f72747f35561fd1ff8e 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:04f8d1bf3b60fc1d444aefe697cf8f136eff217d176e581ebd0384a15ee8e99a 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9af0d11c8bf13ca5e4610db3dd59fd85d81b164601caadc12d574f7d34ad8fd8 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0191be1336ee8c22e8d43e1863fe199c09474cdb4840d1cadfe01b2572ff7cf9 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c9fca51b9f331e29c8b67d4395b09854f6498d4edf7335393fd38f9d53424b23 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b1d71d7ca62a19a32b557736894a0fd2c508b70ce0331142b61e061f732346b1 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2765ac2cd7430cf0c1bb511e0ce3ff8ec36f13c3c7523e714f40358fec128ffd 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:999c7c5ace2c54f8a129722d41d3c07443e357ece47d50b66c5e3c2d2f8231a0 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bc1394c11c732512984180b39a2b81d7d93b92654d19b59356f76b829d8653c1 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.2.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:73a8e497fd4aaf61e4adef335f8359579ced6ad4a5e03ae140c8b8c5bba57c44 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ccba1e26e98da0553ebde3f439ebfd477288a7019da77e7c4458d8e8ef9ba03e 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a85a482eb972a89371cb918a8600e2bd0d676ad79bea30b12d266eae670440ab 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:517b2e2187c251971aa455c516ab10cdc1ffb329b10e10668847e9d73a45d161 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d93f68892a300c6e59ee32a7d6101db8f934bb5cdfac1dcad2164f85d0316b09 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e0df07d4104990fc925e8323a8234e99fe51e661f7b3fd0be537c8a85dd95af8 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5c645e3ef0c240592a5f01e1f88a069eb882d459f8d27fe59a304b4206547fc5 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:618d4065b4b02ecf8205797ba1462750879a273b35fbb6fd43b866b791af9f94 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b75ae3bd11256b16bb8dd112d94d7bb5290b5d7188795098f1ad661d5131078b 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1c8f1f1bae9d7a47c5c4dff6d3fbb59b6b04f03a2306f76f590c32eda1ce586e 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:52031bf35212fb8c7406f213999a9cc3540d28a48b859cb8a14a203e8f7dab5b 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:203983d8a4b322ff0d0914a96537a9e42c27016b2589473c5eb733d546c28370 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.3.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:15c01b278388decef3a736d85f60e16246c52da9bae2dafb3e3e7f6319989abb 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:13caa8e5d0cde4068374b220a6d61978f9eedeac7018a914d0c7d4494505109d 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e999b277552708af1fb43929f2d7798594627a95e26dc99c7d31d97dec32cb2f 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:12ce61485860901638dbb967bfcfb3ae30939d345fbc2f5fc1fde99022c70e8e 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fd4832c3b53e2e5006ec032a1ae062f6f43ce320d7c7674ff266f770c795106c 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6c6aeb4b7ef9b742962d7bcde232d39ba749d8a2de62cbb4a4f458737fef7084 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bf129fef6b6628f08deeff4708af62fd5d2f7e1bb7eacc17561bc82b60c5c50d 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0555ff00c21dded930a8b52f384649e5d4aa07f586f19c7380311319a48d5553 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7a9f0e04c79ad95e00051ee8ef52a7f1417a7a345d7f1d9f426aeb56b0ac4537 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2d18dab3491739f503390f1fb0d50925a296ecae2acbee224224c08456041bd5 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0d79d14d5527683450f592bc2b77aebc84ad63df0e15f3470c8b065746d4685c 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8357e8ef29c291564e7b24f12bba0419e67ebe68f9ec5b6e0434bec11f420af2 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.4.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:98ae7a2aad8b848f54fea75acc95e927b07b5bd0d22f032aadcc8499ee4362f8 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:88f7a9fed056edb8150cecb3aa04108ca555d54f04fc8371693b643571d7ecd1 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0d734b6b250378058a27e933f923f9a9857c6947c35c7668180b1287f2a98d0e 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:257e6070c41a5ac1c1977f407249c38453370a7e2b7b7083b801178e4b07392b 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f70cda4ea74169e206ca457720e9e34db4d8cb0e11761e3179c7aa4519cb03d8 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0eb9eee4d2369d36fc2a04839e36a010278858bae789f5895aeed662f16d7792 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d5ed450453b2a76cf52f3d7fabce3493965551311f977e3c518ec4621106a4de 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:49431716411b3ab43cdf929234ba34a5bc78008510652fc2da16f4b1c28775f8 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bc0e51c254f443b53a207baef543b0c77e6dac08b41341322dc5174310dbdee8 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f9d1f6351e138e2a48744a782e398796ef21d667ea1c813646b7994ba137fbf3 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e9d3b4787ad395d015a6743621a82162ec7d6355afa2847c37ccfa4497bca6ab 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0389e2ca03a50993765309402691f0312c269bb6d0194f3f80001ce7881086f2 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.5.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ee5f66696d79e7e78f05e6faf3f13ac4f6a2fa31e89ccc36aae668560de4b3e7 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:60a1bbc9ad182f5bfeb2dbb60614a06c00551676ac7627ca044aff55b0247ccf 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2e2aa09354477f74f0e137bd54f457f7614cba06e1c70acbab0ae16ea7d33c22 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4798e6f3ed4910c11cc55ed530966fd4f747c25743739ad8de152ffef0996718 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:708fe340f34bf3e5ce8c2b46d13367a05c6333748dad09811983131cf79328a9 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c0874e46eab1a6426599ec9784d2ec04c786d8df5c17fc8984ec42930d5679ac 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8b2236a1d2958360879d5094d62d4728e56d06c06e7a22536473be4fba6d2f8e 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:36e9db9b57b6a886a1c10b351f645ceb19be1ec4c90a0af1b57f80184a02752a 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b6a44d99d843df8fd68463389a4efc449f1441014e874aec0ed8467b3cb35c68 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ee10d3e344c4f45d9c6f4e293ef30fc45513c1407139f5f2efbfeb62edf4571c 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:87d249a6fbf548455aa966dbab64d76b18126878ef137b38d5e3ddec420cbdd5 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:27a09ed1c198859314c993301fdd25b101107c42a8ea8dad0194a61e9e9fea61 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.6.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8b4552dc39a05023ba72df2f73067c75e5108aeedfa52d65054c05b14bf330b6 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3c44f02bfaf0d3c7e299bd47f0e59421326e7fcdb8128a9dd38002e1245de5c4 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c101117c479f6d522161e177a1360429b6c43760cd08fc6b05df275d3bde019e 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4b7a5e213d9ee9872981592c873ef6f338c682ab5654c817e4014618502f5fb4 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ba453997dadc87090ad63c5c329d04630088f4e7f38067cbcdf29725e43ad6c8 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d68aeb30a5e9e4c47dce864de23565fe0e5bfebcda078bb75d85e16626e5eb70 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:dee0061336b027e6306a7b2b80f909edab923a444f492caf03ed530e1e4e8ab0 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ac227d13adec753e107febc6dd90f5f495e79c4869582314af1d4c222a564951 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5963fa68c304222f4b1dc7e2a4a7439407cbb69d6852fd1ad92ff018a74e4de2 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6688980419ea053fccede5261b04f7d86ffefd57a457b10c15e8ecc7961ffe02 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:48380f00d00f3c743c2b35c6fc3be49a431fd2aea7648d778c6af0169ea5d682 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5d64a5ba0130a1ac1894a47f027322881d8bff3b3b38ea7f4d74ec72463c6dae 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.7.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4943d294ea8507543562bb00ced6a9b04bfca4029f8855f7d042ae97d6c5a10e 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:026218fac39588b78bfa747e12b81ccf9b558fbb15205fb55c9cec29509ce79a 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:460fa88fd47022f0269b6ffad8712360005535a96b86ae8ed5755785efff9a60 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:207a1897b77a033bc3c883232c86151bf2646f811c3c1dbe0a4da4f1f1e98527 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:214802a449991cc6b5b1858ba2a44740c5754ad555d7895f20f09c5634494da7 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:28362a797737604473418736cb360b0fc43aecc7e8b42af3abaf9402bfb95320 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:86c20d0ff9b523c865a7f39bdd8b1ec1d6c7e308169bf977a42443d42beea98c 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:55ca7b08f00a2966e8d3d458073e5dc8d9ee417cf2cf53dc1d8d6f1b6f6b59df 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2df2d5283d9e81dcc6e2c9d33acfeef16c9cf797f70469c30087cbba2b8c893b 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:dad4ea7c1036088c2c3bf3525048a5e413705159c03a480157bd316fbffc8aed 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b49cb75b0e746bf24be929b47d346a460ea98b60b66ceeeafea27145d69b333a 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8055c6f339e4a3c4628a35ef92a2f503d30481022fc378380851a6edf9e96dad 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.8.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7329ae0d79cd1372abb03b74772b9e95e0a4be69c219a8e2e905d916ae02d475 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e16420835229addd98c66a061dd8a9bea9b405eff3738f4bb00ff603f98400d1 3 | size 4194304 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.attn.c_attn.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:45efa4ee6ac320f7eb5449955b7ac0154c814d9874c375abd47b9001e2855fd9 3 | size 9216 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.attn.c_attn.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1c4d50c6b3e7c62591e7287b5940ffa58ced69aec815e585b94e872e5e79bed4 3 | size 7077888 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.attn.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f5251700af6d9a6b3885cf9c7815d70c6ff17a0dc939c005465e4756c2ed55dc 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.attn.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b6716a7af12ccf45e27e92aa7b033038175fbc82fc22750291c0f759cf7b5c6b 3 | size 2359296 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.attn.masked_bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8fc93e15d41731c6b43acf5c11babd0465d3e869ba00ac58364110274eecef16 3 | size 4 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.ln_1.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:43dcf271d476d0a084d70934e04db7bfd23bc421006f8524e0cb6c4bca41631b 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.ln_1.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a388be1c285b378cee43f48ecce1a56e9f39c537603ccbdf43314179b70d2964 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.ln_2.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ebe7f54adc51cbd1c0a90ffe54b5b082f612f21fff7590b31c91b9a45d6dbae4 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.ln_2.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:21187e2fb37e9d5bd9bfb8c7917b6e4651c0bba0edd8e602fe33b532a863216f 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.mlp.c_fc.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f325dd48d6535236fc82f15c296c0ba72305107373084e7d83a2af7b61a01945 3 | size 12288 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.mlp.c_fc.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b81e29daa03b021cc0297b38eaabc4bf278628207f3e582267c444c3cdfccae4 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.mlp.c_proj.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ff91b8d9d462396042d5ec07c14c7be91b701bcbe7aab999597ed78b6c7b59aa 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.h.9.mlp.c_proj.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d34e766640c9dc500daad86ddfc55e9739ff1a002c34767cfbc71a44ebddc4fc 3 | size 9437184 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.ln_f.bias_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3101f7c89d3985741e5e7a720572bb3b4967763b2b6f9c8d7be5f27bf9b6f225 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.ln_f.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0179316b9de281fc74b7a4edcec4370af4e5831242a2200daa7ce7c0c6546e8c 3 | size 3072 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.wpe.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:29c69587e2af826b7c159c38620a32e549a925e6d9a0dc37cb563f377f5be772 3 | size 3145728 4 | -------------------------------------------------------------------------------- /weights/gpt2/transformer.wte.weight_gpt.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e182e433b37dbdb47448e0413c840edf6965113c1fc8048c63b69795d8cf875a 3 | size 154389504 4 | -------------------------------------------------------------------------------- /weights/tokenization/gpt_tokens.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783 3 | size 1042301 4 | -------------------------------------------------------------------------------- /weights/tokenization/simple_tokens.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:48456ee48f8479821ec7d5b87ed0d707bfc80a1f1e3a97618206164482783270 3 | size 709 4 | --------------------------------------------------------------------------------