├── .gitignore ├── benchmark ├── transformer.py └── README.md ├── Makefile ├── LICENSE ├── weight_module.f90 ├── sentence_ex.txt ├── savemodel.py ├── README.md ├── transformer.f90 └── read_ggml.f90 /.gitignore: -------------------------------------------------------------------------------- 1 | /*/ 2 | tx 3 | *.mod 4 | *.bin 5 | *.o 6 | *.swp 7 | !benchmark 8 | *.o 9 | 10 | -------------------------------------------------------------------------------- /benchmark/transformer.py: -------------------------------------------------------------------------------- 1 | from sentence_transformers import SentenceTransformer 2 | from time import time 3 | import torch 4 | 5 | #change as desired 6 | #torch.set_num_threads(1) 7 | 8 | if __name__ == "__main__": 9 | 10 | start = time() 11 | st_model = SentenceTransformer("msmarco-distilbert-base-dot-prod-v3") 12 | end = time() 13 | 14 | print("load time:", end-start) 15 | 16 | with open("../sentence_ex.txt") as f: 17 | prompts = f.readlines() 18 | 19 | p = [p.strip() for p in prompts] 20 | print(len(p)) 21 | #print(p) 22 | 23 | start = time() 24 | for p in prompts: 25 | #print(p) 26 | st_model.encode(p) 27 | 28 | end = time() 29 | print("inferenece time:", end-start) 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | FORTRAN = gfortran-10 2 | GCC = gcc-10 3 | 4 | .DEFAULT_GOAL := all 5 | 6 | weight_module.o: weight_module.f90 7 | $(FORTRAN) -c -O3 -march=native -mtune=native -ffast-math -funroll-loops -flto -fPIC weight_module.f90 8 | 9 | transformer.o: transformer.f90 10 | $(FORTRAN) -c -O3 -march=native -mtune=native -ffast-math -funroll-loops -flto -fPIC transformer.f90 11 | read_ggml.o: read_ggml.f90 12 | $(FORTRAN) -c -O3 -march=native -mtune=native -ffast-math -funroll-loops -flto -fPIC read_ggml.f90 13 | 14 | tx: weight_module.o read_ggml.o transformer.o 15 | $(FORTRAN) -O3 -march=native -mtune=native -ffast-math -funroll-loops -flto -fPIC weight_module.o read_ggml.o transformer.o -o tx 16 | 17 | 18 | 19 | all: tx 20 | 21 | clean: 22 | rm *.o 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Andrew Marble 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /weight_module.f90: -------------------------------------------------------------------------------- 1 | module precision_module 2 | implicit none 3 | integer, parameter :: wp = kind(1.0) 4 | end module precision_module 5 | 6 | ! structs for reading weights, config information and state 7 | module weight_module 8 | use precision_module 9 | implicit none 10 | private wp 11 | 12 | type TransformerWeights 13 | real(kind=wp), allocatable :: word_embeddings(:,:) 14 | real(kind=wp), allocatable :: position_embeddings(:,:) 15 | real(kind=wp), allocatable :: emb_layer_norm_w(:) 16 | real(kind=wp), allocatable :: emb_layer_norm_b(:) 17 | real(kind=wp), allocatable :: wq(:,:,:) 18 | real(kind=wp), allocatable :: bq(:,:) 19 | real(kind=wp), allocatable :: wk(:,:,:) 20 | real(kind=wp), allocatable :: bk(:,:) 21 | real(kind=wp), allocatable :: wv(:,:,:) 22 | real(kind=wp), allocatable :: bv(:,:) 23 | real(kind=wp), allocatable :: wo(:,:,:) 24 | real(kind=wp), allocatable :: bo(:,:) 25 | real(kind=wp), allocatable :: sa_layer_norm_w(:,:) 26 | real(kind=wp), allocatable :: sa_layer_norm_b(:,:) 27 | real(kind=wp), allocatable :: w1(:,:,:) 28 | real(kind=wp), allocatable :: b1(:,:) 29 | real(kind=wp), allocatable :: w2(:,:,:) 30 | real(kind=wp), allocatable :: b2(:,:) 31 | real(kind=wp), allocatable :: out_layer_norm_w(:,:) 32 | real(kind=wp), allocatable :: out_layer_norm_b(:,:) 33 | real(kind=wp), allocatable :: linear(:,:) 34 | 35 | end type TransformerWeights 36 | 37 | type Config 38 | INTEGER :: emb_dim, hidden_dim, n_layers, n_heads, n_kv_heads, vocab_size, seq_len 39 | end type Config 40 | 41 | type RunState 42 | 43 | real(kind=wp), allocatable :: att(:,:) 44 | real(kind=wp), allocatable :: key_cache(:,:,:) 45 | real(kind=wp), allocatable :: value_cache(:,:,:) 46 | real(kind=wp) :: times(5) 47 | 48 | end type RunState 49 | 50 | end module weight_module 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | ## Python "transformers" code for comparison 2 | 3 | Summary: I compared total calculation time for embeddings on the 21 strings in `../sentence_ex.tx` between HF transformers and transformers.f90 on an (old) Macbook and slightly newer intel/linux laptop. This code is heavily dependent on `matmul` so we expect the linear algebra backend to be important. On the intel machine, the inference runs 4-5 times faster in python vs the current fortran implementation with openblas. On the mac, the Accelerate framework + fortran is neatly 2x faster than python. Python generally sucks at the initial load time of the weights vs. fortran. 4 | 5 | 6 | Comparison on linux, intel core i7 7 | 8 | ```bash 9 | $ time python transformer.py 10 | load time: 3.760972738265991 11 | 21 12 | inferenece time: 0.37131738662719727 13 | 14 | real 0m6.031s 15 | user 0m5.267s 16 | sys 0m2.265s 17 | ``` 18 | 19 | Compiled with O3 etc. 20 | 21 | ```bash 22 | $ gfortran-10 -O3 -march=native -ffast-math -funroll-loops transformer.f90 -o tx 23 | $ time ./tx -m msmarco-distilbert-base-dot-prod-v3.bin -q -f sentence_ex.txt --time 24 | Load time in seconds: 9.60000008E-02 25 | Total inference time in seconds: 4.36800003 26 | 27 | real 0m4.476s 28 | user 0m4.376s 29 | sys 0m0.100s 30 | ``` 31 | 32 | With external blas library 33 | 34 | ```bash 35 | $ gfortran-10 -O3 -march=native -ffast-math -funroll-loops transformer.f90 -fexternal-blas -lopenblas -o tx 36 | $ time ./tx -m msmarco-distilbert-base-dot-prod-v3.bin -q -f sentence_ex.txt --time 37 | Load time in seconds: 0.128000006 38 | Total inference time in seconds: 1.28000009 39 | 40 | real 0m1.416s 41 | user 0m9.457s 42 | sys 0m7.029s 43 | ``` 44 | 45 | MacOS (intel, 2017 MBP) 46 | 47 | ```bash 48 | % python transformer.py 49 | load time: 0.9851226806640625 50 | 21 51 | inferenece time: 2.890425205230713 52 | ``` 53 | 54 | ```bash 55 | % gfortran -O3 -march=native -ffast-math -funroll-loops transformer.f90 -o tx 56 | % time ./tx -m msmarco-distilbert-base-dot-prod-v3.bin -q -f sentence_ex.txt --time 57 | Load time in seconds: 0.224000007 58 | Total inference time in seconds: 7.64800024 59 | ./tx -m msmarco-distilbert-base-dot-prod-v3.bin -q -f sentence_ex.txt --time 7.27s user 0.25s system 94% cpu 7.976 total 60 | ``` 61 | 62 | ```bash 63 | % gfortran -O3 -march=native -ffast-math -funroll-loops transformer.f90 -fexternal-blas -framework Accelerate -o tx 64 | % time ./tx -m msmarco-distilbert-base-dot-prod-v3.bin -q -f sentence_ex.txt --time 65 | Load time in seconds: 0.352000028 66 | Total inference time in seconds: 1.50400007 67 | ./tx -m msmarco-distilbert-base-dot-prod-v3.bin -q -f sentence_ex.txt --time 1.47s user 0.19s system 76% cpu 2.178 total 68 | ``` 69 | -------------------------------------------------------------------------------- /sentence_ex.txt: -------------------------------------------------------------------------------- 1 | Europe is a continent located entirely in the Northern Hemisphere and mostly in the Eastern Hemisphere. 2 | It comprises the westernmost part of Eurasia and is bordered by the Arctic Ocean to the north, the Atlantic Ocean to the west, the Mediterranean Sea to the south, and Asia to the east. 3 | Europe is commonly considered to be separated from Asia by the watershed of the Ural Mountains, the Ural River, the Caspian Sea, the Greater Caucasus, the Black Sea, and the waterways of the Turkish Straits. 4 | Although some of this border is over land, Europe is generally accorded the status of a full continent because of its great physical size and the weight of history and tradition. 5 | Europe covers about 10,180,000 square kilometres (3,930,000 sq mi), or 2% of the Earth's surface (6.8% of land area), making it the second smallest continent. 6 | Politically, Europe is divided into about fifty sovereign states, of which Russia is the largest and most populous, spanning 39% of the continent and comprising 15% of its population. 7 | Europe had a total population of about 741 million (about 11% of the world population) as of 2018. 8 | The European climate is largely affected by warm Atlantic currents that temper winters and summers on much of the continent, even at latitudes along which the climate in Asia and North America is severe. 9 | Further from the sea, seasonal differences are more noticeable than close to the coast. 10 | European culture is the root of Western civilization, which traces its lineage back to ancient Greece and ancient Rome. 11 | The fall of the Western Roman Empire in 476 AD and the subsequent Migration Period marked the end of Europe's ancient history and the beginning of the Middle Ages. 12 | A saxophone is a type of musical instrument in the woodwind family. 13 | The saxophone uses a piece of wood, called a reed, to make sound. 14 | The player blows air into the mouthpiece, which vibrates the reed. 15 | The saxophone also uses keys to change pitch, and the player closes or opens holes to choose the note. 16 | Commonly, saxophones have about 22 keys. 17 | The saxophone is most commonly found in four voices: soprano, alto, tenor, and baritone saxophones. 18 | However, uncommon saxophones include the bass and contrabass saxophones (lower than a baritone saxophone), the C-melody saxophone (between the tenor and alto saxophones), and the sopranino saxophone (higher than a soprano saxophone). 19 | It was invented in 1840 by Adolphe Sax and is used in classical, jazz, and occasionally in rock, pop, and other styles. 20 | The saxophone was originally created for military bands, but was commonly used in jazz big bands in the 1940s and 1950s. 21 | Famous saxophone players include Marcel Mule (classical music), John Coltrane (jazz music), and Charlie Parker (jazz music). 22 | -------------------------------------------------------------------------------- /savemodel.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import struct 3 | import json 4 | import torch 5 | import numpy as np 6 | 7 | #from transformers import AutoModel, AutoTokenizer 8 | from sentence_transformers import SentenceTransformer 9 | import re 10 | 11 | if len(sys.argv) > 1: 12 | dir_model = sys.argv[1] 13 | else: 14 | dir_model = "msmarco-distilbert-base-dot-prod-v3" 15 | 16 | with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f: 17 | encoder = json.load(f) 18 | 19 | with open(dir_model + "/config.json", "r", encoding="utf-8") as f: 20 | hparams = json.load(f) 21 | 22 | with open(dir_model + "/modules.json", "r", encoding="utf-8") as f: 23 | modules = json.load(f) 24 | 25 | st_model = SentenceTransformer(dir_model) 26 | 27 | list_vars = st_model[0].state_dict() # transformer 28 | 29 | def strip(x: str): 30 | x = "auto_model." + x 31 | print(x) 32 | y = list_vars[x] 33 | assert y.view(-1)[0].dtype == torch.float32 34 | return y.numpy() 35 | 36 | if len(sys.argv) > 2: 37 | outfile = sys.argv[2] 38 | else: 39 | outfile = "msmarco-distilbert-base-dot-prod-v3_converted_full.bin" 40 | 41 | with open(outfile,mode='wb') as of: 42 | #write up front stuff 43 | header = struct.pack( 44 | 'iiiiiii', 45 | hparams['dim'], hparams['hidden_dim'], hparams['n_layers'], 46 | hparams['n_heads'], 0, len(encoder['model']['vocab']), 47 | hparams['max_position_embeddings'], 48 | ) 49 | of.write(header) 50 | 51 | w = strip('embeddings.word_embeddings.weight') 52 | of.write(memoryview(w)) 53 | 54 | w = strip('embeddings.position_embeddings.weight') 55 | of.write(memoryview(w)) 56 | 57 | w = strip('embeddings.LayerNorm.weight') 58 | of.write(memoryview(w)) 59 | 60 | w = strip('embeddings.LayerNorm.bias') 61 | of.write(memoryview(w)) 62 | 63 | layers = hparams['n_layers'] 64 | 65 | for l in range(layers): 66 | w = strip(f'transformer.layer.{l}.attention.q_lin.weight') 67 | of.write(memoryview(w)) 68 | 69 | for l in range(layers): 70 | w = strip(f'transformer.layer.{l}.attention.q_lin.bias') 71 | of.write(memoryview(w)) 72 | 73 | for l in range(layers): 74 | w = strip(f'transformer.layer.{l}.attention.k_lin.weight') 75 | of.write(memoryview(w)) 76 | 77 | for l in range(layers): 78 | w = strip(f'transformer.layer.{l}.attention.k_lin.bias') 79 | of.write(memoryview(w)) 80 | 81 | for l in range(layers): 82 | w = strip(f'transformer.layer.{l}.attention.v_lin.weight') 83 | of.write(memoryview(w)) 84 | 85 | for l in range(layers): 86 | w = strip(f'transformer.layer.{l}.attention.v_lin.bias') 87 | of.write(memoryview(w)) 88 | 89 | for l in range(layers): 90 | w = strip(f'transformer.layer.{l}.attention.out_lin.weight') 91 | of.write(memoryview(w)) 92 | 93 | for l in range(layers): 94 | w = strip(f'transformer.layer.{l}.attention.out_lin.bias') 95 | of.write(memoryview(w)) 96 | 97 | for l in range(layers): 98 | w = strip(f'transformer.layer.{l}.sa_layer_norm.weight') 99 | of.write(memoryview(w)) 100 | 101 | for l in range(layers): 102 | w = strip(f'transformer.layer.{l}.sa_layer_norm.bias') 103 | of.write(memoryview(w)) 104 | 105 | for l in range(layers): 106 | w = strip(f'transformer.layer.{l}.ffn.lin1.weight') 107 | of.write(memoryview(w)) 108 | 109 | for l in range(layers): 110 | w = strip(f'transformer.layer.{l}.ffn.lin1.bias') 111 | of.write(memoryview(w)) 112 | 113 | for l in range(layers): 114 | w = strip(f'transformer.layer.{l}.ffn.lin2.weight') 115 | of.write(memoryview(w)) 116 | 117 | for l in range(layers): 118 | w = strip(f'transformer.layer.{l}.ffn.lin2.bias') 119 | of.write(memoryview(w)) 120 | 121 | for l in range(layers): 122 | w = strip(f'transformer.layer.{l}.output_layer_norm.weight') 123 | of.write(memoryview(w)) 124 | 125 | for l in range(layers): 126 | w = strip(f'transformer.layer.{l}.output_layer_norm.bias') 127 | of.write(memoryview(w)) 128 | 129 | # just stick the linear weights at the end 130 | print("linear.weight") 131 | y = st_model[2].state_dict()['linear.weight'] 132 | assert y.view(-1)[0].dtype == torch.float32 133 | of.write(memoryview(y.numpy())) 134 | 135 | if len(sys.argv) > 3: 136 | vname = sys.argv[3] 137 | else: 138 | vname = "tokenizer.bin" 139 | 140 | vocab = encoder["model"]["vocab"] 141 | # write out vocab 142 | max_len = max([len(bytes(v,"utf-8")) for v in vocab]) 143 | print("Maximum word size: ", max_len) 144 | with open(vname, "wb") as f: 145 | f.write(struct.pack("i", max_len)) 146 | 147 | for v in vocab: 148 | vb = bytes(v,"utf-8") 149 | f.write(struct.pack("ii", 0, len(vb))) 150 | f.write(struct.pack(f"{len(vb)}s",vb)) 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Ferrite - Simple, lightweight transformers in Fortran 2 | 3 | Modern ML frameworks like HF transformers are easy to use but extremely abstract. There are times the abstraction makes sense, particularly model training. For inference using transformers, the "real" inference code is less complex then the abstraction, and it can be faster and more transparent to just write the code. That way you can plainly see what your model is doing under the hood and adapt it to your use case, rather than picking through layer after layer of aabstraction over what is effectively a for-loop with some matrix multiplications inside. 4 | 5 | To that end, as a complement to the [llama.f90](rbitr/llama.f90) Fortran LLM, this project demonstrates a [Sentence Transformer](https://www.sbert.net/index.html) in "pure" Fortran with no dependencies (you still need python to convert pytorch models if you want to use them). 6 | 7 | I plan to evolve this to make sure it can work with general transformer models, and add performance optimization as required. That said, I don't want to add any abstraction so I only want to add generalizations that don't obscure what is going on. The code can easily be adapted for architectural variations. 8 | 9 | ## Setup and running 10 | 11 | ```bash 12 | # clone the repo 13 | git clone https://github.com/rbitr/ferrite 14 | cd ferrite 15 | # download a model 16 | wget https://huggingface.co/SDFASDGA/llm/resolve/main/msmarco-distilbert-base-dot-prod-v3-f32.gguf 17 | # compile 18 | make 19 | #run 20 | ./tx -m msmarco-distilbert-base-dot-prod-v3-f32.gguf -v -p "I alwas feel like somebody's watching me" # (sic) 21 | 22 | GGUF Header Info 23 | Magic number: 1179993927 24 | Version: 3 25 | Tensor Count: 101 26 | Key-Value Pairs: 15 27 | general.architecture 28 | distilbert 29 | general.name 30 | DistilBert 31 | distilbert.context_length 32 | 512 33 | distilbert.embedding_length 34 | 768 35 | distilbert.feed_forward_length 36 | 3072 37 | distilbert.block_count 38 | 6 39 | distilbert.attention.head_count 40 | 12 41 | distilbert.attention.head_count_kv 42 | 1 43 | general.file_type 44 | 0 45 | tokenizer.ggml.model 46 | gpt2 47 | tokenizer.ggml.tokens 48 | 30522 49 | tokenizer.ggml.token_type 50 | 30522 51 | tokenizer.ggml.unknown_token_id 52 | 100 53 | tokenizer.ggml.seperator_token_id 54 | 102 55 | tokenizer.ggml.padding_token_id 56 | 0 57 | Position 573471 58 | Deficit 30 59 | data offset 573473 60 | Embedding dimension: 768 61 | Hidden dimension: 3072 62 | Layers: 6 63 | Heads: 12 64 | kv Heads: 1 65 | Vocabulary Size: 30522 66 | Sequence Length: 512 67 | head size 64 68 | kv head Size 64 69 | loaded word embedding weights: 23440896 70 | loaded position embedding weights: 393216 71 | loaded embedding layernorm weights: 768 72 | loaded embedding layernorm bias: 768 73 | loaded wq weights: 3538944 74 | loaded wq bias: 4608 75 | loaded wk weights: 3538944 76 | loaded wk bias: 4608 77 | loaded wv weights: 3538944 78 | loaded wv bias: 4608 79 | loaded wo weights: 3538944 80 | loaded wo bias: 4608 81 | loaded sa layernorm weights: 4608 82 | loaded sa layernorm bias: 4608 83 | loaded w1 weights: 14155776 84 | loaded w1 bias: 18432 85 | loaded w2 (down) weights: 14155776 86 | loaded w2 (down) bias: 4608 87 | loaded output norm weights: 4608 88 | loaded output norm bias: 4608 89 | loaded classifier weights: 589824 90 | loading tokens 91 | found 30522 tokens 92 | maximum token length 18 93 | Token 4081 is andrew 94 | simple token: i 95 | wordpiece tokens: i 96 | simple token: alwas 97 | wordpiece tokens: al ##was 98 | simple token: feel 99 | wordpiece tokens: feel 100 | simple token: like 101 | wordpiece tokens: like 102 | simple token: somebody 103 | wordpiece tokens: somebody 104 | simple token: ' 105 | wordpiece tokens: ' 106 | simple token: s 107 | wordpiece tokens: s 108 | simple token: watching 109 | wordpiece tokens: watching 110 | simple token: me 111 | wordpiece tokens: me 112 | 102 1046 2633 17312 2515 2067 8308 1006 1056 3667 2034 103 113 | 0.117702775 0.268108070 -0.412374288 -0.684159577 -0.272519588 -0.633238137 ... 114 | ``` 115 | 116 | Right now I've only tested it with the `msmarco-distilbert-base-dot-prod-v3` model from sbert.net. This is a DistilBbert transformer with a pooling and linear layer used for generating embeddings for semantic search. See https://www.sbert.net/docs/pretrained-models/msmarco-v3.html for more information. 117 | 118 | Command line arguments are as follows: 119 | 120 | ```bash 121 | case ('-m', '--model') 122 | ! path to model file 123 | -- 124 | case ('-p', '--prompt') 125 | ! prompt string 126 | -- 127 | case ('-s', '--tokenizer') 128 | ! path to custom tokenizer 129 | -- 130 | case ('-t', '--temperature') 131 | ! temperature scaling (not used) 132 | -- 133 | case ('-n', '--num_tokens') 134 | ! number of tokens to generate, including prompt (not used) 135 | -- 136 | case ('-v', '--verbose') 137 | ! print additional information 138 | -- 139 | case ('-1', '--single_line') 140 | ! print each element on single line 141 | -- 142 | case ('-q', '--quiet') 143 | ! don't print embedding 144 | 145 | ``` 146 | 147 | ## Getting models 148 | 149 | Models are in gguf format, see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md 150 | 151 | You can use the `convert-hf-to-gguf.py` file from https://github.com/rbitr/llama.cpp to convery HF model files, ie 152 | 153 | ```bash 154 | git clone https://github.com/rbitr/llama.cpp 155 | # get the model 156 | git clone https://huggingface.co/sentence-transformers/msmarco-distilbert-base-dot-prod-v3 157 | # convert 158 | python ./llama.cpp/convert-hf-to-gguf.py msmarco-distilbert-base-dot-prod-v3 --outtype f32 159 | ``` 160 | 161 | Note that only distilbert models are supported and it has not been extensively tested. Support is currently limited to the fork referenced above, it's not part of the original repo. 162 | 163 | 164 | ## Examples (currently using the old model file format, adjust accordingly) 165 | 166 | Included in the repo is a file `sentence_ex` made up of some sentences from wikipedia about Europe and the saxoaphone. We save temporary embeddings for each sentence with a bash one-liner: 167 | 168 | ```bash 169 | x=1; while read s; do echo $x $s; ./tx -m msmarco-distilbert-base-dot-prod-v3_converted_full.bin -1 -p "$s" > tmp/emb${x}.txt; x=$((x+1)); done < sentence_ex.txt 170 | 1 Europe is a continent located entirely in the Northern Hemisphere and mostly in the Eastern Hemisphere. 171 | 2 It comprises the westernmost part of Eurasia and is bordered by the Arctic Ocean to the north, the Atlantic Ocean to the west, the Mediterranean Sea to the south, and Asia to the east. 172 | 3 Europe is commonly considered to be separated from Asia by the watershed of the Ural Mountains, the Ural River, the Caspian Sea, the Greater Caucasus, the Black Sea, and the waterways of the Turkish Straits. 173 | 4 Although some of this border is over land, Europe is generally accorded the status of a full continent because of its great physical size and the weight of history and tradition. 174 | 5 Europe covers about 10,180,000 square kilometres (3,930,000 sq mi), or 2% of the Earth's surface (6.8% of land area), making it the second smallest continent. 175 | 6 Politically, Europe is divided into about fifty sovereign states, of which Russia is the largest and most populous, spanning 39% of the continent and comprising 15% of its population. 176 | 7 Europe had a total population of about 741 million (about 11% of the world population) as of 2018. 177 | 8 The European climate is largely affected by warm Atlantic currents that temper winters and summers on much of the continent, even at latitudes along which the climate in Asia and North America is severe. 178 | 9 Further from the sea, seasonal differences are more noticeable than close to the coast. 179 | 10 European culture is the root of Western civilization, which traces its lineage back to ancient Greece and ancient Rome. 180 | 11 The fall of the Western Roman Empire in 476 AD and the subsequent Migration Period marked the end of Europe's ancient history and the beginning of the Middle Ages. 181 | 12 A saxophone is a type of musical instrument in the woodwind family. 182 | 13 The saxophone uses a piece of wood, called a reed, to make sound. 183 | 14 The player blows air into the mouthpiece, which vibrates the reed. 184 | 15 The saxophone also uses keys to change pitch, and the player closes or opens holes to choose the note. 185 | 16 Commonly, saxophones have about 22 keys. 186 | 17 The saxophone is most commonly found in four voices: soprano, alto, tenor, and baritone saxophones. 187 | 18 However, uncommon saxophones include the bass and contrabass saxophones (lower than a baritone saxophone), the C-melody saxophone (between the tenor and alto saxophones), and the sopranino saxophone (higher than a soprano saxophone). 188 | 19 It was invented in 1840 by Adolphe Sax and is used in classical, jazz, and occasionally in rock, pop, and other styles. 189 | 20 The saxophone was originally created for military bands, but was commonly used in jazz big bands in the 1940s and 1950s. 190 | 21 Famous saxophone players include Marcel Mule (classical music), John Coltrane (jazz music), and Charlie Parker (jazz music). 191 | ``` 192 | 193 | Then we can lookup queries by making an embedding and finding the entry with the largest dot-product (computed here in awk) 194 | 195 | ```bash 196 | ./tx -m msmarco-distilbert-base-dot-prod-v3_converted_full.bin -1 -p "What bodies of water are in europe?" > tmp/embq.txt 197 | for x in {1..21}; do echo $x; paste tmp/emb${x}.txt tmp/embq.txt | awk '{dp+=$1*$2} END {print dp}'; done 198 | 1 199 | 35.505 200 | 2 201 | 33.9245 202 | 3 203 | 37.5551 204 | 4 205 | 29.1835 206 | 5 207 | 36.0957 208 | 6 209 | 31.6795 210 | 7 211 | 29.0034 212 | 8 213 | 31.7701 214 | 9 215 | 17.0193 216 | 10 217 | 26.859 218 | 11 219 | 20.4201 220 | 12 221 | 10.0551 222 | 13 223 | 9.95383 224 | 14 225 | 14.5428 226 | 15 227 | 8.84668 228 | 16 229 | 10.5251 230 | 17 231 | 10.2478 232 | 18 233 | 8.90325 234 | 19 235 | 12.1863 236 | 20 237 | 6.15891 238 | 21 239 | 7.62652 240 | ``` 241 | 242 | The question was about Europe so the scores are higher on the first 11 entries, and the maximum is #3 which talks about waterways. 243 | 244 | Below we ask who invented the saxophone and get the highest score at sentence 19 which contains the answer. (Note I misspelled saxophone in the query and it still works). 245 | 246 | ```bash 247 | ./tx -m msmarco-distilbert-base-dot-prod-v3_converted_full.bin -1 -p "Who invented the saxaphone?" > tmp/embq.txt 248 | for x in {1..21}; do echo $x; paste tmp/emb${x}.txt tmp/embq.txt | awk '{dp+=$1*$2} END {print dp}'; done 249 | 1 250 | 8.78761 251 | 2 252 | 11.0401 253 | 3 254 | 11.4972 255 | 4 256 | 5.93544 257 | 5 258 | 3.17357 259 | 6 260 | 6.38081 261 | 7 262 | 9.9643 263 | 8 264 | 16.3048 265 | 9 266 | 12.8389 267 | 10 268 | 22.387 269 | 11 270 | 22.8647 271 | 12 272 | 31.1579 273 | 13 274 | 32.949 275 | 14 276 | 24.6756 277 | 15 278 | 28.0059 279 | 16 280 | 24.3043 281 | 17 282 | 25.446 283 | 18 284 | 29.0274 285 | 19 286 | 42.3414 287 | 20 288 | 30.9246 289 | 21 290 | 33.6924 291 | ``` 292 | 293 | 294 | -------------------------------------------------------------------------------- /transformer.f90: -------------------------------------------------------------------------------- 1 | module arg_parse 2 | implicit none 3 | 4 | type args 5 | real :: temperature 6 | character(:), allocatable :: model_file 7 | character(:), allocatable :: prompt 8 | character(:), allocatable :: tokenizer 9 | character(:), allocatable :: filename 10 | logical :: verbose 11 | integer :: n 12 | logical :: single_line, quiet, time 13 | end type args 14 | 15 | contains 16 | 17 | subroutine parse_args(arg_values) 18 | type(args) :: arg_values 19 | integer :: i, num_args 20 | character(256) :: arg 21 | 22 | 23 | 24 | !defaults 25 | arg_values%temperature = 0 26 | arg_values%model_file = "" 27 | arg_values%prompt = "" 28 | arg_values%verbose = .false. 29 | arg_values%n = 256 30 | arg_values%tokenizer = "tokenizer.bin" 31 | arg_values%single_line = .false. 32 | arg_values%quiet = .false. 33 | arg_values%filename = "" 34 | arg_values%time = .false. 35 | 36 | num_args = command_argument_count() 37 | 38 | i = 1 39 | do while (i <= num_args) 40 | call get_command_argument(i, arg) 41 | select case (arg) 42 | case ('-m', '--model') 43 | ! path to model file 44 | call get_command_argument(i+1, arg) 45 | arg_values%model_file = trim(arg) 46 | i = i + 2 47 | case ('-p', '--prompt') 48 | ! prompt string 49 | call get_command_argument(i+1, arg) 50 | arg_values%prompt = trim(arg) 51 | i = i + 2 52 | case ('-s', '--tokenizer') 53 | ! path to custom tokenizer 54 | call get_command_argument(i+1, arg) 55 | arg_values%tokenizer = trim(arg) 56 | i = i + 2 57 | case ('-t', '--temperature') 58 | ! temperature scaling 59 | call get_command_argument(i+1, arg) 60 | read(arg,*) arg_values%temperature 61 | i = i + 2 62 | case ('-n', '--num_tokens') 63 | ! number of tokens to generate, including prompt 64 | call get_command_argument(i+1, arg) 65 | read(arg,*) arg_values%n 66 | i = i + 2 67 | case ('-f', '--filename') 68 | ! text file with a prompt on each line 69 | call get_command_argument(i+1, arg) 70 | arg_values%filename = trim(arg) 71 | 72 | i = i + 2 73 | case ('-v', '--verbose') 74 | ! print additional information 75 | arg_values%verbose = .true. 76 | i = i + 1 77 | case ('-1', '--single_line') 78 | ! print each element on single line 79 | arg_values%single_line = .true. 80 | i = i + 1 81 | case ('-q', '--quiet') 82 | ! don't print embedding 83 | arg_values%quiet = .true. 84 | i = i + 1 85 | case ('--time') 86 | ! display timings 87 | arg_values%time = .true. 88 | i = i + 1 89 | case default 90 | print *, 'Unrecognized option:', trim(arg) 91 | stop 92 | end select 93 | end do 94 | 95 | ! check for arguments 96 | 97 | 98 | end subroutine 99 | 100 | end module arg_parse 101 | 102 | program transformer 103 | 104 | use iso_c_binding 105 | use precision_module 106 | use weight_module 107 | use arg_parse 108 | use read_ggml, only: load_ggml 109 | implicit none 110 | 111 | type(TransformerWeights) :: weights 112 | type(Config) :: cfg 113 | 114 | integer(4) :: emb_dim, hidden_dim, n_layers, n_heads, n_kv_heads, vocab_size, seq_len 115 | integer(4) :: itmp, msize 116 | 117 | type (args) :: arg_values 118 | character(:), allocatable :: prompt 119 | logical :: verbose, time 120 | 121 | real(kind=wp) :: score 122 | integer :: tok_len, max_len, n, p, l 123 | !integer :: vocab_size = 32000 124 | character(:), allocatable :: tmpstr 125 | character(:), dimension(:), allocatable :: vocab 126 | real(kind=wp),allocatable :: y(:), scores(:) 127 | integer, allocatable :: prompt_tokens(:) 128 | integer, allocatable :: vocab_len(:) 129 | integer, parameter :: max_prompt_len = 1024 130 | character(:), dimension(:), allocatable :: prompts 131 | character(len=max_prompt_len) :: temp_prompt 132 | integer :: tfid, ierr, num_lines 133 | real(kind=wp) :: t_start, t_end 134 | 135 | character(:), dimension(:), allocatable :: simple_tokens 136 | !integer, allocatable :: prompt_tokens 137 | 138 | 139 | call parse_args(arg_values) 140 | 141 | if (arg_values%prompt == "" .and. arg_values%filename == "") then 142 | !print *, arg_values%filename 143 | print *, "prompt required" 144 | stop 145 | end if 146 | 147 | verbose = arg_values%verbose 148 | time = arg_values%time 149 | 150 | msize = 0 151 | 152 | t_start = time_ms() 153 | 154 | call load_ggml(arg_values%model_file, weights, cfg, vocab, vocab_len, verbose) 155 | emb_dim = cfg%emb_dim 156 | hidden_dim = cfg%hidden_dim 157 | n_layers = cfg%n_layers 158 | n_heads = cfg%n_heads 159 | vocab_size = cfg%vocab_size 160 | seq_len = cfg%seq_len 161 | max_len = maxval(vocab_len) 162 | 163 | 164 | if (verbose) then 165 | !write(*,"(A,I0,A)") "Read ", vocab_size, " tokens" 166 | write(*,"(A,A)") "Token 4081 is ", vocab(4081) 167 | end if 168 | 169 | 170 | ! if there is a prompt, read the prompt and make a length 1 list 171 | ! if there is a file, read the lines into a list 172 | 173 | if (arg_values%prompt /= "") then 174 | allocate(character(len=max_prompt_len) :: prompts(1)) 175 | prompts(1) = arg_values%prompt 176 | 177 | else if (arg_values%filename /= "") then 178 | 179 | tfid = 5 180 | open(unit=tfid,file=arg_values%filename) 181 | ierr = 0 182 | num_lines = -1 183 | do while (ierr == 0) 184 | num_lines = num_lines + 1 185 | read(tfid,*,iostat=ierr) temp_prompt 186 | end do 187 | 188 | if (verbose) then 189 | write(*,'(A,I0,A)') "Read ", num_lines, " lines" 190 | end if 191 | 192 | allocate(character(len=max_prompt_len) :: prompts(num_lines)) 193 | 194 | rewind(tfid) 195 | do p = 1,num_lines 196 | read(tfid, '(A)') prompts(p) 197 | end do 198 | 199 | end if 200 | 201 | t_end = time_ms() 202 | 203 | if (time) then 204 | print *, "Load time in seconds: ", (t_end-t_start)/1000 205 | end if 206 | 207 | ! tokenize prompt 208 | !simple_tokens = simple_tokenize(arg_values%prompt) 209 | 210 | t_start = time_ms() 211 | do p=1,size(prompts) 212 | prompt_tokens = sp_tokenize(trim(prompts(p))) 213 | 214 | 215 | if (verbose) then 216 | simple_tokens = simple_tokenize(trim(prompts(p))) 217 | do n=1,size(simple_tokens) 218 | print *, "simple token: ", simple_tokens(n) 219 | print *, "wordpiece tokens: ", encode_word(simple_tokens(n)) 220 | end do 221 | 222 | print *, prompt_tokens 223 | 224 | end if 225 | 226 | !run through transformer 227 | y = dbert(prompt_tokens,weights,cfg) 228 | 229 | if (arg_values%quiet) then 230 | cycle 231 | end if 232 | 233 | if (arg_values%single_line) then 234 | do n=1,emb_dim 235 | write (*,"(F10.5)") y(n) 236 | end do 237 | else 238 | print *, y 239 | end if 240 | 241 | end do 242 | t_end = time_ms() 243 | 244 | if (time) then 245 | print *, "Total inference time in seconds: ", (t_end-t_start)/1000 246 | end if 247 | 248 | 249 | contains 250 | 251 | 252 | function layer_norm(x,w,b) result(xr) 253 | real(kind=wp) :: x(:,:), w(:), b(:) 254 | real(kind=wp) :: xr(size(x,1), size(x,2)) 255 | real(kind=wp) :: xmean(size(x,1),size(x,2)), xvar(size(x,1),size(x,2)) 256 | real(kind=wp) :: xn 257 | !print *, "A" 258 | xmean = spread(sum(x,dim=1)/size(x,1),1,size(x,1)) 259 | xvar = spread(sum( (x-xmean)*(x-xmean),dim=1 ) / size(x,1), 1, size(x,1)) 260 | xr = (x - xmean) / sqrt(xvar + 1e-12) 261 | !print *, "B" 262 | xr = xr*spread(w,2,size(x,2)) + spread(b,2,size(x,2)) 263 | end function 264 | 265 | function softmax(x) result(y) 266 | real(kind=wp), intent(in) :: x(:,:) 267 | real(kind=wp) :: y(size(x,1),size(x,2)) 268 | 269 | y = exp(x - spread(maxval(x,dim=1),1,size(x,1))) 270 | y = y / spread(sum(y,dim=1),1,size(x,1) ) 271 | 272 | end function 273 | 274 | function attention(q,k,v) result(y) 275 | real(kind=wp), intent(in) :: q(:,:), k(:,:), v(:,:) 276 | real(kind=wp) :: y(size(q,1),size(q,2)) 277 | real(kind=wp), allocatable :: y_int(:,:) 278 | y = matmul(v,(softmax(matmul(transpose(k),q) / sqrt(1.0*size(q,1))))) 279 | end function 280 | 281 | function gelu(x) result(y) 282 | real(kind=wp), intent(in) :: x(:,:) 283 | real(kind=wp) :: y(size(x,1),size(x,2)) 284 | y = 0.5 * x * (1 + tanh(sqrt(2 / 3.1415926536) * (x + 0.044715 * x**3))) 285 | end function 286 | 287 | function dbert(toks,w,c) result(y) 288 | integer, intent(in) :: toks(:) 289 | type(TransformerWeights) :: w 290 | type(Config) :: c 291 | real(kind=wp), allocatable :: y(:) 292 | integer :: i,j,l,h,nt,hsize 293 | real(kind=wp), allocatable :: x(:,:) 294 | 295 | real(kind=wp), allocatable :: q(:,:), k(:,:), v(:,:) 296 | real(kind=wp), allocatable :: qs(:,:,:), ks(:,:,:), vs(:,:,:) 297 | real(kind=wp), allocatable :: xb(:,:), attn_out(:,:), xbup(:,:) 298 | nt = size(toks) 299 | allocate(x(c%emb_dim, nt)) 300 | allocate(y(c%emb_dim)) 301 | allocate(xb(c%emb_dim, nt)) 302 | allocate(attn_out(c%emb_dim,nt)) 303 | allocate(xbup(c%hidden_dim,nt)) 304 | 305 | hsize = c%emb_dim/c%n_heads 306 | 307 | do i=1,nt 308 | x(:,i) = w%word_embeddings(:,toks(i)) 309 | x(:,i) = x(:,i) + w%position_embeddings(:,i) 310 | end do 311 | 312 | x = layer_norm(x,w%emb_layer_norm_w, w%emb_layer_norm_b) 313 | 314 | 315 | do l=1,c%n_layers 316 | 317 | q = matmul(transpose(w%wq(:,:,l)),x) + spread(w%bq(:,l),2,nt) 318 | k = matmul(transpose(w%wk(:,:,l)),x) + spread(w%bk(:,l),2,nt) 319 | v = matmul(transpose(w%wv(:,:,l)),x) + spread(w%bv(:,l),2,nt) 320 | 321 | ! split along embedding dim 322 | do h = 1,c%n_heads 323 | 324 | xb(((h-1)*hsize+1):(h*hsize),:) = attention( q(((h-1)*hsize+1):(h*hsize),:),& 325 | &k(((h-1)*hsize+1):(h*hsize),:), v(((h-1)*hsize+1):(h*hsize),:)) 326 | 327 | end do 328 | 329 | xb = matmul(transpose(w%wo(:,:,l)),xb) + spread(w%bo(:,l),2,nt) 330 | xb = xb + x 331 | 332 | xb = layer_norm(xb,w%sa_layer_norm_w(:,l), w%sa_layer_norm_b(:,l)) 333 | 334 | attn_out = xb 335 | 336 | xbup = matmul(transpose(w%w1(:,:,l)),xb) + spread(w%b1(:,l),2,nt) 337 | 338 | xbup = gelu(xbup) 339 | 340 | xb = matmul(transpose(w%w2(:,:,l)),xbup) + spread(w%b2(:,l),2,nt) 341 | 342 | xb = xb + attn_out 343 | 344 | x = layer_norm(xb,w%out_layer_norm_w(:,l), w%out_layer_norm_b(:,l)) 345 | 346 | end do 347 | 348 | ! "pooling" average 349 | y = sum(x,dim=2) / size(x,2) 350 | 351 | ! linear 352 | y = matmul(transpose(w%linear), y) 353 | 354 | end function 355 | 356 | function sp_tokenize(text) result(inds) 357 | character(len=*) :: text 358 | integer, allocatable :: inds(:) 359 | character(:), dimension(:), allocatable :: tokens, wpe 360 | integer :: m, n 361 | 362 | allocate(inds(1)) 363 | 364 | inds(1) = 102 ! bos (1 added because 1 based indices) 365 | 366 | tokens = simple_tokenize(text) 367 | 368 | do m=1,size(tokens) 369 | wpe = encode_word(tokens(m)) 370 | do n = 1,size(wpe) 371 | inds = [inds, lookup(wpe(n),len_trim(wpe(n)))] 372 | end do 373 | end do 374 | 375 | inds = [inds, 103] 376 | 377 | end function 378 | 379 | function lookup(s,l) result(ind) 380 | character(len=*) :: s 381 | integer :: l 382 | integer :: i, ind 383 | 384 | do i = 1,size(vocab) 385 | if (vocab(i) == s .and. vocab_len(i)==l) then 386 | ind = i 387 | return 388 | end if 389 | end do 390 | ind = -1 391 | end function 392 | 393 | function encode_word(word) result(tokens) 394 | character(len=*) :: word 395 | character(:), dimension(:), allocatable :: tokens 396 | integer :: i 397 | 398 | allocate(character(len=max_len) :: tokens(0)) 399 | 400 | do while(len_trim(word) > 0) 401 | i = len_trim(word) 402 | do while ( (i > 0) .and. (lookup(word(:i),i) <= 0)) 403 | i = i - 1 404 | end do 405 | 406 | if ( i == 0) then 407 | deallocate(tokens) 408 | tokens = ["UNK"] 409 | return 410 | end if 411 | tokens = [tokens, word(:i)] 412 | !print *, tokens 413 | word = word((i+1):) 414 | if (len_trim(word) > 0) then 415 | word = "##" // word 416 | end if 417 | 418 | 419 | end do 420 | 421 | end function 422 | 423 | 424 | function simple_tokenize(text) result(tokens) 425 | character(len=*) :: text 426 | character(:), dimension(:), allocatable :: tokens 427 | character(:), allocatable :: ltext, allc 428 | character(len=max_len) :: next_token 429 | integer :: pos 430 | 431 | character(26), parameter :: alpha = 'abcdefghijklmnopqrstuvwxyz' 432 | character(35) :: punct = '[!"#$%&\()*+,-./:;<=>?@\\^_`{|}~])x' 433 | character(10) :: numbers = '0123456789' 434 | 435 | ! is there another way to add the single quote? 436 | punct(35:35) = "'" 437 | !print *, punct 438 | allc = alpha // punct // numbers 439 | 440 | allocate(character(len=max_len) :: tokens(0)) 441 | 442 | ltext = to_lower(text) 443 | 444 | do while (len_trim(ltext) > 0) 445 | pos = 1 446 | 447 | next_token = "" 448 | 449 | 450 | 451 | do while(index(allc,ltext(pos:pos)) <= 0) 452 | pos = pos + 1 453 | end do 454 | 455 | ltext = ltext(pos:) 456 | 457 | pos = 1 458 | 459 | if (index(punct,ltext(pos:pos)) > 0 .and. pos <= len_trim(ltext)) then 460 | !print *, index(punct,ltext(pos:pos)) 461 | next_token = ltext(pos:pos) 462 | !if (verbose) then 463 | ! print *, next_token 464 | !end if 465 | tokens = [tokens, next_token] 466 | ltext = ltext((pos+1):) 467 | cycle 468 | end if 469 | 470 | if (index(alpha,ltext(pos:pos)) > 0 .and. pos <= len_trim(ltext)) then !next char is alphabet 471 | 472 | do while(index(alpha,ltext(pos:pos)) > 0 .and. pos <= len_trim(ltext)) 473 | pos = pos + 1 474 | end do 475 | 476 | next_token = ltext(1:(pos-1)) 477 | ltext = ltext(pos:) 478 | 479 | !if (verbose) then 480 | !print *, "control" 481 | !print *, pos 482 | ! print *, next_token 483 | !print *, ltext 484 | !end if 485 | 486 | ! fortran 2003? 487 | tokens = [tokens, next_token] 488 | 489 | else if (index(numbers,ltext(pos:pos)) > 0 .and. pos <= len_trim(ltext)) then ! next char is number 490 | do while(index(numbers,ltext(pos:pos)) > 0 .and. pos <= len_trim(ltext)) 491 | pos = pos + 1 492 | end do 493 | 494 | next_token = ltext(1:(pos-1)) 495 | ltext = ltext(pos:) 496 | 497 | !if (verbose) then 498 | !print *, "control" 499 | !print *, pos 500 | ! print *, next_token 501 | !print *, ltext 502 | !end if 503 | 504 | ! fortran 2003? 505 | tokens = [tokens, next_token] 506 | 507 | 508 | end if 509 | 510 | end do 511 | 512 | 513 | end function 514 | 515 | !stackoverflow.com/questions/10759375/how-can-i-write-a-to-upper-or-to-lower-function-in-f90 516 | function to_lower (str) result (string) 517 | 518 | 519 | implicit None 520 | character(*), intent(in) :: str 521 | character(len(str)) :: string 522 | 523 | integer :: ic, i 524 | 525 | character(26), parameter :: cap = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 526 | character(26), parameter :: low = 'abcdefghijklmnopqrstuvwxyz' 527 | 528 | string = str 529 | do i = 1, len_trim(str) 530 | ic = index(cap, str(i:i)) 531 | if (ic > 0) string(i:i) = low(ic:ic) 532 | end do 533 | 534 | end function to_lower 535 | 536 | function time_ms() result(t_ms) 537 | real(kind=wp) :: t_ms 538 | integer(4) :: ms 539 | !call cpu_time(t_ms) 540 | call system_clock(ms) 541 | t_ms = real(ms) 542 | end function 543 | 544 | end program 545 | -------------------------------------------------------------------------------- /read_ggml.f90: -------------------------------------------------------------------------------- 1 | ! load.f90 2 | 3 | module mixed_type_module 4 | use precision_module 5 | implicit none 6 | type mixed_type 7 | class(*), allocatable :: item 8 | end type mixed_type 9 | 10 | type multi_type 11 | integer :: type_num 12 | integer(4) :: i32 13 | !integer(2) :: i16 14 | real(4) :: f32 15 | character(64) :: string 16 | type(multi_type), allocatable :: a(:) 17 | end type 18 | 19 | type ggml_tensor_info 20 | character(64) :: tname 21 | integer(4) :: ndim, ttype 22 | integer(8) :: offset 23 | integer(8), allocatable :: dims(:) 24 | end type 25 | 26 | type generic_tensor 27 | integer :: ndims 28 | integer :: ttype 29 | integer(2), allocatable :: f161d(:) 30 | integer(2), allocatable :: f162d(:,:) 31 | real(kind=wp), allocatable :: f321d(:) 32 | real(kind=wp), allocatable :: f322d(:,:) 33 | ! can add fp4 34 | end type 35 | 36 | 37 | end module 38 | 39 | 40 | module read_ggml 41 | 42 | use precision_module 43 | use mixed_type_module 44 | use weight_module 45 | implicit none 46 | 47 | type(ggml_tensor_info), allocatable :: tensors(:) 48 | logical :: verbose 49 | !integer :: file_pos 50 | integer(8) :: tensor_count 51 | logical, parameter :: verbose2 = .false. 52 | contains 53 | subroutine load_ggml(filename, w, c, vocab, token_lengths, v) 54 | character(len=*), intent(in) :: filename 55 | type(TransformerWeights), intent(out) :: w 56 | type(Config), intent(out) :: c 57 | real(kind=wp), allocatable :: scores(:) 58 | character(:), dimension(:), allocatable, intent(out) :: vocab 59 | integer(4), allocatable, intent(out) :: token_lengths(:) 60 | logical, intent(in) :: v 61 | 62 | character(:), dimension(:), allocatable :: vocab_swp 63 | integer(4) :: magic, version 64 | integer(8) :: kv_pairs 65 | !class(*), allocatable :: demo 66 | integer :: max_len = 64 67 | integer :: i, j, val_type,file_pos, alignment, deficit 68 | integer(4) :: num_layers, emb_length, context_length, head_count, ffn_length, kv_heads, vocab_size 69 | type(multi_type), allocatable :: values(:) 70 | type(multi_type) :: multi_temp 71 | character(:), dimension(:), allocatable :: keys 72 | !type(multi_type), allocatable :: x(:) 73 | !type(ggml_tensor_info), allocatable :: tensors(:) 74 | type(ggml_tensor_info) :: t0 75 | !demo = 3 76 | integer(1) :: tbyte 77 | integer(1) :: tbytes(3) 78 | integer(2) :: f16 79 | integer(2), allocatable :: temp2f16(:,:) 80 | integer(2), allocatable :: tempf16(:) 81 | real(kind=wp), allocatable :: tempf32(:) 82 | real(kind=wp), allocatable :: temp2f32(:,:) 83 | character(:), allocatable :: tempstr 84 | type(generic_tensor) :: temp_gt 85 | !type (args) :: arg_values 86 | 87 | 88 | !real(kind=wp), allocatable :: scores(:) 89 | !character(:), dimension(:), allocatable :: vocab 90 | !integer(4), allocatable :: token_lengths(:) 91 | integer(8) :: tmp_vocab_size 92 | integer(4) :: temp_int, maxlen 93 | 94 | integer(8) :: strlen 95 | 96 | character(:), allocatable :: loaded_str 97 | 98 | integer :: head_size, kv_head_size 99 | 100 | allocate(character(len=max_len) :: tempstr) 101 | verbose = v 102 | 103 | ! assumed to be 32 if not specified 104 | alignment = 32 105 | num_layers = 0 106 | 107 | open(UNIT=5, FILE=filename, FORM="UNFORMATTED",& 108 | &ACCESS="STREAM", STATUS="OLD", POSITION="REWIND", ACTION="READ") 109 | 110 | ! config 111 | 112 | read(5) magic, version, tensor_count, kv_pairs 113 | 114 | if (verbose) then 115 | print *, "GGUF Header Info" 116 | print *, "Magic number: ", magic 117 | print *, "Version: ", version 118 | print *, "Tensor Count: ", tensor_count 119 | print *, "Key-Value Pairs: ", kv_pairs 120 | end if 121 | 122 | if (magic .ne. 1179993927) then 123 | print *, "Magic numbers do not match, exiting" 124 | stop 125 | end if 126 | 127 | allocate(character(len=max_len) :: keys(kv_pairs)) 128 | allocate(values(kv_pairs)) 129 | do i = 1,kv_pairs 130 | keys(i) = read_str(5) 131 | read(5) val_type 132 | values(i) = read_val(5,val_type) 133 | if (keys(i) .eq. "general.alignment") then 134 | alignment = values(i)%i32 135 | if (verbose) then 136 | print *, "alignment set to", alignment 137 | end if 138 | else if (keys(i) .eq. "distilbert.block_count") then 139 | num_layers = values(i)%i32 !assume it's int(4) 140 | else if (keys(i) .eq. "distilbert.embedding_length") then 141 | emb_length = values(i)%i32 142 | else if (keys(i) .eq. "distilbert.attention.head_count") then 143 | head_count = values(i)%i32 144 | else if (keys(i) .eq. "distilbert.context_length") then 145 | context_length = values(i)%i32 146 | else if (keys(i) .eq. "tokenizer.ggml.tokens") then 147 | vocab_size = (size(values(i)%a)) 148 | else if (keys(i) .eq. "distilbert.attention.head_count_kv") then 149 | kv_heads = values(i)%i32 150 | else if (keys(i) .eq. "distilbert.feed_forward_length") then 151 | ffn_length = values(i)%i32 152 | end if 153 | 154 | if (verbose) then 155 | print *, keys(i) 156 | call print_multi(values(i)) 157 | end if 158 | end do 159 | 160 | allocate(tensors(tensor_count)) 161 | do i = 1,tensor_count 162 | tensors(i) = read_tensor_info(5) 163 | end do 164 | 165 | ! "level 2 verbose" 166 | if (verbose2) then 167 | do i = 1, tensor_count 168 | write (*, fmt="(A20,I2)",advance="no") tensors(i)%tname, tensors(i)%ndim 169 | do j=1,tensors(i)%ndim 170 | write (*, fmt="(I6)", advance="no") tensors(i)%dims(j) 171 | end do 172 | write (*, fmt="(I2,I11)") tensors(i)%ttype, tensors(i)%offset 173 | end do 174 | end if 175 | 176 | inquire(unit=5,pos=file_pos) 177 | 178 | deficit = mod(file_pos-1,alignment) ! -1 179 | 180 | if (verbose) then 181 | print *, "Position", file_pos 182 | print *, "Deficit", deficit 183 | end if 184 | 185 | if (deficit > 0) then 186 | do i = 1,(alignment-deficit) 187 | read (5) tbyte 188 | if (tbyte /= 0) then 189 | print *, "padding error", tbyte 190 | end if 191 | end do 192 | end if 193 | 194 | inquire(unit=5,pos=file_pos) 195 | 196 | print *, "data offset", file_pos 197 | 198 | !read(5) f16 199 | 200 | !print *, "First value", half_to_float_c(f16) 201 | 202 | 203 | !if (outfile /= "") then 204 | !open(unit=8, file=outfile, form='unformatted', status='unknown', ACCESS="STREAM", action="write") 205 | ! write the header: 206 | if (verbose) then 207 | if (verbose) then 208 | print *, "Embedding dimension: ", emb_length 209 | print *, "Hidden dimension: ", ffn_length 210 | print *, "Layers: ", num_layers 211 | print *, "Heads: ", head_count 212 | print *, "kv Heads: ", kv_heads 213 | print *, "Vocabulary Size: ", vocab_size 214 | print *, "Sequence Length: ", context_length 215 | 216 | end if 217 | 218 | !print *, "Header:" 219 | !print *, emb_length, ffn_length, num_layers, head_count, kv_heads, vocab_size, context_length 220 | end if 221 | !write(8) emb_length, ffn_length, num_layers, head_count, kv_heads, vocab_size, context_length 222 | c%emb_dim = emb_length 223 | c%hidden_dim = ffn_length 224 | c%n_layers = num_layers 225 | c%n_heads = head_count 226 | c%n_kv_heads = kv_heads 227 | c%vocab_size = vocab_size 228 | c%seq_len = context_length 229 | 230 | head_size = emb_length / head_count 231 | kv_head_size = kv_heads * head_size 232 | 233 | if (verbose) then 234 | print *, "head size ", head_size 235 | print *, "kv head Size ", kv_head_size 236 | end if 237 | 238 | t0 = tensor_by_name("token_embd.weight") 239 | temp_gt = read_layer(5,t0,file_pos) 240 | 241 | !call write_tensor(8,temp_gt) 242 | w%word_embeddings = temp_gt%f322d 243 | 244 | if (verbose) then 245 | print *, "loaded word embedding weights:", size(w%word_embeddings) 246 | end if 247 | 248 | t0 = tensor_by_name("position_embd.weight") 249 | temp_gt = read_layer(5,t0,file_pos) 250 | 251 | !call write_tensor(8,temp_gt) 252 | w%position_embeddings = temp_gt%f322d 253 | 254 | if (verbose) then 255 | print *, "loaded position embedding weights:", size(w%position_embeddings) 256 | end if 257 | 258 | 259 | !print *, temp_gt%ttype 260 | !print *, temp_gt%ndims 261 | !print *, w%token_embedding_table(1:10,1) 262 | !print *, "embed sum: ", sum(w%token_embedding_table(1:10,1:10)) 263 | 264 | t0 = tensor_by_name("token_embd_norm.weight") 265 | temp_gt = read_layer(5,t0,file_pos) 266 | 267 | !call write_tensor(8,temp_gt) 268 | w%emb_layer_norm_w = temp_gt%f321d 269 | 270 | if (verbose) then 271 | print *, "loaded embedding layernorm weights:", size(w%emb_layer_norm_w) 272 | end if 273 | 274 | t0 = tensor_by_name("token_embd_norm.bias") 275 | temp_gt = read_layer(5,t0,file_pos) 276 | 277 | !call write_tensor(8,temp_gt) 278 | w%emb_layer_norm_b = temp_gt%f321d 279 | 280 | if (verbose) then 281 | print *, "loaded embedding layernorm bias:", size(w%emb_layer_norm_b) 282 | end if 283 | 284 | 285 | allocate(w%wq(emb_length,emb_length,num_layers)) 286 | do i = 1,num_layers 287 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_q.weight" 288 | t0 = tensor_by_name(tempstr) 289 | temp_gt = read_layer(5,t0,file_pos) 290 | ! f16 291 | !call write_tensor(8,temp_gt) 292 | w%wq(:,:,i) = temp_gt%f322d 293 | end do 294 | 295 | if (verbose) then 296 | print *, "loaded wq weights:", size(w%wq) 297 | end if 298 | 299 | 300 | allocate(w%bq(emb_length,num_layers)) 301 | do i = 1,num_layers 302 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_q.bias" 303 | t0 = tensor_by_name(tempstr) 304 | temp_gt = read_layer(5,t0,file_pos) 305 | ! f16 306 | !call write_tensor(8,temp_gt) 307 | w%bq(:,i) = temp_gt%f321d 308 | end do 309 | 310 | 311 | if (verbose) then 312 | print *, "loaded wq bias:", size(w%bq) 313 | end if 314 | 315 | allocate(w%wk(emb_length,emb_length,num_layers)) 316 | do i = 1,num_layers 317 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_k.weight" 318 | t0 = tensor_by_name(tempstr) 319 | temp_gt = read_layer(5,t0,file_pos) 320 | ! f16 321 | !call write_tensor(8,temp_gt) 322 | w%wk(:,:,i) = temp_gt%f322d 323 | end do 324 | 325 | if (verbose) then 326 | print *, "loaded wk weights:", size(w%wk) 327 | end if 328 | 329 | allocate(w%bk(emb_length,num_layers)) 330 | do i = 1,num_layers 331 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_k.bias" 332 | t0 = tensor_by_name(tempstr) 333 | temp_gt = read_layer(5,t0,file_pos) 334 | ! f16 335 | !call write_tensor(8,temp_gt) 336 | w%bk(:,i) = temp_gt%f321d 337 | end do 338 | 339 | 340 | if (verbose) then 341 | print *, "loaded wk bias:", size(w%bk) 342 | end if 343 | 344 | 345 | allocate(w%wv(emb_length,emb_length,num_layers)) 346 | do i = 1,num_layers 347 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_v.weight" 348 | t0 = tensor_by_name(tempstr) 349 | temp_gt = read_layer(5,t0,file_pos) 350 | ! f16 351 | !call write_tensor(8,temp_gt) 352 | w%wv(:,:,i) = temp_gt%f322d 353 | end do 354 | 355 | !print *, "qkv sum: ", sum(w%wqkv) 356 | if (verbose) then 357 | print *, "loaded wv weights:", size(w%wv) 358 | end if 359 | 360 | allocate(w%bv(emb_length,num_layers)) 361 | do i = 1,num_layers 362 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_v.bias" 363 | t0 = tensor_by_name(tempstr) 364 | temp_gt = read_layer(5,t0,file_pos) 365 | ! f16 366 | !call write_tensor(8,temp_gt) 367 | w%bv(:,i) = temp_gt%f321d 368 | end do 369 | 370 | 371 | if (verbose) then 372 | print *, "loaded wv bias:", size(w%bv) 373 | end if 374 | 375 | 376 | 377 | 378 | allocate(w%wo(emb_length,emb_length,num_layers)) 379 | do i = 1,num_layers 380 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_output.weight" 381 | t0 = tensor_by_name(tempstr) 382 | temp_gt = read_layer(5,t0,file_pos) 383 | ! f16 384 | !call write_tensor(8,temp_gt) 385 | w%wo(:,:,i) = temp_gt%f322d 386 | end do 387 | 388 | if (verbose) then 389 | print *, "loaded wo weights:", size(w%wo) 390 | end if 391 | 392 | allocate(w%bo(emb_length,num_layers)) 393 | do i = 1,num_layers 394 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".attn_output.bias" 395 | t0 = tensor_by_name(tempstr) 396 | temp_gt = read_layer(5,t0,file_pos) 397 | ! f16 398 | !call write_tensor(8,temp_gt) 399 | w%bo(:,i) = temp_gt%f321d 400 | end do 401 | 402 | 403 | if (verbose) then 404 | print *, "loaded wo bias:", size(w%bo) 405 | end if 406 | 407 | 408 | 409 | allocate(w%sa_layer_norm_w(emb_length,num_layers)) 410 | do i = 1,num_layers 411 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".ffn_norm.weight" 412 | t0 = tensor_by_name(tempstr) 413 | temp_gt = read_layer(5,t0,file_pos) 414 | ! should be f32 415 | !call write_tensor(8,temp_gt) 416 | w%sa_layer_norm_w(:,i) = temp_gt%f321d 417 | end do 418 | 419 | if (verbose) then 420 | print *, "loaded sa layernorm weights:", size(w%sa_layer_norm_w) 421 | end if 422 | 423 | allocate(w%sa_layer_norm_b(emb_length,num_layers)) 424 | do i = 1,num_layers 425 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".ffn_norm.bias" 426 | t0 = tensor_by_name(tempstr) 427 | temp_gt = read_layer(5,t0,file_pos) 428 | ! should be f32 429 | !call write_tensor(8,temp_gt) 430 | w%sa_layer_norm_b(:,i) = temp_gt%f321d 431 | end do 432 | 433 | if (verbose) then 434 | print *, "loaded sa layernorm bias:", size(w%sa_layer_norm_w) 435 | end if 436 | 437 | 438 | 439 | allocate(w%w1(emb_length,ffn_length,num_layers)) 440 | do i = 1,num_layers 441 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".ffn_up.weight" 442 | t0 = tensor_by_name(tempstr) 443 | temp_gt = read_layer(5,t0,file_pos) 444 | ! f16 445 | !call write_tensor(8,temp_gt) 446 | w%w1(:,:,i) = temp_gt%f322d 447 | end do 448 | 449 | if (verbose) then 450 | print *, "loaded w1 weights:", size(w%w1) 451 | end if 452 | 453 | allocate(w%b1(ffn_length,num_layers)) 454 | do i = 1,num_layers 455 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".ffn_up.bias" 456 | t0 = tensor_by_name(tempstr) 457 | temp_gt = read_layer(5,t0,file_pos) 458 | ! f16 459 | !call write_tensor(8,temp_gt) 460 | w%b1(:,i) = temp_gt%f321d 461 | end do 462 | 463 | if (verbose) then 464 | print *, "loaded w1 bias:", size(w%b1) 465 | end if 466 | 467 | 468 | allocate(w%w2(ffn_length,emb_length,num_layers)) 469 | do i = 1,num_layers 470 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".ffn_down.weight" 471 | t0 = tensor_by_name(tempstr) 472 | temp_gt = read_layer(5,t0,file_pos) 473 | ! f16 474 | !call write_tensor(8,temp_gt) 475 | w%w2(:,:,i) = temp_gt%f322d 476 | end do 477 | 478 | if (verbose) then 479 | print *, "loaded w2 (down) weights:", size(w%w2) 480 | end if 481 | 482 | allocate(w%b2(emb_length,num_layers)) 483 | do i = 1,num_layers 484 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".ffn_down.bias" 485 | t0 = tensor_by_name(tempstr) 486 | temp_gt = read_layer(5,t0,file_pos) 487 | ! f16 488 | !call write_tensor(8,temp_gt) 489 | w%b2(:,i) = temp_gt%f321d 490 | end do 491 | 492 | if (verbose) then 493 | print *, "loaded w2 (down) bias:", size(w%b2) 494 | end if 495 | 496 | allocate(w%out_layer_norm_w(emb_length,num_layers)) 497 | do i = 1,num_layers 498 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".output_norm.weight" 499 | t0 = tensor_by_name(tempstr) 500 | temp_gt = read_layer(5,t0,file_pos) 501 | ! should be f32 502 | !call write_tensor(8,temp_gt) 503 | w%out_layer_norm_w(:,i) = temp_gt%f321d 504 | end do 505 | 506 | if (verbose) then 507 | print *, "loaded output norm weights:", size(w%out_layer_norm_w) 508 | end if 509 | 510 | allocate(w%out_layer_norm_b(emb_length,num_layers)) 511 | do i = 1,num_layers 512 | write(tempstr,"(A,I0,A)") "blk.", i-1, ".output_norm.bias" 513 | t0 = tensor_by_name(tempstr) 514 | temp_gt = read_layer(5,t0,file_pos) 515 | ! should be f32 516 | !call write_tensor(8,temp_gt) 517 | w%out_layer_norm_b(:,i) = temp_gt%f321d 518 | end do 519 | 520 | if (verbose) then 521 | print *, "loaded output norm bias:", size(w%out_layer_norm_w) 522 | end if 523 | 524 | 525 | 526 | !temp2f32 = get_rope_freqs(emb_length/head_count,context_length,10000.0) 527 | !if (verbose) then 528 | !write(*,"(A)") "rope cos: writing float32" 529 | !end if 530 | !write(8) cos(temp2f32(:,:context_length)) 531 | !if (verbose) then 532 | !write(*,"(A)") "rope sin: writing float32" 533 | !end if 534 | !write(8) sin(temp2f32(:,:context_length)) 535 | ! cos and sin of the above are the cos/sin respectively (f32) 536 | 537 | t0 = tensor_by_name("output") 538 | temp_gt = read_layer(5,t0,file_pos) 539 | ! f16 540 | !call write_tensor(8,temp_gt) 541 | w%linear = temp_gt%f322d 542 | 543 | if (verbose) then 544 | print *, "loaded classifier weights:", size(w%linear) 545 | end if 546 | 547 | 548 | !close(8) 549 | !end if ! writing outfile 550 | 551 | if (.true.) then 552 | ! just read and write the values again: 553 | call fseek(5,0,0) 554 | read(5) magic, version, tensor_count, kv_pairs 555 | 556 | if (magic .ne. 1179993927) then 557 | print *, "Magic numbers do not match, exiting" 558 | stop 559 | end if 560 | 561 | do i = 1,kv_pairs 562 | tempstr = read_str(5) 563 | read(5) val_type 564 | if (verbose2) then 565 | print *, "scanning ", tempstr 566 | end if 567 | if (tempstr .eq. "tokenizer.ggml.tokens") then 568 | if (verbose) then 569 | print *, "loading tokens" 570 | end if 571 | ! allocate 572 | read(5) temp_int, tmp_vocab_size 573 | !allocate(val%a(alen)) 574 | !do i = 1,alen 575 | ! val%a(i) = read_val(handle, atype) 576 | !end do 577 | allocate(character(len=max_len) :: vocab(tmp_vocab_size)) 578 | allocate(token_lengths(tmp_vocab_size)) 579 | do j=1,int(tmp_vocab_size,4) 580 | read(5) strlen 581 | allocate(character(strlen) :: loaded_str) 582 | read(5) loaded_str 583 | token_lengths(j) = int(strlen,4) 584 | vocab(j) = loaded_str 585 | deallocate(loaded_str) 586 | end do 587 | if (verbose) then 588 | write (*,"(A,I0,A)") "found ", size(vocab), " tokens" 589 | end if 590 | 591 | else if (tempstr .eq. "tokenizer.ggml.scores") then 592 | multi_temp = read_val(5,val_type) 593 | allocate(scores(size(multi_temp%a))) 594 | do j = 1,size(multi_temp%a) 595 | scores(j) = multi_temp%a(j)%f32 596 | end do 597 | if (verbose) then 598 | write (*,"(A,I0,A)") "found ", size(multi_temp%a), " scores" 599 | end if 600 | else 601 | multi_temp = read_val(5,val_type) 602 | end if 603 | end do 604 | 605 | !open(unit=8, file="", form='unformatted', status='unknown', ACCESS="STREAM", action="write") 606 | maxlen = maxval(token_lengths) 607 | 608 | allocate(character(len=max_len) :: vocab_swp(tmp_vocab_size)) 609 | if (verbose) then 610 | print *, "maximum token length ", maxlen 611 | end if 612 | !temp_int = 10 613 | !write(8) maxlen 614 | do i=1,size(vocab) 615 | read(vocab(i)(1:1), "(A)") tbytes(1) 616 | read(vocab(i)(2:2), "(A)") tbytes(2) 617 | read(vocab(i)(3:3), "(A)") tbytes(3) 618 | 619 | !end if 620 | if ( (tbytes(1) .eq. -30) .and.& 621 | &(tbytes(2) .eq. -106) .and.& 622 | &(tbytes(3) .eq. -127) ) then 623 | allocate(character(token_lengths(i)-2) :: loaded_str) 624 | loaded_str(1:1) = " " 625 | loaded_str(2:) = vocab(i)(4:token_lengths(i)) 626 | !write(8) scores(i),token_lengths(i)-2,loaded_str 627 | token_lengths(i) = token_lengths(i)-2 628 | vocab_swp(i) = loaded_str 629 | deallocate(loaded_str) 630 | else 631 | !write(8) scores(i),token_lengths(i),vocab(i)(1:token_lengths(i)) 632 | vocab_swp(i) = vocab(i)(1:token_lengths(i)) 633 | end if 634 | end do 635 | 636 | end if 637 | 638 | !close(8) 639 | 640 | close(5) 641 | vocab = vocab_swp 642 | end subroutine 643 | 644 | 645 | subroutine write_tensor(handle, t) 646 | integer :: handle 647 | type(generic_tensor) :: t 648 | 649 | if (t%ttype .eq. 0) then 650 | if (verbose) then 651 | write(*,"(A)") "writing float32" 652 | end if 653 | if (t%ndims .eq. 1) then 654 | write(handle) t%f321d 655 | else if (t%ndims .eq. 2) then 656 | write(handle) t%f322d 657 | end if 658 | else if (t%ttype .eq. 1) then 659 | if (verbose) then 660 | write(*,"(A)") "writing fp16" 661 | end if 662 | if (t%ndims .eq. 1) then 663 | write(handle) t%f161d 664 | else if (t%ndims .eq. 2) then 665 | write(handle) t%f162d 666 | end if 667 | end if 668 | 669 | 670 | end subroutine 671 | 672 | function get_rope_freqs(i_dim, i_end, theta) result(freq_array) 673 | integer :: i_dim, i_end 674 | real(kind=wp) :: theta 675 | !real(kind=wp) :: cis(i_end/2,2) 676 | real(kind=wp),allocatable :: freqs(:) 677 | real(kind=wp),allocatable :: freq_array(:,:) 678 | real(kind=wp) :: irange(i_dim/2) 679 | integer :: i 680 | do i = 1,i_dim/2 681 | irange(i) = 2.0*(i-1) / i_dim 682 | freqs = 1.0 / (theta ** irange) 683 | 684 | end do 685 | allocate(freq_array(size(freqs),i_end)) ! may need transposing 686 | do i = 0,(i_end-1) 687 | freq_array(:,i+1) = i*freqs 688 | end do 689 | 690 | end function 691 | 692 | function tensor_by_name(s) 693 | character(len=*) :: s 694 | integer :: i 695 | type(ggml_tensor_info) :: tensor_by_name 696 | do i=1,tensor_count 697 | if (tensors(i)%tname .eq. s) then 698 | tensor_by_name = tensors(i) 699 | return 700 | end if 701 | end do 702 | print *, "key not found",s 703 | stop 704 | end 705 | function prod(a) 706 | integer(8) :: a(:) 707 | integer :: i 708 | integer(8) :: prod 709 | prod = 1 710 | do i = 1,size(a) 711 | prod = prod * a(i) 712 | end do 713 | end function 714 | 715 | function read_layer_fp16(handle, layer) result(d) 716 | integer :: handle 717 | type(ggml_tensor_info) :: layer 718 | integer(2), allocatable :: d(:) 719 | if (verbose) then 720 | write(*,"(A,A26)",advance="no") "reading",layer%tname 721 | end if 722 | !call fseek(handle,layer%offset+file_pos,0) 723 | allocate(d(prod(layer%dims))) 724 | read(handle) d 725 | if (verbose) then 726 | write(*,"(A)") "... done" 727 | end if 728 | 729 | end function 730 | 731 | function read_layer(handle, layer,file_pos) result(d) 732 | integer :: handle 733 | type(ggml_tensor_info) :: layer 734 | type(generic_tensor) :: d 735 | integer :: file_pos 736 | !integer(2), allocatable :: d(:) 737 | !if (verbose) then 738 | ! write(*,"(A,A26)",advance="no") "reading",layer%tname 739 | !end if 740 | call fseek(handle,layer%offset+file_pos-1,0) 741 | d%ttype = layer%ttype 742 | d%ndims = layer%ndim 743 | 744 | if (d%ttype .eq. 0) then 745 | if (d%ndims .eq. 1) then 746 | allocate(d%f321d(layer%dims(1))) 747 | read(handle) d%f321d 748 | else if (d%ndims .eq. 2) then 749 | allocate(d%f322d(layer%dims(1),layer%dims(2))) 750 | read(handle) d%f322d 751 | else 752 | print *, "Ndims nuot supported", layer%dims 753 | end if 754 | else if (d%ttype .eq. 1) then 755 | if (d%ndims .eq. 1) then 756 | allocate(d%f161d(layer%dims(1))) 757 | read(handle) d%f161d 758 | else if (d%ndims .eq. 2) then 759 | allocate(d%f162d(layer%dims(1),layer%dims(2))) 760 | read(handle) d%f162d 761 | else 762 | print *, "Ndims not supported", layer%dims 763 | end if 764 | else 765 | print *, "Type not supported", layer%ttype 766 | end if 767 | 768 | !if (verbose) then 769 | ! write(*,"(A)") "... done" 770 | !end if 771 | 772 | end function 773 | 774 | function read_str(handle) 775 | integer :: handle 776 | integer(8) :: strlen 777 | 778 | character(:), allocatable :: read_str 779 | read(handle) strlen 780 | allocate(character(strlen) :: read_str) 781 | read(handle) read_str 782 | 783 | end function 784 | 785 | recursive function read_val(handle, val_type) result (val) 786 | integer :: handle, val_type, i 787 | character (:), allocatable :: temp 788 | type(multi_type) :: val 789 | integer(4) :: atype 790 | integer(8) :: alen 791 | 792 | val%type_num = val_type 793 | 794 | if (val_type .eq. 8) then 795 | temp = read_str(handle) 796 | !print *, temp 797 | val%string = temp 798 | 799 | else if (val_type .eq. 4) then 800 | ! read in an int32 801 | read(handle) val%i32 802 | else if (val_type .eq. 6) then 803 | read(handle) val%f32 804 | else if (val_type .eq. 5) then 805 | read(handle) val%i32 806 | else if (val_type .eq. 9) then 807 | read(handle) atype, alen 808 | allocate(val%a(alen)) 809 | do i = 1,alen 810 | val%a(i) = read_val(handle, atype) 811 | end do 812 | 813 | else 814 | print *, "Not implemented", val_type 815 | stop 816 | end if 817 | 818 | 819 | end function 820 | 821 | subroutine print_multi(m) 822 | type(multi_type) :: m 823 | if (m%type_num .eq. 8) then 824 | print *, m%string 825 | else if (m%type_num .eq. 4) then 826 | print *, m%i32 827 | else if (m%type_num .eq. 5) then 828 | print *, m%i32 829 | else if (m%type_num .eq. 6) then 830 | print *, m%f32 831 | else if (m%type_num .eq. 9) then 832 | print *, size(m%a) 833 | end if 834 | 835 | end subroutine 836 | 837 | function read_tensor_info(handle) result(info) 838 | integer :: handle, i 839 | type(ggml_tensor_info) :: info 840 | info%tname = read_str(handle) 841 | read(handle) info%ndim 842 | allocate(info%dims(info%ndim)) 843 | do i = 1,info%ndim 844 | read(handle) info%dims(i) 845 | end do 846 | read(handle) info%ttype 847 | read(handle) info%offset 848 | 849 | end function 850 | 851 | 852 | end module 853 | --------------------------------------------------------------------------------