├── examples ├── none ├── models.ipynb └── Fine-tuning outputs.ipynb ├── logo ├── logo_credits.txt ├── logo-black.png ├── logo-color.png ├── logo-white.png └── logo-no-background.png ├── requirements.txt ├── LICENSE ├── .gitignore ├── GPT.py ├── MinimalGPT.py ├── MinimalGPT_2.py └── README.md /examples/none: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /logo/logo_credits.txt: -------------------------------------------------------------------------------- 1 | https://logo.com/ 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.22.4 2 | pandas==1.4.2 3 | tensorflow==2.9.1 4 | tqdm==4.64.0 5 | -------------------------------------------------------------------------------- /logo/logo-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhaskumarsinha/MinimalGPT/HEAD/logo/logo-black.png -------------------------------------------------------------------------------- /logo/logo-color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhaskumarsinha/MinimalGPT/HEAD/logo/logo-color.png -------------------------------------------------------------------------------- /logo/logo-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhaskumarsinha/MinimalGPT/HEAD/logo/logo-white.png -------------------------------------------------------------------------------- /logo/logo-no-background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhaskumarsinha/MinimalGPT/HEAD/logo/logo-no-background.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Abhas Kumar Sinha 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /examples/models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "04c725fb", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "from MinimalGPT import MinimalGPT" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "id": "4bed3bd1", 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "(None, 10, 128)\n" 26 | ] 27 | }, 28 | { 29 | "name": "stderr", 30 | "output_type": "stream", 31 | "text": [ 32 | "100%|████████████████████████████████████████████████████████████████████████████████| 190/190 [00:06<00:00, 31.25it/s]\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "model = MinimalGPT(output_length = 200,\n", 38 | " gpt_input = 10,\n", 39 | " d_model = 128,\n", 40 | " h = 8,\n", 41 | " decoder_stacks = 1,\n", 42 | " load_tokenizer = './models/tokenizer3.mgt',\n", 43 | " load_weights = './models/weights3.mgw',\n", 44 | " inference_only = True,\n", 45 | " return_model_and_vectorizer_and_output = True)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "id": "4d77e4ea", 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "(,\n", 58 | " ,\n", 59 | " ' of her own the morning looked at the other of the old place, she had not been careless and their way to her out of the next day’s dinner... or the butcher’s book.... At last even these dim sensations spent themselves in the thickening obscurity which enveloped her; a dusk now filled with pale geometric roses, circling softly, interminably before her, now darkened to a uniform blue-blackness, the hue of a summer night without stars. And into this darkness she felt herself sinking, sinking, with the gentle sense of security of one upheld from beneath. Like a tepid tide it rose around her, gliding ever higher and higher, folding in its velvety embrace her relaxed and tired body, now submerging her breast and shoulders, now creeping gradually, with soft inexorableness, over her throat to her chin, to her ears, to her mouth.... Ah, now it was rising too high; the impulse to struggle was renewed;... ')" 60 | ] 61 | }, 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "model" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "id": "620717af", 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Model: \"model\"\n", 82 | "_________________________________________________________________\n", 83 | " Layer (type) Output Shape Param # \n", 84 | "=================================================================\n", 85 | " input_1 (InputLayer) [(None, 10)] 0 \n", 86 | " \n", 87 | " embedding (Embedding) (None, 10, 128) 1826816 \n", 88 | " \n", 89 | " positional_embedding (Posit (None, 10, 128) 0 \n", 90 | " ionalEmbedding) \n", 91 | " \n", 92 | " decoder (Decoder) (None, 10, 128) 37160 \n", 93 | " \n", 94 | " flatten (Flatten) (None, 1280) 0 \n", 95 | " \n", 96 | " dense (Dense) (None, 14273) 18283713 \n", 97 | " \n", 98 | " tf.nn.softmax (TFOpLambda) (None, 14273) 0 \n", 99 | " \n", 100 | "=================================================================\n", 101 | "Total params: 20,147,689\n", 102 | "Trainable params: 20,147,689\n", 103 | "Non-trainable params: 0\n", 104 | "_________________________________________________________________\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "model[0].summary()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "id": "3dba19e2", 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "[array([b'ut@@', b'conversation,', b'picturesque', ..., b'mankind',\n", 122 | " b'costu@@', b'bott@@'], dtype=object)]" 123 | ] 124 | }, 125 | "execution_count": 6, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "model[1].get_weights()" 132 | ] 133 | } 134 | ], 135 | "metadata": { 136 | "kernelspec": { 137 | "display_name": "Python 3 (ipykernel)", 138 | "language": "python", 139 | "name": "python3" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 3 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython3", 151 | "version": "3.9.5" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 5 156 | } 157 | -------------------------------------------------------------------------------- /GPT.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import tensorflow as tf 4 | import math 5 | from tqdm import tqdm 6 | 7 | def scaled_dot_product_attention(q, k, v): 8 | # calculate the dot product of query and key 9 | dot_product = tf.matmul(q, k, transpose_b=True) 10 | 11 | 12 | # scale the dot product 13 | scaled_dot_product = dot_product / tf.math.sqrt(tf.cast(tf.shape(k)[-1], dtype=tf.float32)) 14 | 15 | # apply softmax activation to obtain attention weights 16 | attention_weights = tf.nn.softmax(scaled_dot_product, axis=-1) 17 | 18 | # compute the weighted sum of the value vectors with attention weights 19 | output = tf.matmul(attention_weights, v) 20 | 21 | return output 22 | 23 | 24 | class LinearLayer(tf.keras.layers.Layer): 25 | def __init__(self, ix, ox): 26 | super().__init__() 27 | self.ix = ix 28 | self.ox = ox 29 | 30 | 31 | def build(self, input_shapes): 32 | self.w1 = self.add_weight(shape=(self.ix, self.ox)) 33 | self.b1 = self.add_weight(shape=(1, self.ox)) 34 | 35 | def call(self, inputs): 36 | bz, key = tf.shape(inputs)[0], tf.shape(inputs)[1] 37 | inputs = tf.reshape(inputs, (-1, self.ix)) 38 | inputs = tf.matmul(inputs, self.w1) + self.b1 39 | inputs = tf.reshape(inputs, (bz, key, self.ox)) 40 | return inputs 41 | 42 | 43 | 44 | class split_heads(tf.keras.layers.Layer): 45 | def __init__(self, num_heads = 10): 46 | super().__init__() 47 | self.num_heads = num_heads 48 | 49 | def call(self, inputs): 50 | bz, key = tf.shape(inputs)[0], tf.shape(inputs)[1] 51 | 52 | inputs = tf.reshape(inputs, (bz, key, self.num_heads, -1)) 53 | inputs = tf.transpose(inputs, (0, 2, 1, 3)) 54 | 55 | return inputs 56 | 57 | 58 | class merge_heads(tf.keras.layers.Layer): 59 | def __init__(self): 60 | super().__init__() 61 | 62 | def call(self, inputs): 63 | bz, key = tf.shape(inputs)[0], tf.shape(inputs)[2] 64 | 65 | inputs = tf.transpose(inputs, (0, 2, 1, 3)) 66 | inputs = tf.reshape(inputs, (bz, key, -1)) 67 | return inputs 68 | 69 | 70 | 71 | class GPT_Attention(tf.keras.layers.Layer): 72 | 73 | def __init__(self, ix, ox, num_heads): 74 | super().__init__() 75 | self.ix = ix 76 | self.ox = ox 77 | self.num_heads = num_heads 78 | self.linear1 = LinearLayer(self.ix, self.ox * 3) 79 | self.split = split_heads(num_heads = self.num_heads) 80 | self.merge = merge_heads() 81 | self.linear2 = LinearLayer(self.ox, self.ix) 82 | 83 | if self.ox % self.num_heads != 0: 84 | raise ValueError('The value ox = '+ str(self.ox) +' SHOULD be divisible by number of heads provided') 85 | 86 | def call(self, inputs): 87 | if len(inputs) > 0: 88 | inputs = inputs[0] 89 | inputs = self.linear1(inputs) 90 | k, q, v = tf.split(inputs, 3, axis = -1) 91 | k = self.split(k) 92 | q = self.split(q) 93 | v = self.split(v) 94 | #k, q, v = tf.split(inputs, 3, axis = -1) 95 | inputs = scaled_dot_product_attention(k, q, v) 96 | inputs = self.merge(inputs) 97 | inputs = self.linear2(inputs) 98 | 99 | return inputs 100 | 101 | 102 | 103 | class MultiHeadAttention(tf.keras.layers.Layer): 104 | def __init__(self, num_heads = 8, key_dim = 64, key_embedding = 512): 105 | super(MultiHeadAttention, self).__init__() 106 | self.num_heads = num_heads 107 | self.key_dim = key_dim 108 | self.key_embedding = key_embedding 109 | self.head_vectors = [] 110 | 111 | def build(self, input_shape): 112 | #print(input_shape) 113 | 114 | self.W_k = self.add_weight(shape=(self.num_heads, self.key_dim, self.key_embedding), name='key') 115 | self.W_q = self.add_weight(shape=(self.num_heads, self.key_dim, self.key_embedding), name='query') 116 | self.W_v = self.add_weight(shape=(self.num_heads, self.key_dim, self.key_embedding), name='value') 117 | 118 | self.W_o = self.add_weight(shape=(self.key_dim, self.key_embedding)) 119 | 120 | 121 | def call(self, inputs): 122 | query, key, value = inputs 123 | 124 | self.head_vectors = [] 125 | head_concat = None 126 | 127 | for i in range(self.num_heads): 128 | q = tf.einsum('bij, ij -> bij', query, self.W_q[i]) 129 | k = tf.einsum('bij, ij -> bij', key, self.W_k[i]) 130 | v = tf.einsum('bij, ij -> bij', value, self.W_v[i]) 131 | 132 | self.head_vectors += [scaled_dot_product_attention(q, k, v)] 133 | 134 | 135 | head_concat = tf.concat(self.head_vectors, -2) 136 | #print(tf.shape(head_concat)) 137 | output =tf.einsum('bij, kj -> bkj', head_concat, self.W_o) 138 | 139 | 140 | return output 141 | 142 | class Decoder(tf.keras.layers.Layer): 143 | def __init__(self, num_heads = 8, key_dim = 64, key_embedding = 512, GPT_attention = False): 144 | super(Decoder, self).__init__() 145 | 146 | self.num_heads = num_heads 147 | self.key_dim = key_dim 148 | self.key_embedding = key_embedding 149 | if GPT_attention: 150 | self.attention = GPT_Attention(key_embedding, key_embedding, num_heads) 151 | else: 152 | self.attention = MultiHeadAttention(num_heads = num_heads, key_dim = key_dim, key_embedding = key_embedding) 153 | self.normalize1 = tf.keras.layers.LayerNormalization(axis = -2) 154 | self.normalize2 = tf.keras.layers.LayerNormalization(axis = -2) 155 | 156 | 157 | def build(self, input_shape): 158 | #print(input_shape) 159 | 160 | self.x1 = self.add_weight(shape=(self.key_dim, self.key_embedding), name='vec1') 161 | self.x2 = self.add_weight(shape=(self.key_dim, self.key_embedding), name='vec2') 162 | 163 | self.y1 = self.add_weight(shape=(self.key_dim, self.key_embedding), name='bias1') 164 | self.y2 = self.add_weight(shape=(self.key_dim, self.key_embedding), name='bias2') 165 | 166 | def call(self, inputs): 167 | 168 | first_sublayer_output = self.attention((inputs, inputs, inputs)) 169 | first_sublayer_output = self.normalize1(first_sublayer_output + inputs) 170 | 171 | first_nn = tf.einsum('bij, ij -> bij', first_sublayer_output, self.x1) + self.y1 172 | first_nn = tf.keras.activations.relu(first_nn, alpha=0.0, max_value=None, threshold=0.0) 173 | second_nn = tf.einsum('bij, ij -> bij', first_nn, self.x2) + self.y2 174 | 175 | second_sublayer_output = self.normalize2(second_nn + first_sublayer_output) 176 | 177 | 178 | 179 | return second_sublayer_output 180 | 181 | def positional_function(words, embedding): 182 | pos = np.zeros((words, embedding)) 183 | 184 | for i in range(words): 185 | for j in range(embedding): 186 | if j%2 == 0: 187 | pos[i, j] = math.sin(i/pow(10000, 2*j/(512))) 188 | else: 189 | pos[i, j] = math.cos(i/pow(10000, 2*j/(512))) 190 | 191 | return pos 192 | 193 | 194 | class PositionalEmbedding(tf.keras.layers.Layer): 195 | def __init__(self, positional_function = positional_function, embedding_size = 512, words = 64): 196 | super(PositionalEmbedding, self).__init__() 197 | self.embedding_size = embedding_size 198 | self.words = words 199 | self.pos_mat = tf.cast(tf.convert_to_tensor(positional_function(self.words, self.embedding_size)), tf.float32) 200 | 201 | def build(self, input_sizes): 202 | print(input_sizes) 203 | 204 | def call(self, inputs): 205 | embed = tf.einsum("bij, ij -> bij", inputs, self.pos_mat) 206 | return embed 207 | 208 | def generate_output(model, vectorizer, text_size = 70, gpt_input = 64, input_sequence = []): 209 | 210 | if input_sequence == []: 211 | input_sequence = tf.zeros((1, gpt_input)).numpy() 212 | 213 | text = tf.zeros((1, text_size)).numpy() 214 | text[0][: gpt_input] = input_sequence[0][: gpt_input] 215 | 216 | GPT = model 217 | 218 | 219 | for i in tqdm(range(gpt_input, text_size)): 220 | #print("Iteration number:" + str(i)) 221 | output = tf.argmax(GPT(input_sequence), -1).numpy() 222 | text[0][i - 1] = output 223 | input_sequence = text[0][i - gpt_input : i].reshape(1, gpt_input) 224 | 225 | op = [vectorizer.get_vocabulary()[int(text[0][i])] for i in range(len(text[0]))] 226 | return ' '.join(op) -------------------------------------------------------------------------------- /MinimalGPT.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import tensorflow as tf 4 | from tqdm import tqdm 5 | from GPT import * 6 | import pickle 7 | import argparse 8 | 9 | 10 | def get_model(gpt_input, d_model, h, vocab_size, decoder_stacks): 11 | input_words = tf.keras.layers.Input((gpt_input)) 12 | embedding = tf.keras.layers.Embedding(vocab_size + 2, d_model)(input_words) 13 | positional_enc = PositionalEmbedding(words = gpt_input, embedding_size = d_model)(embedding) 14 | decoder = Decoder(num_heads = 8, key_dim = gpt_input, key_embedding = d_model)(positional_enc) 15 | 16 | for _ in range(decoder_stacks - 1): 17 | decoder = Decoder(num_heads = 8, key_dim = gpt_input, key_embedding = d_model)(decoder) 18 | 19 | decoder = tf.keras.layers.Flatten()(decoder) 20 | linear_layer = tf.keras.layers.Dense(vocab_size + 3)(decoder) 21 | softmax = tf.nn.softmax(linear_layer) 22 | GPT = tf.keras.Model(inputs = input_words, outputs = softmax) 23 | 24 | return GPT 25 | 26 | 27 | def MinimalGPT(data_path='.', 28 | learning_rate=0, 29 | output_length=0, 30 | epochs = 1, 31 | batch_size = 1, 32 | gpt_input=10, 33 | d_model=128, 34 | h=8, 35 | decoder_stacks=1, 36 | token_start=0, 37 | token_end=40000, 38 | vocabulary_start = 0, 39 | vocabulary_end = 40000, 40 | save=False, 41 | load_tokenizer=None, 42 | load_weights=None, 43 | save_tokenizer=None, 44 | save_weights=None, 45 | optimizer=None, 46 | inference_only = False, 47 | return_model_and_vectorizer = False, 48 | return_model_and_vectorizer_and_output = False): 49 | 50 | 51 | if inference_only == False: 52 | with open(data_path, 'r', encoding = 'utf-8') as file: 53 | corpus = file.read() 54 | file_contents = corpus.split()[token_start : token_end] 55 | print("Total tokens: " + str(len(file_contents))) 56 | 57 | 58 | if load_tokenizer: 59 | with open(load_tokenizer, 'r') as f: 60 | encoded_vocabulary = json.load(f) 61 | 62 | # Decode the encoded vocabulary to original strings 63 | vocabulary = [word.encode('utf-8').decode('unicode_escape') for word in encoded_vocabulary] 64 | vectorizer = tf.keras.layers.TextVectorization(standardize = None, split = 'whitespace') 65 | vectorizer.set_vocabulary(vocabulary) 66 | vocab_size = vectorizer.vocabulary_size() 67 | 68 | else: 69 | vocab = [] 70 | for word in tqdm(corpus.split()[vocabulary_start : vocabulary_end]): 71 | vocab += [word] 72 | vocab = list(set(vocab)) 73 | vocab_size = len(vocab) 74 | vectorizer = tf.keras.layers.TextVectorization(standardize = None, split = 'whitespace', vocabulary = vocab) 75 | print('New Vectorizer created successfully...') 76 | print("Vocabulary Size: " + str(vocab_size)) 77 | 78 | 79 | if inference_only == False: 80 | input_tokens, output_tokens = [], [] 81 | for i in tqdm(range(len(file_contents) - gpt_input - 1)): 82 | input_tokens += [file_contents[i : i + gpt_input]] 83 | output_tokens += [file_contents[i + gpt_input]] 84 | 85 | 86 | X = [' '.join(input_tokens[i]) for i in tqdm(range(len(input_tokens)))] 87 | Y = output_tokens 88 | 89 | del corpus 90 | 91 | X = vectorizer(X) 92 | Y = vectorizer(Y) 93 | 94 | if load_weights: 95 | model = get_model(gpt_input = gpt_input, d_model = d_model, h = h, decoder_stacks = decoder_stacks, vocab_size = vocab_size - 2) 96 | 97 | with open(load_weights, 'rb') as file: 98 | W = pickle.load(file) 99 | model.set_weights(W) 100 | else: 101 | model = get_model(gpt_input = gpt_input, d_model = d_model, h = h, decoder_stacks = decoder_stacks, vocab_size = vocab_size) 102 | 103 | 104 | if inference_only == False: 105 | # Compile the model 106 | if not optimizer: 107 | model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy') 108 | else: 109 | model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy') 110 | 111 | # Train the model 112 | if learning_rate > 0: 113 | model.fit(X, Y, batch_size = batch_size, epochs=epochs) 114 | 115 | 116 | # Print the output of the Model 117 | output_seq = generate_output(gpt_input = gpt_input, model = model, vectorizer = vectorizer, text_size = output_length, input_sequence = []) 118 | 119 | if save: 120 | # Save the GPT Model 121 | with open(save_weights, 'wb') as file: 122 | pickle.dump(model.weights, file) 123 | 124 | #Save the Vectorizer Model 125 | vocabulary = vectorizer.get_vocabulary() 126 | 127 | # Encode the vocabulary as JSON-compatible strings 128 | encoded_vocabulary = [word.encode('unicode_escape').decode('utf-8') for word in vocabulary] 129 | encoded_vocabulary = encoded_vocabulary[2:] 130 | 131 | # Save the encoded vocabulary to a JSON file 132 | with open(save_tokenizer, 'w') as f: 133 | json.dump(encoded_vocabulary, f) 134 | print("Vocabulary size saved: " + str(len(encoded_vocabulary))) 135 | 136 | 137 | if return_model_and_vectorizer: 138 | return model, vectorizer 139 | elif return_model_and_vectorizer_and_output: 140 | return model, vectorizer, output_seq.replace('@@ ', '') 141 | else: 142 | return output_seq.replace('@@ ', '') 143 | 144 | 145 | 146 | # Example code to execute when the script file is called 147 | 148 | def main(): 149 | print("This code is executed when the script file is called directly.") 150 | 151 | # Check if the script is being run as the main module 152 | if __name__ == '__main__': 153 | parser = argparse.ArgumentParser() 154 | parser.add_argument('-d', '--data-path', help='File: Corresponding to corpus or training text [String]') 155 | parser.add_argument('-l', '--learning-rate', help='Float: Learning Rate. The model will train ONLY IF the rate is > 0, skip otherwise [Float]', type=float) 156 | parser.add_argument('-ol', '--output-length', help='Length of the output sequence to be generated', type=int) 157 | parser.add_argument('-e', '--epochs', help='Number of training Epochs [Int]', type=int) 158 | parser.add_argument('-b', '--batch-size', help='Size of each batch [Int]', type=int) 159 | parser.add_argument('-s', '--gpt-input', help='Number of Tokens of text the model inputs at a time [Int]', type=int) 160 | parser.add_argument('-dm', '--d-model', help='Embedding layer output dimensions [Int]', type=int) 161 | parser.add_argument('-p', '--multi-head', help='Number of Multi-head Attention layer in parallel [Int]', type=int) 162 | parser.add_argument('-ds', '--decoder-stacks', help='Number of stacked Decoder layer [Int]', type=int) 163 | parser.add_argument('-ts', '--token-start', help='The token number in the corpus to mark it as the starting point of the training [Int]', type=int) 164 | parser.add_argument('-te', '--token-end', help='The token number in the corpus to mark it as the end point of the training [Int]', type=int) 165 | parser.add_argument('-vs', '--vocabulary-start', help='Token number from the corpus to mark the starting point of vocabulary data [Int]', type=int) 166 | parser.add_argument('-ve', '--vocabulary-end', help='Token number from the corpus to mark the end point of vocabulary data [Int]', type=int) 167 | parser.add_argument('-sd', '--save', help='Save the Model and Vectorizer data to disk [True/False]', action='store_true') 168 | parser.add_argument('-lt', '--load-tokenizer', help='File: Vectorization layer [File]') 169 | parser.add_argument('-lw', '--load-weights', help='File: Model Weights [File]') 170 | parser.add_argument('-st', '--save-tokenizer', help='File: Saving Vectorizer File [File]') 171 | parser.add_argument('-sw', '--save-weights', help='File: Saving Model Weights[File]') 172 | parser.add_argument('-ot', '--optimizer', help='Optimizer consistent to TensorFlow optimizer class [tf.keras.optimizers]') 173 | parser.add_argument('-i', '--inference-only', help='Only Print the output of the model in Inference Mode [True/False]', action='store_true') 174 | parser.add_argument('-mv', '--model-vectorizer', help='Return Model, Vectorizer Tuple [True/False]', action='store_true') 175 | parser.add_argument('-mvo', '--model-vectorizer-output', help='Return Model, Vectorizer, Output Tuple [True/False]', action='store_true') 176 | 177 | 178 | args = parser.parse_args() 179 | 180 | 181 | data_path = args.data_path 182 | learning_rate = args.learning_rate 183 | output_length = args.output_length 184 | epochs = args.epochs 185 | batch_size = args.batch_size 186 | gpt_input = args.gpt_input 187 | d_model = args.d_model 188 | h = args.multi_head 189 | stacks = args.decoder_stacks 190 | token_start = args.token_start 191 | token_end = args.token_end 192 | vocabulary_start = args.vocabulary_start 193 | vocabulary_end = args.vocabulary_end 194 | save = args.save 195 | load_tokenizer = args.load_tokenizer 196 | load_weights = args.load_weights 197 | save_tokenizer = args.save_tokenizer 198 | save_weights = args.save_weights 199 | optimizer = args.optimizer 200 | inference_only = args.inference_only 201 | model_and_vectorizer = args.model_vectorizer 202 | model_vectorizer_output = args.model_vectorizer_output 203 | 204 | 205 | 206 | configuration = { 207 | 'data_path': args.data_path, 208 | 'learning_rate': args.learning_rate, 209 | 'output_length': args.output_length, 210 | 'epochs': args.epochs, 211 | 'batch_size': args.batch_size, 212 | 'gpt_input': args.gpt_input, 213 | 'd_model': args.d_model, 214 | 'h': args.multi_head, 215 | 'stacks': args.decoder_stacks, 216 | 'token_start': args.token_start, 217 | 'token_end': args.token_end, 218 | 'vocabulary_start': args.vocabulary_start, 219 | 'vocabulary_end': args.vocabulary_end, 220 | 'save': args.save, 221 | 'load_tokenizer': args.load_tokenizer, 222 | 'load_weights': args.load_weights, 223 | 'save_tokenizer': args.save_tokenizer, 224 | 'save_weights': args.save_weights, 225 | 'optimizer': args.optimizer, 226 | 'inference_only': args.inference_only, 227 | 'model_and_vectorizer': args.model_vectorizer, 228 | 'model_vectorizer_output': args.model_vectorizer_output 229 | } 230 | 231 | # Save the configuration to a JSON file 232 | with open('last-configuration.json', 'w') as file: 233 | json.dump(configuration, file) 234 | 235 | 236 | 237 | output = MinimalGPT(data_path = data_path, 238 | learning_rate = learning_rate, 239 | output_length = output_length, 240 | epochs = epochs, 241 | batch_size = batch_size, 242 | gpt_input = gpt_input, 243 | d_model = d_model, 244 | h = h, 245 | decoder_stacks = stacks, 246 | token_start = token_start, 247 | token_end = token_end, 248 | vocabulary_start = vocabulary_start, 249 | vocabulary_end = vocabulary_end, 250 | save = save, 251 | load_tokenizer = load_tokenizer, 252 | load_weights = load_weights, 253 | save_tokenizer = save_tokenizer, 254 | save_weights = save_weights, 255 | optimizer = optimizer, 256 | inference_only = inference_only, 257 | return_model_and_vectorizer = model_and_vectorizer, 258 | return_model_and_vectorizer_and_output = model_vectorizer_output) 259 | 260 | print(output) -------------------------------------------------------------------------------- /MinimalGPT_2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import tensorflow as tf 4 | from tqdm import tqdm 5 | from GPT import * 6 | import pickle 7 | import argparse 8 | import sys 9 | 10 | 11 | 12 | def save_module(save_weights, model, vectorizer, save_tokenizer): 13 | 14 | # Save the GPT Model 15 | with open(save_weights, 'wb') as file: 16 | pickle.dump(model.weights, file) 17 | 18 | #Save the Vectorizer Model 19 | vocabulary = vectorizer.get_vocabulary() 20 | 21 | # Encode the vocabulary as JSON-compatible strings 22 | encoded_vocabulary = [word.encode('unicode_escape').decode('utf-8') for word in vocabulary] 23 | encoded_vocabulary = encoded_vocabulary[2:] 24 | 25 | # Save the encoded vocabulary to a JSON file 26 | with open(save_tokenizer, 'w') as f: 27 | json.dump(encoded_vocabulary, f) 28 | print("Vocabulary size saved: " + str(len(encoded_vocabulary))) 29 | 30 | 31 | 32 | 33 | 34 | def read_file(f, vectorizer, chunk_size = 1024, starting_chunk = 0, ending_chunk = 5, gpt_input = 10): 35 | i = 0 36 | chunk = [] 37 | 38 | while True: 39 | data = f.read(chunk_size) 40 | 41 | if not data or i > ending_chunk: 42 | break 43 | 44 | if i >= starting_chunk and i <= ending_chunk: 45 | file_contents = data.split() 46 | input_tokens, output_tokens = [], [] 47 | for j in range(len(file_contents) - gpt_input - 1): 48 | input_tokens += [file_contents[j : j + gpt_input]] 49 | output_tokens += [file_contents[j + gpt_input]] 50 | 51 | 52 | X = [' '.join(input_tokens[j]) for j in range(len(input_tokens))] 53 | Y = output_tokens 54 | 55 | X = vectorizer(X) 56 | Y = vectorizer(Y) 57 | 58 | output = tf.concat([X, Y], 1) 59 | 60 | yield output 61 | 62 | i += 1 63 | 64 | 65 | def get_model(gpt_input, d_model, h, vocab_size, decoder_stacks, GPT_attention): 66 | input_words = tf.keras.layers.Input((gpt_input)) 67 | embedding = tf.keras.layers.Embedding(vocab_size + 2, d_model)(input_words) 68 | positional_enc = PositionalEmbedding(words = gpt_input, embedding_size = d_model)(embedding) 69 | decoder = Decoder(num_heads = h, key_dim = gpt_input, key_embedding = d_model, GPT_attention = GPT_attention)(positional_enc) 70 | 71 | for _ in range(decoder_stacks - 1): 72 | decoder = Decoder(num_heads = h, key_dim = gpt_input, key_embedding = d_model, GPT_attention = GPT_attention)(decoder) 73 | 74 | decoder = tf.keras.layers.Flatten()(decoder) 75 | linear_layer = tf.keras.layers.Dense(vocab_size + 3)(decoder) 76 | softmax = tf.nn.softmax(linear_layer) 77 | GPT = tf.keras.Model(inputs = input_words, outputs = softmax) 78 | 79 | return GPT 80 | 81 | 82 | def MinimalGPT(data_path='.', 83 | learning_rate=0, 84 | output_length=0, 85 | epochs = 1, 86 | batch_size = 1, 87 | gpt_input=10, 88 | d_model=128, 89 | h=8, 90 | decoder_stacks=1, 91 | starting_chunk = 0, 92 | ending_chunk = 5, 93 | chunk_size = 10, 94 | token_end=40000, 95 | vocabulary_start = 0, 96 | vocabulary_end = 40000, 97 | save=False, 98 | load_tokenizer=None, 99 | load_weights=None, 100 | save_tokenizer=None, 101 | save_weights=None, 102 | optimizer=None, 103 | inference_only = False, 104 | return_model_and_vectorizer = False, 105 | return_model_and_vectorizer_and_output = False, 106 | GPT_attention = False, 107 | TPU = False): 108 | 109 | if chunk_size: 110 | chunk_size *= 1024 111 | 112 | 113 | if inference_only == False: 114 | with open(data_path, 'r', encoding = 'utf-8') as file: 115 | corpus = file.read() 116 | #file_contents = corpus.split()[token_start : token_end] 117 | #print("Total tokens: " + str(len(file_contents))) 118 | 119 | 120 | if load_tokenizer: 121 | with open(load_tokenizer, 'r') as f: 122 | encoded_vocabulary = json.load(f) 123 | 124 | # Decode the encoded vocabulary to original strings 125 | vocabulary = [word.encode('utf-8').decode('unicode_escape') for word in encoded_vocabulary] 126 | vectorizer = tf.keras.layers.TextVectorization(standardize = None, split = 'whitespace') 127 | vectorizer.set_vocabulary(vocabulary) 128 | vocab_size = vectorizer.vocabulary_size() 129 | 130 | else: 131 | vocab = [] 132 | for word in tqdm(corpus.split()[vocabulary_start : vocabulary_end]): 133 | vocab += [word] 134 | vocab = list(set(vocab)) 135 | vocab_size = len(vocab) 136 | vectorizer = tf.keras.layers.TextVectorization(standardize = None, split = 'whitespace', vocabulary = vocab) 137 | print('New Vectorizer created successfully...') 138 | print("Vocabulary Size: " + str(vocab_size)) 139 | del corpus 140 | 141 | 142 | #if inference_only == False: 143 | # input_tokens, output_tokens = [], [] 144 | # for i in tqdm(range(len(file_contents) - gpt_input - 1)): 145 | # input_tokens += [file_contents[i : i + gpt_input]] 146 | # output_tokens += [file_contents[i + gpt_input]] 147 | 148 | 149 | # X = [' '.join(input_tokens[i]) for i in tqdm(range(len(input_tokens)))] 150 | # Y = output_tokens 151 | 152 | # del corpus 153 | 154 | # X = vectorizer(X) 155 | # Y = vectorizer(Y) 156 | 157 | if load_weights: 158 | model = get_model(gpt_input = gpt_input, d_model = d_model, h = h, decoder_stacks = decoder_stacks, vocab_size = vocab_size - 2, GPT_attention = GPT_attention) 159 | 160 | with open(load_weights, 'rb') as file: 161 | W = pickle.load(file) 162 | model.set_weights(W) 163 | else: 164 | model = get_model(gpt_input = gpt_input, d_model = d_model, h = h, decoder_stacks = decoder_stacks, vocab_size = vocab_size, GPT_attention = GPT_attention) 165 | 166 | 167 | print(model.summary()) 168 | 169 | 170 | if inference_only == False: 171 | # Compile the model 172 | if not optimizer: 173 | model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy') 174 | else: 175 | model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy') 176 | 177 | # Train the model 178 | if learning_rate > 0: 179 | 180 | for epoch in tqdm(range(epochs)): 181 | 182 | with open(data_path, 'r', encoding='utf-8') as f: 183 | chunk_number = 1 184 | for chunk in read_file(f, 185 | vectorizer, 186 | chunk_size, 187 | starting_chunk, 188 | ending_chunk, 189 | gpt_input): 190 | print('Chunk_size: ' + str(chunk.shape[0])) 191 | model.fit(chunk[:, :gpt_input], tf.reshape(chunk[:, -1], (-1, 1)), batch_size = batch_size, epochs=1) 192 | print("Chunk Number " + str(chunk_number) + "/" +str(ending_chunk - starting_chunk + 1) + " processed!") 193 | chunk_number += 1 194 | 195 | 196 | # Print the output of the Model 197 | output_seq = generate_output(gpt_input = gpt_input, model = model, vectorizer = vectorizer, text_size = output_length, input_sequence = []) 198 | 199 | if save == True and TPU == False: 200 | print('Saveeeeee') 201 | 202 | save_module(save_weights, model, vectorizer, save_tokenizer) 203 | 204 | if save == True and TPU == True: 205 | 206 | return save_weights, model, vectorizer, save_tokenizer, output_seq 207 | # Save the GPT Model 208 | #with open(save_weights, 'wb') as file: 209 | # pickle.dump(model.weights, file) 210 | 211 | #Save the Vectorizer Model 212 | #vocabulary = vectorizer.get_vocabulary() 213 | 214 | # Encode the vocabulary as JSON-compatible strings 215 | #encoded_vocabulary = [word.encode('unicode_escape').decode('utf-8') for word in vocabulary] 216 | #encoded_vocabulary = encoded_vocabulary[2:] 217 | 218 | # Save the encoded vocabulary to a JSON file 219 | #with open(save_tokenizer, 'w') as f: 220 | # json.dump(encoded_vocabulary, f) 221 | # print("Vocabulary size saved: " + str(len(encoded_vocabulary))) 222 | 223 | 224 | if return_model_and_vectorizer: 225 | return model, vectorizer 226 | elif return_model_and_vectorizer_and_output: 227 | return model, vectorizer, output_seq.replace('@@ ', '') 228 | else: 229 | return output_seq.replace('@@ ', '') 230 | 231 | 232 | 233 | # Example code to execute when the script file is called 234 | 235 | def main(): 236 | print("This code is executed when the script file is called directly.") 237 | 238 | # Check if the script is being run as the main module 239 | if __name__ == '__main__': 240 | parser = argparse.ArgumentParser() 241 | parser.add_argument('-d', '--data-path', help='File: Corresponding to corpus or training text [String]') 242 | parser.add_argument('-l', '--learning-rate', help='Float: Learning Rate. The model will train ONLY IF the rate is > 0, skip otherwise [Float]', type=float) 243 | parser.add_argument('-ol', '--output-length', help='Length of the output sequence to be generated', type=int) 244 | parser.add_argument('-e', '--epochs', help='Number of training Epochs [Int]', type=int) 245 | parser.add_argument('-b', '--batch-size', help='Size of each batch [Int]', type=int) 246 | parser.add_argument('-s', '--gpt-input', help='Number of Tokens of text the model inputs at a time [Int]', type=int) 247 | parser.add_argument('-dm', '--d-model', help='Embedding layer output dimensions [Int]', type=int) 248 | parser.add_argument('-p', '--multi-head', help='Number of Multi-head Attention layer in parallel [Int]', type=int) 249 | parser.add_argument('-ds', '--decoder-stacks', help='Number of stacked Decoder layer [Int]', type=int) 250 | parser.add_argument('-sc', '--chunk-start', help='The chunk number in the corpus to mark it as the starting point of the training [Int]', type=int) 251 | parser.add_argument('-ec', '--chunk-end', help='The chunk number in the corpus to mark it as the end point of the training [Int]', type=int) 252 | parser.add_argument('-csz', '--chunk-size', help='The size of each chunk in KB.', type=int) 253 | parser.add_argument('-vs', '--vocabulary-start', help='Token number from the corpus to mark the starting point of vocabulary data [Int]', type=int) 254 | parser.add_argument('-ve', '--vocabulary-end', help='Token number from the corpus to mark the end point of vocabulary data [Int]', type=int) 255 | parser.add_argument('-sd', '--save', help='Save the Model and Vectorizer data to disk [True/False]', action='store_true') 256 | parser.add_argument('-lt', '--load-tokenizer', help='File: Vectorization layer [File]') 257 | parser.add_argument('-lw', '--load-weights', help='File: Model Weights [File]') 258 | parser.add_argument('-st', '--save-tokenizer', help='File: Saving Vectorizer File [File]') 259 | parser.add_argument('-sw', '--save-weights', help='File: Saving Model Weights[File]') 260 | parser.add_argument('-ot', '--optimizer', help='Optimizer consistent to TensorFlow optimizer class [tf.keras.optimizers]') 261 | parser.add_argument('-i', '--inference-only', help='Only Print the output of the model in Inference Mode [True/False]', action='store_true') 262 | parser.add_argument('-mv', '--model-vectorizer', help='Return Model, Vectorizer Tuple [True/False]', action='store_true') 263 | parser.add_argument('-mvo', '--model-vectorizer-output', help='Return Model, Vectorizer, Output Tuple [True/False]', action='store_true') 264 | parser.add_argument('-ga', '--gpt-style-attention', help='Uses GPT-styled attention. Note: (d-model) parameter should be divisible by (multi-head), otherwise the program will throw an error! [True/False]', action='store_true') 265 | parser.add_argument('-tpu', '--TPU', help='Use Tensor Processor Units (Distributed Learning)', action='store_true') 266 | 267 | 268 | args = parser.parse_args() 269 | 270 | 271 | data_path = args.data_path 272 | learning_rate = args.learning_rate 273 | output_length = args.output_length 274 | epochs = args.epochs 275 | batch_size = args.batch_size 276 | gpt_input = args.gpt_input 277 | d_model = args.d_model 278 | h = args.multi_head 279 | stacks = args.decoder_stacks 280 | chunk_start = args.chunk_start 281 | chunk_end = args.chunk_end 282 | chunk_size = args.chunk_size 283 | vocabulary_start = args.vocabulary_start 284 | vocabulary_end = args.vocabulary_end 285 | save = args.save 286 | load_tokenizer = args.load_tokenizer 287 | load_weights = args.load_weights 288 | save_tokenizer = args.save_tokenizer 289 | save_weights = args.save_weights 290 | optimizer = args.optimizer 291 | inference_only = args.inference_only 292 | model_and_vectorizer = args.model_vectorizer 293 | GPT_attention = args.gpt_style_attention 294 | model_vectorizer_output = args.model_vectorizer_output 295 | 296 | 297 | 298 | configuration = { 299 | 'data_path': args.data_path, 300 | 'learning_rate': args.learning_rate, 301 | 'output_length': args.output_length, 302 | 'epochs': args.epochs, 303 | 'batch_size': args.batch_size, 304 | 'gpt_input': args.gpt_input, 305 | 'd_model': args.d_model, 306 | 'h': args.multi_head, 307 | 'stacks': args.decoder_stacks, 308 | 'chunk_start': args.chunk_start, 309 | 'chunk_end': args.chunk_end, 310 | 'chunk_size': args.chunk_size, 311 | 'vocabulary_start': args.vocabulary_start, 312 | 'vocabulary_end': args.vocabulary_end, 313 | 'save': args.save, 314 | 'load_tokenizer': args.load_tokenizer, 315 | 'load_weights': args.load_weights, 316 | 'save_tokenizer': args.save_tokenizer, 317 | 'save_weights': args.save_weights, 318 | 'optimizer': args.optimizer, 319 | 'inference_only': args.inference_only, 320 | 'model_and_vectorizer': args.model_vectorizer, 321 | 'model_vectorizer_output': args.model_vectorizer_output, 322 | 'GPT_Attention' : args.gpt_style_attention 323 | } 324 | 325 | # Save the configuration to a JSON file 326 | with open('last-configuration.json', 'w') as file: 327 | json.dump(configuration, file) 328 | 329 | 330 | 331 | if args.TPU == True: 332 | 333 | resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='') 334 | tf.config.experimental_connect_to_cluster(resolver) 335 | # This is the TPU initialization code that has to be at the beginning. 336 | tf.tpu.experimental.initialize_tpu_system(resolver) 337 | print("All devices: ", tf.config.list_logical_devices('TPU')) 338 | 339 | 340 | strategy = tf.distribute.TPUStrategy(resolver) 341 | 342 | with strategy.scope(): 343 | 344 | output = MinimalGPT(data_path = data_path, 345 | learning_rate = learning_rate, 346 | output_length = output_length, 347 | epochs = epochs, 348 | batch_size = batch_size, 349 | gpt_input = gpt_input, 350 | d_model = d_model, 351 | h = h, 352 | decoder_stacks = stacks, 353 | starting_chunk = chunk_start, 354 | ending_chunk = chunk_end, 355 | chunk_size = chunk_size, 356 | vocabulary_start = vocabulary_start, 357 | vocabulary_end = vocabulary_end, 358 | save = save, 359 | load_tokenizer = load_tokenizer, 360 | load_weights = load_weights, 361 | save_tokenizer = save_tokenizer, 362 | save_weights = save_weights, 363 | optimizer = optimizer, 364 | inference_only = inference_only, 365 | return_model_and_vectorizer = model_and_vectorizer, 366 | return_model_and_vectorizer_and_output = model_vectorizer_output, 367 | GPT_attention = GPT_attention, 368 | TPU = True) 369 | 370 | save_module(output[0], output[1], output[2], output[3]) 371 | 372 | print(output[4]) 373 | sys.exit(0) 374 | 375 | 376 | output = MinimalGPT(data_path = data_path, 377 | learning_rate = learning_rate, 378 | output_length = output_length, 379 | epochs = epochs, 380 | batch_size = batch_size, 381 | gpt_input = gpt_input, 382 | d_model = d_model, 383 | h = h, 384 | decoder_stacks = stacks, 385 | starting_chunk = chunk_start, 386 | ending_chunk = chunk_end, 387 | chunk_size = chunk_size, 388 | vocabulary_start = vocabulary_start, 389 | vocabulary_end = vocabulary_end, 390 | save = save, 391 | load_tokenizer = load_tokenizer, 392 | load_weights = load_weights, 393 | save_tokenizer = save_tokenizer, 394 | save_weights = save_weights, 395 | optimizer = optimizer, 396 | inference_only = inference_only, 397 | return_model_and_vectorizer = model_and_vectorizer, 398 | return_model_and_vectorizer_and_output = model_vectorizer_output, 399 | GPT_attention = GPT_attention, 400 | TPU = False) 401 | print(output) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ⚠️ All the support for MinimalGPT has ended, and is depreciated! Use [Corpus2GPT](https://github.com/abhaskumarsinha/Corpus2GPT) in the near future! 2 | https://github.com/abhaskumarsinha/Corpus2GPT 3 | 4 | # MinimalGPT: The 'Tiniest and Simplest GPT Model' 5 | 6 | 7 | 8 | MinimalGPT Logo 9 | 10 | 11 | [[`GPT-1 Paper`](https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf)] [[`1002 short stories from project guttenberg`](https://www.kaggle.com/datasets/shubchat/1002-short-stories-from-project-guttenberg)] [[`logo.com`](https://wwww.logo.com/)] [[`Transformer - Paper`](https://arxiv.org/abs/1706.03762)] [[`Huggingface Transformers`](https://huggingface.co/docs/transformers/index)] [[`TensorFlow`](https://www.tensorflow.org/)] [[`BPE Tokenizer: subword-nmt`](https://github.com/rsennrich/subword-nmt)] 12 | 13 |

MinimalGPT is a concise, adaptable, and streamlined code framework that encompasses the essential components necessary for the construction, training, inference, and fine-tuning of the GPT model. This framework is implemented exclusively using Keras and TensorFlow, ensuring compatibility and coherence within the broader deep learning ecosystem.

14 |

15 |

NEW: CPU/GPU/TPU Support and support for loading big file datasets!

16 |

17 | 18 |

Code Specifications

19 |

20 | In the repository, we introduce two integral files that comprise our proposed framework. The first file, GPT.py, serves as the fundamental framework and encompasses crucial components such as blocks and layers. These components encompass multi-head attention, feedforward mechanisms, scaled dot product attention, positional encoding, softmaxed output, and an inference function for model prediction. The second file, MinimalGPT.py, streamlines the utilization of our framework by offering a concise command-line interface. This interface enables users to effortlessly perform essential operations, including model creation, training, saving, loading, fine-tuning, and inference, all condensed into a single command line execution. Furthermore, the files can be conveniently imported into Python code, allowing users to seamlessly incorporate them into their projects through a simple function call. 21 |

22 |

Requirements

23 | Run the following command to install the required dependencies from the requirements.txt file: 24 |

 25 | pip install -r requirements.txt
 26 | 
27 | 28 |

Usage

29 | 30 |

31 | The model architecture is governed by several critical parameters, including GPT_INPUT, D_MODEL, MULTI_HEAD, and DECODER_STACKS. It is imperative to ensure consistency in these parameters to prevent issues related to loading the model for subsequent re-training or inference processes. In situations where uncertainty arises, referring to the configuration file generated during the previous run can provide valuable insights. Furthermore, the VOCABULARY_START and VOCABULARY_END parameters play a crucial role in defining the window markers for the corpus. These markers aid in generating the Vectorizer layer, which extracts the vocabulary from the corpus within the specified START and END token counts. It is essential to note that tokens within the corpus are separated by whitespaces, and the inclusion of VOCABULARY_START and VOCABULARY_END becomes especially relevant when a token file is not explicitly specified. 32 | 33 | Also, note that BOTH - tokenizer file as well as weights of the model are saved/loaded at a time. Currently the code doesn't supports saving/loading these two files separatedly. 34 | 35 | The inference mode (-i) doesn't only requires model parameters and saved tokenizer and weights file to generate inference data. It should be used with (-ol) switch. 36 |

37 | 38 | 39 |

 40 | usage: MinimalGPT.py [-h] [-d DATA_PATH] [-l LEARNING_RATE]
 41 |                      [-ol OUTPUT_LENGTH] [-e EPOCHS] [-b BATCH_SIZE]
 42 |                      [-s GPT_INPUT] [-dm D_MODEL] [-p MULTI_HEAD]
 43 |                      [-ds DECODER_STACKS] [-ts TOKEN_START] [-te TOKEN_END]
 44 |                      [-vs VOCABULARY_START] [-ve VOCABULARY_END] [-sd]
 45 |                      [-lt LOAD_TOKENIZER] [-lw LOAD_WEIGHTS]
 46 |                      [-st SAVE_TOKENIZER] [-sw SAVE_WEIGHTS] [-ot OPTIMIZER]
 47 |                      [-i] [-mv] [-mvo]
 48 | 
 49 | optional arguments:
 50 |   -h, --help            show this help message and exit
 51 |   -d DATA_PATH, --data-path DATA_PATH
 52 |                         File: Corresponding to corpus or training text
 53 |                         [String]
 54 |   -l LEARNING_RATE, --learning-rate LEARNING_RATE
 55 |                         Float: Learning Rate. The model will train ONLY IF the
 56 |                         rate is > 0, skip otherwise [Float]
 57 |   -ol OUTPUT_LENGTH, --output-length OUTPUT_LENGTH
 58 |                         Length of the output sequence to be generated
 59 |   -e EPOCHS, --epochs EPOCHS
 60 |                         Number of training Epochs [Int]
 61 |   -b BATCH_SIZE, --batch-size BATCH_SIZE
 62 |                         Size of each batch [Int]
 63 |   -s GPT_INPUT, --gpt-input GPT_INPUT
 64 |                         Number of Tokens of text the model inputs at a time
 65 |                         [Int]
 66 |   -dm D_MODEL, --d-model D_MODEL
 67 |                         Embedding layer output dimensions [Int]
 68 |   -p MULTI_HEAD, --multi-head MULTI_HEAD
 69 |                         Number of Multi-head Attention layer in parallel [Int]
 70 |   -ds DECODER_STACKS, --decoder-stacks DECODER_STACKS
 71 |                         Number of stacked Decoder layer [Int]
 72 |   -ts TOKEN_START, --token-start TOKEN_START
 73 |                         The token number in the corpus to mark it as the
 74 |                         starting point of the training [Int]
 75 |   -te TOKEN_END, --token-end TOKEN_END
 76 |                         The token number in the corpus to mark it as the end
 77 |                         point of the training [Int]
 78 |   -vs VOCABULARY_START, --vocabulary-start VOCABULARY_START
 79 |                         Token number from the corpus to mark the starting
 80 |                         point of vocabulary data [Int]
 81 |   -ve VOCABULARY_END, --vocabulary-end VOCABULARY_END
 82 |                         Token number from the corpus to mark the end point of
 83 |                         vocabulary data [Int]
 84 |   -sd, --save           Save the Model and Vectorizer data to disk
 85 |                         [True/False]
 86 |   -lt LOAD_TOKENIZER, --load-tokenizer LOAD_TOKENIZER
 87 |                         File: Vectorization layer [File]
 88 |   -lw LOAD_WEIGHTS, --load-weights LOAD_WEIGHTS
 89 |                         File: Model Weights [File]
 90 |   -st SAVE_TOKENIZER, --save-tokenizer SAVE_TOKENIZER
 91 |                         File: Saving Vectorizer File [File]
 92 |   -sw SAVE_WEIGHTS, --save-weights SAVE_WEIGHTS
 93 |                         File: Saving Model Weights[File]
 94 |   -ot OPTIMIZER, --optimizer OPTIMIZER
 95 |                         Optimizer consistent to TensorFlow optimizer class
 96 |                         [tf.keras.optimizers]
 97 |   -i, --inference-only  Only Print the output of the model in Inference Mode
 98 |                         [True/False]
 99 |   -mv, --model-vectorizer
100 |                         Return Model, Vectorizer Tuple [True/False]
101 |   -mvo, --model-vectorizer-output
102 |                         Return Model, Vectorizer, Output Tuple [True/False]
103 | 
104 | 105 |

Examples

106 | 107 | 108 |

Example of Model Creation and Training

109 | 110 |

Assuming the desired model specifications entail GPT_INPUT = 10, D_MODEL = 128, MULTI_HEAD = 8, and DECODER_STACKS = 1, and the corpus token range for training spans from TOKEN_START = 0 to TOKEN_END = 40000, and generate the vectorizer layer from the corpus span from VOCABULARY_START = 0 to VOCABULARY_END = 200000, the following command is executed to initiate the model training process. The resulting weights and tokenizer data are saved in the designated folder. The subsequent outputs illustrate the outcome of this command execution.

111 | 112 |

113 | PS C:\gpt> python MinimalGPT.py -d './dataset/output_dataset.txt' -l 0.001 -ol 200 -e 4 -b 512 -s 10 -dm 128 -p 8 -ds 1 -ts 0 -te 40000 -vs 0 -ve 200000 -sd -st './models/tokenizer.mgt' -sw './models/weights.mgw'
114 | Total tokens: 40000
115 | 100%|██████████████████████████████████████████████████████████████████████████████| 200000/200000 [02:02<00:00, 1636.38it/s]
116 | New Vectorizer created successfully...
117 | Vocabulary Size: 14270
118 | 100%|██████████████████████████████████████████████████████████████████████████████| 39989/39989 [00:00<00:00, 302926.25it/s]
119 | 100%|█████████████████████████████████████████████████████████████████████████████| 39989/39989 [00:00<00:00, 1289942.19it/s]
120 | (None, 10, 128)
121 | Epoch 1/4
122 | 79/79 [==============================] - 88s 1s/step - loss: 7.8692
123 | Epoch 2/4
124 | 79/79 [==============================] - 92s 1s/step - loss: 3.8066
125 | Epoch 3/4
126 | 79/79 [==============================] - 93s 1s/step - loss: 1.1487
127 | Epoch 4/4
128 | 79/79 [==============================] - 92s 1s/step - loss: 0.2900
129 | 100%|██████████████████████████████████████████████████████████████████████████████████████| 190/190 [00:05<00:00, 34.70it/s]
130 | Vocabulary size saved: 14270
131 |          and her eyes in the library. She was the rather large woman, although not fat, and when she wore high heels--which sh
132 | e was not prone to do, because although Cutter would not have cared, she kept trying to project into other people's minds and
133 | trying, as she said, "Not to do anything to them, that I wouldn't want them to do you me."--she rose a good inch above Cutter.
134 |  She was pleasant humored, and cooperative, and the one great irritant about her that annoyed Cutter, was the fact that she wa
135 | s not capable of meeting life wholeheartedly and with strength. She steadily worried about other people's feelings and thought
136 | s, so that Cutter wondered if she were capable of the slightest personal conviction. Yet that weakness was an advantage at the
137 |  same time, to him, because she worked constantly toward making him happy. The house was run to his minutest liking, and the s
138 | ervants liked her, so that while she did not use a strong enough
139 | 
140 | 141 |

Fine-tuning

142 | 143 |

Suppose we want to fine-tune the above model (or retrain it), then the command to re-load the tokenizer and weights and retrain it on a new text of a specified window range of the corpus is given below:

144 | 145 |

146 | PS C:\gpt> python MinimalGPT.py -d './dataset/output_dataset.txt' -l 0.00005 -ol 200 -e 1 -b 512 -s 10 -dm 128 -p 8 -ds 1 -ts 80000 -te 120000 -sd -st './models/tokenizer2.mgt' -sw './models/weights2.mgw' -lt './models/tokenizer.mgt' -lw './models/weights.mgw'
147 | Total tokens: 40000
148 | 100%|██████████████████████████████████████████████████████████████████████████████| 39989/39989 [00:00<00:00, 302923.51it/s]
149 | 100%|█████████████████████████████████████████████████████████████████████████████| 39989/39989 [00:00<00:00, 1428099.68it/s]
150 | (None, 10, 128)
151 | 79/79 [==============================] - 81s 993ms/step - loss: 7.9725
152 | 100%|██████████████████████████████████████████████████████████████████████████████████████| 190/190 [00:06<00:00, 30.29it/s]
153 | Vocabulary size saved: 14270
154 |          of her own the black of my own and my wife had could seen the house at the same moment her mind caught the first sugg
155 | estion of the folded paper. “But he must have a name! Where is the paper?” She moved to the desk, and began to turn over the s
156 | cattered documents that littered it. The first that caught her eye was an unfinished letter in her husband’s hand, with his pe
157 | n lying across it, as though dropped there at a sudden summons. “My dear Parvis,”--who was Parvis?--“I have just received your
158 |  letter announcing Elwell’s death, and while I suppose there is now no farther risk of trouble, it might be safer--” That was
159 | all. The “risk of trouble” was easily explained by the newspaper clipping which had apprised Mary of the suit brought against
160 | her husband by one of his associates in the Blue Star enterprise. The only new information conveyed in the letter was the fact
161 |  of its showing Boyne,
162 | 
163 | 164 |

Inference Mode

165 |

The inference mode involves the loading of pre-trained weights and vectorizer. These components are then utilized to execute the model, generating outputs of a specified length as specified.

166 | 167 |
168 | PS C:\gpt> python MinimalGPT.py -i -ol 500 -e 6 -b 512 -s 10 -dm 128 -p 8 -ds 1 -lt './models/tokenizer2.mgt' -lw './models/weights2.mgw'
169 | (None, 10, 128)
170 | 100%|██████████████████████████████████████████████████████████████████████████████████████| 490/490 [00:13<00:00, 35.93it/s]
171 |          of her own “on the other from the inel’--a little sensational, of course. But I guess you’d better look it over.” He
172 | held out a newspaper to Mary, who unfolded it slowly, remembering, as she did so, the evening when, in that same room, the per
173 | usal of a clipping from the “Sentinel” had first shaken the depths of her security. As she opened the paper, her eyes, shrinki
174 | ng from the glaring head-lines, “Widow of Boyne’s Victim Forced to Appeal for Aid,” ran down the column of text to two portrai
175 | ts inserted in it. The first was her husband’s, taken from a photograph made the year they had come to England. It was the pic
176 | ture of him that she liked best, the one that stood on the writing-table up-stairs in her bedroom. As the eyes in the photogra
177 | ph met hers, she felt it would be impossible to read what was said of him, and closed her lids with the sharpness of the pain.
178 |  “I thought if you felt disposed to put your name down--” she heard Parvis continue. She opened her eyes with an effort, and t
179 | hey fell on the other portrait. It was that of a youngish man, slightly built, in rough clothes, with features somewhat blurre
180 | d by the shadow of a projecting hat-brim. Where had she seen that outline before? She stared at it confusedly, her heart hamme
181 | ring in her throat and ears. Then she gave a cry. “This is the man--the man who came for my husband!” She heard Parvis start t
182 | o his feet, and was dimly aware that she had slipped backward into the corner of the sofa, and that he was bending above her i
183 | n alarm. With an intense effort she straightened herself, and reached out for the paper, which she had dropped. “It’s the man!
184 |  I should know him anywhere!” she cried in a voice that sounded in her own ears like a scream. Parvis’s voice seemed to come t
185 | o her from far off, down endless, fog-muffled windings. “Mrs. Boyne, you’re not very well. Shall I call somebody? Shall I get
186 | a glass of water?” “No, no, no!” She threw herself toward him, her hand frantically clenching the newspaper. “I tell you, it’s
187 |  the man! I KNOW him! He spoke to me in the garden!” Parvis took the journal from her, directing his glasses to the portrait.
188 | “It can’t be, Mrs. Boyne. It’s Robert Elwell.” “Robert Elwell?” Her white
189 | 
190 | 191 | 192 | 193 |

Importing the model into a project

194 | 195 |

Incorporating the trained models generated through the utilization of MinimalGPT.py into your project is a straightforward process facilitated by importing the MinimalGPT function and configuring it according to the desired specifications. This can be achieved by setting the parameters return_model_and_vectorizer = True or return_model_and_vectorizer_and_output = True within the inference_only = True (Inference Mode) framework. Additionally, the training, creation, and exportation of the model can be accomplished using a similar approach, paralleling the command-line mode. For a comprehensive illustration of these procedures, the accompanying Jupyter Notebook provides an exemplar demonstration.

196 |
197 | from MinimalGPT import MinimalGPT
198 | 
199 | 
200 | model = MinimalGPT(output_length = 200, gpt_input = 10, d_model = 128, h = 8, decoder_stacks = 1, load_tokenizer = './models/tokenizer3.mgt', load_weights = './models/weights3.mgw', inference_only = True, return_model_and_vectorizer_and_output = True)
201 | model[0].summary()
202 | 
203 | _________________________________________________________________
204 | Model: "model"
205 | _________________________________________________________________
206 | 
207 |  Layer (type)                Output Shape              Param  
208 |  
209 | =================================================================
210 |  input_1 (InputLayer)        [(None, 10)]              0         
211 |                                                                  
212 |  embedding (Embedding)       (None, 10, 128)           1826816   
213 |                                                                  
214 |  positional_embedding (Posit  (None, 10, 128)          0         
215 |  ionalEmbedding)                                                 
216 |                                                                  
217 |  decoder (Decoder)           (None, 10, 128)           37160     
218 |                                                                  
219 |  flatten (Flatten)           (None, 1280)              0         
220 |                                                                  
221 |  dense (Dense)               (None, 14273)             18283713  
222 |                                                                  
223 |  tf.nn.softmax (TFOpLambda)  (None, 14273)             0         
224 |                                                                  
225 | =================================================================
226 | Total params: 20,147,689
227 | Trainable params: 20,147,689
228 | Non-trainable params: 0
229 | _________________________________________________________________
230 | 
231 | 232 |

Implementation Specifications

233 |

The model implemented here differs a little bit in comparision to the original paper implementation. The matrix formed after concatenating the heads of the scaled dot-product output is multiplied by the matrix parameter of size key dimension x d_model. For practical purpose, this little tweak to reduce the number of parameter would lead to a little bit increase in performance due to trainable parameter optimization.

234 | 235 | 236 |

Results

237 | Follow the example folder for Notebooks containing the samples. 238 | 239 |

Troubleshooting

240 | Feel free to open tickets in the issue tab in case you encounter any error or have any specific feature request in mind. 241 | 242 | 243 |

References/Further Reading

244 | 245 | 246 | 1. Vaswani, Ashish, et al. "Attention is all you need." Advances in neural information processing systems 30 (2017). 247 | 2. Radford, Alec, et al. "Improving language understanding by generative pre-training." (2018). 248 | 3. Radford, Alec, et al. "Language models are unsupervised multitask learners." OpenAI blog 1.8 (2019): 9. 249 | 4. Brown, Tom, et al. "Language models are few-shot learners." Advances in neural information processing systems 33 (2020): 1877-1901. 250 | 5. Howard, Jeremy, and Sebastian Ruder. "Universal language model fine-tuning for text classification." arXiv preprint arXiv:1801.06146 (2018). 251 | 6. Petroni, Fabio, et al. "Language models as knowledge bases?." arXiv preprint arXiv:1909.01066 (2019). 252 | 253 | -------------------------------------------------------------------------------- /examples/Fine-tuning outputs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "colab": { 8 | "base_uri": "https://localhost:8080/" 9 | }, 10 | "id": "wJU3EvZacdP-", 11 | "outputId": "5a6bce5d-6dbb-446f-e5e5-2b63b3f84a69" 12 | }, 13 | "outputs": [ 14 | { 15 | "name": "stdout", 16 | "output_type": "stream", 17 | "text": [ 18 | "/content/drive/MyDrive/GPT/GPT_models\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "%cd /content/drive/MyDrive/GPT/GPT_models" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": { 30 | "colab": { 31 | "base_uri": "https://localhost:8080/" 32 | }, 33 | "id": "EeKZIkb7c-OM", 34 | "outputId": "954d7a52-b30e-471c-9f3c-9888902c906f" 35 | }, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "2023-05-04 06:40:05.233617: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 42 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 43 | "2023-05-04 06:40:06.697240: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 44 | "Total tokens: 1000000\n", 45 | "100% 1000000/1000000 [19:51<00:00, 839.13it/s]\n", 46 | "2023-05-04 07:00:06.711130: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 47 | "2023-05-04 07:00:07.254245: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 48 | "2023-05-04 07:00:07.254519: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 49 | "2023-05-04 07:00:07.255249: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 50 | "2023-05-04 07:00:07.255473: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 51 | "2023-05-04 07:00:07.255697: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 52 | "2023-05-04 07:00:09.591213: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 53 | "2023-05-04 07:00:09.591504: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 54 | "2023-05-04 07:00:09.591743: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 55 | "2023-05-04 07:00:09.591902: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", 56 | "2023-05-04 07:00:09.591951: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13678 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", 57 | "New Vectorizer created successfully...\n", 58 | "Vocabulary Size: 17843\n", 59 | "100% 999989/999989 [00:02<00:00, 491970.07it/s]\n", 60 | "100% 999989/999989 [00:00<00:00, 1180066.01it/s]\n", 61 | "(None, 10, 512)\n", 62 | "Epoch 1/2\n", 63 | "2023-05-04 07:00:40.044301: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x2ca12d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", 64 | "2023-05-04 07:00:40.044353: I tensorflow/compiler/xla/service/service.cc:177] StreamExecutor device (0): Tesla T4, Compute Capability 7.5\n", 65 | "2023-05-04 07:00:40.175600: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", 66 | "2023-05-04 07:00:40.849457: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n", 67 | "2023-05-04 07:00:41.322333: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", 68 | "1954/1954 [==============================] - 1011s 503ms/step - loss: 6.3368\n", 69 | "Epoch 2/2\n", 70 | "1954/1954 [==============================] - 973s 498ms/step - loss: 4.2926\n", 71 | "100% 190/190 [00:35<00:00, 5.32it/s]\n", 72 | "Vocabulary size saved: 17843\n", 73 | " at the time of life? It was not to be so deeply to be a real good for a good deal of money!\" He had not done with a good deal of a Forsyte of the world, and the other of the same thing.\" June had gone to the construction of her that she had not yet been so that she had been so much of her own life, and perhaps--it was strange--not to be so that he could not bear to be sure of that he had done with its separate wound to his wife and his daughter of his grandfather. In the way, like the busy with the river, of the upper-middle class. He had literally been forced into the room to be sure of the house completed by the river. The room was full of boiling water, ready for the toilet; and the great quantity of the furrows of the mming to coat, and the mantle, boots, cap--everything. For a moment he had not been a single essential detail of her \n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "!python MinimalGPT.py -d './dataset/dataset.txt' -l 0.001 -ol 200 -e 2 -b 512 -s 10 -dm 512 -p 8 -ds 6 -ts 0 -te 1000000 -vs 0 -ve 1000000 -sd -st './model/tokenizer.mgt' -sw './model/weights.mgw'" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 3, 84 | "metadata": { 85 | "colab": { 86 | "base_uri": "https://localhost:8080/" 87 | }, 88 | "id": "mVKSW8ubdmQu", 89 | "outputId": "b854dbaa-5e3c-43d8-fc3a-aca1e9240142" 90 | }, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "2023-05-04 07:56:47.007855: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 97 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 98 | "2023-05-04 07:56:48.077964: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 99 | "Total tokens: 1000000\n", 100 | "2023-05-04 07:56:53.747198: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 101 | "2023-05-04 07:56:53.780778: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 102 | "2023-05-04 07:56:53.781065: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 103 | "2023-05-04 07:56:53.781823: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 104 | "2023-05-04 07:56:53.782072: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 105 | "2023-05-04 07:56:53.782267: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 106 | "2023-05-04 07:56:54.847807: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 107 | "2023-05-04 07:56:54.848128: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 108 | "2023-05-04 07:56:54.848378: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 109 | "2023-05-04 07:56:54.848530: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", 110 | "2023-05-04 07:56:54.848592: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13678 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", 111 | "100% 999989/999989 [00:01<00:00, 525301.78it/s]\n", 112 | "100% 999989/999989 [00:00<00:00, 1186173.32it/s]\n", 113 | "(None, 10, 512)\n", 114 | "2023-05-04 07:57:06.646451: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 115 | "2023-05-04 07:57:07.131646: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 116 | "2023-05-04 07:57:07.525222: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 117 | "2023-05-04 07:57:26.104338: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7f71100292f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", 118 | "2023-05-04 07:57:26.104399: I tensorflow/compiler/xla/service/service.cc:177] StreamExecutor device (0): Tesla T4, Compute Capability 7.5\n", 119 | "2023-05-04 07:57:26.111718: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", 120 | "2023-05-04 07:57:26.368503: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n", 121 | "2023-05-04 07:57:26.619686: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", 122 | "1954/1954 [==============================] - 1015s 506ms/step - loss: 2.1494\n", 123 | "100% 190/190 [00:36<00:00, 5.14it/s]\n", 124 | "2023-05-04 08:15:28.905508: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 125 | "Vocabulary size saved: 17843\n", 126 | " at the time of life--he was always by the mute accomplice, the incorruptible cue, diopportunities of mystery. She drank it, almost from the music-room, and walk off the threshold of the inner room. The two old tossed the memories: and started up. “My dear fold of the Posatin bodiment of Nature and a drawing-rooms--of his couldn’t bear the fact that he was sitting on the table, grave, as if he weighed down by the touch of pondered up the lane, and quietly at the door that she was sure he knew that he was ambitious, was uncut his face to face in the wall. He was not cruel to give him a farm for having broken off his engagement to Swiss Anna, which had promised so many worldly advantagat birth and when the considered trifle as the Co., Ltacopan, and then grew very still, but he had no intention of letting steps, and he saw the figure of \n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "!python MinimalGPT.py -d './dataset/dataset.txt' -l 0.001 -ol 200 -e 1 -b 512 -s 10 -dm 512 -p 8 -ds 6 -ts 000000 -te 1000000 -sd -st './model/tokenizer2.mgt' -sw './model/weights2.mgw' -lt './model/tokenizer.mgt' -lw './model/weights.mgw'" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 4, 137 | "metadata": { 138 | "colab": { 139 | "base_uri": "https://localhost:8080/" 140 | }, 141 | "id": "OgEg6qDO1d7r", 142 | "outputId": "8c707c95-39b2-4b8b-9bad-4c37c1c3876a" 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "2023-05-04 08:17:30.154386: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 150 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 151 | "2023-05-04 08:17:31.218720: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 152 | "Total tokens: 1000000\n", 153 | "2023-05-04 08:17:37.345497: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 154 | "2023-05-04 08:17:37.384265: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 155 | "2023-05-04 08:17:37.384656: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 156 | "2023-05-04 08:17:37.385542: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 157 | "2023-05-04 08:17:37.385873: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 158 | "2023-05-04 08:17:37.386135: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 159 | "2023-05-04 08:17:38.765131: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 160 | "2023-05-04 08:17:38.765475: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 161 | "2023-05-04 08:17:38.765784: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 162 | "2023-05-04 08:17:38.766003: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", 163 | "2023-05-04 08:17:38.766072: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13678 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", 164 | "100% 999989/999989 [00:01<00:00, 589837.35it/s]\n", 165 | "100% 999989/999989 [00:00<00:00, 2207555.26it/s]\n", 166 | "(None, 10, 512)\n", 167 | "2023-05-04 08:17:48.716521: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 168 | "2023-05-04 08:17:49.246262: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 169 | "2023-05-04 08:17:49.692581: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 170 | "2023-05-04 08:18:08.812829: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x224ab130 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", 171 | "2023-05-04 08:18:08.812881: I tensorflow/compiler/xla/service/service.cc:177] StreamExecutor device (0): Tesla T4, Compute Capability 7.5\n", 172 | "2023-05-04 08:18:08.817623: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", 173 | "2023-05-04 08:18:08.993881: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n", 174 | "2023-05-04 08:18:09.139481: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", 175 | "1954/1954 [==============================] - 1016s 507ms/step - loss: 1.4201\n", 176 | "100% 190/190 [00:37<00:00, 5.09it/s]\n", 177 | "2023-05-04 08:36:13.493331: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 178 | "Vocabulary size saved: 17843\n", 179 | " Monsieur Profond had come and out of window, and busied herself with a bowl of flowers. She had heard her humming and kissed her heralded her husband's approach, as though warning the world to recover its good form and all over the sideboard in any of the sal of his last flight. She had not ship was seurable, like the essential of a man who knew the hard on the il of Hanson and the footman who had run with the hare and hunt with the hounds, and then he remembered his trotting and in the sky. It was very lonely! As he entered it--she had no sense of something hard; and then he walked away. Of course, the day. He felt always where the father and was not alarmed at that 'what they had stopped. What was it? What was it--she looked like him with a chuckle. Ben was a flower from her hair; and she was conscious of a man's possessible. He flashed out at the old \n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "!python MinimalGPT.py -d './dataset/dataset.txt' -l 0.001 -ol 200 -e 1 -b 512 -s 10 -dm 512 -p 8 -ds 6 -ts 000000 -te 1000000 -sd -st './model/tokenizer3.mgt' -sw './model/weights3.mgw' -lt './model/tokenizer2.mgt' -lw './model/weights2.mgw'" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 5, 190 | "metadata": { 191 | "colab": { 192 | "base_uri": "https://localhost:8080/" 193 | }, 194 | "id": "UsLyXStH3Vmf", 195 | "outputId": "c0ee6c65-0019-4cfd-c112-26f56df5f982" 196 | }, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "2023-05-04 08:36:17.307478: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 203 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 204 | "2023-05-04 08:36:18.357707: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 205 | "Total tokens: 1000000\n", 206 | "2023-05-04 08:36:23.974820: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 207 | "2023-05-04 08:36:24.015766: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 208 | "2023-05-04 08:36:24.017665: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 209 | "2023-05-04 08:36:24.018575: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 210 | "2023-05-04 08:36:24.018927: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 211 | "2023-05-04 08:36:24.019216: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 212 | "2023-05-04 08:36:26.407877: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 213 | "2023-05-04 08:36:26.408814: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 214 | "2023-05-04 08:36:26.409390: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 215 | "2023-05-04 08:36:26.409890: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", 216 | "2023-05-04 08:36:26.409982: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13678 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", 217 | "100% 999989/999989 [00:02<00:00, 406245.75it/s]\n", 218 | "100% 999989/999989 [00:00<00:00, 2283613.37it/s]\n", 219 | "(None, 10, 512)\n", 220 | "2023-05-04 08:36:37.054402: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 221 | "2023-05-04 08:36:37.539931: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 222 | "2023-05-04 08:36:37.930348: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 223 | "2023-05-04 08:36:56.850900: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x2c1b9680 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", 224 | "2023-05-04 08:36:56.850957: I tensorflow/compiler/xla/service/service.cc:177] StreamExecutor device (0): Tesla T4, Compute Capability 7.5\n", 225 | "2023-05-04 08:36:56.855950: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", 226 | "2023-05-04 08:36:57.021420: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n", 227 | "2023-05-04 08:36:57.170246: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", 228 | "1954/1954 [==============================] - 1013s 505ms/step - loss: 7.1059\n", 229 | "100% 190/190 [00:37<00:00, 5.08it/s]\n", 230 | "2023-05-04 08:55:01.131855: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 231 | "Vocabulary size saved: 17843\n", 232 | " and the girl went to the paddock gate to the farmMayfly filly, and accustomed to the possessive instinct of self-preservation stirred within him. Was it being a moment of death, but he had not gone to experience himself, had tasted too many of those people who have been mistress of France during the fortnight since the summer had been together with the laws of Nature and the benefit of the male lineage of the summer holidays. And the streets of a sort of capital which is the eyebrows down; I shall have to be in the minute.\" \"Don't!\" cried through stamping his foot; \"it's inhuman. Listen! Is there any condition of the seen--will be a happy without a single form, while she was going to see how it was a marvellous country; and got up and went to school. And Gyp said quietly: \"Yes, I believe, this is to be safe.\" \"No,\" muttered me income, and I shall be very different from you.\" Gyp \n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "!python MinimalGPT.py -d './dataset/dataset.txt' -l 0.00001 -ol 200 -e 1 -b 512 -s 10 -dm 512 -p 8 -ds 6 -ts 1000000 -te 2000000 -sd -st './model/tokenizer_fine_tuned_on_2.mgt' -sw './model/weights_fine_tuned_on_2.mgw' -lt './model/tokenizer2.mgt' -lw './model/weights2.mgw'" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 6, 243 | "metadata": { 244 | "colab": { 245 | "base_uri": "https://localhost:8080/" 246 | }, 247 | "id": "6ZfPZICy36q1", 248 | "outputId": "c303f9a4-eb00-4d94-e846-be3768e5cc87" 249 | }, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "2023-05-04 08:55:05.086259: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 256 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 257 | "2023-05-04 08:55:06.170183: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 258 | "Total tokens: 1000000\n", 259 | "2023-05-04 08:55:11.989363: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 260 | "2023-05-04 08:55:12.031155: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 261 | "2023-05-04 08:55:12.031664: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 262 | "2023-05-04 08:55:12.032553: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 263 | "2023-05-04 08:55:12.032855: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 264 | "2023-05-04 08:55:12.033143: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 265 | "2023-05-04 08:55:13.508261: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 266 | "2023-05-04 08:55:13.508752: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 267 | "2023-05-04 08:55:13.509128: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 268 | "2023-05-04 08:55:13.509329: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", 269 | "2023-05-04 08:55:13.509378: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13678 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", 270 | "100% 999989/999989 [00:02<00:00, 341338.16it/s]\n", 271 | "100% 999989/999989 [00:00<00:00, 1226614.27it/s]\n", 272 | "(None, 10, 512)\n", 273 | "2023-05-04 08:55:25.180820: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 274 | "2023-05-04 08:55:25.665798: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 275 | "2023-05-04 08:55:26.054871: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 276 | "2023-05-04 08:55:45.217674: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x294f4740 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", 277 | "2023-05-04 08:55:45.217730: I tensorflow/compiler/xla/service/service.cc:177] StreamExecutor device (0): Tesla T4, Compute Capability 7.5\n", 278 | "2023-05-04 08:55:45.222986: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", 279 | "2023-05-04 08:55:45.397868: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n", 280 | "2023-05-04 08:55:45.544886: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", 281 | "1954/1954 [==============================] - 1011s 504ms/step - loss: 6.6840\n", 282 | "100% 190/190 [00:35<00:00, 5.28it/s]\n", 283 | "2023-05-04 09:13:48.840421: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 284 | "Vocabulary size saved: 17843\n", 285 | " [UNK] s of the first time of the first time of the first day in the first of the river, and the house in the young man, and the little room, and the young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went \n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "!python MinimalGPT.py -d './dataset/dataset.txt' -l 0.00001 -ol 200 -e 1 -b 512 -s 10 -dm 512 -p 8 -ds 6 -ts 1000000 -te 2000000 -sd -st './model/tokenizer_fine_tuned_on_3.mgt' -sw './model/weights_fine_tuned_on_3.mgw' -lt './model/tokenizer3.mgt' -lw './model/weights3.mgw'" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 10, 296 | "metadata": { 297 | "colab": { 298 | "base_uri": "https://localhost:8080/" 299 | }, 300 | "id": "C5uhc45yCmdg", 301 | "outputId": "1d43c7ce-8453-4d27-b123-ce2aaad1591d" 302 | }, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "2023-05-04 09:18:49.547613: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 309 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 310 | "2023-05-04 09:18:50.674122: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 311 | "2023-05-04 09:18:53.039629: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 312 | "2023-05-04 09:18:53.079301: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 313 | "2023-05-04 09:18:53.079660: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 314 | "2023-05-04 09:18:53.080563: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 315 | "2023-05-04 09:18:53.080908: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 316 | "2023-05-04 09:18:53.081170: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 317 | "2023-05-04 09:18:54.353963: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 318 | "2023-05-04 09:18:54.354258: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 319 | "2023-05-04 09:18:54.354473: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 320 | "2023-05-04 09:18:54.354649: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", 321 | "2023-05-04 09:18:54.354698: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13678 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", 322 | "(None, 10, 512)\n", 323 | "2023-05-04 09:18:56.410938: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 324 | "2023-05-04 09:18:56.897877: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 325 | "2023-05-04 09:18:57.289392: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 326 | "100% 4990/4990 [15:58<00:00, 5.21it/s]\n", 327 | " [UNK] s of the first time of the first time of the first day in the first of the river, and the house in the young man, and the little room, and the young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of the old woman who had been a good to be a little friend of the other man, and the little bright-haired young man, and the young man who had a long time of the last night, and the little bottle. The door was opened, and went on the door, and went out of the street. The lamps were the little of the window, and sat down in the dark and the little dark eyes of the dark hair, and the light of the moon was the pale sky was the same as though they were all the raceast, and the little of the old man who had been in the service of the old man of the old man's bed. The little of the window and the eyes of the window and the road, and the whitened water. She was the little of the little dark and the little dark eyes of the dark water, and the dark eyes of the old woman's face buried in the grey and the girl of the wheels, and the little Publius, and the girl who had been in the name of the soldier that he had been to see him in the nature of the future of the Authorities, and the faint scent of the public feeling that she was not quite a new sensation, awakened by the deep sense of comradeship of the old man had been his grandfather's in the same as he had not yet to have been a young man with a peculiar to the whole of the old chap like a man of his wife. He had not been so much of his niece, and he had a little of the old man, who had been a good of the world. The little thing was standing with the little white and hollow in the middle of the bright day of the scent of the flower of the [UNK] s of the old man who had been a great cat's premiss, and the rest of the world, the world of the world was so that he could not be a case of \n" 328 | ] 329 | } 330 | ], 331 | "source": [ 332 | "!python MinimalGPT.py -l 0 -s 10 -dm 512 -p 8 -ds 6 -lt './model/tokenizer_fine_tuned_on_3.mgt' -lw './model/weights_fine_tuned_on_3.mgw' -i -ol 5000" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 11, 338 | "metadata": { 339 | "colab": { 340 | "base_uri": "https://localhost:8080/" 341 | }, 342 | "id": "QXr9NF-ODyrc", 343 | "outputId": "1a2bee96-fbb8-4c9e-b20a-cc638a43e281" 344 | }, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "2023-05-04 09:37:25.203034: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 351 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 352 | "2023-05-04 09:37:26.647824: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", 353 | "2023-05-04 09:37:28.259651: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 354 | "2023-05-04 09:37:28.293171: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 355 | "2023-05-04 09:37:28.293439: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 356 | "2023-05-04 09:37:28.294173: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 357 | "2023-05-04 09:37:28.294434: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 358 | "2023-05-04 09:37:28.294692: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 359 | "2023-05-04 09:37:29.348003: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 360 | "2023-05-04 09:37:29.348306: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 361 | "2023-05-04 09:37:29.348527: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", 362 | "2023-05-04 09:37:29.348693: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", 363 | "2023-05-04 09:37:29.348737: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13678 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", 364 | "(None, 10, 512)\n", 365 | "2023-05-04 09:37:31.425428: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 366 | "2023-05-04 09:37:31.922165: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 367 | "2023-05-04 09:37:32.307755: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 365486080 exceeds 10% of free system memory.\n", 368 | "100% 4990/4990 [15:54<00:00, 5.23it/s]\n", 369 | " and the girl went to the paddock gate to the farmMayfly filly, and accustomed to the possessive instinct of self-preservation stirred within him. Was it being a moment of death, but he had not gone to experience himself, had tasted too many of those people who have been mistress of France during the fortnight since the summer had been together with the laws of Nature and the benefit of the male lineage of the summer holidays. And the streets of a sort of capital which is the eyebrows down; I shall have to be in the minute.\" \"Don't!\" cried through stamping his foot; \"it's inhuman. Listen! Is there any condition of the seen--will be a happy without a single form, while she was going to see how it was a marvellous country; and got up and went to school. And Gyp said quietly: \"Yes, I believe, this is to be safe.\" \"No,\" muttered me income, and I shall be very different from you.\" Gyp laughed. \"Ugh! I hope.\" He looked up at his old face. He had not given him to feel his father, and had not been very amiable. He had never become quite celebrated to the fellow he had himself to death. Great In the few village of the drawing-room window. The pire, and the tea-time she examined the fields, and said: \"Am in dress and to-day on the chapel, and dry, which she had been unhappy for the sake of the essential quality of wolves. The train was already hot, when they were born, and the other matters, instead of the free and the same moment of the old value of the same ivory skin, and pinched her eyes with its window and gaze of the last resources of the Empire, and the other boys were not to be sure to be told him that he had not changed his smothered his way he was trembling old boy. Time had been dejected, and his face to the ferocious of his lips. The vision of that dead companion had the broken in--\"that to hear the play polo of the park. Swithin's eye was in emotion. This dumb and took the military look of the little timid man, who had fallen in Paris, with a k; he was strong, silent and of a busy with his ears and the air of a drums. “He is fall,” said the official on horseback, fanning the little of hat. Something had seen her look on war. There was the allel of the tall trees and lengthening shadows, and the blue, with the white clouds, and the soft breeze that the stream of the stillness and the little dark study, and the little timid sun came down, and realised that he was standing in his house, and he had removed his grief and his face melted into a white wall. He ran upstairs to his pale and the bedroom, and sat down on the edge of the shining-hats, and the highest degree of the new creature who had always been in the house, of this house; and what was there in the boat. \"It would have been company. It was a country friend of the best. As if he had not heard the contents of his fortnight as he had passed through the dressing-room he went to the door. The room was crowded everything had a rose-door and went on the little stairway, and ending again rustled the greenish of the sunlight filtering through the screen of goldening tree; the woods and rich and colour, glowerburnished-lookingautumA tiny, helpless fear that she was not so that she had not seen. She therefore opened her eyes and saw the corner of her mouth came to her bedside. \"These are the baths, and the sun was burned beside the pot of the great window, and the deep-sunk into the shining of the fingers of the white wall. \"My dear boy; Mr. Farry was still a little d; the kitchen, and a nook to the feet, and a grey heart. Ah! well, rolled the night of the slinking of the centre of the lawn at the time, he had been watching the Circassian maid. How could I have devoted to me.\" A Berberine servant appeared. \"Tea, Mahommed,\" he said. \"And I am sorry for you!\" As if repeating him learned to speak to the sly to whom he had been so long since he had completely returned to his village, and to say that he was standing by the window. It was the oddest sound, and the day of the act; and I had not been actor: I was ill for the young men. \"Mr. Boyne had never yet been so unfortunate that he could not have been asleep? He had not been disturbed, and he pressed himself against the wall and the tortured countenance of his thin, yellow fingers. Then she was a sudden straw: \"I shall have a shirt of you; though--\" Mr. Pogram interrupted in, he was rather high, and answered: \"Twenty years is to cut your lady, I am not very soon after I do you think? It's not OLD knowledge, I have power.\" \"I don't know anything about the price of land about there?\" The two years ago.\" \"Will you write to me downstairs. I can't. I shan't be cold. Look at it! You can see you lunch.\" \"All right,\" Bolen said. \"We met no longer in the way of a portent, and surely the war everybody had not been considered mad as Gordy's had any more, Ismail admired greatly. The girl had just striking the most of a little boy, lying on the flat of his sword and violently bade her be silent. She crouched down, hiding her eyes with her ears. A little cold rain began to fall; the first time, and died in the refreshment of the first posture of the blue sky, and the disagree there,\" said he, too, in the dark goldy soul was still and the knob of the general aversions; I was not to see how much I am going to take your relations I shan't be a good bit of you. I don't know--I don't know what I can expect that!\" But if he opposed her, he didn't know what he would do; for that--having settled on the weight of his diplomacy, and the ash-tree they stood, the village of the little timid shaved of her limbs were so sweet and breathless, and harmless, and harmless, and harmless. Almost every quarter of the century, unawake. “If you had been wont to be told you to know what you're talking about. You're a librarian, remember? And he wrote to school pilot. We must go down to John.\" \"I can't see how they're a pretty?\" \"Yes; but that's what I am going to sit down by a fellah and domestic cattle, and mouth, not even to the other, although it before he could not be suspected that he could not be suspected that she could not be clear, even to herself. Love! What would she do like? Her lips were not to put her hand to her bare shoulder. In the next her eyes looked at her oysters in ened. This was a man in town, he was not remarkable for a man, who, preserving their first-precipice, and the sound of the Lowing-machine, and the humming of many bees. It was beautiful here, and the scent of dried clothes and the lime trees of flower, he had been sitting there with a scent of meadow-less, and harm! He passed on. Though he had been bornness. The so, seemed to be buried in the humble robe of duty. A little piece of furniture was visible in the world. His hands were in his pockets and his head sunk on a breast, his legs outstretched face and a baby's face. \"I think he had grown immense, that I shall be tasting the English are generally and I shall be very glad to see that you will never be read, I thought you mightn't take it but I don't think I can go on everything. I'm sure I do, and I do not?\" The two women were silent, looking at the upper classes. He took it was not a bit of property, and I dare to ask you to forgive me to tell you, you'll have your own impression I swear it.\" \"My dear fellow, my master will come to an end. I don't you think.\" \"Then why did you look at him through the dark. He had abandoned the \"Irene! Irene's greatest power in the complete--a youngsters! Time was a man of imagination. The Orient had the girl had done most hundreds of way. If she had never yet exchanged a word. Her face was this: My mother came to the east of England, where, and reason for the lows. We must be taken to the suburban girl, when I was admiring the servants of the tall trees and lengthening shadows, and the blue, with the white clouds, and the soft breeze that the stream of the stillness and the little dark study, and the little timid sun came down, and realised that he was standing in his house, and he had removed his grief and his face melted into a white wall. He ran upstairs to his pale and the bedroom, and sat down on the edge of the shining-hats, and the highest degree of the new creature who had always been in the house, of this house; and what was there in the boat. \"It would have been company. It was a country friend of the best. As if he had not heard the contents of his fortnight as he had passed through the dressing-room he went to the door. The room was crowded everything had a rose-door and went on the little stairway, and ending again rustled the greenish of the sunlight filtering through the screen of goldening tree; the woods and rich and colour, glowerburnished-lookingautumA tiny, helpless fear that she was not so that she had not seen. She therefore opened her eyes and saw the corner of her mouth came to her bedside. \"These are the baths, and the sun was burned beside the pot of the great window, and the deep-sunk into the shining of the fingers of the white wall. \"My dear boy; Mr. Farry was still a little d; the kitchen, and a nook to the feet, and a grey heart. Ah! well, rolled the night of the slinking of the centre of the lawn at the time, he had been watching the Circassian maid. How could I have devoted to me.\" A Berberine servant appeared. \"Tea, Mahommed,\" he said. \"And I am sorry for you!\" As if repeating him learned to speak to the sly to whom he had been so long since he had completely returned to his village, and to say that he was standing by the window. It was the oddest sound, and the day of the act; and I had not been actor: I was ill for the young men. \"Mr. Boyne had never yet been so unfortunate that he could not have been asleep? He had not been disturbed, and he pressed himself against the wall and the tortured countenance of his thin, yellow fingers. Then she was a sudden straw: \"I shall have a shirt of you; though--\" Mr. Pogram interrupted in, he was rather high, and answered: \"Twenty years is to cut your lady, I am not very soon after I do you think? It's not OLD knowledge, I have power.\" \"I don't know anything about the price of land about there?\" The two years ago.\" \"Will you write to me downstairs. I can't. I shan't be cold. Look at it! You can see you lunch.\" \"All right,\" Bolen said. \"We met no longer in the way of a portent, and surely the war everybody had not been considered mad as Gordy's had any more, Ismail admired greatly. The girl had just striking the most of a little boy, lying on the flat of his sword and violently bade her be silent. She crouched down, hiding her eyes with her ears. A little cold rain began to fall; the first time, and died in the refreshment of the first posture of the blue sky, and the disagree there,\" said he, too, in the dark goldy soul was still and the knob of the general aversions; I was not to see how much I am going to take your relations I shan't be a good bit of you. I don't know--I don't know what I can expect that!\" But if he opposed her, he didn't know what he would do; for that--having settled on the weight of his diplomacy, and the ash-tree they stood, the village of the little timid shaved of her limbs were so sweet and breathless, and harmless, and harmless, and harmless. Almost every quarter of the century, unawake. “If you had been wont to be told you to know what you're talking about. You're a librarian, remember? And he wrote to school pilot. We must go down to John.\" \"I can't see how they're a pretty?\" \"Yes; but that's what I am going to sit down by a fellah and domestic cattle, and mouth, not even to the other, although it before he could not be suspected that he could not be suspected that she could not be clear, even to herself. Love! What would she do like? Her lips were not to put her hand to her bare shoulder. In the next her eyes looked at her oysters in ened. This was a man in town, he was not remarkable for a man, who, preserving their first-precipice, and the sound of the Lowing-machine, and the humming of many bees. It was beautiful here, and the scent of dried clothes and the lime trees of flower, he had been sitting there with a scent of meadow-less, and harm! He passed on. Though he had been bornness. The so, seemed to be buried in the humble robe of duty. A little piece of furniture was visible in the world. His hands were in his pockets and his head sunk on a breast, his legs outstretched face and a baby's face. \"I think he had grown immense, that I shall be tasting the English are generally and I shall be very glad to see that you will never be read, I thought you mightn't take it but I don't think I can go on everything. I'm sure I do, and I do not?\" The two women were silent, looking at the upper classes. He took it was not a bit of property, and I dare to ask you to forgive me to tell you, you'll have your own impression I swear it.\" \"My dear fellow, my master will come to an end. I don't you think.\" \"Then why did you look at him through the dark. He had abandoned the \"Irene! Irene's greatest power in the complete--a youngsters! Time was a man of imagination. The Orient had the girl had done most hundreds of way. If she had never yet exchanged a word. Her face was this: My mother came to the east of England, where, and reason for the lows. We must be taken to the suburban girl, when I was admiring the servants of the tall trees and lengthening shadows, and the blue, with the white clouds, and the soft breeze that the stream of the stillness and the little dark study, and the little timid sun came down, and realised that he was standing in his house, and he had removed his grief and his face melted into a white wall. He ran upstairs to his pale and the bedroom, and sat down on the edge of the shining-hats, and the highest degree of the new creature who had always been in the house, of this house; and what was there in the boat. \"It would have been company. It was a country friend of the best. As if he had not heard the contents of his fortnight as he had passed through the dressing-room he went to the door. The room was crowded everything had a rose-door and went on the little stairway, and ending again rustled the greenish of the sunlight filtering through the screen of goldening tree; the woods and rich and colour, glowerburnished-lookingautumA tiny, helpless fear that she was not so that she had not seen. She therefore opened her eyes and saw the corner of her mouth came to her bedside. \"These are the baths, and the sun was burned beside the pot of the great window, and the deep-sunk into the shining of the fingers of the white wall. \"My dear boy; Mr. Farry was still a little d; the kitchen, and a nook to the feet, and a grey heart. Ah! well, rolled the night of the slinking of the centre of the lawn at the time, he had been watching the Circassian maid. How could I have devoted to me.\" A Berberine servant appeared. \"Tea, Mahommed,\" he said. \"And I am sorry for you!\" As if repeating him learned to speak to the sly to whom he had been so long since he had completely returned to his village, and to say that he was standing by the window. It was the oddest sound, and the day of the act; and I had not been actor: I was ill for the young men. \"Mr. Boyne had never yet been so unfortunate that he could not have been asleep? He had not been disturbed, and he pressed himself against the wall and the tortured countenance of his thin, yellow fingers. Then she was a sudden straw: \"I shall have a shirt of you; though--\" Mr. Pogram interrupted in, he was rather high, and answered: \"Twenty years is to cut your lady, I am not very soon after I do you think? It's not OLD knowledge, I have power.\" \"I don't know anything about the price of land about there?\" The two years ago.\" \"Will you write to me downstairs. I can't. I shan't be cold. Look at it! You can see you lunch.\" \"All right,\" Bolen said. \"We met no longer in the way of a portent, and surely the war everybody had not been considered mad as Gordy's had any more, Ismail admired greatly. The girl had just striking the most of a little boy, lying on the flat of his sword and violently bade her be silent. She crouched down, hiding her eyes with her ears. A little cold rain began to fall; the first time, and died in the refreshment of the first posture of the blue sky, and the disagree there,\" said he, too, in the dark goldy soul was still and the knob of the general aversions; I was not to see how much I am going to take your relations I shan't be a good bit of you. I don't know--I don't know what I can expect that!\" But if he opposed her, he didn't know what he would do; for that--having settled on the weight of his diplomacy, and the ash-tree they stood, the village of the little timid shaved of her limbs were so sweet and breathless, and harmless, and harmless, and harmless. Almost every quarter of the century, unawake. “If you had been wont to be told you to know what you're talking about. You're a librarian, remember? And he wrote to school pilot. We must go down to John.\" \"I can't see how they're a pretty?\" \"Yes; but that's what I am going to sit down by a fellah and domestic cattle, and mouth, not even to the other, although it before he could not be suspected that he could not be suspected that she could not be clear, even to herself. Love! What would she do like? Her lips were not to put her hand to her bare shoulder. In the next her eyes looked at her oysters in ened. This was a man in town, he was not remarkable for a man, who, preserving their first-precipice, and the sound of the Lowing-machine, and the humming of many bees. It was beautiful here, and the scent of dried clothes and the lime trees of flower, he had been sitting there with a scent of meadow-less, and harm! He passed on. Though he had been bornness. The so, seemed to be buried in the humble robe of duty. A little piece of furniture was visible in the world. His hands were in his pockets and his head sunk on a breast, his legs outstretched face and a baby's face. \"I think he had grown immense, that I shall be tasting the English are generally and I shall be very glad to see that you will never be read, I thought you mightn't take it but I don't think I can go on everything. I'm sure I do, and I do not?\" The two women were silent, looking at the upper classes. He took it was not a bit of property, and I dare to ask you to forgive me to tell you, you'll have your own impression I swear it.\" \"My dear fellow, my master will come to an end. I don't you think.\" \"Then why did you look at him through the dark. He had abandoned the \"Irene! Irene's greatest power in the complete--a youngsters! Time was a man of imagination. The Orient had the girl had done most hundreds of way. If she had never yet exchanged a word. Her face was this: My mother came to the east of England, where, and reason for the lows. We must be taken to the suburban girl, when I was admiring the servants of the tall trees and lengthening shadows, and the blue, with the white clouds, and the soft breeze that the stream of the stillness and the little dark study, and the little timid sun came down, and realised that he was standing in his house, and he had removed his grief and his face melted into a white wall. He ran upstairs to his pale and the bedroom, and sat down on the edge of the shining-hats, and the highest degree of the new creature who had always been in the house, of this house; and what was there in the boat. \"It would have been company. It was a country friend of the best. As if he had not heard the contents of his fortnight as he had passed through the dressing-room he went to the door. The room was crowded everything had a rose-door and went on the little stairway, and ending again rustled the greenish of the sunlight filtering through the screen of goldening tree; the woods and rich and colour, glowerburnished-lookingautumA tiny, helpless fear that she was not so that she had not seen. She therefore opened her eyes and saw the corner of her mouth came to her bedside. \"These are the baths, and the sun was burned beside the pot of the great window, and the deep-sunk into the shining of the fingers of the white wall. \"My dear boy; Mr. Farry was still a little d; the kitchen, and a nook to the feet, and a grey heart. Ah! well, rolled the night of the slinking of the centre of the lawn at the time, he had been watching the Circassian maid. How could I have devoted to me.\" A Berberine servant appeared. \"Tea, Mahommed,\" he said. \"And I am sorry for you!\" As if repeating him learned to speak to the sly to whom he had been so long since he had completely returned to his village, and to say that he was standing by the window. It was the oddest sound, and the day of the act; and I had not been actor: I was ill for the young men. \"Mr. Boyne had never yet been so unfortunate that he could not have been asleep? He had not been disturbed, and he pressed himself against the wall and the tortured countenance of his thin, yellow fingers. Then she was a sudden straw: \"I shall have a shirt of you; though--\" Mr. Pogram interrupted in, he was rather high, and answered: \"Twenty years is to cut your lady, I am not very soon after I do you think? It's not OLD knowledge, I have power.\" \"I don't know anything about the price of land about there?\" The two years ago.\" \"Will you write to me downstairs. I can't. I shan't be cold. Look at it! You can see you lunch.\" \"All right,\" Bolen said. \"We met no longer in the way of a portent, and surely the war everybody had not been considered mad as Gordy's had any more, Ismail admired greatly. The girl had just striking the most of a little boy, lying on the flat of his sword and violently bade her be silent. She crouched down, hiding her eyes with her ears. A little cold rain began to fall; the first time, and died in the refreshment of the first posture of the blue sky, and the disagree there,\" said he, too, in the dark goldy soul was still and the knob of the general aversions; I was not to see how much I am going to take your relations I shan't be a good bit of you. I don't know--I don't know what I can expect that!\" But if he opposed her, he didn't know what he would do; for that--having settled on the weight of his diplomacy, and the ash-tree they stood, the village of the little timid shaved of her limbs were so sweet and breathless, and harmless, and harmless, and harmless. Almost every quarter of the century, unawake. “If you had been wont to be told you to know what you're talking about. You're a librarian, remember? And he wrote to school pilot. We must go down to John.\" \"I can't see how they're a pretty?\" \"Yes; but that's what I am going to sit down by a fellah and domestic cattle, and mouth, not even to the other, although it before he could not be suspected that he could not be suspected that she could not be clear, even to herself. Love! What would she do like? Her lips were not to put her hand to her bare shoulder. In the next her eyes looked at her oysters in ened. This was a man in town, he was not remarkable for a man, who, preserving their first-precipice, and the sound of the Lowing-machine, and the humming \n" 370 | ] 371 | } 372 | ], 373 | "source": [ 374 | "!python MinimalGPT.py -l 0 -s 10 -dm 512 -p 8 -ds 6 -lt './model/tokenizer_fine_tuned_on_2.mgt' -lw './model/weights_fine_tuned_on_2.mgw' -i -ol 5000" 375 | ] 376 | } 377 | ], 378 | "metadata": { 379 | "accelerator": "GPU", 380 | "colab": { 381 | "provenance": [] 382 | }, 383 | "gpuClass": "standard", 384 | "kernelspec": { 385 | "display_name": "Python 3 (ipykernel)", 386 | "language": "python", 387 | "name": "python3" 388 | }, 389 | "language_info": { 390 | "codemirror_mode": { 391 | "name": "ipython", 392 | "version": 3 393 | }, 394 | "file_extension": ".py", 395 | "mimetype": "text/x-python", 396 | "name": "python", 397 | "nbconvert_exporter": "python", 398 | "pygments_lexer": "ipython3", 399 | "version": "3.9.5" 400 | } 401 | }, 402 | "nbformat": 4, 403 | "nbformat_minor": 1 404 | } 405 | --------------------------------------------------------------------------------