├── .gitignore ├── 001_test_list.sh ├── 002_test_completion.sh ├── 003_completions.sh ├── LICENSE ├── README.md ├── download_model.py ├── openai_server ├── __init__.py ├── __main__.py └── gpt │ ├── encoder.py │ ├── generate_unconditional_samples.py │ ├── interactive_conditional_samples.py │ ├── model.py │ └── sample.py ├── prod.sh ├── requirements.txt └── start.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | *.pyc 4 | /models 5 | -------------------------------------------------------------------------------- /001_test_list.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OPENAI_API_KEY="${OPENAI_API_KEY:-stub}" 3 | set -ex 4 | export PORT="${PORT:-9000}" 5 | export OPENAI_API_BASE="${OPENAI_API_BASE:-http://localhost:${PORT}}" 6 | #export OPENAI_API_BASE="${OPENAI_API_BASE:-https://api.openai.com}" 7 | #export OPENAI_LOG="${OPENAI_LOG:-debug}" 8 | 9 | set +ex 10 | #export OPENAI_LOG="${OPENAI_LOG:-debug}" 11 | set -x 12 | exec openai api engines.list "$@" 13 | -------------------------------------------------------------------------------- /002_test_completion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OPENAI_API_KEY="${OPENAI_API_KEY:-stub}" 3 | set -ex 4 | export PORT="${PORT:-9000}" 5 | export OPENAI_API_BASE="${OPENAI_API_BASE:-http://localhost:${PORT}}" 6 | #export OPENAI_API_BASE="${OPENAI_API_BASE:-https://api.openai.com}" 7 | #export OPENAI_LOG="${OPENAI_LOG:-debug}" 8 | 9 | prompt="${1:-Hello, my name is}" 10 | 11 | if [ -e "$prompt" ] 12 | then 13 | prompt="$(cat "$prompt")" 14 | fi 15 | 16 | ENGINE="${ENGINE:-${E:-davinci}}" 17 | TEMPERATURE="${TEMPERATURE:-${T:-0.9}}" 18 | MAX_TOKENS="${MAX_TOKENS:-${M:-12}}" 19 | N="${N:-1}" 20 | set +ex 21 | shift 1 22 | 23 | set -x 24 | exec openai api completions.create -e "${ENGINE}" -t "${TEMPERATURE}" -M "${MAX_TOKENS}" -n "${N}" -p "${prompt}" "$@" 25 | -------------------------------------------------------------------------------- /003_completions.sh: -------------------------------------------------------------------------------- 1 | export OPENAI_API_KEY="${OPENAI_API_KEY:-stub}" 2 | set -x 3 | export PORT="${PORT:-9000}" 4 | export OPENAI_API_BASE="${OPENAI_API_BASE:-http://localhost:${PORT}}" 5 | #export OPENAI_API_BASE="${OPENAI_API_BASE:-https://api.openai.com}" 6 | #export OPENAI_LOG="${OPENAI_LOG:-debug}" 7 | set +x 8 | 9 | prompt="${1:-Hello, my name is}" 10 | while true 11 | do 12 | prompt="$(openai api completions.create -e davinci -t 0.6 -M 32 -n 1 -p "$prompt")" 13 | printf "\033c" 14 | #echo '----------' 15 | echo "$prompt" 16 | done 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | =============================================================================== 2 | openai-server -- An implementation of the OpenAI API 3 | 4 | Copyright (C) 2020 Shawn Presser. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | 24 | [ MIT license: http://www.opensource.org/licenses/mit-license.php ] 25 | 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # openai-server 2 | 3 | `openai-server` is an implementation of the [OpenAI API](https://openai.com/blog/openai-api/). 4 | 5 | Specifically, we implement `/v1/engines/list` and `/v1/engines/{model_name}/completions` endpoints. 6 | 7 | Both endpoints are mostly feature-complete, with a few differences. The JSON response is identical; any library that works with the OpenAI API will probably work with this. 8 | 9 | To get started, see the [quickstart](#Quickstart) or the [examples](#Examples) or the [JavaScript API](https://github.com/shawwn/tensorfork-openai-api). 10 | 11 | ## Contact 12 | 13 | - Twitter: [@theshawwn](https://twitter.com/theshawwn) 14 | - HN: [sillysaurusx](https://news.ycombinator.com/item?id=23346972) 15 | - ML discord: [https://discordapp.com/invite/x52Xz3y](https://discordapp.com/invite/x52Xz3y) 16 | - Support me on patreon: [patreon.com/shawwn](https://patreon.com/shawwn) 17 | 18 | ## Quickstart 19 | 20 | ```sh 21 | # grab the code. 22 | git clone https://github.com/shawwn/openai-server 23 | cd openai-server 24 | 25 | # install dependencies. 26 | pip3 install -r requirements.txt 27 | 28 | # grab a gpt-2 model. 29 | python3 download_model.py 117M # or 345M, 774M, 1558M 30 | 31 | # start the server. 32 | MODELS=117M bash prod.sh 33 | 34 | # in a new terminal, ask for a completion. 35 | bash 002_test_completion.sh 'Hello there. My name is' 36 | ``` 37 | 38 | Your server is now serving the OpenAI API at localhost:9000. (You can change the port via `export PORT=8000`) 39 | 40 | ## Examples 41 | 42 | ### Generating completions via the `openai` SDK 43 | 44 | You can grab some completions using the official `openai` command-line tool: 45 | ```sh 46 | $ OPENAI_API_BASE=http://localhost:9000 openai api completions.create -e davinci -p 'Hello, world' -t 0.8 -M 16 -n 4 47 | ===== Completion 0 ===== 48 | Hello, world. It seems like a good idea to make a living. The fact that it 49 | ===== Completion 1 ===== 50 | Hello, world. This is not the first time you're seeing the same thing at any given 51 | ===== Completion 2 ===== 52 | Hello, world, please do my best to continue the development of Monad and its conforming 53 | ===== Completion 3 ===== 54 | Hello, world controlled enemy. 55 | 56 | "Be careful. We have come across a near total 57 | ``` 58 | 59 | 60 | ### Continuously dump completions to terminal 61 | ```sh 62 | $ bash 003_completions.sh 'Yo dawg, we implemented OpenAI API' 63 | Yo dawg, we implemented OpenAI API. Now, we have the ability to connect to Signal, a cryptographic data store. 64 | 65 | We can now make this secure by using new kid on the block chain, OpenAI. 66 | 67 | OpenAI is the new block chain protocol for the internet. This is a major milestone. As the internet becomes more open and open for everybody, it is important for us to have a robust, high-quality blockchain. It is also important that we never create an untraceable chain. The blockchain is the only way to guarantee that everyone has the same access to the network. 68 | 69 | We are an open consortium and we believe that the blockchain is the bridge between the internet and the rest of the world. We're committed to this project. We believe that the blockchain is a bridge between the internet and 70 | ^C 71 | ``` 72 | 73 | ### Fetch the JSON endpoint manually 74 | ```sh 75 | $ curl 'http://localhost:9000/v1/engines/117M/completions?prompt=Hello,%20my%20name%20is&max_tokens=32&n=4&temperature=0.9&echo=true' 76 | { 77 | "choices": [ 78 | { 79 | "finish-reason": "length", 80 | "index": 0, 81 | "logprobs": null, 82 | "text": "Hello, my name is Loium Chazz, and I have been far from satisfied with your departure. But I will, at least by some chance, give you permission to decide for" 83 | }, 84 | { 85 | "finish-reason": "length", 86 | "index": 1, 87 | "logprobs": null, 88 | "text": "Hello, my name is Tim and my name is Jodie. Yours, Tom.\n\nTim: Oh hello, my name is Tim.\n\nJB: Where?'" 89 | }, 90 | { 91 | "finish-reason": "length", 92 | "index": 2, 93 | "logprobs": null, 94 | "text": "Hello, my name is Rosen Sylvan. That's right, Buck Paoli, who was a member of the Board of Governors for George W. Bush in the 2009 Democratic primary\u2014" 95 | }, 96 | { 97 | "finish-reason": "length", 98 | "index": 3, 99 | "logprobs": null, 100 | "text": "Hello, my name is Nick Martens, I am an English-speaking Canadian, University of Toronto, Mississauga, Canada. I work in a computer software company located in Canada." 101 | } 102 | ], 103 | "created": 1601701785.777768, 104 | "id": "cmpl-3qN8kwW1Ya7_qxWz4h8wuIzN", 105 | "model": "117M", 106 | "object": "text_completion" 107 | } 108 | ``` 109 | 110 | ### Explore via your browser 111 | 112 | You can [open the JSON endpoint in your browser](http://localhost:9000/v1/engines/117M/completions?prompt=Hello,%20my%20name%20is&max_tokens=32&n=4&temperature=0.9&echo=true) and start playing around with the query params. 113 | 114 | ### A simple bash script for dumping completions 115 | 116 | ```sh 117 | $ T=0.8 M=32 bash 002_test_completion.sh 'Hello, my name is' 118 | Hello, my name is Plato and, like many of you, I am very happy with the pre-release. 119 | 120 | The primary goal of the pre-release was to provide 121 | ``` 122 | 123 | The first argument to `002_test_completion.sh` is the prompt: 124 | ```sh 125 | bash 002_test_completion.sh 'Hello there. My name is' 126 | ``` 127 | 128 | You can set the temperature using `T=0.8` and the token count using `M=32`: 129 | ```sh 130 | T=0.8 M=32 bash 002_test_completion.sh 'Hello there. My name is' 131 | ``` 132 | 133 | To read a prompt from a file, simply pass in the filename. If the first argument is a valid filename, the file becomes the prompt: 134 | ```sh 135 | T=0.8 M=32 bash 002_test_completion.sh README.md 136 | ``` 137 | 138 | If the prompt is too long, the last `1023 - M` tokens of the prompt are used. **Note**: This means if you request 500 tokens, it will only use `1023 minus 500` tokens from the prompt. Therefore, to let GPT see as many tokens as possible, request a small number of tokens (e.g. 16). 139 | 140 | ### Setting up everything from scratch 141 | 142 | A complete example of how to go from zero code to a fully functional OpenAI API server: 143 | 144 | ```sh 145 | # grab the code. 146 | git clone https://github.com/shawwn/openai-server 147 | cd openai-server 148 | 149 | # install dependencies. 150 | pip3 install -r requirements.txt 151 | 152 | # grab all models (requires ~8GB of disk space; if low, just download 117M, which only requires 550MB) 153 | python3 download_model.py 117M 154 | python3 download_model.py 345M 155 | python3 download_model.py 774M 156 | python3 download_model.py 1558M 157 | 158 | # then, do *one* of the following: 159 | 160 | # ...serve one specific model: 161 | MODELS=117M bash prod.sh 162 | 163 | # ...or serve multiple models: 164 | MODELS=1558M,117M bash prod.sh 165 | 166 | # ...or serve all models you've downloaded (the default): 167 | bash prod.sh 168 | ``` 169 | 170 | The server listens on port 9000 by default. You can change it via PORT: 171 | ```sh 172 | PORT=8080 bash prod.sh 173 | ``` 174 | 175 | Now that the server is running, you can start making API requests. See [examples](#Examples). 176 | 177 | ## Notes 178 | 179 | ### A warning about frequency_penalty 180 | 181 | for 1558M, the best results seem to come from `temperature=0.6` and `frequency_penalty=0.9`: 182 | ```sh 183 | curl 'http://localhost:9000/v1/engines/1558M/completions?prompt=Hello,%20my%20name%20is&max_tokens=32&n=4&temperature=0.4&frequency_penalty=0.9&echo=true' 184 | ``` 185 | 186 | But beware: you shouldn't use `frequency_penalty` unless your model is the largest (1558M, commonly known as "1.5B"). For some reason, `frequency_penalty` causes the output to be scrambled when the model is smaller than 1558M. 187 | 188 | ### Running in production 189 | 190 | For production usage, consider running it via the following command: 191 | 192 | ```sh 193 | while true; do MODELS=117M bash prod.sh ; sleep 20 ; done 194 | ``` 195 | 196 | That way, if the server terminates for any reason, it will automatically restart. 197 | 198 | For endpoint monitoring, I recommend [updown.io](https://updown.io/). 199 | 200 | ## Community 201 | 202 | ### Join the ML Discord 203 | 204 | If you're an ML enthusiast, join the [ML Discord](https://discordapp.com/invite/x52Xz3y). 205 | There are ~800 members, with ~120 online at any given time: 206 | 207 | ![image](https://user-images.githubusercontent.com/59632/84269906-bc7d2080-aade-11ea-8b4e-f78412855d43.png) 208 | 209 | There are a variety of interesting channels: 210 | 211 | - `#papers` for pointing out interesting research papers 212 | - `#research` for discussing ML research 213 | - `#show` and `#samples` for showing off your work 214 | - `#hardware` for hardware enthusiasts 215 | - `#ideas` for brainstorming 216 | - `#tensorflow` and `#pytorch` 217 | - `#cats`, `#doggos`, and of course `#memes` 218 | - Quite a few more. 219 | 220 | ## Support me 221 | 222 | *If you found this library helpful, consider [joining my patreon](https://patreon.com/shawwn).* 223 | 224 | -------------------------------------------------------------------------------- /download_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import requests 4 | from tqdm import tqdm 5 | 6 | if len(sys.argv) != 2: 7 | print('You must enter the model name as a parameter, e.g.: download_model.py 124M') 8 | sys.exit(1) 9 | 10 | model = sys.argv[1] 11 | 12 | subdir = os.path.join('models', model) 13 | if not os.path.exists(subdir): 14 | os.makedirs(subdir) 15 | subdir = subdir.replace('\\','/') # needed for Windows 16 | 17 | name = 'model.ckpt' 18 | 19 | for filename in ['checkpoint','hparams.json','encoder.json','vocab.bpe','model.ckpt.index', 'model.ckpt.meta', 'model.ckpt.data-00000-of-00001']: 20 | 21 | filename = filename.replace('model.ckpt', name) 22 | 23 | bucket = os.environ.get('BUCKET', 'gpt-2') 24 | path = os.environ.get('MODEL_DIR', 'gs://{bucket}/{subdir}'.format(bucket=bucket, subdir=subdir)).lstrip('gs:').strip('/') 25 | url = "https://openaipublic.blob.core.windows.net/" + path + "/" + filename 26 | r = requests.get(url, stream=True) 27 | if not r.ok and filename == 'checkpoint': 28 | raise FileNotFoundError(url) 29 | 30 | if not r.ok: 31 | continue 32 | 33 | with open(os.path.join(subdir, filename), 'wb') as f: 34 | file_size = int(r.headers["content-length"]) 35 | chunk_size = 1000 36 | with tqdm(ncols=100, desc="Fetching " + filename, total=file_size, unit_scale=True) as pbar: 37 | # 1k for chunk_size, since Ethernet packet size is around 1500 bytes 38 | for chunk in r.iter_content(chunk_size=chunk_size): 39 | f.write(chunk) 40 | pbar.update(chunk_size) 41 | if filename == 'checkpoint': 42 | with open(os.path.join(subdir, filename)) as f: 43 | for line in f: 44 | if line.startswith('model_checkpoint_path'): 45 | name = line.split(':', 1)[1].strip().strip('"') 46 | 47 | 48 | -------------------------------------------------------------------------------- /openai_server/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import tqdm 4 | import traceback 5 | import time 6 | import base64 7 | import secrets 8 | sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'gpt')) 9 | from json import loads, dumps 10 | from json import load as json_load 11 | 12 | from sanic import Sanic 13 | from sanic.response import json, text 14 | from sanic_cors import CORS, cross_origin 15 | 16 | from pprint import pprint as pp 17 | 18 | from openai_server.gpt import sample, model, encoder 19 | 20 | import tensorflow as tf 21 | import ftfy 22 | 23 | from tokenizers import Tokenizer 24 | from transformers import GPT2TokenizerFast 25 | 26 | 27 | class GPTEngine: 28 | def __init__(self, api, model_name, batch_size=1): 29 | self.api = api 30 | self.id = model_name 31 | self.ckpt = tf.train.latest_checkpoint(os.path.join(api.model_path, model_name)) 32 | if self.ckpt is None: 33 | raise ValueError("Couldn't load checkpoint for {model_name} from {path}".format(model_name=model_name, path=os.path.join(api.model_path, model_name))) 34 | self.graph = tf.Graph() 35 | self.config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) 36 | self.session = tf.Session(graph=self.graph, config=self.config) 37 | #self.encoder = encoder.get_encoder(model_name, self.api.model_path) 38 | self.encoder = GPT2TokenizerFast.from_pretrained("gpt2") 39 | self.hparams = model.default_hparams() 40 | with open(os.path.join(self.api.model_path, model_name, 'hparams.json')) as f: 41 | params = json_load(f) 42 | self.hparams.override_from_dict(params) 43 | with self.session.as_default() as sess, self.graph.as_default() as graph: 44 | pp(self.session.list_devices()) 45 | if 'CUDA_VISIBLE_DEVICES' in os.environ: 46 | print('Using /gpu:0 on device {}'.format(os.environ['CUDA_VISIBLE_DEVICES'])) 47 | with tf.device('/gpu:0' if 'CUDA_VISIBLE_DEVICES' in os.environ else None): 48 | self.batch_size = batch_size 49 | self.context = tf.placeholder(tf.int32, [self.batch_size, None], name="context") 50 | self.length = tf.placeholder(tf.int32, (), name="length") 51 | self.temperature = tf.placeholder(tf.float32, (), name="temperature") 52 | self.top_k = tf.placeholder(tf.int32, (), name="top_k") 53 | self.top_p = tf.placeholder(tf.float32, (), name="top_p") 54 | self.frequency_penalty = tf.placeholder(tf.float32, (), name="frequency_penalty") 55 | #np.random.seed(seed) 56 | #tf.set_random_seed(seed) 57 | self.output = sample.sample_sequence( 58 | hparams=self.hparams, 59 | length=self.length, 60 | context=self.context, 61 | batch_size=self.batch_size, 62 | temperature=self.temperature, 63 | top_k=self.top_k, 64 | top_p=self.top_p, 65 | frequency_penalty=self.frequency_penalty, 66 | ) 67 | var_list = tf.trainable_variables() 68 | self.saver = tf.train.Saver(var_list=var_list) 69 | for v in var_list: 70 | print(self.ckpt, v) 71 | pp(self.hparams) 72 | print('Restoring from {!r}'.format(self.ckpt)) 73 | self.saver.restore(sess, self.ckpt) 74 | 75 | 76 | def fix(self, text): 77 | fixed = ftfy.fix_text(text) 78 | return fixed 79 | 80 | 81 | # GPT2Tokenizer and Tokenizer has different ways of fetching token ids 82 | def encode(self, text, encoder=None): 83 | if encoder is None: 84 | encoder = self.encoder 85 | result = encoder.encode(text) 86 | if isinstance(result, list): 87 | return result 88 | return result.ids 89 | 90 | 91 | def completion(self, prompt, n=None, max_tokens=None, logprobs=None, stream=False, temperature=None, top_p=None, top_k=None, echo=None, frequency_penalty=None, best_of=None, stop=None, **kws): 92 | if temperature is None: 93 | temperature = 0.9 94 | if top_p is None: 95 | top_p = 1.0 96 | if top_k is None: 97 | top_k = 0 98 | if max_tokens is None: 99 | max_tokens = 16 100 | if max_tokens > int(os.environ.get('MAX_TOKENS', '500')): 101 | max_tokens = int(os.environ.get('MAX_TOKENS', '500')) 102 | if n is None: 103 | n = 1 104 | if n >= int(os.environ.get('MAX_N', '4')): 105 | n = int(os.environ.get('MAX_N', '4')) # cap to 4 choices 106 | if echo is None: 107 | echo = False 108 | if frequency_penalty is None or frequency_penalty <= 0.0: 109 | frequency_penalty = 1.0 110 | if stop is not None: 111 | if isinstance(stop, str): 112 | stop = [stop] 113 | print('Stop: {!r}'.format(stop)) 114 | if len(kws) > 0: 115 | print('Got extra keywords: {!r}'.format(kws)) 116 | prompt = self.fix(prompt) 117 | with self.session.as_default() as sess, self.graph.as_default() as graph: 118 | tokens = self.encode(prompt) 119 | while len(tokens) + max_tokens >= self.hparams.n_ctx: 120 | tokens = tokens[1:] 121 | length = max_tokens 122 | for i in range(n): 123 | params = { 124 | self.temperature: temperature, 125 | self.top_p: top_p, 126 | self.top_k: top_k, 127 | self.frequency_penalty: frequency_penalty, 128 | self.length: length, 129 | } 130 | print(params) 131 | result = self.session.run(self.output, {self.context: [tokens], **params}) 132 | result_tokens = result[0] 133 | completion = result_tokens[len(tokens):] 134 | completion_text = self.encoder.decode(completion) 135 | finish_reason = 'length' 136 | if stop is not None: 137 | for s in stop: 138 | if s in completion_text: 139 | completion_text = completion_text.split(s, 1)[0] 140 | finish_reason = 'stop' 141 | if echo: 142 | text = prompt + completion_text 143 | else: 144 | text = completion_text 145 | print(repr(text)) 146 | yield {'index': i, 'logprobs': None, 'text': text, 'finish-reason': finish_reason} 147 | 148 | class API: 149 | def __init__(self, model_path=None): 150 | if model_path is None: 151 | model_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models') 152 | self.model_path = model_path 153 | self.models = [] 154 | self.engines = {} 155 | if 'MODELS' in os.environ: 156 | models = os.environ['MODELS'].split(',') 157 | else: 158 | models = os.listdir(self.model_path) 159 | for model in tqdm.tqdm(models): 160 | try: 161 | engine = GPTEngine(api=self, model_name=model) 162 | self.engines[model] = engine 163 | self.models.append(model) 164 | except: 165 | traceback.print_exc() 166 | pp(self.engines) 167 | pp(self.models) 168 | 169 | 170 | def engines_list(self): 171 | for model in self.models: 172 | yield { 173 | "id": model, 174 | "object": "engine", 175 | "owner": "openai", 176 | "ready": True, 177 | } 178 | 179 | api = API() 180 | 181 | app = Sanic() 182 | CORS(app) 183 | 184 | 185 | def log_request(request): 186 | #import pdb; pdb.set_trace() 187 | headers = dict(list(request.headers.items())) 188 | if 'authorization' in headers: 189 | del headers['authorization'] 190 | headers['x-openai-client-user-agent'] = loads(headers.get('x-openai-client-user-agent', '{}')) 191 | props = {} 192 | props['url'] = request.url 193 | props['method'] = request.method 194 | props['headers'] = headers 195 | props['request'] = request.json 196 | pp(props) 197 | #print(request.json) 198 | 199 | 200 | @app.route('/v1/engines') 201 | async def v1_engines_list(request): 202 | log_request(request) 203 | res = {"object": "list", "data": []} 204 | for result in api.engines_list(): 205 | res["data"].append(result) 206 | return json(res) 207 | return json({ 208 | "data": [ 209 | { 210 | "id": "ada", 211 | "object": "engine", 212 | "owner": "openai", 213 | "ready": True 214 | }, 215 | { 216 | "id": "ada-beta", 217 | "object": "engine", 218 | "owner": "openai", 219 | "ready": True 220 | }, 221 | { 222 | "id": "babbage", 223 | "object": "engine", 224 | "owner": "openai", 225 | "ready": True 226 | }, 227 | { 228 | "id": "babbage-beta", 229 | "object": "engine", 230 | "owner": "openai", 231 | "ready": True 232 | }, 233 | { 234 | "id": "curie", 235 | "object": "engine", 236 | "owner": "openai", 237 | "ready": True 238 | }, 239 | { 240 | "id": "curie-beta", 241 | "object": "engine", 242 | "owner": "openai", 243 | "ready": True 244 | }, 245 | { 246 | "id": "davinci", 247 | "object": "engine", 248 | "owner": "openai", 249 | "ready": True 250 | }, 251 | { 252 | "id": "davinci-beta", 253 | "object": "engine", 254 | "owner": "openai", 255 | "ready": True 256 | } 257 | ], 258 | "object": "list" 259 | }) 260 | 261 | 262 | def random_id(prefix, nbytes=18): 263 | token = secrets.token_bytes(nbytes) 264 | return prefix + '-' + base64.urlsafe_b64encode(token).decode('utf8') 265 | 266 | 267 | def number(x): 268 | if isinstance(x, str): 269 | try: 270 | x = int(x) 271 | except ValueError: 272 | try: 273 | x = float(x) 274 | except ValueError: 275 | pass 276 | return x 277 | 278 | 279 | def json_pretty_dumps(x): 280 | return dumps(x, sort_keys=True, indent=2) 281 | 282 | 283 | from urllib import parse 284 | 285 | @app.route('/v1/engines//completions', methods=['POST', 'GET']) 286 | async def v1_engines_completions(request, engine_name): 287 | log_request(request) 288 | kws = request.json 289 | if kws is None: 290 | url, query = request.url.split('?', 1) if '?' in request.url else (request.url, '') 291 | kws = dict(parse.parse_qsl(query)) 292 | kws = {k: number(v) for k, v in kws.items()} 293 | pp(kws) 294 | engine = None 295 | if engine_name in api.engines: 296 | engine = api.engines[engine_name] 297 | else: 298 | # rather than throw an error when someone attempts to use an 299 | # invalid engine, silently fall back to any valid engine for 300 | # simplicity. E.g. if they try to request 'davinci' but you're 301 | # serving 117M, then automatically fall back to 117M. 302 | for info in api.engines_list(): 303 | print('Warning: attempted to use invalid enngine {!r}; falling back to engine {!r}'.format(engine_name, info['id'])) 304 | engine = api.engines[info['id']] 305 | break 306 | if engine is None: 307 | raise RuntimeError("Not serving any models. Try running `python3 download_model.py 117M` and be sure to `export MODELS=117M` before starting the server.") 308 | 309 | choices = [] 310 | for choice in engine.completion(**kws): 311 | choices.append(choice) 312 | id_ = random_id("cmpl") 313 | return json({"id": id_, "object": "text_completion", "created": time.time(), "model": engine.id, "choices": choices}, dumps=json_pretty_dumps) 314 | #return json({"id": "cmpl-Wt5z1RZglyDHHl0SnSvKWVzA", "object": "text_completion", "created": 1599616871, "model": "davinci:2020-05-03", "choices": [{"text": "Test.SetLayerPropertiesWithNonContainedInvisible (", "index": 0, "logprobs": None, "finish_reason": "length"}]}) 315 | 316 | if __name__ == '__main__': 317 | args = sys.argv[1:] 318 | port = int(args[0] if len(args) > 0 else os.environ.get('PORT', '9000')) 319 | app.run(host='0.0.0.0', port=port) 320 | 321 | -------------------------------------------------------------------------------- /openai_server/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from . import app 4 | 5 | if __name__ == '__main__': 6 | args = sys.argv[1:] 7 | port = int(args[0] if len(args) > 0 else os.environ.get('PORT', '9000')) 8 | app.run(host='0.0.0.0', port=port) 9 | 10 | -------------------------------------------------------------------------------- /openai_server/gpt/encoder.py: -------------------------------------------------------------------------------- 1 | """Byte pair encoding utilities""" 2 | 3 | import os 4 | import json 5 | import regex as re 6 | from functools import lru_cache 7 | 8 | @lru_cache() 9 | def bytes_to_unicode(): 10 | """ 11 | Returns list of utf-8 byte and a corresponding list of unicode strings. 12 | The reversible bpe codes work on unicode strings. 13 | This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. 14 | When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. 15 | This is a signficant percentage of your normal, say, 32K bpe vocab. 16 | To avoid that, we want lookup tables between utf-8 bytes and unicode strings. 17 | And avoids mapping to whitespace/control characters the bpe code barfs on. 18 | """ 19 | bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) 20 | cs = bs[:] 21 | n = 0 22 | for b in range(2**8): 23 | if b not in bs: 24 | bs.append(b) 25 | cs.append(2**8+n) 26 | n += 1 27 | cs = [chr(n) for n in cs] 28 | return dict(zip(bs, cs)) 29 | 30 | def get_pairs(word): 31 | """Return set of symbol pairs in a word. 32 | 33 | Word is represented as tuple of symbols (symbols being variable-length strings). 34 | """ 35 | pairs = set() 36 | prev_char = word[0] 37 | for char in word[1:]: 38 | pairs.add((prev_char, char)) 39 | prev_char = char 40 | return pairs 41 | 42 | class Encoder: 43 | def __init__(self, encoder, bpe_merges, errors='replace'): 44 | self.encoder = encoder 45 | self.decoder = {v:k for k,v in self.encoder.items()} 46 | self.errors = errors # how to handle errors in decoding 47 | self.byte_encoder = bytes_to_unicode() 48 | self.byte_decoder = {v:k for k, v in self.byte_encoder.items()} 49 | self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges)))) 50 | self.cache = {} 51 | 52 | # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions 53 | self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") 54 | 55 | def bpe(self, token): 56 | if token in self.cache: 57 | return self.cache[token] 58 | word = tuple(token) 59 | pairs = get_pairs(word) 60 | 61 | if not pairs: 62 | return token 63 | 64 | while True: 65 | bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) 66 | if bigram not in self.bpe_ranks: 67 | break 68 | first, second = bigram 69 | new_word = [] 70 | i = 0 71 | while i < len(word): 72 | try: 73 | j = word.index(first, i) 74 | new_word.extend(word[i:j]) 75 | i = j 76 | except: 77 | new_word.extend(word[i:]) 78 | break 79 | 80 | if word[i] == first and i < len(word)-1 and word[i+1] == second: 81 | new_word.append(first+second) 82 | i += 2 83 | else: 84 | new_word.append(word[i]) 85 | i += 1 86 | new_word = tuple(new_word) 87 | word = new_word 88 | if len(word) == 1: 89 | break 90 | else: 91 | pairs = get_pairs(word) 92 | word = ' '.join(word) 93 | self.cache[token] = word 94 | return word 95 | 96 | def encode(self, text): 97 | bpe_tokens = [] 98 | for token in re.findall(self.pat, text): 99 | token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) 100 | bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) 101 | return bpe_tokens 102 | 103 | def decode(self, tokens): 104 | text = ''.join([self.decoder[token] for token in tokens]) 105 | text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=self.errors) 106 | return text 107 | 108 | def get_encoder(model_name, models_dir): 109 | with open(os.path.join(models_dir, model_name, 'encoder.json'), 'r') as f: 110 | encoder = json.load(f) 111 | with open(os.path.join(models_dir, model_name, 'vocab.bpe'), 'r', encoding="utf-8") as f: 112 | bpe_data = f.read() 113 | bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]] 114 | return Encoder( 115 | encoder=encoder, 116 | bpe_merges=bpe_merges, 117 | ) 118 | -------------------------------------------------------------------------------- /openai_server/gpt/generate_unconditional_samples.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import fire 4 | import json 5 | import os 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | import model, sample, encoder 10 | 11 | def sample_model( 12 | model_name='124M', 13 | seed=None, 14 | nsamples=0, 15 | batch_size=1, 16 | length=None, 17 | temperature=1, 18 | top_k=0, 19 | top_p=1, 20 | models_dir='models', 21 | ): 22 | """ 23 | Run the sample_model 24 | :model_name=124M : String, which model to use 25 | :seed=None : Integer seed for random number generators, fix seed to 26 | reproduce results 27 | :nsamples=0 : Number of samples to return, if 0, continues to 28 | generate samples indefinately. 29 | :batch_size=1 : Number of batches (only affects speed/memory). 30 | :length=None : Number of tokens in generated text, if None (default), is 31 | determined by model hyperparameters 32 | :temperature=1 : Float value controlling randomness in boltzmann 33 | distribution. Lower temperature results in less random completions. As the 34 | temperature approaches zero, the model will become deterministic and 35 | repetitive. Higher temperature results in more random completions. 36 | :top_k=0 : Integer value controlling diversity. 1 means only 1 word is 37 | considered for each step (token), resulting in deterministic completions, 38 | while 40 means 40 words are considered at each step. 0 (default) is a 39 | special setting meaning no restrictions. 40 generally is a good value. 40 | :models_dir : path to parent folder containing model subfolders 41 | (i.e. contains the folder) 42 | """ 43 | models_dir = os.path.expanduser(os.path.expandvars(models_dir)) 44 | enc = encoder.get_encoder(model_name, models_dir) 45 | hparams = model.default_hparams() 46 | with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: 47 | hparams.override_from_dict(json.load(f)) 48 | 49 | if length is None: 50 | length = hparams.n_ctx 51 | elif length > hparams.n_ctx: 52 | raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) 53 | 54 | with tf.Session(graph=tf.Graph()) as sess: 55 | np.random.seed(seed) 56 | tf.set_random_seed(seed) 57 | 58 | output = sample.sample_sequence( 59 | hparams=hparams, length=length, 60 | start_token=enc.encoder['<|endoftext|>'], 61 | batch_size=batch_size, 62 | temperature=temperature, top_k=top_k, top_p=top_p 63 | )[:, 1:] 64 | 65 | saver = tf.train.Saver() 66 | ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) 67 | saver.restore(sess, ckpt) 68 | 69 | generated = 0 70 | while nsamples == 0 or generated < nsamples: 71 | out = sess.run(output) 72 | for i in range(batch_size): 73 | generated += batch_size 74 | text = enc.decode(out[i]) 75 | print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) 76 | print(text) 77 | 78 | if __name__ == '__main__': 79 | fire.Fire(sample_model) 80 | 81 | -------------------------------------------------------------------------------- /openai_server/gpt/interactive_conditional_samples.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import fire 4 | import json 5 | import os 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | import model, sample, encoder 10 | 11 | def interact_model( 12 | model_name='124M', 13 | seed=None, 14 | nsamples=1, 15 | batch_size=1, 16 | length=None, 17 | temperature=1, 18 | top_k=0, 19 | top_p=1, 20 | models_dir='models', 21 | ): 22 | """ 23 | Interactively run the model 24 | :model_name=124M : String, which model to use 25 | :seed=None : Integer seed for random number generators, fix seed to reproduce 26 | results 27 | :nsamples=1 : Number of samples to return total 28 | :batch_size=1 : Number of batches (only affects speed/memory). Must divide nsamples. 29 | :length=None : Number of tokens in generated text, if None (default), is 30 | determined by model hyperparameters 31 | :temperature=1 : Float value controlling randomness in boltzmann 32 | distribution. Lower temperature results in less random completions. As the 33 | temperature approaches zero, the model will become deterministic and 34 | repetitive. Higher temperature results in more random completions. 35 | :top_k=0 : Integer value controlling diversity. 1 means only 1 word is 36 | considered for each step (token), resulting in deterministic completions, 37 | while 40 means 40 words are considered at each step. 0 (default) is a 38 | special setting meaning no restrictions. 40 generally is a good value. 39 | :models_dir : path to parent folder containing model subfolders 40 | (i.e. contains the folder) 41 | """ 42 | models_dir = os.path.expanduser(os.path.expandvars(models_dir)) 43 | if batch_size is None: 44 | batch_size = 1 45 | assert nsamples % batch_size == 0 46 | 47 | enc = encoder.get_encoder(model_name, models_dir) 48 | hparams = model.default_hparams() 49 | with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: 50 | hparams.override_from_dict(json.load(f)) 51 | 52 | if length is None: 53 | length = hparams.n_ctx // 2 54 | elif length > hparams.n_ctx: 55 | raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) 56 | 57 | with tf.Session(graph=tf.Graph()) as sess: 58 | context = tf.placeholder(tf.int32, [batch_size, None]) 59 | np.random.seed(seed) 60 | tf.set_random_seed(seed) 61 | output = sample.sample_sequence( 62 | hparams=hparams, length=length, 63 | context=context, 64 | batch_size=batch_size, 65 | temperature=temperature, top_k=top_k, top_p=top_p 66 | ) 67 | 68 | saver = tf.train.Saver() 69 | ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) 70 | saver.restore(sess, ckpt) 71 | 72 | while True: 73 | raw_text = input("Model prompt >>> ") 74 | while not raw_text: 75 | print('Prompt should not be empty!') 76 | raw_text = input("Model prompt >>> ") 77 | context_tokens = enc.encode(raw_text) 78 | generated = 0 79 | for _ in range(nsamples // batch_size): 80 | out = sess.run(output, feed_dict={ 81 | context: [context_tokens for _ in range(batch_size)] 82 | })[:, len(context_tokens):] 83 | for i in range(batch_size): 84 | generated += 1 85 | text = enc.decode(out[i]) 86 | print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) 87 | print(text) 88 | print("=" * 80) 89 | 90 | if __name__ == '__main__': 91 | fire.Fire(interact_model) 92 | 93 | -------------------------------------------------------------------------------- /openai_server/gpt/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tensorflow.contrib.training import HParams 4 | 5 | def default_hparams(): 6 | return HParams( 7 | n_vocab=0, 8 | n_ctx=1024, 9 | n_embd=768, 10 | n_head=12, 11 | n_layer=12, 12 | ) 13 | 14 | def shape_list(x): 15 | """Deal with dynamic shape in tensorflow cleanly.""" 16 | static = x.shape.as_list() 17 | dynamic = tf.shape(x) 18 | return [dynamic[i] if s is None else s for i, s in enumerate(static)] 19 | 20 | def softmax(x, axis=-1): 21 | x = x - tf.reduce_max(x, axis=axis, keepdims=True) 22 | ex = tf.exp(x) 23 | return ex / tf.reduce_sum(ex, axis=axis, keepdims=True) 24 | 25 | def gelu(x): 26 | return 0.5*x*(1+tf.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x, 3)))) 27 | 28 | def norm(x, scope, *, axis=-1, epsilon=1e-5): 29 | """Normalize to mean = 0, std = 1, then do a diagonal affine transform.""" 30 | with tf.variable_scope(scope): 31 | n_state = x.shape[-1].value 32 | g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1)) 33 | b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0)) 34 | u = tf.reduce_mean(x, axis=axis, keepdims=True) 35 | s = tf.reduce_mean(tf.square(x-u), axis=axis, keepdims=True) 36 | x = (x - u) * tf.rsqrt(s + epsilon) 37 | x = x*g + b 38 | return x 39 | 40 | def split_states(x, n): 41 | """Reshape the last dimension of x into [n, x.shape[-1]/n].""" 42 | *start, m = shape_list(x) 43 | return tf.reshape(x, start + [n, m//n]) 44 | 45 | def merge_states(x): 46 | """Smash the last two dimensions of x into a single dimension.""" 47 | *start, a, b = shape_list(x) 48 | return tf.reshape(x, start + [a*b]) 49 | 50 | def conv1d(x, scope, nf, *, w_init_stdev=0.02): 51 | with tf.variable_scope(scope): 52 | *start, nx = shape_list(x) 53 | w = tf.get_variable('w', [1, nx, nf], initializer=tf.random_normal_initializer(stddev=w_init_stdev)) 54 | b = tf.get_variable('b', [nf], initializer=tf.constant_initializer(0)) 55 | c = tf.reshape(tf.matmul(tf.reshape(x, [-1, nx]), tf.reshape(w, [-1, nf]))+b, start+[nf]) 56 | return c 57 | 58 | def attention_mask(nd, ns, *, dtype): 59 | """1's in the lower triangle, counting from the lower right corner. 60 | 61 | Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs. 62 | """ 63 | i = tf.range(nd)[:,None] 64 | j = tf.range(ns) 65 | m = i >= j - ns + nd 66 | return tf.cast(m, dtype) 67 | 68 | 69 | def attn(x, scope, n_state, *, past, hparams): 70 | assert x.shape.ndims == 3 # Should be [batch, sequence, features] 71 | assert n_state % hparams.n_head == 0 72 | if past is not None: 73 | assert past.shape.ndims == 5 # Should be [batch, 2, heads, sequence, features], where 2 is [k, v] 74 | 75 | def split_heads(x): 76 | # From [batch, sequence, features] to [batch, heads, sequence, features] 77 | return tf.transpose(split_states(x, hparams.n_head), [0, 2, 1, 3]) 78 | 79 | def merge_heads(x): 80 | # Reverse of split_heads 81 | return merge_states(tf.transpose(x, [0, 2, 1, 3])) 82 | 83 | def mask_attn_weights(w): 84 | # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from src to dst. 85 | _, _, nd, ns = shape_list(w) 86 | b = attention_mask(nd, ns, dtype=w.dtype) 87 | b = tf.reshape(b, [1, 1, nd, ns]) 88 | w = w*b - tf.cast(1e10, w.dtype)*(1-b) 89 | return w 90 | 91 | def multihead_attn(q, k, v): 92 | # q, k, v have shape [batch, heads, sequence, features] 93 | w = tf.matmul(q, k, transpose_b=True) 94 | w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype)) 95 | 96 | w = mask_attn_weights(w) 97 | w = softmax(w) 98 | a = tf.matmul(w, v) 99 | return a 100 | 101 | with tf.variable_scope(scope): 102 | c = conv1d(x, 'c_attn', n_state*3) 103 | q, k, v = map(split_heads, tf.split(c, 3, axis=2)) 104 | present = tf.stack([k, v], axis=1) 105 | if past is not None: 106 | pk, pv = tf.unstack(past, axis=1) 107 | k = tf.concat([pk, k], axis=-2) 108 | v = tf.concat([pv, v], axis=-2) 109 | a = multihead_attn(q, k, v) 110 | a = merge_heads(a) 111 | a = conv1d(a, 'c_proj', n_state) 112 | return a, present 113 | 114 | 115 | def mlp(x, scope, n_state, *, hparams): 116 | with tf.variable_scope(scope): 117 | nx = x.shape[-1].value 118 | h = gelu(conv1d(x, 'c_fc', n_state)) 119 | h2 = conv1d(h, 'c_proj', nx) 120 | return h2 121 | 122 | 123 | def block(x, scope, *, past, hparams): 124 | with tf.variable_scope(scope): 125 | nx = x.shape[-1].value 126 | a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams) 127 | x = x + a 128 | m = mlp(norm(x, 'ln_2'), 'mlp', nx*4, hparams=hparams) 129 | x = x + m 130 | return x, present 131 | 132 | 133 | # def block(x, scope, *, past, hparams): 134 | # with tf.variable_scope(scope): 135 | # nx = x.shape[-1].value 136 | # x_norm = norm(x, 'ln_1') 137 | # a, present = attn(x_norm, 'attn', nx, past=past, hparams=hparams) 138 | # x1 = tf.add(x, a) 139 | # x1_norm = norm(x1, 'ln_2') 140 | # m = mlp(x1_norm, 'mlp', nx*4, hparams=hparams) 141 | # x2 = tf.add(x1, m) 142 | # return x2, present 143 | # 144 | # 145 | # from functools import partial 146 | # 147 | # 148 | # def block(x, scope, *, past, hparams): 149 | # with tf.variable_scope(scope): 150 | # nx = x.shape[-1].value 151 | # o0 = tf.identity 152 | # o1 = partial(norm, scope='ln_1') # x_norm = norm(x, 'ln_1') 153 | # o2 = partial(attn, scope='attn', n_state=nx, past=past, hparams=hparams) # a, present = attn(x_norm, 'attn', nx, past=past, hparams=hparams) 154 | # o3 = partial(tf.add, x) # x1 = tf.add(x, a) 155 | # o4 = partial(norm, scope='ln_2') # x1_norm = norm(x1, 'ln_2') 156 | # o5 = partial(mlp, scope='mlp', n_state=nx*4, hparams=hparams) # m = mlp(x1_norm, 'mlp', nx*4, hparams=hparams) 157 | # o6 = partial(tf.add, o3) # x2 = tf.add(x1, m) 158 | # return o6, present # return x2, present 159 | 160 | 161 | def past_shape(*, hparams, batch_size=None, sequence=None): 162 | return [batch_size, hparams.n_layer, 2, hparams.n_head, sequence, hparams.n_embd // hparams.n_head] 163 | 164 | def expand_tile(value, size): 165 | """Add a new axis of given size.""" 166 | value = tf.convert_to_tensor(value, name='value') 167 | ndims = value.shape.ndims 168 | return tf.tile(tf.expand_dims(value, axis=0), [size] + [1]*ndims) 169 | 170 | def positions_for(tokens, past_length): 171 | batch_size = tf.shape(tokens)[0] 172 | nsteps = tf.shape(tokens)[1] 173 | return expand_tile(past_length + tf.range(nsteps), batch_size) 174 | 175 | 176 | def model(hparams, X, past=None, scope='model', reuse=False): 177 | with tf.variable_scope(scope, reuse=reuse): 178 | results = {} 179 | batch, sequence = shape_list(X) 180 | 181 | wpe = tf.get_variable('wpe', [hparams.n_ctx, hparams.n_embd], 182 | initializer=tf.random_normal_initializer(stddev=0.01)) 183 | wte = tf.get_variable('wte', [hparams.n_vocab, hparams.n_embd], 184 | initializer=tf.random_normal_initializer(stddev=0.02)) 185 | past_length = 0 if past is None else tf.shape(past)[-2] 186 | h = tf.gather(wte, X) + tf.gather(wpe, positions_for(X, past_length)) 187 | 188 | # Transformer 189 | presents = [] 190 | pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer 191 | assert len(pasts) == hparams.n_layer 192 | for layer, past in enumerate(pasts): 193 | h, present = block(h, 'h%d' % layer, past=past, hparams=hparams) 194 | presents.append(present) 195 | results['present'] = tf.stack(presents, axis=1) 196 | h = norm(h, 'ln_f') 197 | 198 | # Language model loss. Do tokens =1.15.0 8 | openai 9 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | exec python3 -m openai_server "$@" 4 | 5 | --------------------------------------------------------------------------------