├── README.md
├── LICENSE
├── nemo_2b_bf16_tp1_client.ipynb
└── nemo_2b_bf16_tp1_server.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | 🐣 Please follow me for new updates https://twitter.com/camenduru
2 | 🔥 Please join our discord server https://discord.gg/k5BwmmvJJU
3 | 🥳 Please join my patreon community https://patreon.com/camenduru
4 |
5 | ## 🚦 WIP 🚦
6 |
7 | ### 🦒 Colab
8 |
9 | | Colab | Info
10 | | --- | --- |
11 | [](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_server.ipynb) | nemo_2b_bf16_tp1_server (Colab Pro GPU A100 😭)
12 | [](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_client.ipynb) | nemo_2b_bf16_tp1_client (CPU)
13 |
14 | ### Tutorial
15 | https://www.youtube.com/watch?v=wER54k32qXE
16 |
17 | ### Main Repo
18 | https://github.com/NVIDIA/apex
19 | https://github.com/NVIDIA/NeMo
20 |
21 | ### Model
22 | https://huggingface.co/nvidia/GPT-2B-001
23 |
24 | ### Model Licence
25 | License to use this model is covered by the CC-BY-4.0. By downloading the public and release version of the model, you accept the terms and conditions of the CC-BY-4.0 license.
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/nemo_2b_bf16_tp1_client.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "view-in-github"
7 | },
8 | "source": [
9 | "[](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_client.ipynb)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {
16 | "id": "kEh5x3-c8Bs_"
17 | },
18 | "outputs": [],
19 | "source": [
20 | "import json\n",
21 | "import requests\n",
22 | "\n",
23 | "port_num = 80\n",
24 | "headers = {\"Content-Type\": \"application/json\"}\n",
25 | "\n",
26 | "def request_data(data):\n",
27 | " resp = requests.put('http://SERVER_URL.loca.lt:{}/generate'.format(port_num),\n",
28 | " data=json.dumps(data),\n",
29 | " headers=headers)\n",
30 | " sentences = resp.json()['sentences']\n",
31 | " return sentences\n",
32 | "\n",
33 | "data = {\n",
34 | " \"sentences\": [\"Tell me an interesting fact about space travel.\"]*1,\n",
35 | " \"tokens_to_generate\": 50,\n",
36 | " \"temperature\": 1.0,\n",
37 | " \"add_BOS\": True,\n",
38 | " \"top_k\": 0,\n",
39 | " \"top_p\": 0.9,\n",
40 | " \"greedy\": False,\n",
41 | " \"all_probs\": False,\n",
42 | " \"repetition_penalty\": 1.2,\n",
43 | " \"min_tokens_to_generate\": 2,\n",
44 | "}\n",
45 | "\n",
46 | "sentences = request_data(data)\n",
47 | "print(sentences)"
48 | ]
49 | }
50 | ],
51 | "metadata": {
52 | "colab": {
53 | "provenance": []
54 | },
55 | "kernelspec": {
56 | "display_name": "Python 3",
57 | "name": "python3"
58 | },
59 | "language_info": {
60 | "name": "python"
61 | }
62 | },
63 | "nbformat": 4,
64 | "nbformat_minor": 0
65 | }
66 |
--------------------------------------------------------------------------------
/nemo_2b_bf16_tp1_server.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | },
15 | "accelerator": "GPU",
16 | "gpuClass": "premium"
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "view-in-github"
23 | },
24 | "source": [
25 | "[](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_server.ipynb)"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "source": [
31 | "%cd /content\n",
32 | "!apt -y install -qq aria2\n",
33 | "!npm install -g localtunnel\n",
34 | "!git clone https://huggingface.co/camenduru/apex\n",
35 | "# !git clone https://github.com/NVIDIA/apex.git\n",
36 | "%cd /content/apex\n",
37 | "# !git checkout 03c9d80ed54c0eaa5b581bf42ceca3162f085327\n",
38 | "!pip install -v --disable-pip-version-check --no-cache-dir --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext\" --global-option=\"--fast_layer_norm\" --global-option=\"--distributed_adam\" --global-option=\"--deprecated_fused_adam\" ./\n",
39 | "!pip install nemo_toolkit['nlp']==1.17.0\n",
40 | "\n",
41 | "%cd /content\n",
42 | "!git clone https://github.com/NVIDIA/NeMo.git \n",
43 | "%cd /content/NeMo/examples/nlp/language_modeling\n",
44 | "!git checkout v1.17.0\n",
45 | "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/nvidia/GPT-2B-001/resolve/main/GPT-2B-001_bf16_tp1.nemo -d /content/NeMo/examples/nlp/language_modeling -o nemo_2b_bf16_tp1.nemo\n",
46 | "\n",
47 | "import subprocess\n",
48 | "import threading\n",
49 | "import time\n",
50 | "import socket\n",
51 | "def iframe_thread(port):\n",
52 | " while True:\n",
53 | " time.sleep(0.5)\n",
54 | " sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n",
55 | " result = sock.connect_ex(('127.0.0.1', port))\n",
56 | " if result == 0:\n",
57 | " break\n",
58 | " sock.close()\n",
59 | " p = subprocess.Popen([\"lt\", \"--port\", \"{}\".format(port)], stdout=subprocess.PIPE)\n",
60 | " for line in p.stdout:\n",
61 | " print(line.decode(), end='')\n",
62 | "threading.Thread(target=iframe_thread, daemon=True, args=(5555,)).start()\n",
63 | "\n",
64 | "!python megatron_gpt_eval.py trainer.precision=bf16 gpt_model_file=nemo_2b_bf16_tp1.nemo server=True tensor_model_parallel_size=1 trainer.devices=1"
65 | ],
66 | "metadata": {
67 | "id": "8fLY9_xXzN9a"
68 | },
69 | "execution_count": null,
70 | "outputs": []
71 | }
72 | ]
73 | }
--------------------------------------------------------------------------------