├── README.md
├── LICENSE
├── nemo_2b_bf16_tp1_client.ipynb
└── nemo_2b_bf16_tp1_server.ipynb


/README.md:
--------------------------------------------------------------------------------
 1 | 🐣 Please follow me for new updates https://twitter.com/camenduru <br />
 2 | 🔥 Please join our discord server https://discord.gg/k5BwmmvJJU <br />
 3 | 🥳 Please join my patreon community https://patreon.com/camenduru <br />
 4 | 
 5 | ## 🚦 WIP 🚦
 6 | 
 7 | ### 🦒 Colab
 8 | 
 9 | | Colab | Info
10 | | --- | --- |
11 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_server.ipynb) | nemo_2b_bf16_tp1_server (Colab Pro GPU A100 😭)
12 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_client.ipynb) | nemo_2b_bf16_tp1_client (CPU)
13 | 
14 | ### Tutorial
15 | https://www.youtube.com/watch?v=wER54k32qXE
16 | 
17 | ### Main Repo
18 | https://github.com/NVIDIA/apex <br />
19 | https://github.com/NVIDIA/NeMo <br />
20 | 
21 | ### Model
22 | https://huggingface.co/nvidia/GPT-2B-001 <br />
23 | 
24 | ### Model Licence
25 | License to use this model is covered by the CC-BY-4.0. By downloading the public and release version of the model, you accept the terms and conditions of the CC-BY-4.0 license.
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/nemo_2b_bf16_tp1_client.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "metadata": {
 6 |         "id": "view-in-github"
 7 |       },
 8 |       "source": [
 9 |         "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_client.ipynb)"
10 |       ]
11 |     },
12 |     {
13 |       "cell_type": "code",
14 |       "execution_count": null,
15 |       "metadata": {
16 |         "id": "kEh5x3-c8Bs_"
17 |       },
18 |       "outputs": [],
19 |       "source": [
20 |         "import json\n",
21 |         "import requests\n",
22 |         "\n",
23 |         "port_num = 80\n",
24 |         "headers = {\"Content-Type\": \"application/json\"}\n",
25 |         "\n",
26 |         "def request_data(data):\n",
27 |         "    resp = requests.put('http://SERVER_URL.loca.lt:{}/generate'.format(port_num),\n",
28 |         "                        data=json.dumps(data),\n",
29 |         "                        headers=headers)\n",
30 |         "    sentences = resp.json()['sentences']\n",
31 |         "    return sentences\n",
32 |         "\n",
33 |         "data = {\n",
34 |         "    \"sentences\": [\"Tell me an interesting fact about space travel.\"]*1,\n",
35 |         "    \"tokens_to_generate\": 50,\n",
36 |         "    \"temperature\": 1.0,\n",
37 |         "    \"add_BOS\": True,\n",
38 |         "    \"top_k\": 0,\n",
39 |         "    \"top_p\": 0.9,\n",
40 |         "    \"greedy\": False,\n",
41 |         "    \"all_probs\": False,\n",
42 |         "    \"repetition_penalty\": 1.2,\n",
43 |         "    \"min_tokens_to_generate\": 2,\n",
44 |         "}\n",
45 |         "\n",
46 |         "sentences = request_data(data)\n",
47 |         "print(sentences)"
48 |       ]
49 |     }
50 |   ],
51 |   "metadata": {
52 |     "colab": {
53 |       "provenance": []
54 |     },
55 |     "kernelspec": {
56 |       "display_name": "Python 3",
57 |       "name": "python3"
58 |     },
59 |     "language_info": {
60 |       "name": "python"
61 |     }
62 |   },
63 |   "nbformat": 4,
64 |   "nbformat_minor": 0
65 | }
66 | 


--------------------------------------------------------------------------------
/nemo_2b_bf16_tp1_server.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nbformat": 4,
 3 |   "nbformat_minor": 0,
 4 |   "metadata": {
 5 |     "colab": {
 6 |       "provenance": []
 7 |     },
 8 |     "kernelspec": {
 9 |       "name": "python3",
10 |       "display_name": "Python 3"
11 |     },
12 |     "language_info": {
13 |       "name": "python"
14 |     },
15 |     "accelerator": "GPU",
16 |     "gpuClass": "premium"
17 |   },
18 |   "cells": [
19 |   {
20 |    "cell_type": "markdown",
21 |    "metadata": {
22 |     "id": "view-in-github"
23 |    },
24 |    "source": [
25 |     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/nvidia-llm-colab/blob/main/nemo_2b_bf16_tp1_server.ipynb)"
26 |    ]
27 |   },
28 |     {
29 |       "cell_type": "code",
30 |       "source": [
31 |         "%cd /content\n",
32 |         "!apt -y install -qq aria2\n",
33 |         "!npm install -g localtunnel\n",
34 |         "!git clone https://huggingface.co/camenduru/apex\n",
35 |         "# !git clone https://github.com/NVIDIA/apex.git\n",
36 |         "%cd /content/apex\n",
37 |         "# !git checkout 03c9d80ed54c0eaa5b581bf42ceca3162f085327\n",
38 |         "!pip install -v --disable-pip-version-check --no-cache-dir --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext\" --global-option=\"--fast_layer_norm\" --global-option=\"--distributed_adam\" --global-option=\"--deprecated_fused_adam\" ./\n",
39 |         "!pip install nemo_toolkit['nlp']==1.17.0\n",
40 |         "\n",
41 |         "%cd /content\n",
42 |         "!git clone https://github.com/NVIDIA/NeMo.git \n",
43 |         "%cd /content/NeMo/examples/nlp/language_modeling\n",
44 |         "!git checkout v1.17.0\n",
45 |         "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/nvidia/GPT-2B-001/resolve/main/GPT-2B-001_bf16_tp1.nemo -d /content/NeMo/examples/nlp/language_modeling -o nemo_2b_bf16_tp1.nemo\n",
46 |         "\n",
47 |         "import subprocess\n",
48 |         "import threading\n",
49 |         "import time\n",
50 |         "import socket\n",
51 |         "def iframe_thread(port):\n",
52 |         "    while True:\n",
53 |         "        time.sleep(0.5)\n",
54 |         "        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n",
55 |         "        result = sock.connect_ex(('127.0.0.1', port))\n",
56 |         "        if result == 0:\n",
57 |         "            break\n",
58 |         "        sock.close()\n",
59 |         "    p = subprocess.Popen([\"lt\", \"--port\", \"{}\".format(port)], stdout=subprocess.PIPE)\n",
60 |         "    for line in p.stdout:\n",
61 |         "        print(line.decode(), end='')\n",
62 |         "threading.Thread(target=iframe_thread, daemon=True, args=(5555,)).start()\n",
63 |         "\n",
64 |         "!python megatron_gpt_eval.py trainer.precision=bf16 gpt_model_file=nemo_2b_bf16_tp1.nemo server=True tensor_model_parallel_size=1 trainer.devices=1"
65 |       ],
66 |       "metadata": {
67 |         "id": "8fLY9_xXzN9a"
68 |       },
69 |       "execution_count": null,
70 |       "outputs": []
71 |     }
72 |   ]
73 | }


--------------------------------------------------------------------------------