├── .gitignore
├── Dockerfile
├── README.md
├── docker-compose.yml
└── nginx.conf


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled files
 2 | __pycache__/
 3 | *.pyc
 4 | *.pyo
 5 | *.egg-info/
 6 | dist/
 7 | 
 8 | # IDEs and editors
 9 | .idea/
10 | .vscode/
11 | 
12 | # OS-specific files
13 | .DS_Store  # macOS
14 | Thumbs.db   # Windows
15 | 
16 | # Docker
17 | *.env  # Environment files
18 | .env.*
19 | docker-compose.override.yml
20 | docker-compose.*.yml
21 | 
22 | # Nginx
23 | logs/
24 | 
25 | # SSL certificates
26 | *.crt
27 | *.key
28 | 
29 | # Miscellaneous
30 | *.swp  # Vim temporary files
31 | *.swo  # Vim temporary files
32 | *.bak  # backup files
33 | *.tmp  # temporary files
34 | 
35 | # Ignore all files in the models folder
36 | models/*
37 | 
38 | # ignore mongo db folder
39 | mongo
40 | 
41 | # ignore chat-ui repo
42 | chat-ui


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Nginx image
 2 | FROM nginx
 3 | 
 4 | # Copy the self-signed SSL certificates into the container
 5 | COPY ./nginx-selfsigned.crt /etc/nginx/ssl/nginx-selfsigned.crt
 6 | COPY ./nginx-selfsigned.key /etc/nginx/ssl/nginx-selfsigned.key
 7 | 
 8 | # Copy a custom Nginx configuration file into the container
 9 | COPY ./nginx.conf /etc/nginx/nginx.conf
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | This project demonstrates how to securely host a private chat model using text-generation-inference and chat-ui with self-signed SSL certificate via nginx.
  2 | 
  3 | ## Prerequisites
  4 | - Docker
  5 | - Docker Compose
  6 | - OpenSSL
  7 | 
  8 | ## Generate Self-Signed SSL Certificate
  9 | 
 10 | ```
 11 | openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout nginx-selfsigned.key -out nginx-selfsigned.crt
 12 | ```
 13 | 
 14 | ## clone chat ui 
 15 | 
 16 | ```
 17 | git clone https://github.com/huggingface/chat-ui.git
 18 | ```
 19 | 
 20 | Add dependency for chat-ui Dockerfile
 21 | 
 22 | ```
 23 | RUN apt-get update && apt-get install -y libvips-dev
 24 | ```
 25 | 
 26 | Change the base path in the svelte.config.js file in chat-ui
 27 | 
 28 | ```
 29 | import adapter from "@sveltejs/adapter-node";
 30 | import { vitePreprocess } from "@sveltejs/kit/vite";
 31 | import dotenv from "dotenv";
 32 | 
 33 | dotenv.config({ path: "./.env.local" });
 34 | dotenv.config({ path: "./.env" });
 35 | 
 36 | process.env.PUBLIC_VERSION = process.env.npm_package_version;
 37 | 
 38 | /** @type {import('@sveltejs/kit').Config} */
 39 | const config = {
 40 | 	// Consult https://kit.svelte.dev/docs/integrations#preprocessors
 41 | 	// for more information about preprocessors
 42 | 	preprocess: vitePreprocess(),
 43 | 
 44 | 	kit: {
 45 | 		adapter: adapter(),
 46 | 
 47 | 		paths: {
 48 | 			base: process.env.APP_BASE || "/chat",
 49 | 		},
 50 | 		csrf: {
 51 | 			// handled in hooks.server.ts, because we can have multiple valid origins
 52 | 			checkOrigin: false,
 53 | 		},
 54 | 	},
 55 | };
 56 | 
 57 | export default config;
 58 | ```
 59 | 
 60 | Add the `.env.local` file in the chat-ui
 61 | 
 62 | ```
 63 | MONGODB_URL=mongodb://mongodb:27017/
 64 | MODELS=`[
 65 |     {
 66 |         "name": "mzbac/CodeLlama-34b-guanaco-gptq",
 67 |         "datasetName": "CodeLlama-34b-guanaco-gptq",
 68 |         "description": "A good alternative to ChatGPT",
 69 |         "websiteUrl": "https://open-assistant.io",
 70 |         "userMessageToken": "### Human: ",
 71 |         "assistantMessageToken": "### Assistant: ",
 72 |         "userMessageEndToken": "\n",
 73 |         "assistantMessageEndToken": "\n",
 74 |         "preprompt": "Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n",
 75 |         "promptExamples": [
 76 |             {
 77 |                 "title": "Write an email from bullet list",
 78 |                 "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
 79 |                 }, {
 80 |                 "title": "Code a snake game",
 81 |                 "prompt": "Code a basic snake game in python, give explanations for each step."
 82 |                 }, {
 83 |                 "title": "Assist in a task",
 84 |                 "prompt": "How do I make a delicious lemon cheesecake?"
 85 |             }
 86 |         ],
 87 |         "parameters": {
 88 |             "temperature": 0.7,
 89 |             "top_p": 0.95,
 90 |             "repetition_penalty": 1.2,
 91 |             "top_k": 50,
 92 |             "truncate": 1024,
 93 |             "max_new_tokens": 2048,
 94 |             "stop": ["### Human"]
 95 |         },
 96 |         "endpoints": [{"url": "http://text-generation-inference:80"}]
 97 |     }
 98 | ]`
 99 | 
100 | ```
101 | 
102 | ## create volume for mongo db file
103 | ```
104 | mkdir mongo
105 | ```
106 | 
107 | 
108 | ## Usage
109 | Start the services:
110 | ```
111 | docker-compose up --build
112 | ```
113 | 
114 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   nginx:
 4 |     build: .
 5 |     ports:
 6 |       - "80:80"
 7 |       - "443:443"
 8 |     depends_on:
 9 |       - chatui
10 |     networks:
11 |       - gateway_network
12 |   mongodb:
13 |     image: mongo:latest
14 |     container_name: mongodb
15 |     volumes:
16 |       - ./mongo:/data/db
17 |     ports:
18 |       - "27017:27017"
19 |     networks:
20 |       - gateway_network
21 |   chatui:
22 |     build: 
23 |       context: ./chat-ui
24 |       dockerfile: Dockerfile
25 |     container_name: chatui
26 |     ports:
27 |       - "3000:3000"
28 |     depends_on:
29 |       - mongodb
30 |       - text-generation-inference
31 |     networks:
32 |       - gateway_network
33 |   text-generation-inference:
34 |     image: ghcr.io/huggingface/text-generation-inference:latest
35 |     command: ["--max-total-tokens", "3080", "--max-input-length", "3072", "--max-batch-prefill-tokens", "3080", "--quantize", "awq", "--model-id", "mzbac/CodeLlama-34b-guanaco-awq"]
36 |     volumes:
37 |       - $PWD/models:/data
38 |     container_name: text-generation-inference
39 |     deploy:
40 |       resources:
41 |         reservations:
42 |           devices:
43 |           - driver: "nvidia"
44 |             device_ids: ["0"]
45 |             capabilities: [gpu]
46 |           memory: 1g
47 |     networks:
48 |       - gateway_network
49 | networks:
50 |   gateway_network:
51 |     driver: bridge
52 | 


--------------------------------------------------------------------------------
/nginx.conf:
--------------------------------------------------------------------------------
 1 | events {
 2 |     worker_connections 1024;
 3 | }
 4 | 
 5 | http {
 6 |     server {
 7 |         listen 80;
 8 |         server_name localhost;
 9 | 
10 |         location / {
11 |             return 301 https://$host$request_uri;
12 |         }
13 |     }
14 | 
15 |     server {
16 |         listen 443 ssl;
17 |         server_name localhost;
18 | 
19 |         ssl_certificate /etc/nginx/ssl/nginx-selfsigned.crt;
20 |         ssl_certificate_key /etc/nginx/ssl/nginx-selfsigned.key;
21 | 
22 |         location / {
23 |             proxy_pass http://text-generation-inference:80; 
24 |             proxy_set_header Host $host;
25 |             proxy_set_header X-Real-IP $remote_addr;
26 |             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
27 |             proxy_set_header X-Forwarded-Proto $scheme;
28 |         }
29 | 
30 |         location /chat {
31 |             proxy_pass http://chatui:3000; 
32 |             proxy_set_header Host $host;
33 |             proxy_set_header X-Real-IP $remote_addr;
34 |             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
35 |             proxy_set_header X-Forwarded-Proto $scheme;
36 |             proxy_buffering off;
37 |             proxy_read_timeout 300s;
38 |             proxy_send_timeout 300s;
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------