├── .gitignore ├── Dockerfile ├── README.md ├── docker-compose.yml └── nginx.conf /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | __pycache__/ 3 | *.pyc 4 | *.pyo 5 | *.egg-info/ 6 | dist/ 7 | 8 | # IDEs and editors 9 | .idea/ 10 | .vscode/ 11 | 12 | # OS-specific files 13 | .DS_Store # macOS 14 | Thumbs.db # Windows 15 | 16 | # Docker 17 | *.env # Environment files 18 | .env.* 19 | docker-compose.override.yml 20 | docker-compose.*.yml 21 | 22 | # Nginx 23 | logs/ 24 | 25 | # SSL certificates 26 | *.crt 27 | *.key 28 | 29 | # Miscellaneous 30 | *.swp # Vim temporary files 31 | *.swo # Vim temporary files 32 | *.bak # backup files 33 | *.tmp # temporary files 34 | 35 | # Ignore all files in the models folder 36 | models/* 37 | 38 | # ignore mongo db folder 39 | mongo 40 | 41 | # ignore chat-ui repo 42 | chat-ui -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Nginx image 2 | FROM nginx 3 | 4 | # Copy the self-signed SSL certificates into the container 5 | COPY ./nginx-selfsigned.crt /etc/nginx/ssl/nginx-selfsigned.crt 6 | COPY ./nginx-selfsigned.key /etc/nginx/ssl/nginx-selfsigned.key 7 | 8 | # Copy a custom Nginx configuration file into the container 9 | COPY ./nginx.conf /etc/nginx/nginx.conf 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This project demonstrates how to securely host a private chat model using text-generation-inference and chat-ui with self-signed SSL certificate via nginx. 2 | 3 | ## Prerequisites 4 | - Docker 5 | - Docker Compose 6 | - OpenSSL 7 | 8 | ## Generate Self-Signed SSL Certificate 9 | 10 | ``` 11 | openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout nginx-selfsigned.key -out nginx-selfsigned.crt 12 | ``` 13 | 14 | ## clone chat ui 15 | 16 | ``` 17 | git clone https://github.com/huggingface/chat-ui.git 18 | ``` 19 | 20 | Add dependency for chat-ui Dockerfile 21 | 22 | ``` 23 | RUN apt-get update && apt-get install -y libvips-dev 24 | ``` 25 | 26 | Change the base path in the svelte.config.js file in chat-ui 27 | 28 | ``` 29 | import adapter from "@sveltejs/adapter-node"; 30 | import { vitePreprocess } from "@sveltejs/kit/vite"; 31 | import dotenv from "dotenv"; 32 | 33 | dotenv.config({ path: "./.env.local" }); 34 | dotenv.config({ path: "./.env" }); 35 | 36 | process.env.PUBLIC_VERSION = process.env.npm_package_version; 37 | 38 | /** @type {import('@sveltejs/kit').Config} */ 39 | const config = { 40 | // Consult https://kit.svelte.dev/docs/integrations#preprocessors 41 | // for more information about preprocessors 42 | preprocess: vitePreprocess(), 43 | 44 | kit: { 45 | adapter: adapter(), 46 | 47 | paths: { 48 | base: process.env.APP_BASE || "/chat", 49 | }, 50 | csrf: { 51 | // handled in hooks.server.ts, because we can have multiple valid origins 52 | checkOrigin: false, 53 | }, 54 | }, 55 | }; 56 | 57 | export default config; 58 | ``` 59 | 60 | Add the `.env.local` file in the chat-ui 61 | 62 | ``` 63 | MONGODB_URL=mongodb://mongodb:27017/ 64 | MODELS=`[ 65 | { 66 | "name": "mzbac/CodeLlama-34b-guanaco-gptq", 67 | "datasetName": "CodeLlama-34b-guanaco-gptq", 68 | "description": "A good alternative to ChatGPT", 69 | "websiteUrl": "https://open-assistant.io", 70 | "userMessageToken": "### Human: ", 71 | "assistantMessageToken": "### Assistant: ", 72 | "userMessageEndToken": "\n", 73 | "assistantMessageEndToken": "\n", 74 | "preprompt": "Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n", 75 | "promptExamples": [ 76 | { 77 | "title": "Write an email from bullet list", 78 | "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" 79 | }, { 80 | "title": "Code a snake game", 81 | "prompt": "Code a basic snake game in python, give explanations for each step." 82 | }, { 83 | "title": "Assist in a task", 84 | "prompt": "How do I make a delicious lemon cheesecake?" 85 | } 86 | ], 87 | "parameters": { 88 | "temperature": 0.7, 89 | "top_p": 0.95, 90 | "repetition_penalty": 1.2, 91 | "top_k": 50, 92 | "truncate": 1024, 93 | "max_new_tokens": 2048, 94 | "stop": ["### Human"] 95 | }, 96 | "endpoints": [{"url": "http://text-generation-inference:80"}] 97 | } 98 | ]` 99 | 100 | ``` 101 | 102 | ## create volume for mongo db file 103 | ``` 104 | mkdir mongo 105 | ``` 106 | 107 | 108 | ## Usage 109 | Start the services: 110 | ``` 111 | docker-compose up --build 112 | ``` 113 | 114 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | nginx: 4 | build: . 5 | ports: 6 | - "80:80" 7 | - "443:443" 8 | depends_on: 9 | - chatui 10 | networks: 11 | - gateway_network 12 | mongodb: 13 | image: mongo:latest 14 | container_name: mongodb 15 | volumes: 16 | - ./mongo:/data/db 17 | ports: 18 | - "27017:27017" 19 | networks: 20 | - gateway_network 21 | chatui: 22 | build: 23 | context: ./chat-ui 24 | dockerfile: Dockerfile 25 | container_name: chatui 26 | ports: 27 | - "3000:3000" 28 | depends_on: 29 | - mongodb 30 | - text-generation-inference 31 | networks: 32 | - gateway_network 33 | text-generation-inference: 34 | image: ghcr.io/huggingface/text-generation-inference:latest 35 | command: ["--max-total-tokens", "3080", "--max-input-length", "3072", "--max-batch-prefill-tokens", "3080", "--quantize", "awq", "--model-id", "mzbac/CodeLlama-34b-guanaco-awq"] 36 | volumes: 37 | - $PWD/models:/data 38 | container_name: text-generation-inference 39 | deploy: 40 | resources: 41 | reservations: 42 | devices: 43 | - driver: "nvidia" 44 | device_ids: ["0"] 45 | capabilities: [gpu] 46 | memory: 1g 47 | networks: 48 | - gateway_network 49 | networks: 50 | gateway_network: 51 | driver: bridge 52 | -------------------------------------------------------------------------------- /nginx.conf: -------------------------------------------------------------------------------- 1 | events { 2 | worker_connections 1024; 3 | } 4 | 5 | http { 6 | server { 7 | listen 80; 8 | server_name localhost; 9 | 10 | location / { 11 | return 301 https://$host$request_uri; 12 | } 13 | } 14 | 15 | server { 16 | listen 443 ssl; 17 | server_name localhost; 18 | 19 | ssl_certificate /etc/nginx/ssl/nginx-selfsigned.crt; 20 | ssl_certificate_key /etc/nginx/ssl/nginx-selfsigned.key; 21 | 22 | location / { 23 | proxy_pass http://text-generation-inference:80; 24 | proxy_set_header Host $host; 25 | proxy_set_header X-Real-IP $remote_addr; 26 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 27 | proxy_set_header X-Forwarded-Proto $scheme; 28 | } 29 | 30 | location /chat { 31 | proxy_pass http://chatui:3000; 32 | proxy_set_header Host $host; 33 | proxy_set_header X-Real-IP $remote_addr; 34 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 35 | proxy_set_header X-Forwarded-Proto $scheme; 36 | proxy_buffering off; 37 | proxy_read_timeout 300s; 38 | proxy_send_timeout 300s; 39 | } 40 | } 41 | } 42 | --------------------------------------------------------------------------------