├── .llm-proxy.env
├── README.md
├── docker-compose-localai-worker.yml
└── docker-compose.yml


/.llm-proxy.env:
--------------------------------------------------------------------------------
1 | PORT=3001
2 | TARGET_URLS=http://localai:8080,http://lm-studio-ip-address:1234
3 | JWT_SECRET=jwt-secret-here
4 | AUTH_USERNAME=admin
5 | AUTH_PASSWORD=password-here
6 | PAYLOAD_LIMIT=25mb


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Local AI Stack
 2 | 
 3 | This consists of a fairly comprehensive stack of AI tools that you can run locally for whatever you want.
 4 | 
 5 | Take note that this will use 4 GPUs as is. I think you'll need at least 2 for a decent experience and ability to run LLMs and image models. I haven't tested with one GPU, but it may work. To adjust the number of GPUs, change the `device_ids` under `localai` and `comfyui`. Feel free to remove whatever you don't want to use.
 6 | 
 7 | When referencing one service from another (like adding a url for comfyui in open webui), make sure you reference the `hostname` and internal port of the service. like: `http://comfyui:7860`.
 8 | 
 9 | 
10 | 
11 | ## URL Reference
12 | 
13 | - localai: http://machine.ip.address.here:8080
14 | - llm-proxy: http://machine.ip.address.here:3001
15 | - open webui: http://machine.ip.address.here:3000
16 | - searxng: http://machine.ip.address.here:8081
17 | - comfy-ui: http://machine.ip.address.here:7860
18 | - qdrant: http://machine.ip.address.here:6333
19 | - n8n: http://machine.ip.address.here:5678
20 | 
21 | ### LocalAI
22 | 
23 | Enables running open source LLMs/Transformers with support for distributed inferencing.
24 | 
25 | [LocalAI docs](https://localai.io/)
26 | 
27 | Thanks to [RoboTF AI](https://www.youtube.com/@RoboTFAI) who helped me get LocalAI working with distributed inferencing.
28 | 
29 | Check out `docker-compose-localai-worker.yml` to run a p2p worker for distributed inferencing. You may need to run 1 worker per GPU on the remote machine.
30 | 
31 | ### LLM-Proxy
32 | 
33 | A simple proxy to make it easier to interact with local AI models. Aggregate models running on separate machines, adds TLS & api keys with the same method as OpenAI's API.
34 | 
35 | To set this up, refer to the [readme](https://github.com/j4ys0n/llm-proxy?tab=readme-ov-file).
36 | 
37 | ### Open WebUI
38 | 
39 | A feature-rich UI for chatting with and interacting with your LLMs.
40 | Open WebUI can use the following services in this stack. SearXNG, ComfyUI, LocalAI, or LocalAI with LLM-Proxy.
41 | 
42 | [Open WebUI docs](https://docs.openwebui.com/)
43 | 
44 | ### SearXNG
45 | 
46 | A privacy respecting, open source metasearch engine.
47 | 
48 | SearXNG uses the following services in this stack: Redis (Valkey)
49 | 
50 | [SearXNG docs](https://docs.searxng.org/)
51 | 
52 | ### ComfyUI
53 | 
54 | A feature-rich and extensible Stable Diffusion UI, for generating and modifying images from prompt inputs. Works with Flux also.
55 | 
56 | You'll need to pull a repo for this - it builds locally. [stable-diffusion-webui-docker](https://github.com/AbdBarho/stable-diffusion-webui-docker). Check out Techno Tim's tutorial for details on what to do. [Techno Tim's Private AI Stack tutorial](https://technotim.live/posts/ai-stack-tutorial/#software-overview#changes-for-comfyui)
57 | 
58 | This could definitely use some work. I tend to prefer pulling images instead of building locally, but I haven't made the time to do something about that for ComfyUI yet.
59 | 
60 | [ComfyUI docs](https://blenderneko.github.io/ComfyUI-docs/)
61 | 
62 | ### Qdrant
63 | 
64 | A vector database for AI applications. Supports similarity search over any vector space and can be used to store arbitrary data.
65 | 
66 | ### n8n
67 | 
68 | A highly extensible low-code automation and integration platform.
69 | 
70 | n8n uses the following services in this stack. Postgres, Qdrant, and LocalAI with LLM-Proxy.
71 | 
72 | [n8n docs](https://docs.n8n.io/)


--------------------------------------------------------------------------------
/docker-compose-localai-worker.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   localai:
 3 |     container_name: localai
 4 |     hostname: localai
 5 |     image: localai/localai:latest-gpu-nvidia-cuda-12
 6 |     environment:
 7 |       - MODELS_PATH=/models
 8 |       - TOKEN=same-token-here
 9 |     #  - DEBUG=true
10 |     volumes:
11 |       - ./models:/models:cached
12 |       - ./images/:/tmp/generated/images/
13 |     network_mode: host
14 |     entrypoint:
15 |       - /build/entrypoint.sh
16 |       - worker
17 |       - p2p-llama-cpp-rpc
18 |       - --llama-cpp-args=-m 16380 # set this to the VRAM size in MB
19 |     deploy:
20 |       resources:
21 |         reservations:
22 |           devices:
23 |             - driver: nvidia
24 |               device_ids: ['0']
25 |               capabilities: [gpu]


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | x-n8n: &service-n8n
  2 |   image: n8nio/n8n:latest
  3 |   environment:
  4 |     - DB_TYPE=postgresdb
  5 |     - DB_POSTGRESDB_HOST=postgres
  6 |     - DB_POSTGRESDB_USER=${POSTGRES_USER}
  7 |     - DB_POSTGRESDB_PASSWORD=${POSTGRES_PASSWORD}
  8 |     - N8N_DIAGNOSTICS_ENABLED=false
  9 |     - N8N_PERSONALIZATION_ENABLED=false
 10 |     - WEBHOOK_URL=https://n8n.crypto-tech.cloud
 11 |     - N8N_ENCRYPTION_KEY
 12 |     - N8N_USER_MANAGEMENT_JWT_SECRET
 13 |     - N8N_SECURE_COOKIE
 14 |     - NODE_FUNCTION_ALLOW_BUILTIN=*
 15 |     - NODE_FUNCTION_ALLOW_EXTERNAL=*
 16 |   links:
 17 |     - postgres
 18 | 
 19 | x-logging: &logging
 20 |   logging:
 21 |     driver: 'json-file'
 22 |     options:
 23 |       max-size: 100m
 24 |       max-file: '2'
 25 | 
 26 | services:
 27 | 
 28 |   localai:
 29 |     container_name: localai
 30 |     hostname: localai
 31 |     image: localai/localai:latest-gpu-nvidia-cuda-12
 32 |     restart: unless-stopped
 33 |     # ports:
 34 |     #   - 8080:8080
 35 |     environment:
 36 |       - MODELS_PATH=/models
 37 |       - TOKEN=your-token-here
 38 |       - FEDERATED=true
 39 |       - LOCALAI_P2P=true
 40 |       - LOCALAI_PARALLEL_REQUESTS=true
 41 |     #  - DEBUG=true
 42 |     volumes:
 43 |       - ./models:/models:cached
 44 |       - ./images/:/tmp/generated/images/
 45 |     network_mode: host # couldn't get p2p to work without this.
 46 |     deploy:
 47 |       resources:
 48 |         reservations:
 49 |           devices:
 50 |             - driver: nvidia
 51 |               device_ids: ['1','2','3']
 52 |               capabilities: [gpu]
 53 |     <<: *logging
 54 |   
 55 |   llm-proxy:
 56 |     container_name: llm-proxy
 57 |     hostname: llm-proxy
 58 |     image: ghcr.io/j4ys0n/llm-proxy:1.4.7
 59 |     restart: unless-stopped
 60 |     ports:
 61 |       - 3001:3001
 62 |     env_file:
 63 |       - ./.llm-proxy.env
 64 |     volumes:
 65 |       - ./.llm-proxy.env:/usr/src/app/.env
 66 |     <<: *logging
 67 | 
 68 |   # open webui start
 69 | 
 70 |   webui:
 71 |     container_name: webui
 72 |     hostname: webui
 73 |     image: ghcr.io/open-webui/open-webui:main
 74 |     restart: unless-stopped
 75 |     ports:
 76 |       - 3000:3000
 77 |     environment:
 78 |       - PORT=3000
 79 |       - OPENAI_API_BASE=http://llm-proxy:8080/v1
 80 |     volumes:
 81 |       - ./open-webui:/app/backend/data
 82 |     <<: *logging
 83 | 
 84 |   redis:
 85 |     container_name: redis
 86 |     hostname: redis
 87 |     image: docker.io/valkey/valkey:8-bookworm
 88 |     restart: unless-stopped
 89 |     command: valkey-server --save 30 1 --loglevel warning
 90 |     volumes:
 91 |       - ./valkey-data:/data
 92 |     cap_drop:
 93 |       - ALL
 94 |     cap_add:
 95 |       - SETGID
 96 |       - SETUID
 97 |       - DAC_OVERRIDE
 98 |     <<: *logging
 99 | 
100 |   searxng:
101 |     container_name: searxng
102 |     hostname: searxng
103 |     image: docker.io/searxng/searxng:latest
104 |     restart: unless-stopped
105 |     ports:
106 |       - 8081:8080
107 |     environment:
108 |       - SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/
109 |       - UWSGI_WORKERS=${SEARXNG_UWSGI_WORKERS:-4}
110 |       - UWSGI_THREADS=${SEARXNG_UWSGI_THREADS:-4}
111 |     cap_drop:
112 |       - ALL
113 |     cap_add:
114 |       - CHOWN
115 |       - SETGID
116 |       - SETUID
117 |     volumes:
118 |       - ./searxng:/etc/searxng:rw
119 |     <<: *logging
120 | 
121 |   # open webui end
122 |   # comfyui start
123 | 
124 |   stable-diffusion-download:
125 |     container_name: stable-diffusion-download
126 |     build: ./stable-diffusion-webui-docker/services/download/
127 |     image: comfy-download
128 |     environment:
129 |       - PUID=${PUID:-1000}
130 |       - PGID=${PGID:-1000}
131 |     volumes:
132 |       - ./stable-diffusion-webui-docker/data:/data
133 |     <<: *logging
134 | 
135 |   comfy-ui:
136 |     container_name: comfy-ui
137 |     hostname: comfy-ui
138 |     build: ./stable-diffusion-webui-docker/services/comfy/
139 |     image: comfy-ui
140 |     restart: unless-stopped
141 |     ports:
142 |       - 7860:7860
143 |     environment:
144 |       - PUID=${PUID:-1000}
145 |       - PGID=${PGID:-1000}
146 |       - CLI_ARGS=
147 |     volumes:
148 |       - ./stable-diffusion-webui-docker/data:/data
149 |       - ./stable-diffusion-webui-docker/output:/output
150 |     stop_signal: SIGKILL
151 |     tty: true
152 |     deploy:
153 |       resources:
154 |         reservations:
155 |           devices:
156 |             - driver: nvidia
157 |               device_ids: ['0']
158 |               capabilities: [compute, utility]
159 |     <<: *logging
160 |   
161 |   # comfyui end
162 |   # n8n start
163 | 
164 |   postgres:
165 |     container_name: postgres
166 |     hostname: postgres
167 |     image: postgres:16-alpine
168 |     restart: unless-stopped
169 |     ports:
170 |       - 5432:5432
171 |     environment:
172 |       - POSTGRES_USER
173 |       - POSTGRES_PASSWORD
174 |       - POSTGRES_DB
175 |     volumes:
176 |       - ./postgres:/var/lib/postgresql/data
177 |     healthcheck:
178 |       test: ['CMD-SHELL', 'pg_isready -h localhost -U ${POSTGRES_USER} -d ${POSTGRES_DB}']
179 |       interval: 5s
180 |       timeout: 5s
181 |       retries: 10
182 |     <<: *logging
183 | 
184 |   qdrant:
185 |     container_name: qdrant
186 |     hostname: qdrant
187 |     image: qdrant/qdrant
188 |     restart: unless-stopped
189 |     ports:
190 |       - 6333:6333
191 |     volumes:
192 |       - ./qdrant:/qdrant/storage
193 |     <<: *logging
194 | 
195 |   n8n-import:
196 |     <<: [*service-n8n, *logging]
197 |     container_name: n8n-import
198 |     entrypoint: /bin/sh
199 |     command:
200 |       - "-c"
201 |       - "n8n import:credentials --separate --input=/backup/credentials && n8n import:workflow --separate --input=/backup/workflows"
202 |     volumes:
203 |       - ./n8n/backup:/backup
204 |     depends_on:
205 |       postgres:
206 |         condition: service_healthy
207 |   
208 |   n8n:
209 |     <<: [*service-n8n, *logging]
210 |     container_name: n8n
211 |     hostname: n8n
212 |     restart: unless-stopped
213 |     ports:
214 |       - 5678:5678
215 |     volumes:
216 |       - ./n8n/data:/home/node/.n8n
217 |       - ./n8n/backup:/backup
218 |       - ./shared:/data/shared # this is only if you want to be able to do anything with local files
219 |     depends_on:
220 |       postgres:
221 |         condition: service_healthy
222 |       n8n-import:
223 |         condition: service_completed_successfully
224 | 
225 |   # n8n end


--------------------------------------------------------------------------------