├── models └── .keep ├── examples ├── query_data │ ├── data │ │ └── .keep │ ├── .gitignore │ ├── models │ │ ├── completion.tmpl │ │ ├── wizardlm.tmpl │ │ ├── gpt-3.5-turbo.yaml │ │ └── embeddings.yaml │ ├── docker-compose.yml │ ├── store.py │ ├── query.py │ ├── update.py │ └── README.md ├── discord-bot │ ├── models │ ├── .env.example │ ├── docker-compose.yaml │ └── README.md ├── slack-bot │ ├── models │ ├── .env.example │ ├── docker-compose.yaml │ └── README.md ├── chatbot-ui │ ├── models │ │ ├── completion.tmpl │ │ ├── gpt4all.tmpl │ │ └── gpt-3.5-turbo.yaml │ ├── docker-compose.yaml │ └── README.md ├── langchain-python │ ├── models │ ├── test.py │ ├── docker-compose.yaml │ ├── README.md │ └── agent.py ├── langchain │ ├── models │ │ ├── completion.tmpl │ │ ├── gpt4all.tmpl │ │ └── gpt-3.5-turbo.yaml │ ├── .gitignore │ ├── langchainjs-localai-example │ │ ├── .gitignore │ │ ├── tsconfig.json │ │ ├── package.json │ │ ├── .vscode │ │ │ └── launch.json │ │ └── src │ │ │ └── index.mts │ ├── langchainpy-localai-example │ │ ├── .vscode │ │ │ ├── settings.json │ │ │ └── launch.json │ │ ├── simple_demo.py │ │ ├── requirements.txt │ │ └── full_demo.py │ ├── JS.Dockerfile │ ├── PY.Dockerfile │ ├── README.md │ └── docker-compose.yaml ├── rwkv │ ├── models │ │ ├── rwkv_completion.tmpl │ │ ├── gpt-3.5-turbo.yaml │ │ └── rwkv_chat.tmpl │ ├── Dockerfile.build │ ├── scripts │ │ └── build.sh │ ├── docker-compose.yaml │ └── README.md ├── localai-webui │ ├── docker-compose.yml │ └── README.md └── README.md ├── tests └── fixtures │ ├── completion.tmpl │ ├── ggml-gpt4all-j.tmpl │ ├── gpt4.yaml │ ├── gpt4_2.yaml │ └── config.yaml ├── .dockerignore ├── prompt-templates ├── wizardlm.tmpl ├── koala.tmpl ├── alpaca.tmpl ├── vicuna.tmpl └── ggml-gpt4all-j.tmpl ├── entrypoint.sh ├── .env ├── renovate.json ├── Earthfile ├── .devcontainer ├── Dockerfile ├── docker-compose.yml └── devcontainer.json ├── Dockerfile ├── .gitignore ├── api ├── apt_suite_test.go ├── api.go ├── api_test.go ├── config.go ├── prediction.go └── openai.go ├── .github ├── bump_deps.sh └── workflows │ ├── release.yml.disabled │ ├── test.yml │ ├── bump_deps.yaml │ └── image.yml ├── .goreleaser.yaml ├── docker-compose.yaml ├── Dockerfile.dev ├── .vscode └── launch.json ├── LICENSE ├── go.mod ├── main.go ├── Makefile ├── pkg └── model │ └── loader.go ├── go.sum └── README.md /models/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/query_data/data/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/query_data/.gitignore: -------------------------------------------------------------------------------- 1 | storage/ -------------------------------------------------------------------------------- /tests/fixtures/completion.tmpl: -------------------------------------------------------------------------------- 1 | {{.Input}} -------------------------------------------------------------------------------- /examples/discord-bot/models: -------------------------------------------------------------------------------- 1 | ../chatbot-ui/models/ -------------------------------------------------------------------------------- /examples/slack-bot/models: -------------------------------------------------------------------------------- 1 | ../chatbot-ui/models -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | models 2 | examples/chatbot-ui/models -------------------------------------------------------------------------------- /examples/chatbot-ui/models/completion.tmpl: -------------------------------------------------------------------------------- 1 | {{.Input}} -------------------------------------------------------------------------------- /examples/langchain-python/models: -------------------------------------------------------------------------------- 1 | ../chatbot-ui/models -------------------------------------------------------------------------------- /examples/langchain/models/completion.tmpl: -------------------------------------------------------------------------------- 1 | {{.Input}} -------------------------------------------------------------------------------- /examples/query_data/models/completion.tmpl: -------------------------------------------------------------------------------- 1 | {{.Input}} -------------------------------------------------------------------------------- /prompt-templates/wizardlm.tmpl: -------------------------------------------------------------------------------- 1 | {{.Input}} 2 | 3 | ### Response: -------------------------------------------------------------------------------- /examples/query_data/models/wizardlm.tmpl: -------------------------------------------------------------------------------- 1 | {{.Input}} 2 | 3 | ### Response: -------------------------------------------------------------------------------- /prompt-templates/koala.tmpl: -------------------------------------------------------------------------------- 1 | BEGINNING OF CONVERSATION: USER: {{.Input}} GPT: -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /build 4 | 5 | make build 6 | 7 | ./local-ai "$@" -------------------------------------------------------------------------------- /examples/langchain/.gitignore: -------------------------------------------------------------------------------- 1 | models/ggml-koala-13B-4bit-128g 2 | models/ggml-gpt4all-j -------------------------------------------------------------------------------- /examples/langchain/langchainjs-localai-example/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | -------------------------------------------------------------------------------- /examples/rwkv/models/rwkv_completion.tmpl: -------------------------------------------------------------------------------- 1 | Complete the following sentence: {{.Input}} -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # THREADS=14 2 | # CONTEXT_SIZE=512 3 | MODELS_PATH=/models 4 | # DEBUG=true 5 | # BUILD_TYPE=generic 6 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["config:base"] 4 | } 5 | -------------------------------------------------------------------------------- /Earthfile: -------------------------------------------------------------------------------- 1 | VERSION 0.7 2 | 3 | build: 4 | FROM DOCKERFILE -f Dockerfile . 5 | SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai 6 | -------------------------------------------------------------------------------- /examples/langchain/langchainpy-localai-example/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python" 3 | } -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GO_VERSION=1.20 2 | FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye 3 | RUN apt-get update && apt-get install -y cmake 4 | -------------------------------------------------------------------------------- /examples/langchain/JS.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:latest 2 | COPY ./langchainjs-localai-example /app 3 | WORKDIR /app 4 | RUN npm install 5 | RUN npm run build 6 | ENTRYPOINT [ "npm", "run", "start" ] -------------------------------------------------------------------------------- /prompt-templates/alpaca.tmpl: -------------------------------------------------------------------------------- 1 | Below is an instruction that describes a task. Write a response that appropriately completes the request. 2 | 3 | ### Instruction: 4 | {{.Input}} 5 | 6 | ### Response: -------------------------------------------------------------------------------- /prompt-templates/vicuna.tmpl: -------------------------------------------------------------------------------- 1 | Below is an instruction that describes a task. Write a response that appropriately completes the request. 2 | 3 | ### Instruction: 4 | {{.Input}} 5 | 6 | ### Response: -------------------------------------------------------------------------------- /examples/discord-bot/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=x 2 | DISCORD_BOT_TOKEN=x 3 | DISCORD_CLIENT_ID=x 4 | OPENAI_API_BASE=http://api:8080 5 | ALLOWED_SERVER_IDS=x 6 | SERVER_TO_MODERATION_CHANNEL=1:1 7 | -------------------------------------------------------------------------------- /examples/langchain/PY.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-bullseye 2 | COPY ./langchainpy-localai-example /app 3 | WORKDIR /app 4 | RUN pip install --no-cache-dir -r requirements.txt 5 | ENTRYPOINT [ "python", "./full_demo.py" ]; -------------------------------------------------------------------------------- /prompt-templates/ggml-gpt4all-j.tmpl: -------------------------------------------------------------------------------- 1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. 2 | ### Prompt: 3 | {{.Input}} 4 | ### Response: 5 | -------------------------------------------------------------------------------- /tests/fixtures/ggml-gpt4all-j.tmpl: -------------------------------------------------------------------------------- 1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. 2 | ### Prompt: 3 | {{.Input}} 4 | ### Response: 5 | -------------------------------------------------------------------------------- /examples/chatbot-ui/models/gpt4all.tmpl: -------------------------------------------------------------------------------- 1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. 2 | ### Prompt: 3 | {{.Input}} 4 | ### Response: 5 | -------------------------------------------------------------------------------- /examples/langchain/models/gpt4all.tmpl: -------------------------------------------------------------------------------- 1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. 2 | ### Prompt: 3 | {{.Input}} 4 | ### Response: 5 | -------------------------------------------------------------------------------- /examples/langchain-python/test.py: -------------------------------------------------------------------------------- 1 | 2 | from langchain.llms import OpenAI 3 | 4 | llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo") 5 | text = "What would be a good company name for a company that makes colorful socks?" 6 | print(llm(text)) 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GO_VERSION=1.20 2 | ARG BUILD_TYPE= 3 | FROM golang:$GO_VERSION 4 | WORKDIR /build 5 | RUN apt-get update && apt-get install -y cmake 6 | COPY . . 7 | RUN make prepare-sources 8 | EXPOSE 8080 9 | ENTRYPOINT [ "/build/entrypoint.sh" ] 10 | -------------------------------------------------------------------------------- /examples/langchain/langchainpy-localai-example/simple_demo.py: -------------------------------------------------------------------------------- 1 | 2 | from langchain.llms import OpenAI 3 | 4 | llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo") 5 | text = "What would be a good company name for a company that makes colorful socks?" 6 | print(llm(text)) 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # go-llama build artifacts 2 | go-llama 3 | go-gpt4all-j 4 | go-gpt2 5 | go-rwkv 6 | 7 | # LocalAI build binary 8 | LocalAI 9 | local-ai 10 | # prevent above rules from omitting the helm chart 11 | !charts/* 12 | 13 | # Ignore models 14 | models/* 15 | test-models/ -------------------------------------------------------------------------------- /api/apt_suite_test.go: -------------------------------------------------------------------------------- 1 | package api_test 2 | 3 | import ( 4 | "testing" 5 | 6 | . "github.com/onsi/ginkgo/v2" 7 | . "github.com/onsi/gomega" 8 | ) 9 | 10 | func TestLocalAI(t *testing.T) { 11 | RegisterFailHandler(Fail) 12 | RunSpecs(t, "LocalAI test suite") 13 | } 14 | -------------------------------------------------------------------------------- /.github/bump_deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -xe 3 | REPO=$1 4 | BRANCH=$2 5 | VAR=$3 6 | 7 | LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH") 8 | 9 | sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/" 10 | -------------------------------------------------------------------------------- /tests/fixtures/gpt4.yaml: -------------------------------------------------------------------------------- 1 | name: gpt4all 2 | parameters: 3 | model: testmodel 4 | top_p: 80 5 | top_k: 0.9 6 | temperature: 0.1 7 | context_size: 10 8 | stopwords: 9 | - "HUMAN:" 10 | - "### Response:" 11 | roles: 12 | user: "HUMAN:" 13 | system: "GPT:" 14 | template: 15 | completion: completion 16 | chat: ggml-gpt4all-j -------------------------------------------------------------------------------- /tests/fixtures/gpt4_2.yaml: -------------------------------------------------------------------------------- 1 | name: gpt4all-2 2 | parameters: 3 | model: testmodel 4 | top_p: 80 5 | top_k: 0.9 6 | temperature: 0.1 7 | context_size: 10 8 | stopwords: 9 | - "HUMAN:" 10 | - "### Response:" 11 | roles: 12 | user: "HUMAN:" 13 | system: "GPT:" 14 | template: 15 | completion: completion 16 | chat: ggml-gpt4all-j -------------------------------------------------------------------------------- /examples/rwkv/Dockerfile.build: -------------------------------------------------------------------------------- 1 | FROM python 2 | 3 | # convert the model (one-off) 4 | RUN pip3 install torch numpy 5 | 6 | WORKDIR /build 7 | COPY ./scripts/ . 8 | 9 | RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release 10 | ENTRYPOINT [ "/build/build.sh" ] -------------------------------------------------------------------------------- /examples/rwkv/scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | URL=$1 5 | OUT=$2 6 | FILENAME=$(basename $URL) 7 | 8 | wget -nc $URL -O /build/$FILENAME 9 | 10 | python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16 11 | python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2 12 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | # Make sure to check the documentation at http://goreleaser.com 2 | project_name: local-ai 3 | builds: 4 | - ldflags: 5 | - -w -s 6 | env: 7 | - CGO_ENABLED=0 8 | goos: 9 | - linux 10 | - darwin 11 | - windows 12 | goarch: 13 | - amd64 14 | - arm64 15 | binary: '{{ .ProjectName }}' -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: . 8 | dockerfile: Dockerfile.dev 9 | ports: 10 | - 8080:8080 11 | env_file: 12 | - .env 13 | volumes: 14 | - ./models:/models:cached 15 | command: ["/usr/bin/local-ai" ] 16 | -------------------------------------------------------------------------------- /examples/chatbot-ui/models/gpt-3.5-turbo.yaml: -------------------------------------------------------------------------------- 1 | name: gpt-3.5-turbo 2 | parameters: 3 | model: ggml-gpt4all-j 4 | top_k: 80 5 | temperature: 0.2 6 | top_p: 0.7 7 | context_size: 1024 8 | threads: 14 9 | stopwords: 10 | - "HUMAN:" 11 | - "GPT:" 12 | roles: 13 | user: " " 14 | system: " " 15 | template: 16 | completion: completion 17 | chat: gpt4all -------------------------------------------------------------------------------- /examples/query_data/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: . 8 | dockerfile: Dockerfile 9 | ports: 10 | - 8080:8080 11 | env_file: 12 | - .env 13 | volumes: 14 | - ./models:/models:cached 15 | command: ["/usr/bin/local-ai"] 16 | -------------------------------------------------------------------------------- /Dockerfile.dev: -------------------------------------------------------------------------------- 1 | ARG GO_VERSION=1.20 2 | ARG DEBIAN_VERSION=11 3 | ARG BUILD_TYPE= 4 | 5 | FROM golang:$GO_VERSION as builder 6 | WORKDIR /build 7 | RUN apt-get update && apt-get install -y cmake 8 | COPY . . 9 | RUN make build 10 | 11 | FROM debian:$DEBIAN_VERSION 12 | COPY --from=builder /build/local-ai /usr/bin/local-ai 13 | EXPOSE 8080 14 | ENTRYPOINT [ "/usr/bin/local-ai" ] -------------------------------------------------------------------------------- /examples/query_data/models/gpt-3.5-turbo.yaml: -------------------------------------------------------------------------------- 1 | name: gpt-3.5-turbo 2 | parameters: 3 | model: HERE 4 | top_k: 80 5 | temperature: 0.2 6 | top_p: 0.7 7 | context_size: 1024 8 | threads: 14 9 | embeddings: true 10 | stopwords: 11 | - "HUMAN:" 12 | - "GPT:" 13 | roles: 14 | user: " " 15 | system: " " 16 | template: 17 | completion: completion 18 | chat: wizardlm 19 | -------------------------------------------------------------------------------- /examples/query_data/models/embeddings.yaml: -------------------------------------------------------------------------------- 1 | name: text-embedding-ada-002 2 | parameters: 3 | model: HERE 4 | top_k: 80 5 | temperature: 0.2 6 | top_p: 0.7 7 | context_size: 1024 8 | threads: 14 9 | stopwords: 10 | - "HUMAN:" 11 | - "GPT:" 12 | roles: 13 | user: " " 14 | system: " " 15 | embeddings: true 16 | template: 17 | completion: completion 18 | chat: gpt4all 19 | -------------------------------------------------------------------------------- /examples/langchain/models/gpt-3.5-turbo.yaml: -------------------------------------------------------------------------------- 1 | name: gpt-3.5-turbo 2 | parameters: 3 | model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g 4 | top_k: 80 5 | temperature: 0.2 6 | top_p: 0.7 7 | context_size: 1024 8 | threads: 4 9 | stopwords: 10 | - "HUMAN:" 11 | - "GPT:" 12 | roles: 13 | user: " " 14 | system: " " 15 | backend: "gptj" 16 | template: 17 | completion: completion 18 | chat: gpt4all -------------------------------------------------------------------------------- /examples/rwkv/models/gpt-3.5-turbo.yaml: -------------------------------------------------------------------------------- 1 | name: gpt-3.5-turbo 2 | parameters: 3 | model: rwkv 4 | top_k: 80 5 | temperature: 0.9 6 | max_tokens: 100 7 | top_p: 0.8 8 | context_size: 1024 9 | threads: 14 10 | backend: "rwkv" 11 | cutwords: 12 | - "Bob:.*" 13 | roles: 14 | user: "Bob:" 15 | system: "Alice:" 16 | assistant: "Alice:" 17 | template: 18 | completion: rwkv_completion 19 | chat: rwkv_chat -------------------------------------------------------------------------------- /examples/rwkv/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: ../../ 8 | dockerfile: Dockerfile.dev 9 | ports: 10 | - 8080:8080 11 | environment: 12 | - DEBUG=true 13 | - MODELS_PATH=/models 14 | volumes: 15 | - ./models:/models:cached 16 | command: ["/usr/bin/local-ai" ] 17 | -------------------------------------------------------------------------------- /examples/langchain-python/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: ../../ 8 | dockerfile: Dockerfile.dev 9 | ports: 10 | - 8080:8080 11 | environment: 12 | - DEBUG=true 13 | - MODELS_PATH=/models 14 | volumes: 15 | - ./models:/models:cached 16 | command: ["/usr/bin/local-ai" ] 17 | -------------------------------------------------------------------------------- /examples/slack-bot/.env.example: -------------------------------------------------------------------------------- 1 | SLACK_APP_TOKEN=xapp-1-... 2 | SLACK_BOT_TOKEN=xoxb-... 3 | OPENAI_API_KEY=sk-... 4 | OPENAI_API_BASE=http://api:8080 5 | OPENAI_MODEL=gpt-3.5-turbo 6 | OPENAI_TIMEOUT_SECONDS=60 7 | #OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check 8 | #for mistakes and make suggestion to improve the language of the given text" 9 | USE_SLACK_LANGUAGE=true 10 | SLACK_APP_LOG_LEVEL=INFO 11 | TRANSLATE_MARKDOWN=true -------------------------------------------------------------------------------- /examples/langchain/langchainjs-localai-example/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2022", 4 | "lib": ["ES2022", "DOM"], 5 | "module": "ES2022", 6 | "moduleResolution": "node", 7 | "strict": true, 8 | "esModuleInterop": true, 9 | "allowSyntheticDefaultImports": true, 10 | "isolatedModules": true, 11 | "outDir": "./dist" 12 | }, 13 | "include": ["src", "test"], 14 | "exclude": ["node_modules", "dist"] 15 | } 16 | -------------------------------------------------------------------------------- /examples/rwkv/models/rwkv_chat.tmpl: -------------------------------------------------------------------------------- 1 | The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob. 2 | 3 | Bob: Hello Alice, how are you doing? 4 | 5 | Alice: Hi Bob! Thanks, I'm fine. What about you? 6 | 7 | Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while? 8 | 9 | Alice: Not at all! I'm listening. 10 | 11 | {{.Input}} 12 | 13 | Alice: -------------------------------------------------------------------------------- /examples/localai-webui/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: . 8 | dockerfile: Dockerfile 9 | ports: 10 | - 8080:8080 11 | env_file: 12 | - .env 13 | volumes: 14 | - ./models:/models:cached 15 | command: ["/usr/bin/local-ai"] 16 | 17 | frontend: 18 | image: quay.io/go-skynet/localai-frontend:master 19 | ports: 20 | - 3000:3000 -------------------------------------------------------------------------------- /examples/discord-bot/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: ../../ 8 | dockerfile: Dockerfile.dev 9 | ports: 10 | - 8080:8080 11 | environment: 12 | - DEBUG=true 13 | - MODELS_PATH=/models 14 | volumes: 15 | - ./models:/models:cached 16 | command: ["/usr/bin/local-ai" ] 17 | 18 | bot: 19 | image: quay.io/go-skynet/gpt-discord-bot:main 20 | env_file: 21 | - .env 22 | -------------------------------------------------------------------------------- /examples/slack-bot/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: ../../ 8 | dockerfile: Dockerfile.dev 9 | ports: 10 | - 8080:8080 11 | environment: 12 | - DEBUG=true 13 | - MODELS_PATH=/models 14 | volumes: 15 | - ./models:/models:cached 16 | command: ["/usr/bin/local-ai" ] 17 | 18 | bot: 19 | build: 20 | context: ./ChatGPT-in-Slack 21 | dockerfile: Dockerfile 22 | env_file: 23 | - .env 24 | -------------------------------------------------------------------------------- /examples/chatbot-ui/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: ../../ 8 | dockerfile: Dockerfile.dev 9 | ports: 10 | - 8080:8080 11 | environment: 12 | - DEBUG=true 13 | - MODELS_PATH=/models 14 | volumes: 15 | - ./models:/models:cached 16 | command: ["/usr/bin/local-ai" ] 17 | 18 | chatgpt: 19 | image: ghcr.io/mckaywrigley/chatbot-ui:main 20 | ports: 21 | - 3000:3000 22 | environment: 23 | - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' 24 | - 'OPENAI_API_HOST=http://api:8080' -------------------------------------------------------------------------------- /.github/workflows/release.yml.disabled: -------------------------------------------------------------------------------- 1 | name: goreleaser 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | goreleaser: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | - name: Set up Go 17 | uses: actions/setup-go@v3 18 | with: 19 | go-version: 1.18 20 | - name: Run GoReleaser 21 | uses: goreleaser/goreleaser-action@v4 22 | with: 23 | version: latest 24 | args: release --clean 25 | env: 26 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Launch Go", 6 | "type": "go", 7 | "request": "launch", 8 | "mode": "debug", 9 | "program": "${workspaceFolder}/main.go", 10 | "args": [ 11 | "api" 12 | ], 13 | "env": { 14 | "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2", 15 | "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2", 16 | "DEBUG": "true" 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /examples/langchain/langchainjs-localai-example/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "langchainjs-localai-example", 3 | "version": "0.1.0", 4 | "description": "Trivial Example of using langchain + the OpenAI API + LocalAI together", 5 | "main": "index.mjs", 6 | "scripts": { 7 | "build": "tsc --build", 8 | "clean": "tsc --build --clean", 9 | "start": "node --trace-warnings dist/index.mjs" 10 | }, 11 | "author": "dave@gray101.com", 12 | "license": "MIT", 13 | "devDependencies": { 14 | "@types/node": "^18.16.4", 15 | "typescript": "^5.0.4" 16 | }, 17 | "dependencies": { 18 | "langchain": "^0.0.67", 19 | "typeorm": "^0.3.15" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /tests/fixtures/config.yaml: -------------------------------------------------------------------------------- 1 | - name: list1 2 | parameters: 3 | model: testmodel 4 | top_p: 80 5 | top_k: 0.9 6 | temperature: 0.1 7 | context_size: 10 8 | stopwords: 9 | - "HUMAN:" 10 | - "### Response:" 11 | roles: 12 | user: "HUMAN:" 13 | system: "GPT:" 14 | template: 15 | completion: completion 16 | chat: ggml-gpt4all-j 17 | - name: list2 18 | parameters: 19 | top_p: 80 20 | top_k: 0.9 21 | temperature: 0.1 22 | model: testmodel 23 | context_size: 10 24 | stopwords: 25 | - "HUMAN:" 26 | - "### Response:" 27 | roles: 28 | user: "HUMAN:" 29 | system: "GPT:" 30 | template: 31 | completion: completion 32 | chat: ggml-gpt4all-j -------------------------------------------------------------------------------- /examples/slack-bot/README.md: -------------------------------------------------------------------------------- 1 | # Slack bot 2 | 3 | Slackbot using: https://github.com/seratch/ChatGPT-in-Slack 4 | 5 | ## Setup 6 | 7 | ```bash 8 | # Clone LocalAI 9 | git clone https://github.com/go-skynet/LocalAI 10 | 11 | cd LocalAI/examples/slack-bot 12 | 13 | git clone https://github.com/seratch/ChatGPT-in-Slack 14 | 15 | # (optional) Checkout a specific LocalAI tag 16 | # git checkout -b build 17 | 18 | # Download gpt4all-j to models/ 19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j 20 | 21 | # Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack) 22 | cp -rfv .env.example .env 23 | vim .env 24 | 25 | # start with docker-compose 26 | docker-compose up -d --build 27 | ``` -------------------------------------------------------------------------------- /examples/langchain/langchainjs-localai-example/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "node", 9 | "request": "launch", 10 | "name": "Launch Program", 11 | // "skipFiles": [ 12 | // "/**" 13 | // ], 14 | "program": "${workspaceFolder}\\dist\\index.mjs", 15 | "outFiles": [ 16 | "${workspaceFolder}/**/*.js" 17 | ] 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /examples/langchain/langchainpy-localai-example/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python: Current File", 6 | "type": "python", 7 | "request": "launch", 8 | "program": "${file}", 9 | "console": "integratedTerminal", 10 | "redirectOutput": true, 11 | "justMyCode": false 12 | }, 13 | { 14 | "name": "Python: Attach to Port 5678", 15 | "type": "python", 16 | "request": "attach", 17 | "connect": { 18 | "host": "localhost", 19 | "port": 5678 20 | }, 21 | "justMyCode": false 22 | } 23 | ] 24 | } -------------------------------------------------------------------------------- /examples/langchain/langchainpy-localai-example/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | aiosignal==1.3.1 3 | async-timeout==4.0.2 4 | attrs==23.1.0 5 | certifi==2022.12.7 6 | charset-normalizer==3.1.0 7 | colorama==0.4.6 8 | dataclasses-json==0.5.7 9 | debugpy==1.6.7 10 | frozenlist==1.3.3 11 | greenlet==2.0.2 12 | idna==3.4 13 | langchain==0.0.159 14 | marshmallow==3.19.0 15 | marshmallow-enum==1.5.1 16 | multidict==6.0.4 17 | mypy-extensions==1.0.0 18 | numexpr==2.8.4 19 | numpy==1.24.3 20 | openai==0.27.6 21 | openapi-schema-pydantic==1.2.4 22 | packaging==23.1 23 | pydantic==1.10.7 24 | PyYAML==6.0 25 | requests==2.29.0 26 | SQLAlchemy==2.0.12 27 | tenacity==8.2.2 28 | tqdm==4.65.0 29 | typing-inspect==0.8.0 30 | typing_extensions==4.5.0 31 | urllib3==1.26.15 32 | yarl==1.9.2 33 | -------------------------------------------------------------------------------- /examples/localai-webui/README.md: -------------------------------------------------------------------------------- 1 | # localai-webui 2 | 3 | Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend). 4 | 5 | ![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png) 6 | 7 | ## Setup 8 | 9 | ```bash 10 | # Clone LocalAI 11 | git clone https://github.com/go-skynet/LocalAI 12 | 13 | cd LocalAI/examples/localai-webui 14 | 15 | # (optional) Checkout a specific LocalAI tag 16 | # git checkout -b build 17 | 18 | # Download any desired models to models/ in the parent LocalAI project dir 19 | # For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin 20 | 21 | # start with docker-compose 22 | docker-compose up -d --build 23 | ``` 24 | 25 | Open http://localhost:3000 for the Web UI. 26 | 27 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'tests' 3 | 4 | on: 5 | pull_request: 6 | push: 7 | branches: 8 | - master 9 | tags: 10 | - '*' 11 | 12 | jobs: 13 | ubuntu-latest: 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Clone 18 | uses: actions/checkout@v3 19 | with: 20 | submodules: true 21 | - name: Dependencies 22 | run: | 23 | sudo apt-get update 24 | sudo apt-get install build-essential 25 | - name: Test 26 | run: | 27 | make test 28 | 29 | macOS-latest: 30 | runs-on: macOS-latest 31 | 32 | steps: 33 | - name: Clone 34 | uses: actions/checkout@v3 35 | with: 36 | submodules: true 37 | 38 | - name: Dependencies 39 | run: | 40 | brew update 41 | brew install sdl2 42 | - name: Test 43 | run: | 44 | make test -------------------------------------------------------------------------------- /examples/langchain-python/README.md: -------------------------------------------------------------------------------- 1 | ## Langchain-python 2 | 3 | Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html). 4 | 5 | To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string. 6 | 7 | See the example below: 8 | 9 | ``` 10 | # Clone LocalAI 11 | git clone https://github.com/go-skynet/LocalAI 12 | 13 | cd LocalAI/examples/langchain-python 14 | 15 | # (optional) Checkout a specific LocalAI tag 16 | # git checkout -b build 17 | 18 | # Download gpt4all-j to models/ 19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j 20 | 21 | # start with docker-compose 22 | docker-compose up -d --build 23 | 24 | 25 | pip install langchain 26 | pip install openai 27 | 28 | export OPENAI_API_BASE=http://localhost:8080 29 | export OPENAI_API_KEY=sk- 30 | 31 | python test.py 32 | # A good company name for a company that makes colorful socks would be "Colorsocks". 33 | 34 | python agent.py 35 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 go-skynet authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.devcontainer/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | services: 3 | # Update this to the name of the service you want to work with in your docker-compose.yml file 4 | api: 5 | # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 6 | # folder. Note that the path of the Dockerfile and context is relative to the *primary* 7 | # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile" 8 | # array). The sample below assumes your primary file is in the root of your project. 9 | # 10 | build: 11 | context: . 12 | dockerfile: .devcontainer/Dockerfile 13 | 14 | volumes: 15 | # Update this to wherever you want VS Code to mount the folder of your project 16 | - .:/workspace:cached 17 | 18 | # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust. 19 | # cap_add: 20 | # - SYS_PTRACE 21 | # security_opt: 22 | # - seccomp:unconfined 23 | 24 | # Overrides default command so things don't shut down after the process ends. 25 | command: /bin/sh -c "while sleep 1000; do :; done" 26 | 27 | -------------------------------------------------------------------------------- /examples/langchain/README.md: -------------------------------------------------------------------------------- 1 | # langchain 2 | 3 | Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions. 4 | 5 | **Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example. 6 | 7 | ## Setup 8 | 9 | ```bash 10 | # Clone LocalAI 11 | git clone https://github.com/go-skynet/LocalAI 12 | 13 | cd LocalAI/examples/langchain 14 | 15 | # (optional) - Edit the example code in typescript. 16 | # vi ./langchainjs-localai-example/index.ts 17 | 18 | # Download gpt4all-j to models/ 19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j 20 | 21 | # start with docker-compose for typescript! 22 | docker-compose --profile ts up --build 23 | 24 | # or start with docker-compose for python! 25 | docker-compose --profile py up --build 26 | ``` 27 | 28 | ## Copyright 29 | 30 | Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code. -------------------------------------------------------------------------------- /examples/langchain/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.6' 2 | 3 | services: 4 | api: 5 | image: quay.io/go-skynet/local-ai:latest 6 | build: 7 | context: ../../ 8 | dockerfile: Dockerfile.dev 9 | ports: 10 | - 8080:8080 11 | environment: 12 | - DEBUG=true 13 | - MODELS_PATH=/models 14 | volumes: 15 | - ./models:/models:cached 16 | command: ["/usr/bin/local-ai" ] 17 | 18 | js: 19 | build: 20 | context: . 21 | dockerfile: JS.Dockerfile 22 | profiles: 23 | - js 24 | - ts 25 | depends_on: 26 | - "api" 27 | environment: 28 | - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' 29 | - 'OPENAI_API_BASE=http://api:8080/v1' 30 | - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g' 31 | 32 | py: 33 | build: 34 | context: . 35 | dockerfile: PY.Dockerfile 36 | profiles: 37 | - py 38 | depends_on: 39 | - "api" 40 | environment: 41 | - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' 42 | - 'OPENAI_API_BASE=http://api:8080/v1' 43 | - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g' -------------------------------------------------------------------------------- /examples/query_data/store.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended) 4 | # os.environ['OPENAI_API_KEY']= "" 5 | 6 | from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext 7 | from langchain.llms.openai import OpenAI 8 | from llama_index import StorageContext, load_index_from_storage 9 | 10 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') 11 | 12 | # This example uses text-davinci-003 by default; feel free to change if desired 13 | llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)) 14 | 15 | # Configure prompt parameters and initialise helper 16 | max_input_size = 512 17 | num_output = 512 18 | max_chunk_overlap = 30 19 | 20 | prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) 21 | 22 | # Load documents from the 'data' directory 23 | documents = SimpleDirectoryReader('data').load_data() 24 | service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512) 25 | index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) 26 | index.storage_context.persist(persist_dir="./storage") 27 | 28 | -------------------------------------------------------------------------------- /examples/query_data/query.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended) 4 | # os.environ['OPENAI_API_KEY']= "" 5 | 6 | from llama_index import LLMPredictor, PromptHelper, ServiceContext 7 | from langchain.llms.openai import OpenAI 8 | from llama_index import StorageContext, load_index_from_storage 9 | 10 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') 11 | 12 | # This example uses text-davinci-003 by default; feel free to change if desired 13 | llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)) 14 | 15 | # Configure prompt parameters and initialise helper 16 | max_input_size = 1024 17 | num_output = 256 18 | max_chunk_overlap = 20 19 | 20 | prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) 21 | 22 | # Load documents from the 'data' directory 23 | service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) 24 | 25 | # rebuild storage context 26 | storage_context = StorageContext.from_defaults(persist_dir='./storage') 27 | 28 | # load index 29 | index = load_index_from_storage(storage_context, service_context=service_context, ) 30 | 31 | query_engine = index.as_query_engine() 32 | 33 | data = input("Question: ") 34 | response = query_engine.query(data) 35 | print(response) 36 | -------------------------------------------------------------------------------- /examples/query_data/update.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended) 4 | # os.environ['OPENAI_API_KEY']= "" 5 | 6 | from llama_index import LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext 7 | from langchain.llms.openai import OpenAI 8 | from llama_index import StorageContext, load_index_from_storage 9 | 10 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') 11 | 12 | # This example uses text-davinci-003 by default; feel free to change if desired 13 | llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)) 14 | 15 | # Configure prompt parameters and initialise helper 16 | max_input_size = 512 17 | num_output = 256 18 | max_chunk_overlap = 20 19 | 20 | prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) 21 | 22 | # Load documents from the 'data' directory 23 | service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) 24 | 25 | # rebuild storage context 26 | storage_context = StorageContext.from_defaults(persist_dir='./storage') 27 | 28 | # load index 29 | index = load_index_from_storage(storage_context, service_context=service_context, ) 30 | documents = SimpleDirectoryReader('data').load_data() 31 | index.refresh(documents) 32 | index.storage_context.persist(persist_dir="./storage") -------------------------------------------------------------------------------- /.github/workflows/bump_deps.yaml: -------------------------------------------------------------------------------- 1 | name: Bump dependencies 2 | on: 3 | schedule: 4 | - cron: 0 20 * * * 5 | workflow_dispatch: 6 | jobs: 7 | bump: 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | include: 12 | - repository: "go-skynet/go-gpt4all-j.cpp" 13 | variable: "GOGPT4ALLJ_VERSION" 14 | branch: "master" 15 | - repository: "go-skynet/go-llama.cpp" 16 | variable: "GOLLAMA_VERSION" 17 | branch: "master" 18 | - repository: "go-skynet/go-gpt2.cpp" 19 | variable: "GOGPT2_VERSION" 20 | branch: "master" 21 | - repository: "donomii/go-rwkv.cpp" 22 | variable: "RWKV_VERSION" 23 | branch: "main" 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v3 27 | - name: Bump dependencies 🔧 28 | run: | 29 | bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} 30 | - name: Create Pull Request 31 | uses: peter-evans/create-pull-request@v5 32 | with: 33 | token: ${{ secrets.UPDATE_BOT_TOKEN }} 34 | push-to-fork: ci-forks/LocalAI 35 | commit-message: ':arrow_up: Update ${{ matrix.repository }}' 36 | title: ':arrow_up: Update ${{ matrix.repository }}' 37 | branch: "update/${{ matrix.variable }}" 38 | body: Bump of ${{ matrix.repository }} version 39 | signoff: true 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /examples/langchain-python/agent.py: -------------------------------------------------------------------------------- 1 | ## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6 2 | 3 | from io import StringIO 4 | import sys 5 | import os 6 | from typing import Dict, Optional 7 | 8 | from langchain.agents import load_tools 9 | from langchain.agents import initialize_agent 10 | from langchain.agents.tools import Tool 11 | from langchain.llms import OpenAI 12 | 13 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') 14 | model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo') 15 | 16 | class PythonREPL: 17 | """Simulates a standalone Python REPL.""" 18 | 19 | def __init__(self): 20 | pass 21 | 22 | def run(self, command: str) -> str: 23 | """Run command and returns anything printed.""" 24 | old_stdout = sys.stdout 25 | sys.stdout = mystdout = StringIO() 26 | try: 27 | exec(command, globals()) 28 | sys.stdout = old_stdout 29 | output = mystdout.getvalue() 30 | except Exception as e: 31 | sys.stdout = old_stdout 32 | output = str(e) 33 | return output 34 | 35 | llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name) 36 | python_repl = Tool( 37 | "Python REPL", 38 | PythonREPL().run, 39 | """A Python shell. Use this to execute python commands. Input should be a valid python command. 40 | If you expect output it should be printed out.""", 41 | ) 42 | tools = [python_repl] 43 | agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True) 44 | agent.run("What is the 10th fibonacci number?") -------------------------------------------------------------------------------- /examples/chatbot-ui/README.md: -------------------------------------------------------------------------------- 1 | # chatbot-ui 2 | 3 | Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui). 4 | 5 | ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png) 6 | 7 | ## Setup 8 | 9 | ```bash 10 | # Clone LocalAI 11 | git clone https://github.com/go-skynet/LocalAI 12 | 13 | cd LocalAI/examples/chatbot-ui 14 | 15 | # (optional) Checkout a specific LocalAI tag 16 | # git checkout -b build 17 | 18 | # Download gpt4all-j to models/ 19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j 20 | 21 | # start with docker-compose 22 | docker-compose up -d --build 23 | ``` 24 | 25 | ## Pointing chatbot-ui to a separately managed LocalAI service 26 | 27 | If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<>` below): 28 | ``` 29 | version: '3.6' 30 | 31 | services: 32 | chatgpt: 33 | image: ghcr.io/mckaywrigley/chatbot-ui:main 34 | ports: 35 | - 3000:3000 36 | environment: 37 | - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' 38 | - 'OPENAI_API_HOST=http://<>:8080' 39 | ``` 40 | 41 | Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`. 42 | 43 | ## Accessing chatbot-ui 44 | 45 | Open http://localhost:3000 for the Web UI. 46 | 47 | -------------------------------------------------------------------------------- /examples/langchain/langchainpy-localai-example/full_demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | from langchain.chat_models import ChatOpenAI 5 | from langchain import PromptTemplate, LLMChain 6 | from langchain.prompts.chat import ( 7 | ChatPromptTemplate, 8 | SystemMessagePromptTemplate, 9 | AIMessagePromptTemplate, 10 | HumanMessagePromptTemplate, 11 | ) 12 | from langchain.schema import ( 13 | AIMessage, 14 | HumanMessage, 15 | SystemMessage 16 | ) 17 | 18 | # This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI. 19 | logging.basicConfig(level=logging.DEBUG) 20 | 21 | print('Langchain + LocalAI PYTHON Tests') 22 | 23 | base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1') 24 | key = os.environ.get('OPENAI_API_KEY', '-') 25 | model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo') 26 | 27 | 28 | chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100) 29 | 30 | print("Created ChatOpenAI for ", chat.model_name) 31 | 32 | template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!" 33 | system_message_prompt = SystemMessagePromptTemplate.from_template(template) 34 | human_template = "{text}" 35 | human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) 36 | 37 | chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) 38 | 39 | print("ABOUT to execute") 40 | 41 | # get a chat completion from the formatted messages 42 | response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages()) 43 | 44 | print(response) 45 | 46 | print("."); -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose 3 | { 4 | "name": "Existing Docker Compose (Extend)", 5 | 6 | // Update the 'dockerComposeFile' list if you have more compose files or use different names. 7 | // The .devcontainer/docker-compose.yml file contains any overrides you need/want to make. 8 | "dockerComposeFile": [ 9 | "../docker-compose.yaml", 10 | "docker-compose.yml" 11 | ], 12 | 13 | // The 'service' property is the name of the service for the container that VS Code should 14 | // use. Update this value and .devcontainer/docker-compose.yml to the real service name. 15 | "service": "api", 16 | 17 | // The optional 'workspaceFolder' property is the path VS Code should open by default when 18 | // connected. This is typically a file mount in .devcontainer/docker-compose.yml 19 | "workspaceFolder": "/workspace", 20 | 21 | "features": { 22 | "ghcr.io/devcontainers/features/go:1": {}, 23 | "ghcr.io/azutake/devcontainer-features/go-packages-install:0": {} 24 | }, 25 | 26 | // Features to add to the dev container. More info: https://containers.dev/features. 27 | // "features": {}, 28 | 29 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 30 | // "forwardPorts": [], 31 | 32 | // Uncomment the next line if you want start specific services in your Docker Compose config. 33 | // "runServices": [], 34 | 35 | // Uncomment the next line if you want to keep your containers running after VS Code shuts down. 36 | // "shutdownAction": "none", 37 | 38 | // Uncomment the next line to run commands after the container is created. 39 | "postCreateCommand": "make prepare" 40 | 41 | // Configure tool-specific properties. 42 | // "customizations": {}, 43 | 44 | // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. 45 | // "remoteUser": "devcontainer" 46 | } 47 | -------------------------------------------------------------------------------- /examples/query_data/README.md: -------------------------------------------------------------------------------- 1 | # Data query example 2 | 3 | This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents. 4 | 5 | It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html). 6 | 7 | Summary of the steps: 8 | 9 | - prepare the dataset (and store it into `data`) 10 | - prepare a vector index database to run queries on 11 | - run queries 12 | 13 | ## Requirements 14 | 15 | For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it). 16 | 17 | The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files). 18 | 19 | You will also need a training data set. Copy that over `data`. 20 | 21 | ## Setup 22 | 23 | Start the API: 24 | 25 | ```bash 26 | # Clone LocalAI 27 | git clone https://github.com/go-skynet/LocalAI 28 | 29 | cd LocalAI/examples/query_data 30 | 31 | # Copy your models, edit config files accordingly 32 | 33 | # start with docker-compose 34 | docker-compose up -d --build 35 | ``` 36 | 37 | ### Create a storage 38 | 39 | In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM. 40 | 41 | ```bash 42 | export OPENAI_API_BASE=http://localhost:8080/v1 43 | export OPENAI_API_KEY=sk- 44 | 45 | python store.py 46 | ``` 47 | 48 | After it finishes, a directory "storage" will be created with the vector index database. 49 | 50 | ## Query 51 | 52 | We can now query the dataset. 53 | 54 | ```bash 55 | export OPENAI_API_BASE=http://localhost:8080/v1 56 | export OPENAI_API_KEY=sk- 57 | 58 | python query.py 59 | ``` 60 | 61 | ## Update 62 | 63 | To update our vector database, run `update.py` 64 | 65 | ```bash 66 | export OPENAI_API_BASE=http://localhost:8080/v1 67 | export OPENAI_API_KEY=sk- 68 | 69 | python update.py 70 | ``` -------------------------------------------------------------------------------- /examples/discord-bot/README.md: -------------------------------------------------------------------------------- 1 | # discord-bot 2 | 3 | ![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png) 4 | 5 | ## Setup 6 | 7 | ```bash 8 | # Clone LocalAI 9 | git clone https://github.com/go-skynet/LocalAI 10 | 11 | cd LocalAI/examples/discord-bot 12 | 13 | # (optional) Checkout a specific LocalAI tag 14 | # git checkout -b build 15 | 16 | # Download gpt4all-j to models/ 17 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j 18 | 19 | # Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup) 20 | cp -rfv .env.example .env 21 | vim .env 22 | 23 | # start with docker-compose 24 | docker-compose up -d --build 25 | ``` 26 | 27 | Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup 28 | 29 | Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..` 30 | 31 | ## Kubernetes 32 | 33 | - install the local-ai chart first 34 | - change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest: 35 | 36 | ```yaml 37 | apiVersion: v1 38 | kind: Namespace 39 | metadata: 40 | name: discord-bot 41 | --- 42 | apiVersion: apps/v1 43 | kind: Deployment 44 | metadata: 45 | name: localai 46 | namespace: discord-bot 47 | labels: 48 | app: localai 49 | spec: 50 | selector: 51 | matchLabels: 52 | app: localai 53 | replicas: 1 54 | template: 55 | metadata: 56 | labels: 57 | app: localai 58 | name: localai 59 | spec: 60 | containers: 61 | - name: localai-discord 62 | env: 63 | - name: OPENAI_API_KEY 64 | value: "x" 65 | - name: DISCORD_BOT_TOKEN 66 | value: "" 67 | - name: DISCORD_CLIENT_ID 68 | value: "" 69 | - name: OPENAI_API_BASE 70 | value: "http://local-ai.default.svc.cluster.local:8080" 71 | - name: ALLOWED_SERVER_IDS 72 | value: "xx" 73 | - name: SERVER_TO_MODERATION_CHANNEL 74 | value: "1:1" 75 | image: quay.io/go-skynet/gpt-discord-bot:main 76 | ``` 77 | -------------------------------------------------------------------------------- /examples/rwkv/README.md: -------------------------------------------------------------------------------- 1 | # rwkv 2 | 3 | Example of how to run rwkv models. 4 | 5 | ## Run models 6 | 7 | Setup: 8 | 9 | ```bash 10 | # Clone LocalAI 11 | git clone https://github.com/go-skynet/LocalAI 12 | 13 | cd LocalAI/examples/rwkv 14 | 15 | # (optional) Checkout a specific LocalAI tag 16 | # git checkout -b build 17 | 18 | # build the tooling image to convert an rwkv model locally: 19 | docker build -t rwkv-converter -f Dockerfile.build . 20 | 21 | # download and convert a model (one-off) - it's going to be fast on CPU too! 22 | docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv 23 | 24 | # Get the tokenizer 25 | wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json 26 | 27 | # start with docker-compose 28 | docker-compose up -d --build 29 | ``` 30 | 31 | Test it out: 32 | 33 | ```bash 34 | curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ 35 | "model": "gpt-3.5-turbo", 36 | "prompt": "A long time ago, in a galaxy far away", 37 | "max_tokens": 100, 38 | "temperature": 0.9, "top_p": 0.8, "top_k": 80 39 | }' 40 | 41 | # {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} 42 | 43 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 44 | "model": "gpt-3.5-turbo", 45 | "messages": [{"role": "user", "content": "How are you?"}], 46 | "temperature": 0.9, "top_p": 0.8, "top_k": 80 47 | }' 48 | 49 | # {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} 50 | ``` 51 | 52 | ### Fine tuning 53 | 54 | See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb). 55 | 56 | ## See also 57 | 58 | - [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) 59 | - [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) -------------------------------------------------------------------------------- /.github/workflows/image.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'build container images' 3 | 4 | on: 5 | pull_request: 6 | push: 7 | branches: 8 | - master 9 | tags: 10 | - '*' 11 | 12 | jobs: 13 | docker: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | 19 | - name: Prepare 20 | id: prep 21 | run: | 22 | DOCKER_IMAGE=quay.io/go-skynet/local-ai 23 | VERSION=master 24 | SHORTREF=${GITHUB_SHA::8} 25 | 26 | # If this is git tag, use the tag name as a docker tag 27 | if [[ $GITHUB_REF == refs/tags/* ]]; then 28 | VERSION=${GITHUB_REF#refs/tags/} 29 | fi 30 | TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}" 31 | 32 | # If the VERSION looks like a version number, assume that 33 | # this is the most recent version of the image and also 34 | # tag it 'latest'. 35 | if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then 36 | TAGS="$TAGS,${DOCKER_IMAGE}:latest" 37 | fi 38 | 39 | # Set output parameters. 40 | echo ::set-output name=tags::${TAGS} 41 | echo ::set-output name=docker_image::${DOCKER_IMAGE} 42 | 43 | - name: Set up QEMU 44 | uses: docker/setup-qemu-action@master 45 | with: 46 | platforms: all 47 | 48 | - name: Set up Docker Buildx 49 | id: buildx 50 | uses: docker/setup-buildx-action@master 51 | 52 | - name: Login to DockerHub 53 | if: github.event_name != 'pull_request' 54 | uses: docker/login-action@v2 55 | with: 56 | registry: quay.io 57 | username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} 58 | password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} 59 | - name: Build 60 | if: github.event_name != 'pull_request' 61 | uses: docker/build-push-action@v4 62 | with: 63 | builder: ${{ steps.buildx.outputs.name }} 64 | context: . 65 | file: ./Dockerfile 66 | platforms: linux/amd64,linux/arm64 67 | push: true 68 | tags: ${{ steps.prep.outputs.tags }} 69 | - name: Build PRs 70 | if: github.event_name == 'pull_request' 71 | uses: docker/build-push-action@v4 72 | with: 73 | builder: ${{ steps.buildx.outputs.name }} 74 | context: . 75 | file: ./Dockerfile 76 | platforms: linux/amd64 77 | push: false 78 | tags: ${{ steps.prep.outputs.tags }} -------------------------------------------------------------------------------- /examples/langchain/langchainjs-localai-example/src/index.mts: -------------------------------------------------------------------------------- 1 | import { OpenAIChat } from "langchain/llms/openai"; 2 | import { loadQAStuffChain } from "langchain/chains"; 3 | import { Document } from "langchain/document"; 4 | import { initializeAgentExecutorWithOptions } from "langchain/agents"; 5 | import {Calculator} from "langchain/tools/calculator"; 6 | 7 | const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1'; 8 | const fakeApiKey = process.env['OPENAI_API_KEY'] || '-'; 9 | const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo'; 10 | 11 | function getModel(): OpenAIChat { 12 | return new OpenAIChat({ 13 | prefixMessages: [ 14 | { 15 | role: "system", 16 | content: "You are a helpful assistant that answers in pirate language", 17 | }, 18 | ], 19 | modelName: modelName, 20 | maxTokens: 50, 21 | openAIApiKey: fakeApiKey, 22 | maxRetries: 2 23 | }, { 24 | basePath: pathToLocalAi, 25 | apiKey: fakeApiKey, 26 | }); 27 | } 28 | 29 | // Minimal example. 30 | export const run = async () => { 31 | const model = getModel(); 32 | console.log(`about to model.call at ${new Date().toUTCString()}`); 33 | const res = await model.call( 34 | "What would be a good company name a company that makes colorful socks?" 35 | ); 36 | console.log(`${new Date().toUTCString()}`); 37 | console.log({ res }); 38 | }; 39 | 40 | await run(); 41 | 42 | // This example uses the `StuffDocumentsChain` 43 | export const run2 = async () => { 44 | const model = getModel(); 45 | const chainA = loadQAStuffChain(model); 46 | const docs = [ 47 | new Document({ pageContent: "Harrison went to Harvard." }), 48 | new Document({ pageContent: "Ankush went to Princeton." }), 49 | ]; 50 | const resA = await chainA.call({ 51 | input_documents: docs, 52 | question: "Where did Harrison go to college?", 53 | }); 54 | console.log({ resA }); 55 | }; 56 | 57 | await run2(); 58 | 59 | // Quickly thrown together example of using tools + agents. 60 | // This seems like it should work, but it doesn't yet. 61 | export const temporarilyBrokenToolTest = async () => { 62 | const model = getModel(); 63 | 64 | const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, { 65 | agentType: "zero-shot-react-description", 66 | }); 67 | 68 | console.log("Loaded agent."); 69 | 70 | const input = `What is the value of (500 *2) + 350 - 13?`; 71 | 72 | console.log(`Executing with input "${input}"...`); 73 | 74 | const result = await executor.call({ input }); 75 | 76 | console.log(`Got output ${result.output}`); 77 | } 78 | 79 | await temporarilyBrokenToolTest(); 80 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/go-skynet/LocalAI 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be 7 | github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 8 | github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c 9 | github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638 10 | github.com/gofiber/fiber/v2 v2.45.0 11 | github.com/hashicorp/go-multierror v1.1.1 12 | github.com/onsi/ginkgo/v2 v2.9.4 13 | github.com/onsi/gomega v1.27.6 14 | github.com/otiai10/openaigo v1.1.0 15 | github.com/rs/zerolog v1.29.1 16 | github.com/sashabaranov/go-openai v1.9.3 17 | github.com/swaggo/swag v1.16.1 18 | github.com/urfave/cli/v2 v2.25.3 19 | github.com/valyala/fasthttp v1.47.0 20 | gopkg.in/yaml.v3 v3.0.1 21 | ) 22 | 23 | require ( 24 | github.com/KyleBanks/depth v1.2.1 // indirect 25 | github.com/PuerkitoBio/purell v1.1.1 // indirect 26 | github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect 27 | github.com/andybalholm/brotli v1.0.5 // indirect 28 | github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect 29 | github.com/go-logr/logr v1.2.4 // indirect 30 | github.com/go-openapi/jsonpointer v0.19.5 // indirect 31 | github.com/go-openapi/jsonreference v0.19.6 // indirect 32 | github.com/go-openapi/spec v0.20.4 // indirect 33 | github.com/go-openapi/swag v0.19.15 // indirect 34 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect 35 | github.com/google/go-cmp v0.5.9 // indirect 36 | github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect 37 | github.com/google/uuid v1.3.0 // indirect 38 | github.com/hashicorp/errwrap v1.0.0 // indirect 39 | github.com/josharian/intern v1.0.0 // indirect 40 | github.com/klauspost/compress v1.16.3 // indirect 41 | github.com/mailru/easyjson v0.7.6 // indirect 42 | github.com/mattn/go-colorable v0.1.13 // indirect 43 | github.com/mattn/go-isatty v0.0.18 // indirect 44 | github.com/mattn/go-runewidth v0.0.14 // indirect 45 | github.com/philhofer/fwd v1.1.2 // indirect 46 | github.com/rivo/uniseg v0.2.0 // indirect 47 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 48 | github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect 49 | github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect 50 | github.com/tinylib/msgp v1.1.8 // indirect 51 | github.com/valyala/bytebufferpool v1.0.0 // indirect 52 | github.com/valyala/tcplisten v1.0.0 // indirect 53 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect 54 | golang.org/x/net v0.9.0 // indirect 55 | golang.org/x/sys v0.8.0 // indirect 56 | golang.org/x/text v0.9.0 // indirect 57 | golang.org/x/tools v0.8.0 // indirect 58 | gopkg.in/yaml.v2 v2.4.0 // indirect 59 | ) 60 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | 8 | api "github.com/go-skynet/LocalAI/api" 9 | model "github.com/go-skynet/LocalAI/pkg/model" 10 | "github.com/rs/zerolog" 11 | "github.com/rs/zerolog/log" 12 | "github.com/urfave/cli/v2" 13 | ) 14 | 15 | func main() { 16 | log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) 17 | 18 | path, err := os.Getwd() 19 | if err != nil { 20 | log.Error().Msgf("error: %s", err.Error()) 21 | os.Exit(1) 22 | } 23 | 24 | app := &cli.App{ 25 | Name: "LocalAI", 26 | Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.", 27 | Flags: []cli.Flag{ 28 | &cli.BoolFlag{ 29 | Name: "f16", 30 | EnvVars: []string{"F16"}, 31 | }, 32 | &cli.BoolFlag{ 33 | Name: "debug", 34 | EnvVars: []string{"DEBUG"}, 35 | }, 36 | &cli.IntFlag{ 37 | Name: "threads", 38 | DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.", 39 | EnvVars: []string{"THREADS"}, 40 | Value: 4, 41 | }, 42 | &cli.StringFlag{ 43 | Name: "models-path", 44 | DefaultText: "Path containing models used for inferencing", 45 | EnvVars: []string{"MODELS_PATH"}, 46 | Value: filepath.Join(path, "models"), 47 | }, 48 | &cli.StringFlag{ 49 | Name: "config-file", 50 | DefaultText: "Config file", 51 | EnvVars: []string{"CONFIG_FILE"}, 52 | }, 53 | &cli.StringFlag{ 54 | Name: "address", 55 | DefaultText: "Bind address for the API server.", 56 | EnvVars: []string{"ADDRESS"}, 57 | Value: ":8080", 58 | }, 59 | &cli.IntFlag{ 60 | Name: "context-size", 61 | DefaultText: "Default context size of the model", 62 | EnvVars: []string{"CONTEXT_SIZE"}, 63 | Value: 512, 64 | }, 65 | }, 66 | Description: ` 67 | LocalAI is a drop-in replacement OpenAI API which runs inference locally. 68 | 69 | Some of the models compatible are: 70 | - Vicuna 71 | - Koala 72 | - GPT4ALL 73 | - GPT4ALL-J 74 | - Cerebras 75 | - Alpaca 76 | - StableLM (ggml quantized) 77 | 78 | It uses llama.cpp, ggml and gpt4all as backend with golang c bindings. 79 | `, 80 | UsageText: `local-ai [options]`, 81 | Copyright: "go-skynet authors", 82 | Action: func(ctx *cli.Context) error { 83 | fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path")) 84 | return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address")) 85 | }, 86 | } 87 | 88 | err = app.Run(os.Args) 89 | if err != nil { 90 | log.Error().Msgf("error: %s", err.Error()) 91 | os.Exit(1) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /api/api.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "errors" 5 | 6 | model "github.com/go-skynet/LocalAI/pkg/model" 7 | "github.com/gofiber/fiber/v2" 8 | "github.com/gofiber/fiber/v2/middleware/cors" 9 | "github.com/gofiber/fiber/v2/middleware/logger" 10 | "github.com/gofiber/fiber/v2/middleware/recover" 11 | "github.com/rs/zerolog" 12 | "github.com/rs/zerolog/log" 13 | ) 14 | 15 | func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App { 16 | zerolog.SetGlobalLevel(zerolog.InfoLevel) 17 | if debug { 18 | zerolog.SetGlobalLevel(zerolog.DebugLevel) 19 | } 20 | 21 | // Return errors as JSON responses 22 | app := fiber.New(fiber.Config{ 23 | DisableStartupMessage: disableMessage, 24 | // Override default error handler 25 | ErrorHandler: func(ctx *fiber.Ctx, err error) error { 26 | // Status code defaults to 500 27 | code := fiber.StatusInternalServerError 28 | 29 | // Retrieve the custom status code if it's a *fiber.Error 30 | var e *fiber.Error 31 | if errors.As(err, &e) { 32 | code = e.Code 33 | } 34 | 35 | // Send custom error page 36 | return ctx.Status(code).JSON( 37 | ErrorResponse{ 38 | Error: &APIError{Message: err.Error(), Code: code}, 39 | }, 40 | ) 41 | }, 42 | }) 43 | 44 | if debug { 45 | app.Use(logger.New(logger.Config{ 46 | Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", 47 | })) 48 | } 49 | 50 | cm := make(ConfigMerger) 51 | if err := cm.LoadConfigs(loader.ModelPath); err != nil { 52 | log.Error().Msgf("error loading config files: %s", err.Error()) 53 | } 54 | 55 | if configFile != "" { 56 | if err := cm.LoadConfigFile(configFile); err != nil { 57 | log.Error().Msgf("error loading config file: %s", err.Error()) 58 | } 59 | } 60 | 61 | if debug { 62 | for k, v := range cm { 63 | log.Debug().Msgf("Model: %s (config: %+v)", k, v) 64 | } 65 | } 66 | // Default middleware config 67 | app.Use(recover.New()) 68 | app.Use(cors.New()) 69 | 70 | // openAI compatible API endpoint 71 | app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16)) 72 | app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16)) 73 | 74 | app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16)) 75 | app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16)) 76 | 77 | app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16)) 78 | app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16)) 79 | 80 | app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) 81 | app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) 82 | 83 | // /v1/engines/{engine_id}/embeddings 84 | 85 | app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) 86 | 87 | app.Get("/v1/models", listModels(loader, cm)) 88 | app.Get("/models", listModels(loader, cm)) 89 | 90 | return app 91 | } 92 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Here is a list of projects that can easily be integrated with the LocalAI backend. 4 | 5 | ### Projects 6 | 7 | 8 | ### Chatbot-UI 9 | 10 | _by [@mkellerman](https://github.com/mkellerman)_ 11 | 12 | ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png) 13 | 14 | This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui). 15 | 16 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) 17 | 18 | ### Discord bot 19 | 20 | _by [@mudler](https://github.com/mudler)_ 21 | 22 | Run a discord bot which lets you talk directly with a model 23 | 24 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server. 25 | 26 | ### Langchain 27 | 28 | _by [@dave-gray101](https://github.com/dave-gray101)_ 29 | 30 | A ready to use example to show e2e how to integrate LocalAI with langchain 31 | 32 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) 33 | 34 | ### Langchain Python 35 | 36 | _by [@mudler](https://github.com/mudler)_ 37 | 38 | A ready to use example to show e2e how to integrate LocalAI with langchain 39 | 40 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) 41 | 42 | ### LocalAI WebUI 43 | 44 | _by [@dhruvgera](https://github.com/dhruvgera)_ 45 | 46 | ![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png) 47 | 48 | A light, community-maintained web interface for LocalAI 49 | 50 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/) 51 | 52 | ### How to run rwkv models 53 | 54 | _by [@mudler](https://github.com/mudler)_ 55 | 56 | A full example on how to run RWKV models with LocalAI 57 | 58 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/) 59 | 60 | ### Slack bot 61 | 62 | _by [@mudler](https://github.com/mudler)_ 63 | 64 | Run a slack bot which lets you talk directly with a model 65 | 66 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) 67 | 68 | ### Question answering on documents 69 | 70 | _by [@mudler](https://github.com/mudler)_ 71 | 72 | Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents. 73 | 74 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/) 75 | 76 | ### Template for Runpod.io 77 | 78 | _by [@fHachenberg](https://github.com/fHachenberg)_ 79 | 80 | Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io 81 | 82 | [Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra) 83 | 84 | ## Want to contribute? 85 | 86 | Create an issue, and put `Example: ` in the title! We will post your examples here. 87 | -------------------------------------------------------------------------------- /api/api_test.go: -------------------------------------------------------------------------------- 1 | package api_test 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | . "github.com/go-skynet/LocalAI/api" 8 | "github.com/go-skynet/LocalAI/pkg/model" 9 | "github.com/gofiber/fiber/v2" 10 | . "github.com/onsi/ginkgo/v2" 11 | . "github.com/onsi/gomega" 12 | 13 | openaigo "github.com/otiai10/openaigo" 14 | "github.com/sashabaranov/go-openai" 15 | ) 16 | 17 | var _ = Describe("API test", func() { 18 | 19 | var app *fiber.App 20 | var modelLoader *model.ModelLoader 21 | var client *openai.Client 22 | var client2 *openaigo.Client 23 | Context("API query", func() { 24 | BeforeEach(func() { 25 | modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) 26 | app = App("", modelLoader, 1, 512, false, true, true) 27 | go app.Listen("127.0.0.1:9090") 28 | 29 | defaultConfig := openai.DefaultConfig("") 30 | defaultConfig.BaseURL = "http://127.0.0.1:9090/v1" 31 | 32 | client2 = openaigo.NewClient("") 33 | client2.BaseURL = defaultConfig.BaseURL 34 | 35 | // Wait for API to be ready 36 | client = openai.NewClientWithConfig(defaultConfig) 37 | Eventually(func() error { 38 | _, err := client.ListModels(context.TODO()) 39 | return err 40 | }, "2m").ShouldNot(HaveOccurred()) 41 | }) 42 | AfterEach(func() { 43 | app.Shutdown() 44 | }) 45 | It("returns the models list", func() { 46 | models, err := client.ListModels(context.TODO()) 47 | Expect(err).ToNot(HaveOccurred()) 48 | Expect(len(models.Models)).To(Equal(3)) 49 | Expect(models.Models[0].ID).To(Equal("testmodel")) 50 | }) 51 | It("can generate completions", func() { 52 | resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"}) 53 | Expect(err).ToNot(HaveOccurred()) 54 | Expect(len(resp.Choices)).To(Equal(1)) 55 | Expect(resp.Choices[0].Text).ToNot(BeEmpty()) 56 | }) 57 | 58 | It("can generate chat completions ", func() { 59 | resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) 60 | Expect(err).ToNot(HaveOccurred()) 61 | Expect(len(resp.Choices)).To(Equal(1)) 62 | Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) 63 | }) 64 | 65 | It("can generate completions from model configs", func() { 66 | resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"}) 67 | Expect(err).ToNot(HaveOccurred()) 68 | Expect(len(resp.Choices)).To(Equal(1)) 69 | Expect(resp.Choices[0].Text).ToNot(BeEmpty()) 70 | }) 71 | 72 | It("can generate chat completions from model configs", func() { 73 | resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) 74 | Expect(err).ToNot(HaveOccurred()) 75 | Expect(len(resp.Choices)).To(Equal(1)) 76 | Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) 77 | }) 78 | 79 | It("returns errors", func() { 80 | _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) 81 | Expect(err).To(HaveOccurred()) 82 | Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:")) 83 | }) 84 | 85 | }) 86 | 87 | Context("Config file", func() { 88 | BeforeEach(func() { 89 | modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) 90 | app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true) 91 | go app.Listen("127.0.0.1:9090") 92 | 93 | defaultConfig := openai.DefaultConfig("") 94 | defaultConfig.BaseURL = "http://127.0.0.1:9090/v1" 95 | client2 = openaigo.NewClient("") 96 | client2.BaseURL = defaultConfig.BaseURL 97 | // Wait for API to be ready 98 | client = openai.NewClientWithConfig(defaultConfig) 99 | Eventually(func() error { 100 | _, err := client.ListModels(context.TODO()) 101 | return err 102 | }, "2m").ShouldNot(HaveOccurred()) 103 | }) 104 | AfterEach(func() { 105 | app.Shutdown() 106 | }) 107 | It("can generate chat completions from config file", func() { 108 | 109 | models, err := client.ListModels(context.TODO()) 110 | Expect(err).ToNot(HaveOccurred()) 111 | Expect(len(models.Models)).To(Equal(5)) 112 | Expect(models.Models[0].ID).To(Equal("testmodel")) 113 | }) 114 | It("can generate chat completions from config file", func() { 115 | resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) 116 | Expect(err).ToNot(HaveOccurred()) 117 | Expect(len(resp.Choices)).To(Equal(1)) 118 | Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) 119 | }) 120 | It("can generate chat completions from config file", func() { 121 | resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) 122 | Expect(err).ToNot(HaveOccurred()) 123 | Expect(len(resp.Choices)).To(Equal(1)) 124 | Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) 125 | }) 126 | It("can generate edit completions from config file", func() { 127 | request := openaigo.EditCreateRequestBody{ 128 | Model: "list2", 129 | Instruction: "foo", 130 | Input: "bar", 131 | } 132 | resp, err := client2.CreateEdit(context.Background(), request) 133 | Expect(err).ToNot(HaveOccurred()) 134 | Expect(len(resp.Choices)).To(Equal(1)) 135 | Expect(resp.Choices[0].Text).ToNot(BeEmpty()) 136 | }) 137 | }) 138 | }) 139 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GOCMD=go 2 | GOTEST=$(GOCMD) test 3 | GOVET=$(GOCMD) vet 4 | BINARY_NAME=local-ai 5 | 6 | GOLLAMA_VERSION?=cf9b522db63898dcc5eb86e37c979ab85cbd583e 7 | GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109 8 | GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa 9 | RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp 10 | RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58 11 | 12 | GREEN := $(shell tput -Txterm setaf 2) 13 | YELLOW := $(shell tput -Txterm setaf 3) 14 | WHITE := $(shell tput -Txterm setaf 7) 15 | CYAN := $(shell tput -Txterm setaf 6) 16 | RESET := $(shell tput -Txterm sgr0) 17 | 18 | C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv 19 | LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv 20 | 21 | # Use this if you want to set the default behavior 22 | ifndef BUILD_TYPE 23 | BUILD_TYPE:=default 24 | endif 25 | 26 | ifeq ($(BUILD_TYPE), "generic") 27 | GENERIC_PREFIX:=generic- 28 | else 29 | GENERIC_PREFIX:= 30 | endif 31 | 32 | .PHONY: all test build vendor 33 | 34 | all: help 35 | 36 | ## GPT4ALL-J 37 | go-gpt4all-j: 38 | git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j 39 | cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1 40 | # This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml.. 41 | @find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + 42 | @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + 43 | @find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + 44 | @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} + 45 | @find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} + 46 | @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} + 47 | @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} + 48 | @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} + 49 | 50 | ## RWKV 51 | go-rwkv: 52 | git clone --recurse-submodules $(RWKV_REPO) go-rwkv 53 | cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1 54 | @find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} + 55 | @find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} + 56 | @find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} + 57 | 58 | go-rwkv/librwkv.a: go-rwkv 59 | cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a .. 60 | 61 | go-gpt4all-j/libgptj.a: go-gpt4all-j 62 | $(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a 63 | 64 | ## CEREBRAS GPT 65 | go-gpt2: 66 | git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2 67 | cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1 68 | # This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml.. 69 | @find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + 70 | @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + 71 | @find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + 72 | @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} + 73 | @find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} + 74 | @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} + 75 | 76 | go-gpt2/libgpt2.a: go-gpt2 77 | $(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a 78 | 79 | go-llama: 80 | git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama 81 | cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1 82 | 83 | go-llama/libbinding.a: go-llama 84 | $(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a 85 | 86 | replace: 87 | $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama 88 | $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j 89 | $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2 90 | $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv 91 | 92 | prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv 93 | $(GOCMD) mod download 94 | 95 | ## GENERIC 96 | rebuild: ## Rebuilds the project 97 | $(MAKE) -C go-llama clean 98 | $(MAKE) -C go-gpt4all-j clean 99 | $(MAKE) -C go-gpt2 clean 100 | $(MAKE) -C go-rwkv clean 101 | $(MAKE) build 102 | 103 | prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a replace ## Prepares for building 104 | 105 | clean: ## Remove build related file 106 | rm -fr ./go-llama 107 | rm -rf ./go-gpt4all-j 108 | rm -rf ./go-gpt2 109 | rm -rf ./go-rwkv 110 | rm -rf $(BINARY_NAME) 111 | 112 | ## Build: 113 | 114 | build: prepare ## Build the project 115 | $(info ${GREEN}I local-ai build info:${RESET}) 116 | $(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET}) 117 | C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./ 118 | 119 | generic-build: ## Build the project using generic 120 | BUILD_TYPE="generic" $(MAKE) build 121 | 122 | ## Run 123 | run: prepare ## run local-ai 124 | C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go 125 | 126 | test-models/testmodel: 127 | mkdir test-models 128 | wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel 129 | cp tests/fixtures/* test-models 130 | 131 | test: prepare test-models/testmodel 132 | cp tests/fixtures/* test-models 133 | @C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./... 134 | 135 | ## Help: 136 | help: ## Show this help. 137 | @echo '' 138 | @echo 'Usage:' 139 | @echo ' ${YELLOW}make${RESET} ${GREEN}${RESET}' 140 | @echo '' 141 | @echo 'Targets:' 142 | @awk 'BEGIN {FS = ":.*?## "} { \ 143 | if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \ 144 | else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \ 145 | }' $(MAKEFILE_LIST) 146 | -------------------------------------------------------------------------------- /api/config.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | 11 | model "github.com/go-skynet/LocalAI/pkg/model" 12 | "github.com/gofiber/fiber/v2" 13 | "github.com/rs/zerolog/log" 14 | "gopkg.in/yaml.v3" 15 | ) 16 | 17 | type Config struct { 18 | OpenAIRequest `yaml:"parameters"` 19 | Name string `yaml:"name"` 20 | StopWords []string `yaml:"stopwords"` 21 | Cutstrings []string `yaml:"cutstrings"` 22 | TrimSpace []string `yaml:"trimspace"` 23 | ContextSize int `yaml:"context_size"` 24 | F16 bool `yaml:"f16"` 25 | Threads int `yaml:"threads"` 26 | Debug bool `yaml:"debug"` 27 | Roles map[string]string `yaml:"roles"` 28 | Embeddings bool `yaml:"embeddings"` 29 | Backend string `yaml:"backend"` 30 | TemplateConfig TemplateConfig `yaml:"template"` 31 | MirostatETA float64 `yaml:"mirostat_eta"` 32 | MirostatTAU float64 `yaml:"mirostat_tau"` 33 | Mirostat int `yaml:"mirostat"` 34 | 35 | PromptStrings, InputStrings []string 36 | } 37 | 38 | type TemplateConfig struct { 39 | Completion string `yaml:"completion"` 40 | Chat string `yaml:"chat"` 41 | Edit string `yaml:"edit"` 42 | } 43 | 44 | type ConfigMerger map[string]Config 45 | 46 | func ReadConfigFile(file string) ([]*Config, error) { 47 | c := &[]*Config{} 48 | f, err := os.ReadFile(file) 49 | if err != nil { 50 | return nil, fmt.Errorf("cannot read config file: %w", err) 51 | } 52 | if err := yaml.Unmarshal(f, c); err != nil { 53 | return nil, fmt.Errorf("cannot unmarshal config file: %w", err) 54 | } 55 | 56 | return *c, nil 57 | } 58 | 59 | func ReadConfig(file string) (*Config, error) { 60 | c := &Config{} 61 | f, err := os.ReadFile(file) 62 | if err != nil { 63 | return nil, fmt.Errorf("cannot read config file: %w", err) 64 | } 65 | if err := yaml.Unmarshal(f, c); err != nil { 66 | return nil, fmt.Errorf("cannot unmarshal config file: %w", err) 67 | } 68 | 69 | return c, nil 70 | } 71 | 72 | func (cm ConfigMerger) LoadConfigFile(file string) error { 73 | c, err := ReadConfigFile(file) 74 | if err != nil { 75 | return fmt.Errorf("cannot load config file: %w", err) 76 | } 77 | 78 | for _, cc := range c { 79 | cm[cc.Name] = *cc 80 | } 81 | return nil 82 | } 83 | 84 | func (cm ConfigMerger) LoadConfig(file string) error { 85 | c, err := ReadConfig(file) 86 | if err != nil { 87 | return fmt.Errorf("cannot read config file: %w", err) 88 | } 89 | 90 | cm[c.Name] = *c 91 | return nil 92 | } 93 | 94 | func (cm ConfigMerger) LoadConfigs(path string) error { 95 | files, err := ioutil.ReadDir(path) 96 | if err != nil { 97 | return err 98 | } 99 | 100 | for _, file := range files { 101 | // Skip templates, YAML and .keep files 102 | if !strings.Contains(file.Name(), ".yaml") { 103 | continue 104 | } 105 | c, err := ReadConfig(filepath.Join(path, file.Name())) 106 | if err == nil { 107 | cm[c.Name] = *c 108 | } 109 | } 110 | 111 | return nil 112 | } 113 | 114 | func updateConfig(config *Config, input *OpenAIRequest) { 115 | if input.Echo { 116 | config.Echo = input.Echo 117 | } 118 | if input.TopK != 0 { 119 | config.TopK = input.TopK 120 | } 121 | if input.TopP != 0 { 122 | config.TopP = input.TopP 123 | } 124 | 125 | if input.Temperature != 0 { 126 | config.Temperature = input.Temperature 127 | } 128 | 129 | if input.Maxtokens != 0 { 130 | config.Maxtokens = input.Maxtokens 131 | } 132 | 133 | switch stop := input.Stop.(type) { 134 | case string: 135 | if stop != "" { 136 | config.StopWords = append(config.StopWords, stop) 137 | } 138 | case []interface{}: 139 | for _, pp := range stop { 140 | if s, ok := pp.(string); ok { 141 | config.StopWords = append(config.StopWords, s) 142 | } 143 | } 144 | } 145 | 146 | if input.RepeatPenalty != 0 { 147 | config.RepeatPenalty = input.RepeatPenalty 148 | } 149 | 150 | if input.Keep != 0 { 151 | config.Keep = input.Keep 152 | } 153 | 154 | if input.Batch != 0 { 155 | config.Batch = input.Batch 156 | } 157 | 158 | if input.F16 { 159 | config.F16 = input.F16 160 | } 161 | 162 | if input.IgnoreEOS { 163 | config.IgnoreEOS = input.IgnoreEOS 164 | } 165 | 166 | if input.Seed != 0 { 167 | config.Seed = input.Seed 168 | } 169 | 170 | if input.Mirostat != 0 { 171 | config.Mirostat = input.Mirostat 172 | } 173 | 174 | if input.MirostatETA != 0 { 175 | config.MirostatETA = input.MirostatETA 176 | } 177 | 178 | if input.MirostatTAU != 0 { 179 | config.MirostatTAU = input.MirostatTAU 180 | } 181 | 182 | switch inputs := input.Input.(type) { 183 | case string: 184 | if inputs != "" { 185 | config.InputStrings = append(config.InputStrings, inputs) 186 | } 187 | case []interface{}: 188 | for _, pp := range inputs { 189 | if s, ok := pp.(string); ok { 190 | config.InputStrings = append(config.InputStrings, s) 191 | } 192 | } 193 | } 194 | 195 | switch p := input.Prompt.(type) { 196 | case string: 197 | config.PromptStrings = append(config.PromptStrings, p) 198 | case []interface{}: 199 | for _, pp := range p { 200 | if s, ok := pp.(string); ok { 201 | config.PromptStrings = append(config.PromptStrings, s) 202 | } 203 | } 204 | } 205 | } 206 | 207 | func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) { 208 | input := new(OpenAIRequest) 209 | // Get input data from the request body 210 | if err := c.BodyParser(input); err != nil { 211 | return nil, nil, err 212 | } 213 | 214 | modelFile := input.Model 215 | 216 | if c.Params("model") != "" { 217 | modelFile = c.Params("model") 218 | } 219 | 220 | received, _ := json.Marshal(input) 221 | 222 | log.Debug().Msgf("Request received: %s", string(received)) 223 | 224 | // Set model from bearer token, if available 225 | bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ") 226 | bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) 227 | 228 | // If no model was specified, take the first available 229 | if modelFile == "" && !bearerExists { 230 | models, _ := loader.ListModels() 231 | if len(models) > 0 { 232 | modelFile = models[0] 233 | log.Debug().Msgf("No model specified, using: %s", modelFile) 234 | } else { 235 | log.Debug().Msgf("No model specified, returning error") 236 | return nil, nil, fmt.Errorf("no model specified") 237 | } 238 | } 239 | 240 | // If a model is found in bearer token takes precedence 241 | if bearerExists { 242 | log.Debug().Msgf("Using model from bearer token: %s", bearer) 243 | modelFile = bearer 244 | } 245 | 246 | // Load a config file if present after the model name 247 | modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml") 248 | if _, err := os.Stat(modelConfig); err == nil { 249 | if err := cm.LoadConfig(modelConfig); err != nil { 250 | return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) 251 | } 252 | } 253 | 254 | var config *Config 255 | cfg, exists := cm[modelFile] 256 | if !exists { 257 | config = &Config{ 258 | OpenAIRequest: defaultRequest(modelFile), 259 | ContextSize: ctx, 260 | Threads: threads, 261 | F16: f16, 262 | Debug: debug, 263 | } 264 | } else { 265 | config = &cfg 266 | } 267 | 268 | // Set the parameters for the language model prediction 269 | updateConfig(config, input) 270 | 271 | // Don't allow 0 as setting 272 | if config.Threads == 0 { 273 | if threads != 0 { 274 | config.Threads = threads 275 | } else { 276 | config.Threads = 4 277 | } 278 | } 279 | 280 | return config, input, nil 281 | } 282 | -------------------------------------------------------------------------------- /api/prediction.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strings" 7 | "sync" 8 | 9 | "github.com/donomii/go-rwkv.cpp" 10 | model "github.com/go-skynet/LocalAI/pkg/model" 11 | gpt2 "github.com/go-skynet/go-gpt2.cpp" 12 | gptj "github.com/go-skynet/go-gpt4all-j.cpp" 13 | llama "github.com/go-skynet/go-llama.cpp" 14 | ) 15 | 16 | // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 17 | var mutexMap sync.Mutex 18 | var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex) 19 | 20 | func defaultLLamaOpts(c Config) []llama.ModelOption { 21 | llamaOpts := []llama.ModelOption{} 22 | if c.ContextSize != 0 { 23 | llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize)) 24 | } 25 | if c.F16 { 26 | llamaOpts = append(llamaOpts, llama.EnableF16Memory) 27 | } 28 | if c.Embeddings { 29 | llamaOpts = append(llamaOpts, llama.EnableEmbeddings) 30 | } 31 | 32 | return llamaOpts 33 | } 34 | 35 | func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) { 36 | if !c.Embeddings { 37 | return nil, fmt.Errorf("endpoint disabled for this model by API configuration") 38 | } 39 | 40 | modelFile := c.Model 41 | 42 | llamaOpts := defaultLLamaOpts(c) 43 | 44 | var inferenceModel interface{} 45 | var err error 46 | if c.Backend == "" { 47 | inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads)) 48 | } else { 49 | inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads)) 50 | } 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | var fn func() ([]float32, error) 56 | switch model := inferenceModel.(type) { 57 | case *llama.LLama: 58 | fn = func() ([]float32, error) { 59 | predictOptions := buildLLamaPredictOptions(c) 60 | return model.Embeddings(s, predictOptions...) 61 | } 62 | default: 63 | fn = func() ([]float32, error) { 64 | return nil, fmt.Errorf("embeddings not supported by the backend") 65 | } 66 | } 67 | 68 | return func() ([]float32, error) { 69 | // This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 70 | mutexMap.Lock() 71 | l, ok := mutexes[modelFile] 72 | if !ok { 73 | m := &sync.Mutex{} 74 | mutexes[modelFile] = m 75 | l = m 76 | } 77 | mutexMap.Unlock() 78 | l.Lock() 79 | defer l.Unlock() 80 | 81 | embeds, err := fn() 82 | if err != nil { 83 | return embeds, err 84 | } 85 | // Remove trailing 0s 86 | for i := len(embeds) - 1; i >= 0; i-- { 87 | if embeds[i] == 0.0 { 88 | embeds = embeds[:i] 89 | } else { 90 | break 91 | } 92 | } 93 | return embeds, nil 94 | }, nil 95 | } 96 | 97 | func buildLLamaPredictOptions(c Config) []llama.PredictOption { 98 | // Generate the prediction using the language model 99 | predictOptions := []llama.PredictOption{ 100 | llama.SetTemperature(c.Temperature), 101 | llama.SetTopP(c.TopP), 102 | llama.SetTopK(c.TopK), 103 | llama.SetTokens(c.Maxtokens), 104 | llama.SetThreads(c.Threads), 105 | } 106 | 107 | if c.Mirostat != 0 { 108 | predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat)) 109 | } 110 | 111 | if c.MirostatETA != 0 { 112 | predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA)) 113 | } 114 | 115 | if c.MirostatTAU != 0 { 116 | predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU)) 117 | } 118 | 119 | if c.Debug { 120 | predictOptions = append(predictOptions, llama.Debug) 121 | } 122 | 123 | predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...)) 124 | 125 | if c.RepeatPenalty != 0 { 126 | predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty)) 127 | } 128 | 129 | if c.Keep != 0 { 130 | predictOptions = append(predictOptions, llama.SetNKeep(c.Keep)) 131 | } 132 | 133 | if c.Batch != 0 { 134 | predictOptions = append(predictOptions, llama.SetBatch(c.Batch)) 135 | } 136 | 137 | if c.F16 { 138 | predictOptions = append(predictOptions, llama.EnableF16KV) 139 | } 140 | 141 | if c.IgnoreEOS { 142 | predictOptions = append(predictOptions, llama.IgnoreEOS) 143 | } 144 | 145 | if c.Seed != 0 { 146 | predictOptions = append(predictOptions, llama.SetSeed(c.Seed)) 147 | } 148 | 149 | return predictOptions 150 | } 151 | 152 | func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) { 153 | supportStreams := false 154 | modelFile := c.Model 155 | 156 | llamaOpts := defaultLLamaOpts(c) 157 | 158 | var inferenceModel interface{} 159 | var err error 160 | if c.Backend == "" { 161 | inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads)) 162 | } else { 163 | inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads)) 164 | } 165 | if err != nil { 166 | return nil, err 167 | } 168 | 169 | var fn func() (string, error) 170 | 171 | switch model := inferenceModel.(type) { 172 | case *rwkv.RwkvState: 173 | supportStreams = true 174 | 175 | fn = func() (string, error) { 176 | stopWord := "\n" 177 | if len(c.StopWords) > 0 { 178 | stopWord = c.StopWords[0] 179 | } 180 | 181 | if err := model.ProcessInput(s); err != nil { 182 | return "", err 183 | } 184 | 185 | response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback) 186 | 187 | return response, nil 188 | } 189 | case *gpt2.StableLM: 190 | fn = func() (string, error) { 191 | // Generate the prediction using the language model 192 | predictOptions := []gpt2.PredictOption{ 193 | gpt2.SetTemperature(c.Temperature), 194 | gpt2.SetTopP(c.TopP), 195 | gpt2.SetTopK(c.TopK), 196 | gpt2.SetTokens(c.Maxtokens), 197 | gpt2.SetThreads(c.Threads), 198 | } 199 | 200 | if c.Batch != 0 { 201 | predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) 202 | } 203 | 204 | if c.Seed != 0 { 205 | predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) 206 | } 207 | 208 | return model.Predict( 209 | s, 210 | predictOptions..., 211 | ) 212 | } 213 | case *gpt2.GPT2: 214 | fn = func() (string, error) { 215 | // Generate the prediction using the language model 216 | predictOptions := []gpt2.PredictOption{ 217 | gpt2.SetTemperature(c.Temperature), 218 | gpt2.SetTopP(c.TopP), 219 | gpt2.SetTopK(c.TopK), 220 | gpt2.SetTokens(c.Maxtokens), 221 | gpt2.SetThreads(c.Threads), 222 | } 223 | 224 | if c.Batch != 0 { 225 | predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) 226 | } 227 | 228 | if c.Seed != 0 { 229 | predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) 230 | } 231 | 232 | return model.Predict( 233 | s, 234 | predictOptions..., 235 | ) 236 | } 237 | case *gptj.GPTJ: 238 | fn = func() (string, error) { 239 | // Generate the prediction using the language model 240 | predictOptions := []gptj.PredictOption{ 241 | gptj.SetTemperature(c.Temperature), 242 | gptj.SetTopP(c.TopP), 243 | gptj.SetTopK(c.TopK), 244 | gptj.SetTokens(c.Maxtokens), 245 | gptj.SetThreads(c.Threads), 246 | } 247 | 248 | if c.Batch != 0 { 249 | predictOptions = append(predictOptions, gptj.SetBatch(c.Batch)) 250 | } 251 | 252 | if c.Seed != 0 { 253 | predictOptions = append(predictOptions, gptj.SetSeed(c.Seed)) 254 | } 255 | 256 | return model.Predict( 257 | s, 258 | predictOptions..., 259 | ) 260 | } 261 | case *llama.LLama: 262 | supportStreams = true 263 | fn = func() (string, error) { 264 | 265 | if tokenCallback != nil { 266 | model.SetTokenCallback(tokenCallback) 267 | } 268 | 269 | predictOptions := buildLLamaPredictOptions(c) 270 | 271 | str, er := model.Predict( 272 | s, 273 | predictOptions..., 274 | ) 275 | // Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels) 276 | // For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}} 277 | // after a stream event has occurred 278 | model.SetTokenCallback(nil) 279 | return str, er 280 | } 281 | } 282 | 283 | return func() (string, error) { 284 | // This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 285 | mutexMap.Lock() 286 | l, ok := mutexes[modelFile] 287 | if !ok { 288 | m := &sync.Mutex{} 289 | mutexes[modelFile] = m 290 | l = m 291 | } 292 | mutexMap.Unlock() 293 | l.Lock() 294 | defer l.Unlock() 295 | 296 | res, err := fn() 297 | if tokenCallback != nil && !supportStreams { 298 | tokenCallback(res) 299 | } 300 | return res, err 301 | }, nil 302 | } 303 | 304 | func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) { 305 | result := []Choice{} 306 | 307 | n := input.N 308 | 309 | if input.N == 0 { 310 | n = 1 311 | } 312 | 313 | // get the model function to call for the result 314 | predFunc, err := ModelInference(predInput, loader, *config, tokenCallback) 315 | if err != nil { 316 | return result, err 317 | } 318 | 319 | for i := 0; i < n; i++ { 320 | prediction, err := predFunc() 321 | if err != nil { 322 | return result, err 323 | } 324 | 325 | prediction = Finetune(*config, predInput, prediction) 326 | cb(prediction, &result) 327 | 328 | //result = append(result, Choice{Text: prediction}) 329 | 330 | } 331 | return result, err 332 | } 333 | 334 | var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp) 335 | var mu sync.Mutex = sync.Mutex{} 336 | 337 | func Finetune(config Config, input, prediction string) string { 338 | if config.Echo { 339 | prediction = input + prediction 340 | } 341 | 342 | for _, c := range config.Cutstrings { 343 | mu.Lock() 344 | reg, ok := cutstrings[c] 345 | if !ok { 346 | cutstrings[c] = regexp.MustCompile(c) 347 | reg = cutstrings[c] 348 | } 349 | mu.Unlock() 350 | prediction = reg.ReplaceAllString(prediction, "") 351 | } 352 | 353 | for _, c := range config.TrimSpace { 354 | prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c)) 355 | } 356 | return prediction 357 | 358 | } 359 | -------------------------------------------------------------------------------- /pkg/model/loader.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | "sync" 11 | "text/template" 12 | 13 | "github.com/hashicorp/go-multierror" 14 | "github.com/rs/zerolog/log" 15 | 16 | rwkv "github.com/donomii/go-rwkv.cpp" 17 | gpt2 "github.com/go-skynet/go-gpt2.cpp" 18 | gptj "github.com/go-skynet/go-gpt4all-j.cpp" 19 | llama "github.com/go-skynet/go-llama.cpp" 20 | ) 21 | 22 | type ModelLoader struct { 23 | ModelPath string 24 | mu sync.Mutex 25 | 26 | models map[string]*llama.LLama 27 | gptmodels map[string]*gptj.GPTJ 28 | gpt2models map[string]*gpt2.GPT2 29 | gptstablelmmodels map[string]*gpt2.StableLM 30 | rwkv map[string]*rwkv.RwkvState 31 | promptsTemplates map[string]*template.Template 32 | } 33 | 34 | func NewModelLoader(modelPath string) *ModelLoader { 35 | return &ModelLoader{ 36 | ModelPath: modelPath, 37 | gpt2models: make(map[string]*gpt2.GPT2), 38 | gptmodels: make(map[string]*gptj.GPTJ), 39 | gptstablelmmodels: make(map[string]*gpt2.StableLM), 40 | models: make(map[string]*llama.LLama), 41 | rwkv: make(map[string]*rwkv.RwkvState), 42 | promptsTemplates: make(map[string]*template.Template), 43 | } 44 | } 45 | 46 | func (ml *ModelLoader) ExistsInModelPath(s string) bool { 47 | _, err := os.Stat(filepath.Join(ml.ModelPath, s)) 48 | return err == nil 49 | } 50 | 51 | func (ml *ModelLoader) ListModels() ([]string, error) { 52 | files, err := ioutil.ReadDir(ml.ModelPath) 53 | if err != nil { 54 | return []string{}, err 55 | } 56 | 57 | models := []string{} 58 | for _, file := range files { 59 | // Skip templates, YAML and .keep files 60 | if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") { 61 | continue 62 | } 63 | 64 | models = append(models, file.Name()) 65 | } 66 | 67 | return models, nil 68 | } 69 | 70 | func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) { 71 | ml.mu.Lock() 72 | defer ml.mu.Unlock() 73 | 74 | m, ok := ml.promptsTemplates[modelName] 75 | if !ok { 76 | modelFile := filepath.Join(ml.ModelPath, modelName) 77 | if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { 78 | return "", err 79 | } 80 | 81 | t, exists := ml.promptsTemplates[modelName] 82 | if exists { 83 | m = t 84 | } 85 | } 86 | if m == nil { 87 | return "", fmt.Errorf("failed loading any template") 88 | } 89 | 90 | var buf bytes.Buffer 91 | 92 | if err := m.Execute(&buf, in); err != nil { 93 | return "", err 94 | } 95 | return buf.String(), nil 96 | } 97 | 98 | func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error { 99 | // Check if the template was already loaded 100 | if _, ok := ml.promptsTemplates[modelName]; ok { 101 | return nil 102 | } 103 | 104 | // Check if the model path exists 105 | // skip any error here - we run anyway if a template does not exist 106 | modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName) 107 | 108 | if !ml.ExistsInModelPath(modelTemplateFile) { 109 | return nil 110 | } 111 | 112 | dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile)) 113 | if err != nil { 114 | return err 115 | } 116 | 117 | // Parse the template 118 | tmpl, err := template.New("prompt").Parse(string(dat)) 119 | if err != nil { 120 | return err 121 | } 122 | ml.promptsTemplates[modelName] = tmpl 123 | 124 | return nil 125 | } 126 | 127 | func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) { 128 | ml.mu.Lock() 129 | defer ml.mu.Unlock() 130 | 131 | // Check if we already have a loaded model 132 | if !ml.ExistsInModelPath(modelName) { 133 | return nil, fmt.Errorf("model does not exist") 134 | } 135 | 136 | if m, ok := ml.gptstablelmmodels[modelName]; ok { 137 | log.Debug().Msgf("Model already loaded in memory: %s", modelName) 138 | return m, nil 139 | } 140 | 141 | // Load the model and keep it in memory for later use 142 | modelFile := filepath.Join(ml.ModelPath, modelName) 143 | log.Debug().Msgf("Loading model in memory from file: %s", modelFile) 144 | 145 | model, err := gpt2.NewStableLM(modelFile) 146 | if err != nil { 147 | return nil, err 148 | } 149 | 150 | // If there is a prompt template, load it 151 | if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { 152 | return nil, err 153 | } 154 | 155 | ml.gptstablelmmodels[modelName] = model 156 | return model, err 157 | } 158 | 159 | func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) { 160 | ml.mu.Lock() 161 | defer ml.mu.Unlock() 162 | 163 | // Check if we already have a loaded model 164 | if !ml.ExistsInModelPath(modelName) { 165 | return nil, fmt.Errorf("model does not exist") 166 | } 167 | 168 | if m, ok := ml.gpt2models[modelName]; ok { 169 | log.Debug().Msgf("Model already loaded in memory: %s", modelName) 170 | return m, nil 171 | } 172 | 173 | // Load the model and keep it in memory for later use 174 | modelFile := filepath.Join(ml.ModelPath, modelName) 175 | log.Debug().Msgf("Loading model in memory from file: %s", modelFile) 176 | 177 | model, err := gpt2.New(modelFile) 178 | if err != nil { 179 | return nil, err 180 | } 181 | 182 | // If there is a prompt template, load it 183 | if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { 184 | return nil, err 185 | } 186 | 187 | ml.gpt2models[modelName] = model 188 | return model, err 189 | } 190 | 191 | func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) { 192 | ml.mu.Lock() 193 | defer ml.mu.Unlock() 194 | 195 | // Check if we already have a loaded model 196 | if !ml.ExistsInModelPath(modelName) { 197 | return nil, fmt.Errorf("model does not exist") 198 | } 199 | 200 | if m, ok := ml.gptmodels[modelName]; ok { 201 | log.Debug().Msgf("Model already loaded in memory: %s", modelName) 202 | return m, nil 203 | } 204 | 205 | // Load the model and keep it in memory for later use 206 | modelFile := filepath.Join(ml.ModelPath, modelName) 207 | log.Debug().Msgf("Loading model in memory from file: %s", modelFile) 208 | 209 | model, err := gptj.New(modelFile) 210 | if err != nil { 211 | return nil, err 212 | } 213 | 214 | // If there is a prompt template, load it 215 | if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { 216 | return nil, err 217 | } 218 | 219 | ml.gptmodels[modelName] = model 220 | return model, err 221 | } 222 | 223 | func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) { 224 | ml.mu.Lock() 225 | defer ml.mu.Unlock() 226 | 227 | log.Debug().Msgf("Loading model name: %s", modelName) 228 | 229 | // Check if we already have a loaded model 230 | if !ml.ExistsInModelPath(modelName) { 231 | return nil, fmt.Errorf("model does not exist") 232 | } 233 | 234 | if m, ok := ml.rwkv[modelName]; ok { 235 | log.Debug().Msgf("Model already loaded in memory: %s", modelName) 236 | return m, nil 237 | } 238 | 239 | // Load the model and keep it in memory for later use 240 | modelFile := filepath.Join(ml.ModelPath, modelName) 241 | tokenPath := filepath.Join(ml.ModelPath, tokenFile) 242 | log.Debug().Msgf("Loading model in memory from file: %s", modelFile) 243 | 244 | model := rwkv.LoadFiles(modelFile, tokenPath, threads) 245 | if model == nil { 246 | return nil, fmt.Errorf("could not load model") 247 | } 248 | 249 | ml.rwkv[modelName] = model 250 | return model, nil 251 | } 252 | 253 | func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) { 254 | ml.mu.Lock() 255 | defer ml.mu.Unlock() 256 | 257 | log.Debug().Msgf("Loading model name: %s", modelName) 258 | 259 | // Check if we already have a loaded model 260 | if !ml.ExistsInModelPath(modelName) { 261 | return nil, fmt.Errorf("model does not exist") 262 | } 263 | 264 | if m, ok := ml.models[modelName]; ok { 265 | log.Debug().Msgf("Model already loaded in memory: %s", modelName) 266 | return m, nil 267 | } 268 | 269 | // Load the model and keep it in memory for later use 270 | modelFile := filepath.Join(ml.ModelPath, modelName) 271 | log.Debug().Msgf("Loading model in memory from file: %s", modelFile) 272 | 273 | model, err := llama.New(modelFile, opts...) 274 | if err != nil { 275 | return nil, err 276 | } 277 | 278 | // If there is a prompt template, load it 279 | if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { 280 | return nil, err 281 | } 282 | 283 | ml.models[modelName] = model 284 | return model, err 285 | } 286 | 287 | const tokenizerSuffix = ".tokenizer.json" 288 | 289 | var loadedModels map[string]interface{} = map[string]interface{}{} 290 | var muModels sync.Mutex 291 | 292 | func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) { 293 | switch strings.ToLower(backendString) { 294 | case "llama": 295 | return ml.LoadLLaMAModel(modelFile, llamaOpts...) 296 | case "stablelm": 297 | return ml.LoadStableLMModel(modelFile) 298 | case "gpt2": 299 | return ml.LoadGPT2Model(modelFile) 300 | case "gptj": 301 | return ml.LoadGPTJModel(modelFile) 302 | case "rwkv": 303 | return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads) 304 | default: 305 | return nil, fmt.Errorf("backend unsupported: %s", backendString) 306 | } 307 | } 308 | 309 | func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) { 310 | updateModels := func(model interface{}) { 311 | muModels.Lock() 312 | defer muModels.Unlock() 313 | loadedModels[modelFile] = model 314 | } 315 | 316 | muModels.Lock() 317 | m, exists := loadedModels[modelFile] 318 | if exists { 319 | muModels.Unlock() 320 | return m, nil 321 | } 322 | muModels.Unlock() 323 | 324 | model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...) 325 | if modelerr == nil { 326 | updateModels(model) 327 | return model, nil 328 | } else { 329 | err = multierror.Append(err, modelerr) 330 | } 331 | 332 | model, modelerr = ml.LoadGPTJModel(modelFile) 333 | if modelerr == nil { 334 | updateModels(model) 335 | return model, nil 336 | } else { 337 | err = multierror.Append(err, modelerr) 338 | } 339 | 340 | model, modelerr = ml.LoadGPT2Model(modelFile) 341 | if modelerr == nil { 342 | updateModels(model) 343 | return model, nil 344 | } else { 345 | err = multierror.Append(err, modelerr) 346 | } 347 | 348 | model, modelerr = ml.LoadStableLMModel(modelFile) 349 | if modelerr == nil { 350 | updateModels(model) 351 | return model, nil 352 | } else { 353 | err = multierror.Append(err, modelerr) 354 | } 355 | 356 | model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads) 357 | if modelerr == nil { 358 | updateModels(model) 359 | return model, nil 360 | } else { 361 | err = multierror.Append(err, modelerr) 362 | } 363 | 364 | return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error()) 365 | } 366 | -------------------------------------------------------------------------------- /api/openai.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "encoding/json" 7 | "fmt" 8 | "strings" 9 | 10 | model "github.com/go-skynet/LocalAI/pkg/model" 11 | "github.com/gofiber/fiber/v2" 12 | "github.com/rs/zerolog/log" 13 | "github.com/valyala/fasthttp" 14 | ) 15 | 16 | // APIError provides error information returned by the OpenAI API. 17 | type APIError struct { 18 | Code any `json:"code,omitempty"` 19 | Message string `json:"message"` 20 | Param *string `json:"param,omitempty"` 21 | Type string `json:"type"` 22 | } 23 | 24 | type ErrorResponse struct { 25 | Error *APIError `json:"error,omitempty"` 26 | } 27 | 28 | type OpenAIUsage struct { 29 | PromptTokens int `json:"prompt_tokens"` 30 | CompletionTokens int `json:"completion_tokens"` 31 | TotalTokens int `json:"total_tokens"` 32 | } 33 | 34 | type Item struct { 35 | Embedding []float32 `json:"embedding"` 36 | Index int `json:"index"` 37 | Object string `json:"object,omitempty"` 38 | } 39 | 40 | type OpenAIResponse struct { 41 | Created int `json:"created,omitempty"` 42 | Object string `json:"object,omitempty"` 43 | ID string `json:"id,omitempty"` 44 | Model string `json:"model,omitempty"` 45 | Choices []Choice `json:"choices,omitempty"` 46 | Data []Item `json:"data,omitempty"` 47 | 48 | Usage OpenAIUsage `json:"usage"` 49 | } 50 | 51 | type Choice struct { 52 | Index int `json:"index,omitempty"` 53 | FinishReason string `json:"finish_reason,omitempty"` 54 | Message *Message `json:"message,omitempty"` 55 | Delta *Message `json:"delta,omitempty"` 56 | Text string `json:"text,omitempty"` 57 | } 58 | 59 | type Message struct { 60 | Role string `json:"role,omitempty" yaml:"role"` 61 | Content string `json:"content,omitempty" yaml:"content"` 62 | } 63 | 64 | type OpenAIModel struct { 65 | ID string `json:"id"` 66 | Object string `json:"object"` 67 | } 68 | 69 | type OpenAIRequest struct { 70 | Model string `json:"model" yaml:"model"` 71 | 72 | // Prompt is read only by completion API calls 73 | Prompt interface{} `json:"prompt" yaml:"prompt"` 74 | 75 | // Edit endpoint 76 | Instruction string `json:"instruction" yaml:"instruction"` 77 | Input interface{} `json:"input" yaml:"input"` 78 | 79 | Stop interface{} `json:"stop" yaml:"stop"` 80 | 81 | // Messages is read only by chat/completion API calls 82 | Messages []Message `json:"messages" yaml:"messages"` 83 | 84 | Stream bool `json:"stream"` 85 | Echo bool `json:"echo"` 86 | // Common options between all the API calls 87 | TopP float64 `json:"top_p" yaml:"top_p"` 88 | TopK int `json:"top_k" yaml:"top_k"` 89 | Temperature float64 `json:"temperature" yaml:"temperature"` 90 | Maxtokens int `json:"max_tokens" yaml:"max_tokens"` 91 | 92 | N int `json:"n"` 93 | 94 | // Custom parameters - not present in the OpenAI API 95 | Batch int `json:"batch" yaml:"batch"` 96 | F16 bool `json:"f16" yaml:"f16"` 97 | IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"` 98 | RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"` 99 | Keep int `json:"n_keep" yaml:"n_keep"` 100 | 101 | MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"` 102 | MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"` 103 | Mirostat int `json:"mirostat" yaml:"mirostat"` 104 | 105 | Seed int `json:"seed" yaml:"seed"` 106 | } 107 | 108 | func defaultRequest(modelFile string) OpenAIRequest { 109 | return OpenAIRequest{ 110 | TopP: 0.7, 111 | TopK: 80, 112 | Maxtokens: 512, 113 | Temperature: 0.9, 114 | Model: modelFile, 115 | } 116 | } 117 | 118 | // https://platform.openai.com/docs/api-reference/completions 119 | func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { 120 | return func(c *fiber.Ctx) error { 121 | config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16) 122 | if err != nil { 123 | return fmt.Errorf("failed reading parameters from request:%w", err) 124 | } 125 | 126 | log.Debug().Msgf("Parameter Config: %+v", config) 127 | 128 | templateFile := config.Model 129 | 130 | if config.TemplateConfig.Completion != "" { 131 | templateFile = config.TemplateConfig.Completion 132 | } 133 | 134 | var result []Choice 135 | for _, i := range config.PromptStrings { 136 | // A model can have a "file.bin.tmpl" file associated with a prompt template prefix 137 | templatedInput, err := loader.TemplatePrefix(templateFile, struct { 138 | Input string 139 | }{Input: i}) 140 | if err == nil { 141 | i = templatedInput 142 | log.Debug().Msgf("Template found, input modified to: %s", i) 143 | } 144 | 145 | r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) { 146 | *c = append(*c, Choice{Text: s}) 147 | }, nil) 148 | if err != nil { 149 | return err 150 | } 151 | 152 | result = append(result, r...) 153 | } 154 | 155 | resp := &OpenAIResponse{ 156 | Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. 157 | Choices: result, 158 | Object: "text_completion", 159 | } 160 | 161 | jsonResult, _ := json.Marshal(resp) 162 | log.Debug().Msgf("Response: %s", jsonResult) 163 | 164 | // Return the prediction in the response body 165 | return c.JSON(resp) 166 | } 167 | } 168 | 169 | // https://platform.openai.com/docs/api-reference/embeddings 170 | func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { 171 | return func(c *fiber.Ctx) error { 172 | config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16) 173 | if err != nil { 174 | return fmt.Errorf("failed reading parameters from request:%w", err) 175 | } 176 | 177 | log.Debug().Msgf("Parameter Config: %+v", config) 178 | items := []Item{} 179 | 180 | for i, s := range config.InputStrings { 181 | 182 | // get the model function to call for the result 183 | embedFn, err := ModelEmbedding(s, loader, *config) 184 | if err != nil { 185 | return err 186 | } 187 | 188 | embeddings, err := embedFn() 189 | if err != nil { 190 | return err 191 | } 192 | items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"}) 193 | } 194 | 195 | resp := &OpenAIResponse{ 196 | Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. 197 | Data: items, 198 | Object: "list", 199 | } 200 | 201 | jsonResult, _ := json.Marshal(resp) 202 | log.Debug().Msgf("Response: %s", jsonResult) 203 | 204 | // Return the prediction in the response body 205 | return c.JSON(resp) 206 | } 207 | } 208 | 209 | func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { 210 | 211 | process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) { 212 | ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool { 213 | resp := OpenAIResponse{ 214 | Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. 215 | Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}}, 216 | Object: "chat.completion.chunk", 217 | } 218 | log.Debug().Msgf("Sending goroutine: %s", s) 219 | 220 | responses <- resp 221 | return true 222 | }) 223 | close(responses) 224 | } 225 | return func(c *fiber.Ctx) error { 226 | config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16) 227 | if err != nil { 228 | return fmt.Errorf("failed reading parameters from request:%w", err) 229 | } 230 | 231 | log.Debug().Msgf("Parameter Config: %+v", config) 232 | 233 | var predInput string 234 | 235 | mess := []string{} 236 | for _, i := range input.Messages { 237 | r := config.Roles[i.Role] 238 | if r == "" { 239 | r = i.Role 240 | } 241 | 242 | content := fmt.Sprint(r, " ", i.Content) 243 | mess = append(mess, content) 244 | } 245 | 246 | predInput = strings.Join(mess, "\n") 247 | 248 | if input.Stream { 249 | log.Debug().Msgf("Stream request received") 250 | c.Context().SetContentType("text/event-stream") 251 | //c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8) 252 | // c.Set("Content-Type", "text/event-stream") 253 | c.Set("Cache-Control", "no-cache") 254 | c.Set("Connection", "keep-alive") 255 | c.Set("Transfer-Encoding", "chunked") 256 | } 257 | 258 | templateFile := config.Model 259 | 260 | if config.TemplateConfig.Chat != "" { 261 | templateFile = config.TemplateConfig.Chat 262 | } 263 | 264 | // A model can have a "file.bin.tmpl" file associated with a prompt template prefix 265 | templatedInput, err := loader.TemplatePrefix(templateFile, struct { 266 | Input string 267 | }{Input: predInput}) 268 | if err == nil { 269 | predInput = templatedInput 270 | log.Debug().Msgf("Template found, input modified to: %s", predInput) 271 | } 272 | 273 | if input.Stream { 274 | responses := make(chan OpenAIResponse) 275 | 276 | go process(predInput, input, config, loader, responses) 277 | 278 | c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { 279 | 280 | for ev := range responses { 281 | var buf bytes.Buffer 282 | enc := json.NewEncoder(&buf) 283 | enc.Encode(ev) 284 | 285 | fmt.Fprintf(w, "event: data\n\n") 286 | fmt.Fprintf(w, "data: %v\n\n", buf.String()) 287 | log.Debug().Msgf("Sending chunk: %s", buf.String()) 288 | w.Flush() 289 | } 290 | 291 | w.WriteString("event: data\n\n") 292 | resp := &OpenAIResponse{ 293 | Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. 294 | Choices: []Choice{{FinishReason: "stop"}}, 295 | } 296 | respData, _ := json.Marshal(resp) 297 | 298 | w.WriteString(fmt.Sprintf("data: %s\n\n", respData)) 299 | w.Flush() 300 | })) 301 | return nil 302 | } 303 | 304 | result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) { 305 | *c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}}) 306 | }, nil) 307 | if err != nil { 308 | return err 309 | } 310 | 311 | resp := &OpenAIResponse{ 312 | Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. 313 | Choices: result, 314 | Object: "chat.completion", 315 | } 316 | respData, _ := json.Marshal(resp) 317 | log.Debug().Msgf("Response: %s", respData) 318 | 319 | // Return the prediction in the response body 320 | return c.JSON(resp) 321 | } 322 | } 323 | 324 | func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { 325 | return func(c *fiber.Ctx) error { 326 | config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16) 327 | if err != nil { 328 | return fmt.Errorf("failed reading parameters from request:%w", err) 329 | } 330 | 331 | log.Debug().Msgf("Parameter Config: %+v", config) 332 | 333 | templateFile := config.Model 334 | 335 | if config.TemplateConfig.Edit != "" { 336 | templateFile = config.TemplateConfig.Edit 337 | } 338 | 339 | var result []Choice 340 | for _, i := range config.InputStrings { 341 | // A model can have a "file.bin.tmpl" file associated with a prompt template prefix 342 | templatedInput, err := loader.TemplatePrefix(templateFile, struct { 343 | Input string 344 | Instruction string 345 | }{Input: i}) 346 | if err == nil { 347 | i = templatedInput 348 | log.Debug().Msgf("Template found, input modified to: %s", i) 349 | } 350 | 351 | r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) { 352 | *c = append(*c, Choice{Text: s}) 353 | }, nil) 354 | if err != nil { 355 | return err 356 | } 357 | 358 | result = append(result, r...) 359 | } 360 | 361 | resp := &OpenAIResponse{ 362 | Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. 363 | Choices: result, 364 | Object: "edit", 365 | } 366 | 367 | jsonResult, _ := json.Marshal(resp) 368 | log.Debug().Msgf("Response: %s", jsonResult) 369 | 370 | // Return the prediction in the response body 371 | return c.JSON(resp) 372 | } 373 | } 374 | 375 | func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error { 376 | return func(c *fiber.Ctx) error { 377 | models, err := loader.ListModels() 378 | if err != nil { 379 | return err 380 | } 381 | var mm map[string]interface{} = map[string]interface{}{} 382 | 383 | dataModels := []OpenAIModel{} 384 | for _, m := range models { 385 | mm[m] = nil 386 | dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"}) 387 | } 388 | 389 | for k := range cm { 390 | if _, exists := mm[k]; !exists { 391 | dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"}) 392 | } 393 | } 394 | 395 | return c.JSON(struct { 396 | Object string `json:"object"` 397 | Data []OpenAIModel `json:"data"` 398 | }{ 399 | Object: "list", 400 | Data: dataModels, 401 | }) 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= 2 | github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= 3 | github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= 4 | github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= 5 | github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= 6 | github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= 7 | github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= 8 | github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= 9 | github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= 10 | github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= 11 | github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= 12 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 13 | github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= 14 | github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 15 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 16 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 17 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 18 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 19 | github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ= 20 | github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= 21 | github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= 22 | github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= 23 | github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= 24 | github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY= 25 | github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= 26 | github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs= 27 | github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns= 28 | github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M= 29 | github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I= 30 | github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= 31 | github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= 32 | github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= 33 | github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ= 34 | github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM= 35 | github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis= 36 | github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI= 37 | github.com/go-skynet/go-llama.cpp v0.0.0-20230504223241-67ff6a4db244/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY= 38 | github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675 h1:plXywr95RghidIHPHl+O/zpcNXenEeS6w/6WftFNr9E= 39 | github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY= 40 | github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638 h1:+7UXkGG+LeqJ5oPBEJo5D73Y2drKOVzrlB8D+iG2PHw= 41 | github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I= 42 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= 43 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= 44 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 45 | github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8= 46 | github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w= 47 | github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s= 48 | github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc= 49 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 50 | github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= 51 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 52 | github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= 53 | github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= 54 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= 55 | github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 56 | github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= 57 | github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= 58 | github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= 59 | github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= 60 | github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= 61 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 62 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 63 | github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY= 64 | github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= 65 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 66 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 67 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 68 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 69 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 70 | github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= 71 | github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= 72 | github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA= 73 | github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= 74 | github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= 75 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 76 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 77 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= 78 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 79 | github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= 80 | github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 81 | github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= 82 | github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 83 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 84 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 85 | github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE= 86 | github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM= 87 | github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= 88 | github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= 89 | github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y= 90 | github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg= 91 | github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg= 92 | github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= 93 | github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw= 94 | github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0= 95 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 96 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 97 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 98 | github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= 99 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 100 | github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= 101 | github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc= 102 | github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU= 103 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 104 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 105 | github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM= 106 | github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= 107 | github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4= 108 | github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8= 109 | github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4= 110 | github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk= 111 | github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g= 112 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 113 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 114 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 115 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 116 | github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg= 117 | github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto= 118 | github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw= 119 | github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= 120 | github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw= 121 | github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY= 122 | github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc= 123 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= 124 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= 125 | github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c= 126 | github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA= 127 | github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= 128 | github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= 129 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= 130 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= 131 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 132 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 133 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 134 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 135 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 136 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 137 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 138 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 139 | golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= 140 | golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= 141 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 142 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 143 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 144 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 145 | golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= 146 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 147 | golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= 148 | golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= 149 | golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= 150 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 151 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 152 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 153 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 154 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 155 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 156 | golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 157 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 158 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 159 | golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 160 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 161 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 162 | golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 163 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 164 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 165 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 166 | golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 167 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 168 | golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= 169 | golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 170 | golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= 171 | golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 172 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 173 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 174 | golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= 175 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 176 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 177 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 178 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 179 | golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 180 | golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= 181 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 182 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 183 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 184 | golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 185 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 186 | golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= 187 | golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y= 188 | golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4= 189 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 190 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 191 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 192 | google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= 193 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 194 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 195 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= 196 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 197 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 198 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 199 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 200 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 201 | gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 202 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 203 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 |
3 |
4 | LocalAI 5 |
6 |

7 | 8 | [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml) 9 | 10 | [![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) 11 | 12 | **LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0. 13 | 14 | - OpenAI compatible API 15 | - Supports multiple models 16 | - Once loaded the first time, it keep models loaded in memory for faster inference 17 | - Support for prompt templates 18 | - Doesn't shell-out, but uses C bindings for a faster inference and better performance. 19 | 20 | LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud). 21 | 22 | See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/). 23 | 24 | ## News 25 | 26 | - 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint 27 | - 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 ) 28 | 29 | Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it) 30 | 31 | ### Blogs and articles 32 | 33 | - [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters. 34 | 35 | ## Contribute and help 36 | 37 | To help the project you can: 38 | 39 | - Upvote the [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI. 40 | 41 | - [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project. 42 | 43 | - If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels. 44 | 45 | - If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome! 46 | 47 | ## Model compatibility 48 | 49 | It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml). 50 | 51 | Tested with: 52 | - Vicuna 53 | - Alpaca 54 | - [GPT4ALL](https://github.com/nomic-ai/gpt4all) (changes required, see below) 55 | - [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin) (no changes required) 56 | - Koala 57 | - [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml) 58 | - WizardLM 59 | - [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) 60 | 61 | ### GPT4ALL 62 | 63 | Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`. 64 | 65 | ### RWKV 66 | 67 |
68 | 69 | A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv). 70 | 71 | Note: rwkv models have an associated tokenizer along that needs to be provided with it: 72 | 73 | ``` 74 | 36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small 75 | 36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json 76 | ``` 77 | 78 |
79 | 80 | ### Others 81 | 82 | It should also be compatible with StableLM and GPTNeoX ggml models (untested). 83 | 84 | ### Hardware requirements 85 | 86 | Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources. 87 | 88 | 89 | ## Usage 90 | 91 | > `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest). 92 | 93 | The easiest way to run LocalAI is by using `docker-compose`: 94 | 95 | ```bash 96 | 97 | git clone https://github.com/go-skynet/LocalAI 98 | 99 | cd LocalAI 100 | 101 | # (optional) Checkout a specific LocalAI tag 102 | # git checkout -b build 103 | 104 | # copy your models to models/ 105 | cp your-model.bin models/ 106 | 107 | # (optional) Edit the .env file to set things like context size and threads 108 | # vim .env 109 | 110 | # start with docker-compose 111 | docker-compose up -d --build 112 | 113 | # Now API is accessible at localhost:8080 114 | curl http://localhost:8080/v1/models 115 | # {"object":"list","data":[{"id":"your-model.bin","object":"model"}]} 116 | 117 | curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ 118 | "model": "your-model.bin", 119 | "prompt": "A long time ago in a galaxy far, far away", 120 | "temperature": 0.7 121 | }' 122 | ``` 123 | 124 | ### Example: Use GPT4ALL-J model 125 | 126 |
127 | 128 | ```bash 129 | # Clone LocalAI 130 | git clone https://github.com/go-skynet/LocalAI 131 | 132 | cd LocalAI 133 | 134 | # (optional) Checkout a specific LocalAI tag 135 | # git checkout -b build 136 | 137 | # Download gpt4all-j to models/ 138 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j 139 | 140 | # Use a template from the examples 141 | cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/ 142 | 143 | # (optional) Edit the .env file to set things like context size and threads 144 | # vim .env 145 | 146 | # start with docker-compose 147 | docker-compose up -d --build 148 | 149 | # Now API is accessible at localhost:8080 150 | curl http://localhost:8080/v1/models 151 | # {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]} 152 | 153 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 154 | "model": "ggml-gpt4all-j", 155 | "messages": [{"role": "user", "content": "How are you?"}], 156 | "temperature": 0.9 157 | }' 158 | 159 | # {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]} 160 | ``` 161 |
162 | 163 | To build locally, run `make build` (see below). 164 | 165 | ### Other examples 166 | 167 | To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/). 168 | 169 | 170 | ### Advanced configuration 171 | 172 | LocalAI can be configured to serve user-defined models with a set of default parameters and templates. 173 | 174 |
175 | 176 | You can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 177 | Consider the following `models` folder in the `example/chatbot-ui`: 178 | 179 | ``` 180 | base ❯ ls -liah examples/chatbot-ui/models 181 | 36487587 drwxr-xr-x 2 mudler mudler 4.0K May 3 12:27 . 182 | 36487586 drwxr-xr-x 3 mudler mudler 4.0K May 3 10:42 .. 183 | 36465214 -rw-r--r-- 1 mudler mudler 10 Apr 27 07:46 completion.tmpl 184 | 36464855 -rw-r--r-- 1 mudler mudler 3.6G Apr 27 00:08 ggml-gpt4all-j 185 | 36464537 -rw-r--r-- 1 mudler mudler 245 May 3 10:42 gpt-3.5-turbo.yaml 186 | 36467388 -rw-r--r-- 1 mudler mudler 180 Apr 27 07:46 gpt4all.tmpl 187 | ``` 188 | 189 | In the `gpt-3.5-turbo.yaml` file it is defined the `gpt-3.5-turbo` model which is an alias to use `gpt4all-j` with pre-defined options. 190 | 191 | For instance, consider the following that declares `gpt-3.5-turbo` backed by the `ggml-gpt4all-j` model: 192 | 193 | ```yaml 194 | name: gpt-3.5-turbo 195 | # Default model parameters 196 | parameters: 197 | # Relative to the models path 198 | model: ggml-gpt4all-j 199 | # temperature 200 | temperature: 0.3 201 | # all the OpenAI request options here.. 202 | 203 | # Default context size 204 | context_size: 512 205 | threads: 10 206 | # Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with. 207 | backend: gptj # available: llama, stablelm, gpt2, gptj rwkv 208 | # stopwords (if supported by the backend) 209 | stopwords: 210 | - "HUMAN:" 211 | - "### Response:" 212 | # define chat roles 213 | roles: 214 | user: "HUMAN:" 215 | system: "GPT:" 216 | template: 217 | # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files 218 | completion: completion 219 | chat: ggml-gpt4all-j 220 | ``` 221 | 222 | Specifying a `config-file` via CLI allows to declare models in a single file as a list, for instance: 223 | 224 | ```yaml 225 | - name: list1 226 | parameters: 227 | model: testmodel 228 | context_size: 512 229 | threads: 10 230 | stopwords: 231 | - "HUMAN:" 232 | - "### Response:" 233 | roles: 234 | user: "HUMAN:" 235 | system: "GPT:" 236 | template: 237 | completion: completion 238 | chat: ggml-gpt4all-j 239 | - name: list2 240 | parameters: 241 | model: testmodel 242 | context_size: 512 243 | threads: 10 244 | stopwords: 245 | - "HUMAN:" 246 | - "### Response:" 247 | roles: 248 | user: "HUMAN:" 249 | system: "GPT:" 250 | template: 251 | completion: completion 252 | chat: ggml-gpt4all-j 253 | ``` 254 | 255 | See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files. 256 | 257 |
258 | 259 | ### Prompt templates 260 | 261 | The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release. 262 | 263 |
264 | You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca: 265 | 266 | ``` 267 | The below instruction describes a task. Write a response that appropriately completes the request. 268 | 269 | ### Instruction: 270 | {{.Input}} 271 | 272 | ### Response: 273 | ``` 274 | 275 | See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models. 276 | 277 | 278 | For the edit endpoint, an example template for alpaca-based models can be: 279 | 280 | ```yaml 281 | Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. 282 | 283 | ### Instruction: 284 | {{.Instruction}} 285 | 286 | ### Input: 287 | {{.Input}} 288 | 289 | ### Response: 290 | ``` 291 | 292 |
293 | 294 | ### CLI 295 | 296 | You can control LocalAI with command line arguments, to specify a binding address, or the number of threads. 297 | 298 |
299 | 300 | Usage: 301 | 302 | ``` 303 | local-ai --models-path [--address
] [--threads ] 304 | ``` 305 | 306 | | Parameter | Environment Variable | Default Value | Description | 307 | | ------------ | -------------------- | ------------- | -------------------------------------- | 308 | | models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). | 309 | | threads | THREADS | Number of Physical cores | The number of threads to use for text generation. | 310 | | address | ADDRESS | :8080 | The address and port to listen on. | 311 | | context-size | CONTEXT_SIZE | 512 | Default token context size. | 312 | | debug | DEBUG | false | Enable debug mode. | 313 | | config-file | CONFIG_FILE | empty | Path to a LocalAI config file. | 314 | 315 |
316 | 317 | ## Setup 318 | 319 | Currently LocalAI comes as a container image and can be used with docker or a container engine of choice. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest). 320 | 321 | ### Docker 322 | 323 |
324 | Example of starting the API with `docker`: 325 | 326 | ```bash 327 | docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4 328 | ``` 329 | 330 | You should see: 331 | ``` 332 | ┌───────────────────────────────────────────────────┐ 333 | │ Fiber v2.42.0 │ 334 | │ http://127.0.0.1:8080 │ 335 | │ (bound on host 0.0.0.0 and port 8080) │ 336 | │ │ 337 | │ Handlers ............. 1 Processes ........... 1 │ 338 | │ Prefork ....... Disabled PID ................. 1 │ 339 | └───────────────────────────────────────────────────┘ 340 | ``` 341 | 342 |
343 | 344 | ### Build locally 345 | 346 |
347 | 348 | In order to build the `LocalAI` container image locally you can use `docker`: 349 | 350 | ``` 351 | # build the image 352 | docker build -t LocalAI . 353 | docker run LocalAI 354 | ``` 355 | 356 | Or you can build the binary with `make`: 357 | 358 | ``` 359 | make build 360 | ``` 361 | 362 |
363 | 364 | ### Build on mac 365 | 366 | Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`. 367 | 368 |
369 | 370 | The below has been tested by one mac user and found to work. Note that this doesn't use docker to run the server: 371 | 372 | ``` 373 | # install build dependencies 374 | brew install cmake 375 | brew install go 376 | 377 | # clone the repo 378 | git clone https://github.com/go-skynet/LocalAI.git 379 | 380 | cd LocalAI 381 | 382 | # build the binary 383 | make build 384 | 385 | # Download gpt4all-j to models/ 386 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j 387 | 388 | # Use a template from the examples 389 | cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/ 390 | 391 | # Run LocalAI 392 | ./local-ai --models-path ./models/ --debug 393 | 394 | # Now API is accessible at localhost:8080 395 | curl http://localhost:8080/v1/models 396 | 397 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 398 | "model": "ggml-gpt4all-j", 399 | "messages": [{"role": "user", "content": "How are you?"}], 400 | "temperature": 0.9 401 | }' 402 | ``` 403 | 404 |
405 | 406 | ### Windows compatibility 407 | 408 | It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2 409 | 410 | ### Run LocalAI in Kubernetes 411 | 412 | LocalAI can be installed inside Kubernetes with helm. 413 | 414 |
415 | 416 | 1. Add the helm repo 417 | ```bash 418 | helm repo add go-skynet https://go-skynet.github.io/helm-charts/ 419 | ``` 420 | 1. Create a values files with your settings: 421 | ```bash 422 | cat < values.yaml 423 | deployment: 424 | image: quay.io/go-skynet/local-ai:latest 425 | env: 426 | threads: 4 427 | contextSize: 1024 428 | modelsPath: "/models" 429 | # Optionally create a PVC, mount the PV to the LocalAI Deployment, 430 | # and download a model to prepopulate the models directory 431 | modelsVolume: 432 | enabled: true 433 | url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" 434 | pvc: 435 | size: 6Gi 436 | accessModes: 437 | - ReadWriteOnce 438 | auth: 439 | # Optional value for HTTP basic access authentication header 440 | basic: "" # 'username:password' base64 encoded 441 | service: 442 | type: ClusterIP 443 | annotations: {} 444 | # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout 445 | # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" 446 | EOF 447 | ``` 448 | 3. Install the helm chart: 449 | ```bash 450 | helm repo update 451 | helm install local-ai go-skynet/local-ai -f values.yaml 452 | ``` 453 | 454 | Check out also the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts). 455 | 456 |
457 | 458 | ## Supported OpenAI API endpoints 459 | 460 | You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create). 461 | 462 | Following the list of endpoints/parameters supported. 463 | 464 | Note: 465 | 466 | - You can also specify the model as part of the OpenAI token. 467 | - If only one model is available, the API will use it for all the requests. 468 | 469 | ### Chat completions 470 | 471 |
472 | For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body: 473 | 474 | ``` 475 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 476 | "model": "ggml-koala-7b-model-q4_0-r2.bin", 477 | "messages": [{"role": "user", "content": "Say this is a test!"}], 478 | "temperature": 0.7 479 | }' 480 | ``` 481 | 482 | Available additional parameters: `top_p`, `top_k`, `max_tokens` 483 |
484 | 485 | ### Edit completions 486 | 487 |
488 | To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body: 489 | 490 | ``` 491 | curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{ 492 | "model": "ggml-koala-7b-model-q4_0-r2.bin", 493 | "instruction": "rephrase", 494 | "input": "Black cat jumped out of the window", 495 | "temperature": 0.7 496 | }' 497 | ``` 498 | 499 | Available additional parameters: `top_p`, `top_k`, `max_tokens`. 500 | 501 |
502 | 503 | ### Completions 504 | 505 |
506 | 507 | To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body: 508 | 509 | ``` 510 | curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ 511 | "model": "ggml-koala-7b-model-q4_0-r2.bin", 512 | "prompt": "A long time ago in a galaxy far, far away", 513 | "temperature": 0.7 514 | }' 515 | ``` 516 | 517 | Available additional parameters: `top_p`, `top_k`, `max_tokens` 518 | 519 |
520 | 521 | ### List models 522 | 523 |
524 | You can list all the models available with: 525 | 526 | ``` 527 | curl http://localhost:8080/v1/models 528 | ``` 529 | 530 |
531 | 532 | ## Frequently asked questions 533 | 534 | Here are answers to some of the most common questions. 535 | 536 | 537 | ### How do I get models? 538 | 539 |
540 | 541 | Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all. 542 | 543 |
544 | 545 | ### What's the difference with Serge, or XXX? 546 | 547 | 548 |
549 | 550 | LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes. 551 | 552 |
553 | 554 | 555 | ### Can I use it with a Discord bot, or XXX? 556 | 557 |
558 | 559 | Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application! 560 | 561 |
562 | 563 | 564 | ### Can this leverage GPUs? 565 | 566 |
567 | 568 | Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/llama.cpp/discussions/915. 569 | 570 |
571 | 572 | ### Where is the webUI? 573 | 574 |
575 | There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API) 576 | 577 |
578 | 579 | ### Does it work with AutoGPT? 580 | 581 |
582 | 583 | AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon! 584 | 585 |
586 | 587 | ## Projects already using LocalAI to run local models 588 | 589 | Feel free to open up a PR to get your project listed! 590 | 591 | - [Kairos](https://github.com/kairos-io/kairos) 592 | - [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models) 593 | 594 | ## Blog posts and other articles 595 | 596 | - https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65 597 | - https://kairos.io/docs/examples/localai/ 598 | 599 | ## Short-term roadmap 600 | 601 | - [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10) 602 | - [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6) 603 | - [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85) 604 | - [x] Multi-model support 605 | - [x] Have a webUI! 606 | - [x] Allow configuration of defaults for models. 607 | - [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui. 608 | 609 | ## Star history 610 | 611 | [![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date) 612 | 613 | ## License 614 | 615 | LocalAI is a community-driven project. It was initially created by [Ettore Di Giacinto](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud). 616 | 617 | MIT 618 | 619 | ## Golang bindings used 620 | 621 | - [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) 622 | - [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp) 623 | - [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp) 624 | - [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp) 625 | 626 | ## Acknowledgements 627 | 628 | - [llama.cpp](https://github.com/ggerganov/llama.cpp) 629 | - https://github.com/tatsu-lab/stanford_alpaca 630 | - https://github.com/cornelk/llama-go for the initial ideas 631 | - https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!) 632 | 633 | ## Contributors 634 | 635 | 636 | 637 | 638 | --------------------------------------------------------------------------------