├── models
    └── .keep
├── examples
    ├── query_data
    │   ├── data
    │   │   └── .keep
    │   ├── .gitignore
    │   ├── models
    │   │   ├── completion.tmpl
    │   │   ├── wizardlm.tmpl
    │   │   ├── gpt-3.5-turbo.yaml
    │   │   └── embeddings.yaml
    │   ├── docker-compose.yml
    │   ├── store.py
    │   ├── query.py
    │   ├── update.py
    │   └── README.md
    ├── discord-bot
    │   ├── models
    │   ├── .env.example
    │   ├── docker-compose.yaml
    │   └── README.md
    ├── slack-bot
    │   ├── models
    │   ├── .env.example
    │   ├── docker-compose.yaml
    │   └── README.md
    ├── chatbot-ui
    │   ├── models
    │   │   ├── completion.tmpl
    │   │   ├── gpt4all.tmpl
    │   │   └── gpt-3.5-turbo.yaml
    │   ├── docker-compose.yaml
    │   └── README.md
    ├── langchain-python
    │   ├── models
    │   ├── test.py
    │   ├── docker-compose.yaml
    │   ├── README.md
    │   └── agent.py
    ├── langchain
    │   ├── models
    │   │   ├── completion.tmpl
    │   │   ├── gpt4all.tmpl
    │   │   └── gpt-3.5-turbo.yaml
    │   ├── .gitignore
    │   ├── langchainjs-localai-example
    │   │   ├── .gitignore
    │   │   ├── tsconfig.json
    │   │   ├── package.json
    │   │   ├── .vscode
    │   │   │   └── launch.json
    │   │   └── src
    │   │   │   └── index.mts
    │   ├── langchainpy-localai-example
    │   │   ├── .vscode
    │   │   │   ├── settings.json
    │   │   │   └── launch.json
    │   │   ├── simple_demo.py
    │   │   ├── requirements.txt
    │   │   └── full_demo.py
    │   ├── JS.Dockerfile
    │   ├── PY.Dockerfile
    │   ├── README.md
    │   └── docker-compose.yaml
    ├── rwkv
    │   ├── models
    │   │   ├── rwkv_completion.tmpl
    │   │   ├── gpt-3.5-turbo.yaml
    │   │   └── rwkv_chat.tmpl
    │   ├── Dockerfile.build
    │   ├── scripts
    │   │   └── build.sh
    │   ├── docker-compose.yaml
    │   └── README.md
    ├── localai-webui
    │   ├── docker-compose.yml
    │   └── README.md
    └── README.md
├── tests
    └── fixtures
    │   ├── completion.tmpl
    │   ├── ggml-gpt4all-j.tmpl
    │   ├── gpt4.yaml
    │   ├── gpt4_2.yaml
    │   └── config.yaml
├── .dockerignore
├── prompt-templates
    ├── wizardlm.tmpl
    ├── koala.tmpl
    ├── alpaca.tmpl
    ├── vicuna.tmpl
    └── ggml-gpt4all-j.tmpl
├── entrypoint.sh
├── .env
├── renovate.json
├── Earthfile
├── .devcontainer
    ├── Dockerfile
    ├── docker-compose.yml
    └── devcontainer.json
├── Dockerfile
├── .gitignore
├── api
    ├── apt_suite_test.go
    ├── api.go
    ├── api_test.go
    ├── config.go
    ├── prediction.go
    └── openai.go
├── .github
    ├── bump_deps.sh
    └── workflows
    │   ├── release.yml.disabled
    │   ├── test.yml
    │   ├── bump_deps.yaml
    │   └── image.yml
├── .goreleaser.yaml
├── docker-compose.yaml
├── Dockerfile.dev
├── .vscode
    └── launch.json
├── LICENSE
├── go.mod
├── main.go
├── Makefile
├── pkg
    └── model
    │   └── loader.go
├── go.sum
└── README.md


/models/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/query_data/data/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/query_data/.gitignore:
--------------------------------------------------------------------------------
1 | storage/


--------------------------------------------------------------------------------
/tests/fixtures/completion.tmpl:
--------------------------------------------------------------------------------
1 | {{.Input}}


--------------------------------------------------------------------------------
/examples/discord-bot/models:
--------------------------------------------------------------------------------
1 | ../chatbot-ui/models/


--------------------------------------------------------------------------------
/examples/slack-bot/models:
--------------------------------------------------------------------------------
1 | ../chatbot-ui/models


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | models
2 | examples/chatbot-ui/models


--------------------------------------------------------------------------------
/examples/chatbot-ui/models/completion.tmpl:
--------------------------------------------------------------------------------
1 | {{.Input}}


--------------------------------------------------------------------------------
/examples/langchain-python/models:
--------------------------------------------------------------------------------
1 | ../chatbot-ui/models


--------------------------------------------------------------------------------
/examples/langchain/models/completion.tmpl:
--------------------------------------------------------------------------------
1 | {{.Input}}


--------------------------------------------------------------------------------
/examples/query_data/models/completion.tmpl:
--------------------------------------------------------------------------------
1 | {{.Input}}


--------------------------------------------------------------------------------
/prompt-templates/wizardlm.tmpl:
--------------------------------------------------------------------------------
1 | {{.Input}}
2 | 
3 | ### Response:


--------------------------------------------------------------------------------
/examples/query_data/models/wizardlm.tmpl:
--------------------------------------------------------------------------------
1 | {{.Input}}
2 | 
3 | ### Response:


--------------------------------------------------------------------------------
/prompt-templates/koala.tmpl:
--------------------------------------------------------------------------------
1 | BEGINNING OF CONVERSATION: USER: {{.Input}} GPT:


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd /build
4 | 
5 | make build
6 | 
7 | ./local-ai "$@"


--------------------------------------------------------------------------------
/examples/langchain/.gitignore:
--------------------------------------------------------------------------------
1 | models/ggml-koala-13B-4bit-128g
2 | models/ggml-gpt4all-j


--------------------------------------------------------------------------------
/examples/langchain/langchainjs-localai-example/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | dist/
3 | 


--------------------------------------------------------------------------------
/examples/rwkv/models/rwkv_completion.tmpl:
--------------------------------------------------------------------------------
1 | Complete the following sentence: {{.Input}} 


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | # THREADS=14
2 | # CONTEXT_SIZE=512
3 | MODELS_PATH=/models
4 | # DEBUG=true
5 | # BUILD_TYPE=generic
6 | 


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 |   "extends": ["config:base"]
4 | }
5 | 


--------------------------------------------------------------------------------
/Earthfile:
--------------------------------------------------------------------------------
1 | VERSION 0.7
2 | 
3 | build:
4 |     FROM DOCKERFILE -f Dockerfile .
5 |     SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
6 | 


--------------------------------------------------------------------------------
/examples/langchain/langchainpy-localai-example/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
3 | }


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG GO_VERSION=1.20
2 | FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
3 | RUN apt-get update && apt-get install -y cmake
4 | 


--------------------------------------------------------------------------------
/examples/langchain/JS.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:latest
2 | COPY ./langchainjs-localai-example /app
3 | WORKDIR /app
4 | RUN npm install
5 | RUN npm run build
6 | ENTRYPOINT [ "npm", "run", "start" ]


--------------------------------------------------------------------------------
/prompt-templates/alpaca.tmpl:
--------------------------------------------------------------------------------
1 | Below is an instruction that describes a task. Write a response that appropriately completes the request.
2 | 
3 | ### Instruction:
4 | {{.Input}}
5 | 
6 | ### Response:


--------------------------------------------------------------------------------
/prompt-templates/vicuna.tmpl:
--------------------------------------------------------------------------------
1 | Below is an instruction that describes a task. Write a response that appropriately completes the request.
2 | 
3 | ### Instruction:
4 | {{.Input}}
5 | 
6 | ### Response:


--------------------------------------------------------------------------------
/examples/discord-bot/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=x
2 | DISCORD_BOT_TOKEN=x
3 | DISCORD_CLIENT_ID=x
4 | OPENAI_API_BASE=http://api:8080
5 | ALLOWED_SERVER_IDS=x
6 | SERVER_TO_MODERATION_CHANNEL=1:1
7 | 


--------------------------------------------------------------------------------
/examples/langchain/PY.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10-bullseye
2 | COPY ./langchainpy-localai-example /app
3 | WORKDIR /app
4 | RUN pip install --no-cache-dir -r requirements.txt
5 | ENTRYPOINT [ "python", "./full_demo.py" ];


--------------------------------------------------------------------------------
/prompt-templates/ggml-gpt4all-j.tmpl:
--------------------------------------------------------------------------------
1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
2 | ### Prompt:
3 | {{.Input}}
4 | ### Response:
5 | 


--------------------------------------------------------------------------------
/tests/fixtures/ggml-gpt4all-j.tmpl:
--------------------------------------------------------------------------------
1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
2 | ### Prompt:
3 | {{.Input}}
4 | ### Response:
5 | 


--------------------------------------------------------------------------------
/examples/chatbot-ui/models/gpt4all.tmpl:
--------------------------------------------------------------------------------
1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
2 | ### Prompt:
3 | {{.Input}}
4 | ### Response:
5 | 


--------------------------------------------------------------------------------
/examples/langchain/models/gpt4all.tmpl:
--------------------------------------------------------------------------------
1 | The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
2 | ### Prompt:
3 | {{.Input}}
4 | ### Response:
5 | 


--------------------------------------------------------------------------------
/examples/langchain-python/test.py:
--------------------------------------------------------------------------------
1 | 
2 | from langchain.llms import OpenAI
3 | 
4 | llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
5 | text = "What would be a good company name for a company that makes colorful socks?"
6 | print(llm(text))
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG GO_VERSION=1.20
 2 | ARG BUILD_TYPE=
 3 | FROM golang:$GO_VERSION
 4 | WORKDIR /build
 5 | RUN apt-get update && apt-get install -y cmake
 6 | COPY . .
 7 | RUN make prepare-sources
 8 | EXPOSE 8080
 9 | ENTRYPOINT [ "/build/entrypoint.sh" ]
10 | 


--------------------------------------------------------------------------------
/examples/langchain/langchainpy-localai-example/simple_demo.py:
--------------------------------------------------------------------------------
1 | 
2 | from langchain.llms import OpenAI
3 | 
4 | llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
5 | text = "What would be a good company name for a company that makes colorful socks?"
6 | print(llm(text))
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # go-llama build artifacts
 2 | go-llama
 3 | go-gpt4all-j
 4 | go-gpt2
 5 | go-rwkv
 6 | 
 7 | # LocalAI build binary
 8 | LocalAI
 9 | local-ai
10 | # prevent above rules from omitting the helm chart
11 | !charts/*
12 | 
13 | # Ignore models
14 | models/*
15 | test-models/


--------------------------------------------------------------------------------
/api/apt_suite_test.go:
--------------------------------------------------------------------------------
 1 | package api_test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	. "github.com/onsi/ginkgo/v2"
 7 | 	. "github.com/onsi/gomega"
 8 | )
 9 | 
10 | func TestLocalAI(t *testing.T) {
11 | 	RegisterFailHandler(Fail)
12 | 	RunSpecs(t, "LocalAI test suite")
13 | }
14 | 


--------------------------------------------------------------------------------
/.github/bump_deps.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -xe
 3 | REPO=$1
 4 | BRANCH=$2
 5 | VAR=$3
 6 | 
 7 | LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
 8 | 
 9 | sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
10 | 


--------------------------------------------------------------------------------
/tests/fixtures/gpt4.yaml:
--------------------------------------------------------------------------------
 1 | name: gpt4all
 2 | parameters:
 3 |   model: testmodel
 4 |   top_p: 80
 5 |   top_k: 0.9
 6 |   temperature: 0.1
 7 | context_size: 10
 8 | stopwords:
 9 | - "HUMAN:"
10 | - "### Response:"
11 | roles:
12 |   user: "HUMAN:"
13 |   system: "GPT:"
14 | template:
15 |   completion: completion
16 |   chat: ggml-gpt4all-j


--------------------------------------------------------------------------------
/tests/fixtures/gpt4_2.yaml:
--------------------------------------------------------------------------------
 1 | name: gpt4all-2
 2 | parameters:
 3 |   model: testmodel
 4 |   top_p: 80
 5 |   top_k: 0.9
 6 |   temperature: 0.1
 7 | context_size: 10
 8 | stopwords:
 9 | - "HUMAN:"
10 | - "### Response:"
11 | roles:
12 |   user: "HUMAN:"
13 |   system: "GPT:"
14 | template:
15 |   completion: completion
16 |   chat: ggml-gpt4all-j


--------------------------------------------------------------------------------
/examples/rwkv/Dockerfile.build:
--------------------------------------------------------------------------------
 1 | FROM python
 2 | 
 3 | # convert the model (one-off)
 4 | RUN pip3 install torch numpy
 5 | 
 6 | WORKDIR /build
 7 | COPY ./scripts/ .
 8 | 
 9 | RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
10 | ENTRYPOINT [ "/build/build.sh" ]


--------------------------------------------------------------------------------
/examples/rwkv/scripts/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -ex
 3 | 
 4 | URL=$1
 5 | OUT=$2
 6 | FILENAME=$(basename $URL)
 7 | 
 8 | wget -nc $URL -O /build/$FILENAME
 9 | 
10 | python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
11 | python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2
12 | 


--------------------------------------------------------------------------------
/.goreleaser.yaml:
--------------------------------------------------------------------------------
 1 | # Make sure to check the documentation at http://goreleaser.com
 2 | project_name: local-ai
 3 | builds:
 4 |   - ldflags:
 5 |       - -w -s
 6 |     env:
 7 |       - CGO_ENABLED=0
 8 |     goos:
 9 |       - linux
10 |       - darwin
11 |       - windows
12 |     goarch:
13 |       - amd64
14 |       - arm64
15 |     binary: '{{ .ProjectName }}'


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: .
 8 |       dockerfile: Dockerfile.dev
 9 |     ports:
10 |       - 8080:8080
11 |     env_file:
12 |       - .env
13 |     volumes:
14 |       - ./models:/models:cached
15 |     command: ["/usr/bin/local-ai" ]
16 | 


--------------------------------------------------------------------------------
/examples/chatbot-ui/models/gpt-3.5-turbo.yaml:
--------------------------------------------------------------------------------
 1 | name: gpt-3.5-turbo
 2 | parameters:
 3 |   model: ggml-gpt4all-j
 4 |   top_k: 80
 5 |   temperature: 0.2
 6 |   top_p: 0.7
 7 | context_size: 1024
 8 | threads: 14
 9 | stopwords:
10 | - "HUMAN:"
11 | - "GPT:"
12 | roles:
13 |   user: " "
14 |   system: " "
15 | template:
16 |   completion: completion
17 |   chat: gpt4all


--------------------------------------------------------------------------------
/examples/query_data/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: .
 8 |       dockerfile: Dockerfile
 9 |     ports:
10 |       - 8080:8080
11 |     env_file:
12 |       - .env
13 |     volumes:
14 |       - ./models:/models:cached
15 |     command: ["/usr/bin/local-ai"]
16 | 


--------------------------------------------------------------------------------
/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | ARG GO_VERSION=1.20
 2 | ARG DEBIAN_VERSION=11
 3 | ARG BUILD_TYPE=
 4 | 
 5 | FROM golang:$GO_VERSION as builder
 6 | WORKDIR /build
 7 | RUN apt-get update && apt-get install -y cmake
 8 | COPY . .
 9 | RUN make build
10 | 
11 | FROM debian:$DEBIAN_VERSION
12 | COPY --from=builder /build/local-ai /usr/bin/local-ai
13 | EXPOSE 8080
14 | ENTRYPOINT [ "/usr/bin/local-ai" ]


--------------------------------------------------------------------------------
/examples/query_data/models/gpt-3.5-turbo.yaml:
--------------------------------------------------------------------------------
 1 | name: gpt-3.5-turbo
 2 | parameters:
 3 |   model: HERE
 4 |   top_k: 80
 5 |   temperature: 0.2
 6 |   top_p: 0.7
 7 | context_size: 1024
 8 | threads: 14
 9 | embeddings: true
10 | stopwords:
11 | - "HUMAN:"
12 | - "GPT:"
13 | roles:
14 |   user: " "
15 |   system: " "
16 | template:
17 |   completion: completion
18 |   chat: wizardlm
19 | 


--------------------------------------------------------------------------------
/examples/query_data/models/embeddings.yaml:
--------------------------------------------------------------------------------
 1 | name: text-embedding-ada-002
 2 | parameters:
 3 |   model: HERE
 4 |   top_k: 80
 5 |   temperature: 0.2
 6 |   top_p: 0.7
 7 | context_size: 1024
 8 | threads: 14
 9 | stopwords:
10 | - "HUMAN:"
11 | - "GPT:"
12 | roles:
13 |   user: " "
14 |   system: " "
15 | embeddings: true
16 | template:
17 |   completion: completion
18 |   chat: gpt4all
19 | 


--------------------------------------------------------------------------------
/examples/langchain/models/gpt-3.5-turbo.yaml:
--------------------------------------------------------------------------------
 1 | name: gpt-3.5-turbo
 2 | parameters:
 3 |   model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
 4 |   top_k: 80
 5 |   temperature: 0.2
 6 |   top_p: 0.7
 7 | context_size: 1024
 8 | threads: 4
 9 | stopwords:
10 | - "HUMAN:"
11 | - "GPT:"
12 | roles:
13 |   user: " "
14 |   system: " "
15 | backend: "gptj"
16 | template:
17 |   completion: completion
18 |   chat: gpt4all


--------------------------------------------------------------------------------
/examples/rwkv/models/gpt-3.5-turbo.yaml:
--------------------------------------------------------------------------------
 1 | name: gpt-3.5-turbo
 2 | parameters:
 3 |   model: rwkv
 4 |   top_k: 80
 5 |   temperature: 0.9
 6 |   max_tokens: 100
 7 |   top_p: 0.8
 8 | context_size: 1024
 9 | threads: 14
10 | backend: "rwkv"
11 | cutwords:
12 | - "Bob:.*"
13 | roles:
14 |   user: "Bob:"
15 |   system: "Alice:"
16 |   assistant: "Alice:"
17 | template:
18 |   completion: rwkv_completion
19 |   chat: rwkv_chat


--------------------------------------------------------------------------------
/examples/rwkv/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: ../../
 8 |       dockerfile: Dockerfile.dev
 9 |     ports:
10 |       - 8080:8080
11 |     environment:
12 |       - DEBUG=true
13 |       - MODELS_PATH=/models
14 |     volumes:
15 |       - ./models:/models:cached
16 |     command: ["/usr/bin/local-ai" ]
17 | 


--------------------------------------------------------------------------------
/examples/langchain-python/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: ../../
 8 |       dockerfile: Dockerfile.dev
 9 |     ports:
10 |       - 8080:8080
11 |     environment:
12 |       - DEBUG=true
13 |       - MODELS_PATH=/models
14 |     volumes:
15 |       - ./models:/models:cached
16 |     command: ["/usr/bin/local-ai" ]
17 | 


--------------------------------------------------------------------------------
/examples/slack-bot/.env.example:
--------------------------------------------------------------------------------
 1 | SLACK_APP_TOKEN=xapp-1-...
 2 | SLACK_BOT_TOKEN=xoxb-...
 3 | OPENAI_API_KEY=sk-...
 4 | OPENAI_API_BASE=http://api:8080
 5 | OPENAI_MODEL=gpt-3.5-turbo
 6 | OPENAI_TIMEOUT_SECONDS=60
 7 | #OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
 8 | #for mistakes and make suggestion to improve the language of the given text"
 9 | USE_SLACK_LANGUAGE=true
10 | SLACK_APP_LOG_LEVEL=INFO
11 | TRANSLATE_MARKDOWN=true


--------------------------------------------------------------------------------
/examples/langchain/langchainjs-localai-example/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es2022",
 4 |     "lib": ["ES2022", "DOM"],
 5 |     "module": "ES2022",
 6 |     "moduleResolution": "node",
 7 |     "strict": true,
 8 |     "esModuleInterop": true,
 9 |     "allowSyntheticDefaultImports": true,
10 |     "isolatedModules": true,
11 |     "outDir": "./dist"
12 |   },
13 |   "include": ["src", "test"],
14 |   "exclude": ["node_modules", "dist"]
15 | }
16 | 


--------------------------------------------------------------------------------
/examples/rwkv/models/rwkv_chat.tmpl:
--------------------------------------------------------------------------------
 1 | The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
 2 | 
 3 | Bob: Hello Alice, how are you doing?
 4 | 
 5 | Alice: Hi Bob! Thanks, I'm fine. What about you?
 6 | 
 7 | Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
 8 | 
 9 | Alice: Not at all! I'm listening.
10 | 
11 | {{.Input}}
12 | 
13 | Alice: 


--------------------------------------------------------------------------------
/examples/localai-webui/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: .
 8 |       dockerfile: Dockerfile
 9 |     ports:
10 |       - 8080:8080
11 |     env_file:
12 |       - .env
13 |     volumes:
14 |       - ./models:/models:cached
15 |     command: ["/usr/bin/local-ai"]
16 | 
17 |   frontend:
18 |     image: quay.io/go-skynet/localai-frontend:master
19 |     ports:
20 |       - 3000:3000


--------------------------------------------------------------------------------
/examples/discord-bot/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: ../../
 8 |       dockerfile: Dockerfile.dev
 9 |     ports:
10 |       - 8080:8080
11 |     environment:
12 |       - DEBUG=true
13 |       - MODELS_PATH=/models
14 |     volumes:
15 |       - ./models:/models:cached
16 |     command: ["/usr/bin/local-ai" ]
17 | 
18 |   bot:
19 |     image: quay.io/go-skynet/gpt-discord-bot:main
20 |     env_file:
21 |     - .env
22 | 


--------------------------------------------------------------------------------
/examples/slack-bot/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: ../../
 8 |       dockerfile: Dockerfile.dev
 9 |     ports:
10 |       - 8080:8080
11 |     environment:
12 |       - DEBUG=true
13 |       - MODELS_PATH=/models
14 |     volumes:
15 |       - ./models:/models:cached
16 |     command: ["/usr/bin/local-ai" ]
17 | 
18 |   bot:
19 |     build:
20 |      context: ./ChatGPT-in-Slack
21 |      dockerfile: Dockerfile
22 |     env_file:
23 |     - .env
24 | 


--------------------------------------------------------------------------------
/examples/chatbot-ui/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: ../../
 8 |       dockerfile: Dockerfile.dev
 9 |     ports:
10 |       - 8080:8080
11 |     environment:
12 |       - DEBUG=true
13 |       - MODELS_PATH=/models
14 |     volumes:
15 |       - ./models:/models:cached
16 |     command: ["/usr/bin/local-ai" ]
17 | 
18 |   chatgpt:
19 |     image: ghcr.io/mckaywrigley/chatbot-ui:main
20 |     ports:
21 |       - 3000:3000
22 |     environment:
23 |       - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
24 |       - 'OPENAI_API_HOST=http://api:8080'


--------------------------------------------------------------------------------
/.github/workflows/release.yml.disabled:
--------------------------------------------------------------------------------
 1 | name: goreleaser
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'
 7 | 
 8 | jobs:
 9 |   goreleaser:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout
13 |         uses: actions/checkout@v3
14 |         with:
15 |           fetch-depth: 0
16 |       - name: Set up Go
17 |         uses: actions/setup-go@v3
18 |         with:
19 |           go-version: 1.18
20 |       - name: Run GoReleaser
21 |         uses: goreleaser/goreleaser-action@v4
22 |         with:
23 |           version: latest
24 |           args: release --clean
25 |         env:
26 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "Launch Go",
 6 |             "type": "go",
 7 |             "request": "launch",
 8 |             "mode": "debug",
 9 |             "program": "${workspaceFolder}/main.go",
10 |             "args": [
11 |                 "api"
12 |             ],
13 |             "env": {
14 |                 "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
15 |                 "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
16 |                 "DEBUG": "true"
17 |             }
18 |         }
19 |     ]
20 | }


--------------------------------------------------------------------------------
/examples/langchain/langchainjs-localai-example/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "langchainjs-localai-example",
 3 |   "version": "0.1.0",
 4 |   "description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
 5 |   "main": "index.mjs",
 6 |   "scripts": {
 7 |     "build": "tsc --build",
 8 |     "clean": "tsc --build --clean",
 9 |     "start": "node --trace-warnings dist/index.mjs"
10 |   },
11 |   "author": "dave@gray101.com",
12 |   "license": "MIT",
13 |   "devDependencies": {
14 |     "@types/node": "^18.16.4",
15 |     "typescript": "^5.0.4"
16 |   },
17 |   "dependencies": {
18 |     "langchain": "^0.0.67",
19 |     "typeorm": "^0.3.15"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/fixtures/config.yaml:
--------------------------------------------------------------------------------
 1 | - name: list1
 2 |   parameters:
 3 |     model: testmodel
 4 |     top_p: 80
 5 |     top_k: 0.9
 6 |     temperature: 0.1
 7 |   context_size: 10
 8 |   stopwords:
 9 |   - "HUMAN:"
10 |   - "### Response:"
11 |   roles:
12 |     user: "HUMAN:"
13 |     system: "GPT:"
14 |   template:
15 |     completion: completion
16 |     chat: ggml-gpt4all-j
17 | - name: list2
18 |   parameters:
19 |     top_p: 80
20 |     top_k: 0.9
21 |     temperature: 0.1
22 |     model: testmodel
23 |   context_size: 10
24 |   stopwords:
25 |   - "HUMAN:"
26 |   - "### Response:"
27 |   roles:
28 |     user: "HUMAN:"
29 |     system: "GPT:"
30 |   template:
31 |     completion: completion
32 |     chat: ggml-gpt4all-j


--------------------------------------------------------------------------------
/examples/slack-bot/README.md:
--------------------------------------------------------------------------------
 1 | # Slack bot
 2 | 
 3 | Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
 4 | 
 5 | ## Setup
 6 | 
 7 | ```bash
 8 | # Clone LocalAI
 9 | git clone https://github.com/go-skynet/LocalAI
10 | 
11 | cd LocalAI/examples/slack-bot
12 | 
13 | git clone https://github.com/seratch/ChatGPT-in-Slack
14 | 
15 | # (optional) Checkout a specific LocalAI tag
16 | # git checkout -b build <TAG>
17 | 
18 | # Download gpt4all-j to models/
19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
20 | 
21 | # Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
22 | cp -rfv .env.example .env
23 | vim .env
24 | 
25 | # start with docker-compose
26 | docker-compose up -d --build
27 | ```


--------------------------------------------------------------------------------
/examples/langchain/langchainjs-localai-example/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "type": "node",
 9 |             "request": "launch",
10 |             "name": "Launch Program",
11 |             // "skipFiles": [
12 |             //     "<node_internals>/**"
13 |             // ],
14 |             "program": "${workspaceFolder}\\dist\\index.mjs",
15 |             "outFiles": [
16 |                 "${workspaceFolder}/**/*.js"
17 |             ]
18 |         }
19 |     ]
20 | }


--------------------------------------------------------------------------------
/examples/langchain/langchainpy-localai-example/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "Python: Current File",
 6 |             "type": "python",
 7 |             "request": "launch",
 8 |             "program": "${file}",
 9 |             "console": "integratedTerminal",
10 |             "redirectOutput": true,
11 |             "justMyCode": false
12 |         },
13 |         {
14 |             "name": "Python: Attach to Port 5678",
15 |             "type": "python",
16 |             "request": "attach",
17 |             "connect": {
18 |                 "host": "localhost",
19 |                 "port": 5678
20 |               },
21 |             "justMyCode": false
22 |         }
23 |     ]
24 | }


--------------------------------------------------------------------------------
/examples/langchain/langchainpy-localai-example/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.8.4
 2 | aiosignal==1.3.1
 3 | async-timeout==4.0.2
 4 | attrs==23.1.0
 5 | certifi==2022.12.7
 6 | charset-normalizer==3.1.0
 7 | colorama==0.4.6
 8 | dataclasses-json==0.5.7
 9 | debugpy==1.6.7
10 | frozenlist==1.3.3
11 | greenlet==2.0.2
12 | idna==3.4
13 | langchain==0.0.159
14 | marshmallow==3.19.0
15 | marshmallow-enum==1.5.1
16 | multidict==6.0.4
17 | mypy-extensions==1.0.0
18 | numexpr==2.8.4
19 | numpy==1.24.3
20 | openai==0.27.6
21 | openapi-schema-pydantic==1.2.4
22 | packaging==23.1
23 | pydantic==1.10.7
24 | PyYAML==6.0
25 | requests==2.29.0
26 | SQLAlchemy==2.0.12
27 | tenacity==8.2.2
28 | tqdm==4.65.0
29 | typing-inspect==0.8.0
30 | typing_extensions==4.5.0
31 | urllib3==1.26.15
32 | yarl==1.9.2
33 | 


--------------------------------------------------------------------------------
/examples/localai-webui/README.md:
--------------------------------------------------------------------------------
 1 | # localai-webui
 2 | 
 3 | Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
 4 | 
 5 | ![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
 6 | 
 7 | ## Setup
 8 | 
 9 | ```bash
10 | # Clone LocalAI
11 | git clone https://github.com/go-skynet/LocalAI
12 | 
13 | cd LocalAI/examples/localai-webui
14 | 
15 | # (optional) Checkout a specific LocalAI tag
16 | # git checkout -b build <TAG>
17 | 
18 | # Download any desired models to models/ in the parent LocalAI project dir
19 | # For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
20 | 
21 | # start with docker-compose
22 | docker-compose up -d --build
23 | ```
24 | 
25 | Open http://localhost:3000 for the Web UI.
26 | 
27 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 'tests'
 3 | 
 4 | on:
 5 |   pull_request:
 6 |   push:
 7 |     branches:
 8 |       - master
 9 |     tags:
10 |       - '*'
11 | 
12 | jobs:
13 |   ubuntu-latest:
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - name: Clone
18 |         uses: actions/checkout@v3
19 |         with: 
20 |           submodules: true
21 |       - name: Dependencies
22 |         run: |
23 |           sudo apt-get update
24 |           sudo apt-get install build-essential
25 |       - name: Test
26 |         run: |
27 |           make test
28 | 
29 |   macOS-latest:
30 |     runs-on: macOS-latest
31 | 
32 |     steps:
33 |       - name: Clone
34 |         uses: actions/checkout@v3
35 |         with: 
36 |           submodules: true
37 | 
38 |       - name: Dependencies
39 |         run: |
40 |           brew update
41 |           brew install sdl2
42 |       - name: Test
43 |         run: |
44 |           make test


--------------------------------------------------------------------------------
/examples/langchain-python/README.md:
--------------------------------------------------------------------------------
 1 | ## Langchain-python
 2 | 
 3 | Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
 4 | 
 5 | To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
 6 | 
 7 | See the example below:
 8 | 
 9 | ```
10 | # Clone LocalAI
11 | git clone https://github.com/go-skynet/LocalAI
12 | 
13 | cd LocalAI/examples/langchain-python
14 | 
15 | # (optional) Checkout a specific LocalAI tag
16 | # git checkout -b build <TAG>
17 | 
18 | # Download gpt4all-j to models/
19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
20 | 
21 | # start with docker-compose
22 | docker-compose up -d --build
23 | 
24 | 
25 | pip install langchain
26 | pip install openai
27 | 
28 | export OPENAI_API_BASE=http://localhost:8080
29 | export OPENAI_API_KEY=sk-
30 | 
31 | python test.py
32 | # A good company name for a company that makes colorful socks would be "Colorsocks".
33 | 
34 | python agent.py
35 | ```


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 go-skynet authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/.devcontainer/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | services:
 3 |   # Update this to the name of the service you want to work with in your docker-compose.yml file
 4 |   api:
 5 |     # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
 6 |     # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
 7 |     # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
 8 |     # array). The sample below assumes your primary file is in the root of your project.
 9 |     #
10 |     build:
11 |       context: .
12 |       dockerfile: .devcontainer/Dockerfile
13 | 
14 |     volumes:
15 |       # Update this to wherever you want VS Code to mount the folder of your project
16 |       - .:/workspace:cached
17 | 
18 |     # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
19 |     # cap_add:
20 |     #   - SYS_PTRACE
21 |     # security_opt:
22 |     #   - seccomp:unconfined
23 | 
24 |     # Overrides default command so things don't shut down after the process ends.
25 |     command: /bin/sh -c "while sleep 1000; do :; done"
26 |  
27 | 


--------------------------------------------------------------------------------
/examples/langchain/README.md:
--------------------------------------------------------------------------------
 1 | # langchain
 2 | 
 3 | Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
 4 | 
 5 | **Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
 6 | 
 7 | ## Setup
 8 | 
 9 | ```bash
10 | # Clone LocalAI
11 | git clone https://github.com/go-skynet/LocalAI
12 | 
13 | cd LocalAI/examples/langchain
14 | 
15 | # (optional) - Edit the example code in typescript.
16 | # vi ./langchainjs-localai-example/index.ts
17 | 
18 | # Download gpt4all-j to models/
19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
20 | 
21 | # start with docker-compose for typescript!
22 | docker-compose --profile ts up --build
23 | 
24 | # or start with docker-compose for python!
25 | docker-compose --profile py up --build
26 | ```
27 | 
28 | ## Copyright
29 | 
30 | Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.


--------------------------------------------------------------------------------
/examples/langchain/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.6'
 2 | 
 3 | services:
 4 |   api:
 5 |     image: quay.io/go-skynet/local-ai:latest
 6 |     build:
 7 |       context: ../../
 8 |       dockerfile: Dockerfile.dev
 9 |     ports:
10 |       - 8080:8080
11 |     environment:
12 |       - DEBUG=true
13 |       - MODELS_PATH=/models
14 |     volumes:
15 |       - ./models:/models:cached
16 |     command: ["/usr/bin/local-ai" ]
17 | 
18 |   js:
19 |     build:
20 |       context: .
21 |       dockerfile: JS.Dockerfile
22 |     profiles:
23 |       - js
24 |       - ts
25 |     depends_on:
26 |     - "api"
27 |     environment:
28 |       - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
29 |       - 'OPENAI_API_BASE=http://api:8080/v1'
30 |       - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
31 | 
32 |   py:
33 |     build:
34 |       context: .
35 |       dockerfile: PY.Dockerfile
36 |     profiles:
37 |       - py
38 |     depends_on:
39 |     - "api"
40 |     environment:
41 |       - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
42 |       - 'OPENAI_API_BASE=http://api:8080/v1'
43 |       - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'


--------------------------------------------------------------------------------
/examples/query_data/store.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
 4 | # os.environ['OPENAI_API_KEY']= ""
 5 | 
 6 | from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
 7 | from langchain.llms.openai import OpenAI
 8 | from llama_index import StorageContext, load_index_from_storage
 9 | 
10 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
11 | 
12 | # This example uses text-davinci-003 by default; feel free to change if desired
13 | llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
14 | 
15 | # Configure prompt parameters and initialise helper
16 | max_input_size = 512
17 | num_output = 512
18 | max_chunk_overlap = 30
19 | 
20 | prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
21 | 
22 | # Load documents from the 'data' directory
23 | documents = SimpleDirectoryReader('data').load_data()
24 | service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512)
25 | index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
26 | index.storage_context.persist(persist_dir="./storage")
27 | 
28 | 


--------------------------------------------------------------------------------
/examples/query_data/query.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
 4 | # os.environ['OPENAI_API_KEY']= ""
 5 | 
 6 | from llama_index import   LLMPredictor, PromptHelper, ServiceContext
 7 | from langchain.llms.openai import OpenAI
 8 | from llama_index import StorageContext, load_index_from_storage
 9 | 
10 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
11 | 
12 | # This example uses text-davinci-003 by default; feel free to change if desired
13 | llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
14 | 
15 | # Configure prompt parameters and initialise helper
16 | max_input_size = 1024
17 | num_output = 256
18 | max_chunk_overlap = 20
19 | 
20 | prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
21 | 
22 | # Load documents from the 'data' directory
23 | service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
24 | 
25 | # rebuild storage context
26 | storage_context = StorageContext.from_defaults(persist_dir='./storage')
27 | 
28 | # load index
29 | index = load_index_from_storage(storage_context,     service_context=service_context,    )
30 | 
31 | query_engine = index.as_query_engine()
32 | 
33 | data = input("Question: ")
34 | response = query_engine.query(data)
35 | print(response)
36 | 


--------------------------------------------------------------------------------
/examples/query_data/update.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
 4 | # os.environ['OPENAI_API_KEY']= ""
 5 | 
 6 | from llama_index import   LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
 7 | from langchain.llms.openai import OpenAI
 8 | from llama_index import StorageContext, load_index_from_storage
 9 | 
10 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
11 | 
12 | # This example uses text-davinci-003 by default; feel free to change if desired
13 | llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
14 | 
15 | # Configure prompt parameters and initialise helper
16 | max_input_size = 512
17 | num_output = 256
18 | max_chunk_overlap = 20
19 | 
20 | prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
21 | 
22 | # Load documents from the 'data' directory
23 | service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
24 | 
25 | # rebuild storage context
26 | storage_context = StorageContext.from_defaults(persist_dir='./storage')
27 | 
28 | # load index
29 | index = load_index_from_storage(storage_context,     service_context=service_context,    )
30 | documents = SimpleDirectoryReader('data').load_data()
31 | index.refresh(documents)
32 | index.storage_context.persist(persist_dir="./storage")


--------------------------------------------------------------------------------
/.github/workflows/bump_deps.yaml:
--------------------------------------------------------------------------------
 1 | name: Bump dependencies
 2 | on:
 3 |   schedule:
 4 |     - cron: 0 20 * * *
 5 |   workflow_dispatch:
 6 | jobs:
 7 |   bump:
 8 |     strategy:
 9 |       fail-fast: false
10 |       matrix:
11 |         include:
12 |           - repository: "go-skynet/go-gpt4all-j.cpp"
13 |             variable: "GOGPT4ALLJ_VERSION"
14 |             branch: "master"
15 |           - repository: "go-skynet/go-llama.cpp"
16 |             variable: "GOLLAMA_VERSION"
17 |             branch: "master"
18 |           - repository: "go-skynet/go-gpt2.cpp"
19 |             variable: "GOGPT2_VERSION"
20 |             branch: "master"
21 |           - repository: "donomii/go-rwkv.cpp"
22 |             variable: "RWKV_VERSION"
23 |             branch: "main"
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - uses: actions/checkout@v3
27 |       - name: Bump dependencies 🔧
28 |         run: |
29 |           bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
30 |       - name: Create Pull Request
31 |         uses: peter-evans/create-pull-request@v5
32 |         with:
33 |           token: ${{ secrets.UPDATE_BOT_TOKEN }}
34 |           push-to-fork: ci-forks/LocalAI
35 |           commit-message: ':arrow_up: Update ${{ matrix.repository }}'
36 |           title: ':arrow_up: Update ${{ matrix.repository }}'
37 |           branch: "update/${{ matrix.variable }}"
38 |           body: Bump of ${{ matrix.repository }} version
39 |           signoff: true
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/examples/langchain-python/agent.py:
--------------------------------------------------------------------------------
 1 | ## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
 2 | 
 3 | from io import StringIO
 4 | import sys
 5 | import os
 6 | from typing import Dict, Optional
 7 | 
 8 | from langchain.agents import load_tools
 9 | from langchain.agents import initialize_agent
10 | from langchain.agents.tools import Tool
11 | from langchain.llms import OpenAI
12 | 
13 | base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
14 | model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
15 | 
16 | class PythonREPL:
17 |     """Simulates a standalone Python REPL."""
18 | 
19 |     def __init__(self):
20 |         pass        
21 | 
22 |     def run(self, command: str) -> str:
23 |         """Run command and returns anything printed."""
24 |         old_stdout = sys.stdout
25 |         sys.stdout = mystdout = StringIO()
26 |         try:
27 |             exec(command, globals())
28 |             sys.stdout = old_stdout
29 |             output = mystdout.getvalue()
30 |         except Exception as e:
31 |             sys.stdout = old_stdout
32 |             output = str(e)
33 |         return output
34 | 
35 | llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
36 | python_repl = Tool(
37 |         "Python REPL",
38 |         PythonREPL().run,
39 |         """A Python shell. Use this to execute python commands. Input should be a valid python command.
40 |         If you expect output it should be printed out.""",
41 |     )
42 | tools = [python_repl]
43 | agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
44 | agent.run("What is the 10th fibonacci number?")


--------------------------------------------------------------------------------
/examples/chatbot-ui/README.md:
--------------------------------------------------------------------------------
 1 | # chatbot-ui
 2 | 
 3 | Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
 4 | 
 5 | ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
 6 | 
 7 | ## Setup
 8 | 
 9 | ```bash
10 | # Clone LocalAI
11 | git clone https://github.com/go-skynet/LocalAI
12 | 
13 | cd LocalAI/examples/chatbot-ui
14 | 
15 | # (optional) Checkout a specific LocalAI tag
16 | # git checkout -b build <TAG>
17 | 
18 | # Download gpt4all-j to models/
19 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
20 | 
21 | # start with docker-compose
22 | docker-compose up -d --build
23 | ```
24 | 
25 | ## Pointing chatbot-ui to a separately managed LocalAI service
26 | 
27 | If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
28 | ```
29 | version: '3.6'
30 | 
31 | services:
32 |   chatgpt:
33 |     image: ghcr.io/mckaywrigley/chatbot-ui:main
34 |     ports:
35 |       - 3000:3000
36 |     environment:
37 |       - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
38 |       - 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
39 | ```
40 | 
41 | Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
42 | 
43 | ## Accessing chatbot-ui
44 | 
45 | Open http://localhost:3000 for the Web UI.
46 | 
47 | 


--------------------------------------------------------------------------------
/examples/langchain/langchainpy-localai-example/full_demo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | from langchain.chat_models import ChatOpenAI
 5 | from langchain import PromptTemplate, LLMChain
 6 | from langchain.prompts.chat import (
 7 |     ChatPromptTemplate,
 8 |     SystemMessagePromptTemplate,
 9 |     AIMessagePromptTemplate,
10 |     HumanMessagePromptTemplate,
11 | )
12 | from langchain.schema import (
13 |     AIMessage,
14 |     HumanMessage,
15 |     SystemMessage
16 | )
17 | 
18 | # This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
19 | logging.basicConfig(level=logging.DEBUG)
20 | 
21 | print('Langchain + LocalAI PYTHON Tests')
22 | 
23 | base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
24 | key = os.environ.get('OPENAI_API_KEY', '-')
25 | model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
26 | 
27 | 
28 | chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
29 | 
30 | print("Created ChatOpenAI for ", chat.model_name)
31 | 
32 | template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
33 | system_message_prompt = SystemMessagePromptTemplate.from_template(template)
34 | human_template = "{text}"
35 | human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
36 | 
37 | chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
38 | 
39 | print("ABOUT to execute")
40 | 
41 | # get a chat completion from the formatted messages
42 | response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
43 | 
44 | print(response)
45 | 
46 | print(".");


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
 3 | {
 4 | 	"name": "Existing Docker Compose (Extend)",
 5 | 
 6 | 	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
 7 | 	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
 8 | 	"dockerComposeFile": [
 9 | 		"../docker-compose.yaml",
10 | 		"docker-compose.yml"
11 | 	],
12 | 
13 | 	// The 'service' property is the name of the service for the container that VS Code should
14 | 	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
15 | 	"service": "api",
16 | 
17 | 	// The optional 'workspaceFolder' property is the path VS Code should open by default when
18 | 	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
19 | 	"workspaceFolder": "/workspace",
20 | 
21 | 	"features": {
22 | 		"ghcr.io/devcontainers/features/go:1": {},
23 | 		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
24 | 	},
25 | 
26 | 	// Features to add to the dev container. More info: https://containers.dev/features.
27 | 	// "features": {},
28 | 
29 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
30 | 	// "forwardPorts": [],
31 | 
32 | 	// Uncomment the next line if you want start specific services in your Docker Compose config.
33 | 	// "runServices": [],
34 | 
35 | 	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
36 | 	// "shutdownAction": "none",
37 | 
38 | 	// Uncomment the next line to run commands after the container is created.
39 | 	"postCreateCommand": "make prepare"
40 | 
41 | 	// Configure tool-specific properties.
42 | 	// "customizations": {},
43 | 
44 | 	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
45 | 	// "remoteUser": "devcontainer"
46 | }
47 | 


--------------------------------------------------------------------------------
/examples/query_data/README.md:
--------------------------------------------------------------------------------
 1 | # Data query example
 2 | 
 3 | This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
 4 | 
 5 | It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
 6 | 
 7 | Summary of the steps:
 8 | 
 9 | - prepare the dataset (and store it into `data`)
10 | - prepare a vector index database to run queries on
11 | - run queries
12 | 
13 | ## Requirements
14 | 
15 | For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it).
16 | 
17 | The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files).
18 | 
19 | You will also need a training data set. Copy that over `data`.
20 | 
21 | ## Setup
22 | 
23 | Start the API:
24 | 
25 | ```bash
26 | # Clone LocalAI
27 | git clone https://github.com/go-skynet/LocalAI
28 | 
29 | cd LocalAI/examples/query_data
30 | 
31 | # Copy your models, edit config files accordingly
32 | 
33 | # start with docker-compose
34 | docker-compose up -d --build
35 | ```
36 | 
37 | ### Create a storage
38 | 
39 | In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
40 | 
41 | ```bash
42 | export OPENAI_API_BASE=http://localhost:8080/v1
43 | export OPENAI_API_KEY=sk-
44 | 
45 | python store.py
46 | ```
47 | 
48 | After it finishes, a directory "storage" will be created with the vector index database.
49 | 
50 | ## Query
51 | 
52 | We can now query the dataset. 
53 | 
54 | ```bash
55 | export OPENAI_API_BASE=http://localhost:8080/v1
56 | export OPENAI_API_KEY=sk-
57 | 
58 | python query.py
59 | ```
60 | 
61 | ## Update
62 | 
63 | To update our vector database, run `update.py`
64 | 
65 | ```bash
66 | export OPENAI_API_BASE=http://localhost:8080/v1
67 | export OPENAI_API_KEY=sk-
68 | 
69 | python update.py
70 | ```


--------------------------------------------------------------------------------
/examples/discord-bot/README.md:
--------------------------------------------------------------------------------
 1 | # discord-bot
 2 | 
 3 | ![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png)
 4 | 
 5 | ## Setup
 6 | 
 7 | ```bash
 8 | # Clone LocalAI
 9 | git clone https://github.com/go-skynet/LocalAI
10 | 
11 | cd LocalAI/examples/discord-bot
12 | 
13 | # (optional) Checkout a specific LocalAI tag
14 | # git checkout -b build <TAG>
15 | 
16 | # Download gpt4all-j to models/
17 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
18 | 
19 | # Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
20 | cp -rfv .env.example .env
21 | vim .env
22 | 
23 | # start with docker-compose
24 | docker-compose up -d --build
25 | ```
26 | 
27 | Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
28 | 
29 | Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
30 | 
31 | ## Kubernetes
32 | 
33 | - install the local-ai chart first
34 | - change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
35 | 
36 | ```yaml
37 | apiVersion: v1
38 | kind: Namespace
39 | metadata:
40 |   name: discord-bot
41 | ---
42 | apiVersion: apps/v1
43 | kind: Deployment
44 | metadata:
45 |   name: localai
46 |   namespace: discord-bot
47 |   labels:
48 |     app: localai
49 | spec:
50 |   selector:
51 |     matchLabels:
52 |       app: localai
53 |   replicas: 1
54 |   template:
55 |     metadata:
56 |       labels:
57 |         app: localai
58 |       name: localai
59 |     spec:
60 |       containers:
61 |         - name: localai-discord
62 |           env:
63 |           - name: OPENAI_API_KEY
64 |             value: "x"
65 |           - name: DISCORD_BOT_TOKEN
66 |             value: ""
67 |           - name: DISCORD_CLIENT_ID
68 |             value: ""
69 |           - name: OPENAI_API_BASE
70 |             value: "http://local-ai.default.svc.cluster.local:8080"
71 |           - name: ALLOWED_SERVER_IDS
72 |             value: "xx"
73 |           - name: SERVER_TO_MODERATION_CHANNEL
74 |             value: "1:1"
75 |           image: quay.io/go-skynet/gpt-discord-bot:main
76 | ```
77 | 


--------------------------------------------------------------------------------
/examples/rwkv/README.md:
--------------------------------------------------------------------------------
 1 | # rwkv
 2 | 
 3 | Example of how to run rwkv models.
 4 | 
 5 | ## Run models
 6 | 
 7 | Setup:
 8 | 
 9 | ```bash
10 | # Clone LocalAI
11 | git clone https://github.com/go-skynet/LocalAI
12 | 
13 | cd LocalAI/examples/rwkv
14 | 
15 | # (optional) Checkout a specific LocalAI tag
16 | # git checkout -b build <TAG>
17 | 
18 | # build the tooling image to convert an rwkv model locally:
19 | docker build -t rwkv-converter -f Dockerfile.build .
20 | 
21 | # download and convert a model (one-off) - it's going to be fast on CPU too!
22 | docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
23 | 
24 | # Get the tokenizer
25 | wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
26 | 
27 | # start with docker-compose
28 | docker-compose up -d --build
29 | ```
30 | 
31 | Test it out:
32 | 
33 | ```bash
34 | curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
35 |     "model": "gpt-3.5-turbo",
36 |     "prompt": "A long time ago, in a galaxy far away",
37 |     "max_tokens": 100,
38 |     "temperature": 0.9, "top_p": 0.8, "top_k": 80
39 |   }'
40 | 
41 | # {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
42 | 
43 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
44 |      "model": "gpt-3.5-turbo",            
45 |      "messages": [{"role": "user", "content": "How are you?"}],
46 |      "temperature": 0.9, "top_p": 0.8, "top_k": 80
47 |    }'
48 | 
49 | # {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
50 | ```
51 | 
52 | ### Fine tuning
53 | 
54 | See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
55 | 
56 | ## See also
57 | 
58 | - [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
59 | - [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)


--------------------------------------------------------------------------------
/.github/workflows/image.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 'build container images'
 3 | 
 4 | on:
 5 |   pull_request:
 6 |   push:
 7 |     branches:
 8 |       - master
 9 |     tags:
10 |       - '*'
11 | 
12 | jobs:
13 |   docker:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v3
18 | 
19 |       - name: Prepare
20 |         id: prep
21 |         run: |
22 |           DOCKER_IMAGE=quay.io/go-skynet/local-ai
23 |           VERSION=master
24 |           SHORTREF=${GITHUB_SHA::8}
25 | 
26 |           # If this is git tag, use the tag name as a docker tag
27 |           if [[ $GITHUB_REF == refs/tags/* ]]; then
28 |             VERSION=${GITHUB_REF#refs/tags/}
29 |           fi
30 |           TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
31 | 
32 |           # If the VERSION looks like a version number, assume that
33 |           # this is the most recent version of the image and also
34 |           # tag it 'latest'.
35 |           if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
36 |             TAGS="$TAGS,${DOCKER_IMAGE}:latest"
37 |           fi
38 | 
39 |           # Set output parameters.
40 |           echo ::set-output name=tags::${TAGS}
41 |           echo ::set-output name=docker_image::${DOCKER_IMAGE}
42 | 
43 |       - name: Set up QEMU
44 |         uses: docker/setup-qemu-action@master
45 |         with:
46 |           platforms: all
47 | 
48 |       - name: Set up Docker Buildx
49 |         id: buildx
50 |         uses: docker/setup-buildx-action@master
51 | 
52 |       - name: Login to DockerHub
53 |         if: github.event_name != 'pull_request'
54 |         uses: docker/login-action@v2
55 |         with:
56 |           registry: quay.io
57 |           username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
58 |           password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
59 |       - name: Build
60 |         if: github.event_name != 'pull_request'
61 |         uses: docker/build-push-action@v4
62 |         with:
63 |           builder: ${{ steps.buildx.outputs.name }}
64 |           context: .
65 |           file: ./Dockerfile
66 |           platforms: linux/amd64,linux/arm64
67 |           push: true
68 |           tags: ${{ steps.prep.outputs.tags }}
69 |       - name: Build PRs
70 |         if: github.event_name == 'pull_request'
71 |         uses: docker/build-push-action@v4
72 |         with:
73 |           builder: ${{ steps.buildx.outputs.name }}
74 |           context: .
75 |           file: ./Dockerfile
76 |           platforms: linux/amd64
77 |           push: false
78 |           tags: ${{ steps.prep.outputs.tags }}


--------------------------------------------------------------------------------
/examples/langchain/langchainjs-localai-example/src/index.mts:
--------------------------------------------------------------------------------
 1 | import { OpenAIChat } from "langchain/llms/openai";
 2 | import { loadQAStuffChain } from "langchain/chains";
 3 | import { Document } from "langchain/document";
 4 | import { initializeAgentExecutorWithOptions } from "langchain/agents";
 5 | import {Calculator} from "langchain/tools/calculator";
 6 | 
 7 | const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
 8 | const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
 9 | const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
10 | 
11 | function getModel(): OpenAIChat {
12 |   return new OpenAIChat({
13 |     prefixMessages: [
14 |       {
15 |         role: "system",
16 |         content: "You are a helpful assistant that answers in pirate language",
17 |       },
18 |     ],
19 |     modelName: modelName,
20 |     maxTokens: 50,
21 |     openAIApiKey: fakeApiKey,
22 |     maxRetries: 2
23 |   }, {
24 |     basePath: pathToLocalAi,
25 |     apiKey: fakeApiKey,
26 |   });
27 | }
28 | 
29 | // Minimal example.
30 | export const run = async () => {
31 |   const model = getModel();
32 |   console.log(`about to model.call at ${new Date().toUTCString()}`);
33 |   const res = await model.call(
34 |     "What would be a good company name a company that makes colorful socks?"
35 |   );
36 |   console.log(`${new Date().toUTCString()}`);
37 |   console.log({ res });
38 | };
39 | 
40 | await run();
41 | 
42 | // This example uses the `StuffDocumentsChain`
43 | export const run2 = async () => {
44 |   const model = getModel();
45 |   const chainA = loadQAStuffChain(model);
46 |   const docs = [
47 |     new Document({ pageContent: "Harrison went to Harvard." }),
48 |     new Document({ pageContent: "Ankush went to Princeton." }),
49 |   ];
50 |   const resA = await chainA.call({
51 |     input_documents: docs,
52 |     question: "Where did Harrison go to college?",
53 |   });
54 |   console.log({ resA });
55 | };
56 | 
57 | await run2();
58 | 
59 | // Quickly thrown together example of using tools + agents.
60 | // This seems like it should work, but it doesn't yet.
61 | export const temporarilyBrokenToolTest = async () => {
62 |   const model = getModel();
63 | 
64 |   const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
65 |     agentType: "zero-shot-react-description",
66 |   });
67 | 
68 |   console.log("Loaded agent.");
69 | 
70 |   const input = `What is the value of (500 *2) + 350 - 13?`;
71 | 
72 |   console.log(`Executing with input "${input}"...`);
73 | 
74 |   const result = await executor.call({ input });
75 | 
76 |   console.log(`Got output ${result.output}`);
77 | }
78 | 
79 | await temporarilyBrokenToolTest();
80 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/go-skynet/LocalAI
 2 | 
 3 | go 1.19
 4 | 
 5 | require (
 6 | 	github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
 7 | 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
 8 | 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
 9 | 	github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638
10 | 	github.com/gofiber/fiber/v2 v2.45.0
11 | 	github.com/hashicorp/go-multierror v1.1.1
12 | 	github.com/onsi/ginkgo/v2 v2.9.4
13 | 	github.com/onsi/gomega v1.27.6
14 | 	github.com/otiai10/openaigo v1.1.0
15 | 	github.com/rs/zerolog v1.29.1
16 | 	github.com/sashabaranov/go-openai v1.9.3
17 | 	github.com/swaggo/swag v1.16.1
18 | 	github.com/urfave/cli/v2 v2.25.3
19 | 	github.com/valyala/fasthttp v1.47.0
20 | 	gopkg.in/yaml.v3 v3.0.1
21 | )
22 | 
23 | require (
24 | 	github.com/KyleBanks/depth v1.2.1 // indirect
25 | 	github.com/PuerkitoBio/purell v1.1.1 // indirect
26 | 	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
27 | 	github.com/andybalholm/brotli v1.0.5 // indirect
28 | 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
29 | 	github.com/go-logr/logr v1.2.4 // indirect
30 | 	github.com/go-openapi/jsonpointer v0.19.5 // indirect
31 | 	github.com/go-openapi/jsonreference v0.19.6 // indirect
32 | 	github.com/go-openapi/spec v0.20.4 // indirect
33 | 	github.com/go-openapi/swag v0.19.15 // indirect
34 | 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
35 | 	github.com/google/go-cmp v0.5.9 // indirect
36 | 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
37 | 	github.com/google/uuid v1.3.0 // indirect
38 | 	github.com/hashicorp/errwrap v1.0.0 // indirect
39 | 	github.com/josharian/intern v1.0.0 // indirect
40 | 	github.com/klauspost/compress v1.16.3 // indirect
41 | 	github.com/mailru/easyjson v0.7.6 // indirect
42 | 	github.com/mattn/go-colorable v0.1.13 // indirect
43 | 	github.com/mattn/go-isatty v0.0.18 // indirect
44 | 	github.com/mattn/go-runewidth v0.0.14 // indirect
45 | 	github.com/philhofer/fwd v1.1.2 // indirect
46 | 	github.com/rivo/uniseg v0.2.0 // indirect
47 | 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
48 | 	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
49 | 	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
50 | 	github.com/tinylib/msgp v1.1.8 // indirect
51 | 	github.com/valyala/bytebufferpool v1.0.0 // indirect
52 | 	github.com/valyala/tcplisten v1.0.0 // indirect
53 | 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
54 | 	golang.org/x/net v0.9.0 // indirect
55 | 	golang.org/x/sys v0.8.0 // indirect
56 | 	golang.org/x/text v0.9.0 // indirect
57 | 	golang.org/x/tools v0.8.0 // indirect
58 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
59 | )
60 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"path/filepath"
 7 | 
 8 | 	api "github.com/go-skynet/LocalAI/api"
 9 | 	model "github.com/go-skynet/LocalAI/pkg/model"
10 | 	"github.com/rs/zerolog"
11 | 	"github.com/rs/zerolog/log"
12 | 	"github.com/urfave/cli/v2"
13 | )
14 | 
15 | func main() {
16 | 	log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
17 | 
18 | 	path, err := os.Getwd()
19 | 	if err != nil {
20 | 		log.Error().Msgf("error: %s", err.Error())
21 | 		os.Exit(1)
22 | 	}
23 | 
24 | 	app := &cli.App{
25 | 		Name:  "LocalAI",
26 | 		Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
27 | 		Flags: []cli.Flag{
28 | 			&cli.BoolFlag{
29 | 				Name:    "f16",
30 | 				EnvVars: []string{"F16"},
31 | 			},
32 | 			&cli.BoolFlag{
33 | 				Name:    "debug",
34 | 				EnvVars: []string{"DEBUG"},
35 | 			},
36 | 			&cli.IntFlag{
37 | 				Name:        "threads",
38 | 				DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
39 | 				EnvVars:     []string{"THREADS"},
40 | 				Value:       4,
41 | 			},
42 | 			&cli.StringFlag{
43 | 				Name:        "models-path",
44 | 				DefaultText: "Path containing models used for inferencing",
45 | 				EnvVars:     []string{"MODELS_PATH"},
46 | 				Value:       filepath.Join(path, "models"),
47 | 			},
48 | 			&cli.StringFlag{
49 | 				Name:        "config-file",
50 | 				DefaultText: "Config file",
51 | 				EnvVars:     []string{"CONFIG_FILE"},
52 | 			},
53 | 			&cli.StringFlag{
54 | 				Name:        "address",
55 | 				DefaultText: "Bind address for the API server.",
56 | 				EnvVars:     []string{"ADDRESS"},
57 | 				Value:       ":8080",
58 | 			},
59 | 			&cli.IntFlag{
60 | 				Name:        "context-size",
61 | 				DefaultText: "Default context size of the model",
62 | 				EnvVars:     []string{"CONTEXT_SIZE"},
63 | 				Value:       512,
64 | 			},
65 | 		},
66 | 		Description: `
67 | LocalAI is a drop-in replacement OpenAI API which runs inference locally.
68 | 
69 | Some of the models compatible are:
70 | - Vicuna
71 | - Koala
72 | - GPT4ALL
73 | - GPT4ALL-J
74 | - Cerebras
75 | - Alpaca
76 | - StableLM (ggml quantized)
77 | 
78 | It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
79 | `,
80 | 		UsageText: `local-ai [options]`,
81 | 		Copyright: "go-skynet authors",
82 | 		Action: func(ctx *cli.Context) error {
83 | 			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
84 | 			return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
85 | 		},
86 | 	}
87 | 
88 | 	err = app.Run(os.Args)
89 | 	if err != nil {
90 | 		log.Error().Msgf("error: %s", err.Error())
91 | 		os.Exit(1)
92 | 	}
93 | }
94 | 


--------------------------------------------------------------------------------
/api/api.go:
--------------------------------------------------------------------------------
 1 | package api
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 
 6 | 	model "github.com/go-skynet/LocalAI/pkg/model"
 7 | 	"github.com/gofiber/fiber/v2"
 8 | 	"github.com/gofiber/fiber/v2/middleware/cors"
 9 | 	"github.com/gofiber/fiber/v2/middleware/logger"
10 | 	"github.com/gofiber/fiber/v2/middleware/recover"
11 | 	"github.com/rs/zerolog"
12 | 	"github.com/rs/zerolog/log"
13 | )
14 | 
15 | func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
16 | 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
17 | 	if debug {
18 | 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
19 | 	}
20 | 
21 | 	// Return errors as JSON responses
22 | 	app := fiber.New(fiber.Config{
23 | 		DisableStartupMessage: disableMessage,
24 | 		// Override default error handler
25 | 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
26 | 			// Status code defaults to 500
27 | 			code := fiber.StatusInternalServerError
28 | 
29 | 			// Retrieve the custom status code if it's a *fiber.Error
30 | 			var e *fiber.Error
31 | 			if errors.As(err, &e) {
32 | 				code = e.Code
33 | 			}
34 | 
35 | 			// Send custom error page
36 | 			return ctx.Status(code).JSON(
37 | 				ErrorResponse{
38 | 					Error: &APIError{Message: err.Error(), Code: code},
39 | 				},
40 | 			)
41 | 		},
42 | 	})
43 | 
44 | 	if debug {
45 | 		app.Use(logger.New(logger.Config{
46 | 			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
47 | 		}))
48 | 	}
49 | 
50 | 	cm := make(ConfigMerger)
51 | 	if err := cm.LoadConfigs(loader.ModelPath); err != nil {
52 | 		log.Error().Msgf("error loading config files: %s", err.Error())
53 | 	}
54 | 
55 | 	if configFile != "" {
56 | 		if err := cm.LoadConfigFile(configFile); err != nil {
57 | 			log.Error().Msgf("error loading config file: %s", err.Error())
58 | 		}
59 | 	}
60 | 
61 | 	if debug {
62 | 		for k, v := range cm {
63 | 			log.Debug().Msgf("Model: %s (config: %+v)", k, v)
64 | 		}
65 | 	}
66 | 	// Default middleware config
67 | 	app.Use(recover.New())
68 | 	app.Use(cors.New())
69 | 
70 | 	// openAI compatible API endpoint
71 | 	app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
72 | 	app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
73 | 
74 | 	app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
75 | 	app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
76 | 
77 | 	app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
78 | 	app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
79 | 
80 | 	app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
81 | 	app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
82 | 
83 | 	// /v1/engines/{engine_id}/embeddings
84 | 
85 | 	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
86 | 
87 | 	app.Get("/v1/models", listModels(loader, cm))
88 | 	app.Get("/models", listModels(loader, cm))
89 | 
90 | 	return app
91 | }
92 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | Here is a list of projects that can easily be integrated with the LocalAI backend. 
 4 | 
 5 | ### Projects
 6 | 
 7 | 
 8 | ### Chatbot-UI
 9 | 
10 | _by [@mkellerman](https://github.com/mkellerman)_
11 | 
12 | ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
13 | 
14 | This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
15 | 
16 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
17 | 
18 | ### Discord bot
19 | 
20 | _by [@mudler](https://github.com/mudler)_
21 | 
22 | Run a discord bot which lets you talk directly with a model
23 | 
24 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
25 | 
26 | ### Langchain
27 | 
28 | _by [@dave-gray101](https://github.com/dave-gray101)_
29 | 
30 | A ready to use example to show e2e how to integrate LocalAI with langchain
31 | 
32 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
33 | 
34 | ### Langchain Python
35 | 
36 | _by [@mudler](https://github.com/mudler)_
37 | 
38 | A ready to use example to show e2e how to integrate LocalAI with langchain
39 | 
40 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
41 | 
42 | ### LocalAI WebUI
43 | 
44 | _by [@dhruvgera](https://github.com/dhruvgera)_
45 | 
46 | ![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
47 | 
48 | A light, community-maintained web interface for LocalAI
49 | 
50 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
51 | 
52 | ### How to run rwkv models
53 | 
54 | _by [@mudler](https://github.com/mudler)_
55 | 
56 | A full example on how to run RWKV models with LocalAI
57 | 
58 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
59 | 
60 | ### Slack bot
61 | 
62 | _by [@mudler](https://github.com/mudler)_
63 | 
64 | Run a slack bot which lets you talk directly with a model
65 | 
66 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
67 | 
68 | ### Question answering on documents
69 | 
70 | _by [@mudler](https://github.com/mudler)_
71 | 
72 | Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
73 | 
74 | [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
75 | 
76 | ### Template for Runpod.io
77 | 
78 | _by [@fHachenberg](https://github.com/fHachenberg)_
79 | 
80 | Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
81 | 
82 | [Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
83 | 
84 | ## Want to contribute?
85 | 
86 | Create an issue, and put `Example: <description>` in the title! We will post your examples here.
87 | 


--------------------------------------------------------------------------------
/api/api_test.go:
--------------------------------------------------------------------------------
  1 | package api_test
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"os"
  6 | 
  7 | 	. "github.com/go-skynet/LocalAI/api"
  8 | 	"github.com/go-skynet/LocalAI/pkg/model"
  9 | 	"github.com/gofiber/fiber/v2"
 10 | 	. "github.com/onsi/ginkgo/v2"
 11 | 	. "github.com/onsi/gomega"
 12 | 
 13 | 	openaigo "github.com/otiai10/openaigo"
 14 | 	"github.com/sashabaranov/go-openai"
 15 | )
 16 | 
 17 | var _ = Describe("API test", func() {
 18 | 
 19 | 	var app *fiber.App
 20 | 	var modelLoader *model.ModelLoader
 21 | 	var client *openai.Client
 22 | 	var client2 *openaigo.Client
 23 | 	Context("API query", func() {
 24 | 		BeforeEach(func() {
 25 | 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
 26 | 			app = App("", modelLoader, 1, 512, false, true, true)
 27 | 			go app.Listen("127.0.0.1:9090")
 28 | 
 29 | 			defaultConfig := openai.DefaultConfig("")
 30 | 			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
 31 | 
 32 | 			client2 = openaigo.NewClient("")
 33 | 			client2.BaseURL = defaultConfig.BaseURL
 34 | 
 35 | 			// Wait for API to be ready
 36 | 			client = openai.NewClientWithConfig(defaultConfig)
 37 | 			Eventually(func() error {
 38 | 				_, err := client.ListModels(context.TODO())
 39 | 				return err
 40 | 			}, "2m").ShouldNot(HaveOccurred())
 41 | 		})
 42 | 		AfterEach(func() {
 43 | 			app.Shutdown()
 44 | 		})
 45 | 		It("returns the models list", func() {
 46 | 			models, err := client.ListModels(context.TODO())
 47 | 			Expect(err).ToNot(HaveOccurred())
 48 | 			Expect(len(models.Models)).To(Equal(3))
 49 | 			Expect(models.Models[0].ID).To(Equal("testmodel"))
 50 | 		})
 51 | 		It("can generate completions", func() {
 52 | 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
 53 | 			Expect(err).ToNot(HaveOccurred())
 54 | 			Expect(len(resp.Choices)).To(Equal(1))
 55 | 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
 56 | 		})
 57 | 
 58 | 		It("can generate chat completions ", func() {
 59 | 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
 60 | 			Expect(err).ToNot(HaveOccurred())
 61 | 			Expect(len(resp.Choices)).To(Equal(1))
 62 | 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
 63 | 		})
 64 | 
 65 | 		It("can generate completions from model configs", func() {
 66 | 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
 67 | 			Expect(err).ToNot(HaveOccurred())
 68 | 			Expect(len(resp.Choices)).To(Equal(1))
 69 | 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
 70 | 		})
 71 | 
 72 | 		It("can generate chat completions from model configs", func() {
 73 | 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
 74 | 			Expect(err).ToNot(HaveOccurred())
 75 | 			Expect(len(resp.Choices)).To(Equal(1))
 76 | 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
 77 | 		})
 78 | 
 79 | 		It("returns errors", func() {
 80 | 			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
 81 | 			Expect(err).To(HaveOccurred())
 82 | 			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:"))
 83 | 		})
 84 | 
 85 | 	})
 86 | 
 87 | 	Context("Config file", func() {
 88 | 		BeforeEach(func() {
 89 | 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
 90 | 			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
 91 | 			go app.Listen("127.0.0.1:9090")
 92 | 
 93 | 			defaultConfig := openai.DefaultConfig("")
 94 | 			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
 95 | 			client2 = openaigo.NewClient("")
 96 | 			client2.BaseURL = defaultConfig.BaseURL
 97 | 			// Wait for API to be ready
 98 | 			client = openai.NewClientWithConfig(defaultConfig)
 99 | 			Eventually(func() error {
100 | 				_, err := client.ListModels(context.TODO())
101 | 				return err
102 | 			}, "2m").ShouldNot(HaveOccurred())
103 | 		})
104 | 		AfterEach(func() {
105 | 			app.Shutdown()
106 | 		})
107 | 		It("can generate chat completions from config file", func() {
108 | 
109 | 			models, err := client.ListModels(context.TODO())
110 | 			Expect(err).ToNot(HaveOccurred())
111 | 			Expect(len(models.Models)).To(Equal(5))
112 | 			Expect(models.Models[0].ID).To(Equal("testmodel"))
113 | 		})
114 | 		It("can generate chat completions from config file", func() {
115 | 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
116 | 			Expect(err).ToNot(HaveOccurred())
117 | 			Expect(len(resp.Choices)).To(Equal(1))
118 | 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
119 | 		})
120 | 		It("can generate chat completions from config file", func() {
121 | 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
122 | 			Expect(err).ToNot(HaveOccurred())
123 | 			Expect(len(resp.Choices)).To(Equal(1))
124 | 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
125 | 		})
126 | 		It("can generate edit completions from config file", func() {
127 | 			request := openaigo.EditCreateRequestBody{
128 | 				Model:       "list2",
129 | 				Instruction: "foo",
130 | 				Input:       "bar",
131 | 			}
132 | 			resp, err := client2.CreateEdit(context.Background(), request)
133 | 			Expect(err).ToNot(HaveOccurred())
134 | 			Expect(len(resp.Choices)).To(Equal(1))
135 | 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
136 | 		})
137 | 	})
138 | })
139 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | GOCMD=go
  2 | GOTEST=$(GOCMD) test
  3 | GOVET=$(GOCMD) vet
  4 | BINARY_NAME=local-ai
  5 | 
  6 | GOLLAMA_VERSION?=cf9b522db63898dcc5eb86e37c979ab85cbd583e
  7 | GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
  8 | GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
  9 | RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 10 | RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
 11 | 
 12 | GREEN  := $(shell tput -Txterm setaf 2)
 13 | YELLOW := $(shell tput -Txterm setaf 3)
 14 | WHITE  := $(shell tput -Txterm setaf 7)
 15 | CYAN   := $(shell tput -Txterm setaf 6)
 16 | RESET  := $(shell tput -Txterm sgr0)
 17 | 
 18 | C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
 19 | LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
 20 | 
 21 | # Use this if you want to set the default behavior
 22 | ifndef BUILD_TYPE
 23 | 	BUILD_TYPE:=default
 24 | endif
 25 | 
 26 | ifeq ($(BUILD_TYPE), "generic")
 27 | 	GENERIC_PREFIX:=generic-
 28 | else
 29 | 	GENERIC_PREFIX:=
 30 | endif
 31 | 
 32 | .PHONY: all test build vendor
 33 | 
 34 | all: help
 35 | 
 36 | ## GPT4ALL-J
 37 | go-gpt4all-j:
 38 | 	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
 39 | 	cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
 40 | 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 41 | 	@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 42 | 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 43 | 	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 44 | 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
 45 | 	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
 46 | 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
 47 | 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
 48 | 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
 49 | 
 50 | ## RWKV
 51 | go-rwkv:
 52 | 	git clone --recurse-submodules $(RWKV_REPO) go-rwkv
 53 | 	cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
 54 | 	@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
 55 | 	@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
 56 | 	@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
 57 | 
 58 | go-rwkv/librwkv.a: go-rwkv
 59 | 	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a .. && cp ggml/src/libggml.a ..
 60 | 
 61 | go-gpt4all-j/libgptj.a: go-gpt4all-j
 62 | 	$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
 63 | 
 64 | ## CEREBRAS GPT
 65 | go-gpt2: 
 66 | 	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
 67 | 	cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
 68 | 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 69 | 	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 70 | 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 71 | 	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 72 | 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
 73 | 	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
 74 | 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
 75 | 
 76 | go-gpt2/libgpt2.a: go-gpt2
 77 | 	$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
 78 | 
 79 | go-llama:
 80 | 	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
 81 | 	cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
 82 | 
 83 | go-llama/libbinding.a: go-llama 
 84 | 	$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
 85 | 
 86 | replace:
 87 | 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
 88 | 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
 89 | 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
 90 | 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
 91 | 
 92 | prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv
 93 | 	$(GOCMD) mod download
 94 | 
 95 | ## GENERIC
 96 | rebuild: ## Rebuilds the project
 97 | 	$(MAKE) -C go-llama clean
 98 | 	$(MAKE) -C go-gpt4all-j clean
 99 | 	$(MAKE) -C go-gpt2 clean
100 | 	$(MAKE) -C go-rwkv clean
101 | 	$(MAKE) build
102 | 
103 | prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a replace ## Prepares for building
104 | 
105 | clean: ## Remove build related file
106 | 	rm -fr ./go-llama
107 | 	rm -rf ./go-gpt4all-j
108 | 	rm -rf ./go-gpt2
109 | 	rm -rf ./go-rwkv
110 | 	rm -rf $(BINARY_NAME)
111 | 
112 | ## Build:
113 | 
114 | build: prepare ## Build the project
115 | 	$(info ${GREEN}I local-ai build info:${RESET})
116 | 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
117 | 	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
118 | 
119 | generic-build: ## Build the project using generic
120 | 	BUILD_TYPE="generic" $(MAKE) build
121 | 
122 | ## Run
123 | run: prepare ## run local-ai
124 | 	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
125 | 
126 | test-models/testmodel:
127 | 	mkdir test-models
128 | 	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
129 | 	cp tests/fixtures/* test-models
130 | 
131 | test: prepare test-models/testmodel
132 | 	cp tests/fixtures/* test-models
133 | 	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./...
134 | 
135 | ## Help:
136 | help: ## Show this help.
137 | 	@echo ''
138 | 	@echo 'Usage:'
139 | 	@echo '  ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
140 | 	@echo ''
141 | 	@echo 'Targets:'
142 | 	@awk 'BEGIN {FS = ":.*?## "} { \
143 | 		if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf "    ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
144 | 		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
145 | 		}' $(MAKEFILE_LIST)
146 | 


--------------------------------------------------------------------------------
/api/config.go:
--------------------------------------------------------------------------------
  1 | package api
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strings"
 10 | 
 11 | 	model "github.com/go-skynet/LocalAI/pkg/model"
 12 | 	"github.com/gofiber/fiber/v2"
 13 | 	"github.com/rs/zerolog/log"
 14 | 	"gopkg.in/yaml.v3"
 15 | )
 16 | 
 17 | type Config struct {
 18 | 	OpenAIRequest  `yaml:"parameters"`
 19 | 	Name           string            `yaml:"name"`
 20 | 	StopWords      []string          `yaml:"stopwords"`
 21 | 	Cutstrings     []string          `yaml:"cutstrings"`
 22 | 	TrimSpace      []string          `yaml:"trimspace"`
 23 | 	ContextSize    int               `yaml:"context_size"`
 24 | 	F16            bool              `yaml:"f16"`
 25 | 	Threads        int               `yaml:"threads"`
 26 | 	Debug          bool              `yaml:"debug"`
 27 | 	Roles          map[string]string `yaml:"roles"`
 28 | 	Embeddings     bool              `yaml:"embeddings"`
 29 | 	Backend        string            `yaml:"backend"`
 30 | 	TemplateConfig TemplateConfig    `yaml:"template"`
 31 | 	MirostatETA    float64           `yaml:"mirostat_eta"`
 32 | 	MirostatTAU    float64           `yaml:"mirostat_tau"`
 33 | 	Mirostat       int               `yaml:"mirostat"`
 34 | 
 35 | 	PromptStrings, InputStrings []string
 36 | }
 37 | 
 38 | type TemplateConfig struct {
 39 | 	Completion string `yaml:"completion"`
 40 | 	Chat       string `yaml:"chat"`
 41 | 	Edit       string `yaml:"edit"`
 42 | }
 43 | 
 44 | type ConfigMerger map[string]Config
 45 | 
 46 | func ReadConfigFile(file string) ([]*Config, error) {
 47 | 	c := &[]*Config{}
 48 | 	f, err := os.ReadFile(file)
 49 | 	if err != nil {
 50 | 		return nil, fmt.Errorf("cannot read config file: %w", err)
 51 | 	}
 52 | 	if err := yaml.Unmarshal(f, c); err != nil {
 53 | 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 54 | 	}
 55 | 
 56 | 	return *c, nil
 57 | }
 58 | 
 59 | func ReadConfig(file string) (*Config, error) {
 60 | 	c := &Config{}
 61 | 	f, err := os.ReadFile(file)
 62 | 	if err != nil {
 63 | 		return nil, fmt.Errorf("cannot read config file: %w", err)
 64 | 	}
 65 | 	if err := yaml.Unmarshal(f, c); err != nil {
 66 | 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 67 | 	}
 68 | 
 69 | 	return c, nil
 70 | }
 71 | 
 72 | func (cm ConfigMerger) LoadConfigFile(file string) error {
 73 | 	c, err := ReadConfigFile(file)
 74 | 	if err != nil {
 75 | 		return fmt.Errorf("cannot load config file: %w", err)
 76 | 	}
 77 | 
 78 | 	for _, cc := range c {
 79 | 		cm[cc.Name] = *cc
 80 | 	}
 81 | 	return nil
 82 | }
 83 | 
 84 | func (cm ConfigMerger) LoadConfig(file string) error {
 85 | 	c, err := ReadConfig(file)
 86 | 	if err != nil {
 87 | 		return fmt.Errorf("cannot read config file: %w", err)
 88 | 	}
 89 | 
 90 | 	cm[c.Name] = *c
 91 | 	return nil
 92 | }
 93 | 
 94 | func (cm ConfigMerger) LoadConfigs(path string) error {
 95 | 	files, err := ioutil.ReadDir(path)
 96 | 	if err != nil {
 97 | 		return err
 98 | 	}
 99 | 
100 | 	for _, file := range files {
101 | 		// Skip templates, YAML and .keep files
102 | 		if !strings.Contains(file.Name(), ".yaml") {
103 | 			continue
104 | 		}
105 | 		c, err := ReadConfig(filepath.Join(path, file.Name()))
106 | 		if err == nil {
107 | 			cm[c.Name] = *c
108 | 		}
109 | 	}
110 | 
111 | 	return nil
112 | }
113 | 
114 | func updateConfig(config *Config, input *OpenAIRequest) {
115 | 	if input.Echo {
116 | 		config.Echo = input.Echo
117 | 	}
118 | 	if input.TopK != 0 {
119 | 		config.TopK = input.TopK
120 | 	}
121 | 	if input.TopP != 0 {
122 | 		config.TopP = input.TopP
123 | 	}
124 | 
125 | 	if input.Temperature != 0 {
126 | 		config.Temperature = input.Temperature
127 | 	}
128 | 
129 | 	if input.Maxtokens != 0 {
130 | 		config.Maxtokens = input.Maxtokens
131 | 	}
132 | 
133 | 	switch stop := input.Stop.(type) {
134 | 	case string:
135 | 		if stop != "" {
136 | 			config.StopWords = append(config.StopWords, stop)
137 | 		}
138 | 	case []interface{}:
139 | 		for _, pp := range stop {
140 | 			if s, ok := pp.(string); ok {
141 | 				config.StopWords = append(config.StopWords, s)
142 | 			}
143 | 		}
144 | 	}
145 | 
146 | 	if input.RepeatPenalty != 0 {
147 | 		config.RepeatPenalty = input.RepeatPenalty
148 | 	}
149 | 
150 | 	if input.Keep != 0 {
151 | 		config.Keep = input.Keep
152 | 	}
153 | 
154 | 	if input.Batch != 0 {
155 | 		config.Batch = input.Batch
156 | 	}
157 | 
158 | 	if input.F16 {
159 | 		config.F16 = input.F16
160 | 	}
161 | 
162 | 	if input.IgnoreEOS {
163 | 		config.IgnoreEOS = input.IgnoreEOS
164 | 	}
165 | 
166 | 	if input.Seed != 0 {
167 | 		config.Seed = input.Seed
168 | 	}
169 | 
170 | 	if input.Mirostat != 0 {
171 | 		config.Mirostat = input.Mirostat
172 | 	}
173 | 
174 | 	if input.MirostatETA != 0 {
175 | 		config.MirostatETA = input.MirostatETA
176 | 	}
177 | 
178 | 	if input.MirostatTAU != 0 {
179 | 		config.MirostatTAU = input.MirostatTAU
180 | 	}
181 | 
182 | 	switch inputs := input.Input.(type) {
183 | 	case string:
184 | 		if inputs != "" {
185 | 			config.InputStrings = append(config.InputStrings, inputs)
186 | 		}
187 | 	case []interface{}:
188 | 		for _, pp := range inputs {
189 | 			if s, ok := pp.(string); ok {
190 | 				config.InputStrings = append(config.InputStrings, s)
191 | 			}
192 | 		}
193 | 	}
194 | 
195 | 	switch p := input.Prompt.(type) {
196 | 	case string:
197 | 		config.PromptStrings = append(config.PromptStrings, p)
198 | 	case []interface{}:
199 | 		for _, pp := range p {
200 | 			if s, ok := pp.(string); ok {
201 | 				config.PromptStrings = append(config.PromptStrings, s)
202 | 			}
203 | 		}
204 | 	}
205 | }
206 | 
207 | func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
208 | 	input := new(OpenAIRequest)
209 | 	// Get input data from the request body
210 | 	if err := c.BodyParser(input); err != nil {
211 | 		return nil, nil, err
212 | 	}
213 | 
214 | 	modelFile := input.Model
215 | 
216 | 	if c.Params("model") != "" {
217 | 		modelFile = c.Params("model")
218 | 	}
219 | 
220 | 	received, _ := json.Marshal(input)
221 | 
222 | 	log.Debug().Msgf("Request received: %s", string(received))
223 | 
224 | 	// Set model from bearer token, if available
225 | 	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
226 | 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
227 | 
228 | 	// If no model was specified, take the first available
229 | 	if modelFile == "" && !bearerExists {
230 | 		models, _ := loader.ListModels()
231 | 		if len(models) > 0 {
232 | 			modelFile = models[0]
233 | 			log.Debug().Msgf("No model specified, using: %s", modelFile)
234 | 		} else {
235 | 			log.Debug().Msgf("No model specified, returning error")
236 | 			return nil, nil, fmt.Errorf("no model specified")
237 | 		}
238 | 	}
239 | 
240 | 	// If a model is found in bearer token takes precedence
241 | 	if bearerExists {
242 | 		log.Debug().Msgf("Using model from bearer token: %s", bearer)
243 | 		modelFile = bearer
244 | 	}
245 | 
246 | 	// Load a config file if present after the model name
247 | 	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
248 | 	if _, err := os.Stat(modelConfig); err == nil {
249 | 		if err := cm.LoadConfig(modelConfig); err != nil {
250 | 			return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
251 | 		}
252 | 	}
253 | 
254 | 	var config *Config
255 | 	cfg, exists := cm[modelFile]
256 | 	if !exists {
257 | 		config = &Config{
258 | 			OpenAIRequest: defaultRequest(modelFile),
259 | 			ContextSize:   ctx,
260 | 			Threads:       threads,
261 | 			F16:           f16,
262 | 			Debug:         debug,
263 | 		}
264 | 	} else {
265 | 		config = &cfg
266 | 	}
267 | 
268 | 	// Set the parameters for the language model prediction
269 | 	updateConfig(config, input)
270 | 
271 | 	// Don't allow 0 as setting
272 | 	if config.Threads == 0 {
273 | 		if threads != 0 {
274 | 			config.Threads = threads
275 | 		} else {
276 | 			config.Threads = 4
277 | 		}
278 | 	}
279 | 
280 | 	return config, input, nil
281 | }
282 | 


--------------------------------------------------------------------------------
/api/prediction.go:
--------------------------------------------------------------------------------
  1 | package api
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"regexp"
  6 | 	"strings"
  7 | 	"sync"
  8 | 
  9 | 	"github.com/donomii/go-rwkv.cpp"
 10 | 	model "github.com/go-skynet/LocalAI/pkg/model"
 11 | 	gpt2 "github.com/go-skynet/go-gpt2.cpp"
 12 | 	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 13 | 	llama "github.com/go-skynet/go-llama.cpp"
 14 | )
 15 | 
 16 | // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 17 | var mutexMap sync.Mutex
 18 | var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
 19 | 
 20 | func defaultLLamaOpts(c Config) []llama.ModelOption {
 21 | 	llamaOpts := []llama.ModelOption{}
 22 | 	if c.ContextSize != 0 {
 23 | 		llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
 24 | 	}
 25 | 	if c.F16 {
 26 | 		llamaOpts = append(llamaOpts, llama.EnableF16Memory)
 27 | 	}
 28 | 	if c.Embeddings {
 29 | 		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
 30 | 	}
 31 | 
 32 | 	return llamaOpts
 33 | }
 34 | 
 35 | func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
 36 | 	if !c.Embeddings {
 37 | 		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
 38 | 	}
 39 | 
 40 | 	modelFile := c.Model
 41 | 
 42 | 	llamaOpts := defaultLLamaOpts(c)
 43 | 
 44 | 	var inferenceModel interface{}
 45 | 	var err error
 46 | 	if c.Backend == "" {
 47 | 		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
 48 | 	} else {
 49 | 		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
 50 | 	}
 51 | 	if err != nil {
 52 | 		return nil, err
 53 | 	}
 54 | 
 55 | 	var fn func() ([]float32, error)
 56 | 	switch model := inferenceModel.(type) {
 57 | 	case *llama.LLama:
 58 | 		fn = func() ([]float32, error) {
 59 | 			predictOptions := buildLLamaPredictOptions(c)
 60 | 			return model.Embeddings(s, predictOptions...)
 61 | 		}
 62 | 	default:
 63 | 		fn = func() ([]float32, error) {
 64 | 			return nil, fmt.Errorf("embeddings not supported by the backend")
 65 | 		}
 66 | 	}
 67 | 
 68 | 	return func() ([]float32, error) {
 69 | 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 70 | 		mutexMap.Lock()
 71 | 		l, ok := mutexes[modelFile]
 72 | 		if !ok {
 73 | 			m := &sync.Mutex{}
 74 | 			mutexes[modelFile] = m
 75 | 			l = m
 76 | 		}
 77 | 		mutexMap.Unlock()
 78 | 		l.Lock()
 79 | 		defer l.Unlock()
 80 | 
 81 | 		embeds, err := fn()
 82 | 		if err != nil {
 83 | 			return embeds, err
 84 | 		}
 85 | 		// Remove trailing 0s
 86 | 		for i := len(embeds) - 1; i >= 0; i-- {
 87 | 			if embeds[i] == 0.0 {
 88 | 				embeds = embeds[:i]
 89 | 			} else {
 90 | 				break
 91 | 			}
 92 | 		}
 93 | 		return embeds, nil
 94 | 	}, nil
 95 | }
 96 | 
 97 | func buildLLamaPredictOptions(c Config) []llama.PredictOption {
 98 | 	// Generate the prediction using the language model
 99 | 	predictOptions := []llama.PredictOption{
100 | 		llama.SetTemperature(c.Temperature),
101 | 		llama.SetTopP(c.TopP),
102 | 		llama.SetTopK(c.TopK),
103 | 		llama.SetTokens(c.Maxtokens),
104 | 		llama.SetThreads(c.Threads),
105 | 	}
106 | 
107 | 	if c.Mirostat != 0 {
108 | 		predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
109 | 	}
110 | 
111 | 	if c.MirostatETA != 0 {
112 | 		predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
113 | 	}
114 | 
115 | 	if c.MirostatTAU != 0 {
116 | 		predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
117 | 	}
118 | 
119 | 	if c.Debug {
120 | 		predictOptions = append(predictOptions, llama.Debug)
121 | 	}
122 | 
123 | 	predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
124 | 
125 | 	if c.RepeatPenalty != 0 {
126 | 		predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
127 | 	}
128 | 
129 | 	if c.Keep != 0 {
130 | 		predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
131 | 	}
132 | 
133 | 	if c.Batch != 0 {
134 | 		predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
135 | 	}
136 | 
137 | 	if c.F16 {
138 | 		predictOptions = append(predictOptions, llama.EnableF16KV)
139 | 	}
140 | 
141 | 	if c.IgnoreEOS {
142 | 		predictOptions = append(predictOptions, llama.IgnoreEOS)
143 | 	}
144 | 
145 | 	if c.Seed != 0 {
146 | 		predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
147 | 	}
148 | 
149 | 	return predictOptions
150 | }
151 | 
152 | func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
153 | 	supportStreams := false
154 | 	modelFile := c.Model
155 | 
156 | 	llamaOpts := defaultLLamaOpts(c)
157 | 
158 | 	var inferenceModel interface{}
159 | 	var err error
160 | 	if c.Backend == "" {
161 | 		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
162 | 	} else {
163 | 		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
164 | 	}
165 | 	if err != nil {
166 | 		return nil, err
167 | 	}
168 | 
169 | 	var fn func() (string, error)
170 | 
171 | 	switch model := inferenceModel.(type) {
172 | 	case *rwkv.RwkvState:
173 | 		supportStreams = true
174 | 
175 | 		fn = func() (string, error) {
176 | 			stopWord := "\n"
177 | 			if len(c.StopWords) > 0 {
178 | 				stopWord = c.StopWords[0]
179 | 			}
180 | 
181 | 			if err := model.ProcessInput(s); err != nil {
182 | 				return "", err
183 | 			}
184 | 
185 | 			response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
186 | 
187 | 			return response, nil
188 | 		}
189 | 	case *gpt2.StableLM:
190 | 		fn = func() (string, error) {
191 | 			// Generate the prediction using the language model
192 | 			predictOptions := []gpt2.PredictOption{
193 | 				gpt2.SetTemperature(c.Temperature),
194 | 				gpt2.SetTopP(c.TopP),
195 | 				gpt2.SetTopK(c.TopK),
196 | 				gpt2.SetTokens(c.Maxtokens),
197 | 				gpt2.SetThreads(c.Threads),
198 | 			}
199 | 
200 | 			if c.Batch != 0 {
201 | 				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
202 | 			}
203 | 
204 | 			if c.Seed != 0 {
205 | 				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
206 | 			}
207 | 
208 | 			return model.Predict(
209 | 				s,
210 | 				predictOptions...,
211 | 			)
212 | 		}
213 | 	case *gpt2.GPT2:
214 | 		fn = func() (string, error) {
215 | 			// Generate the prediction using the language model
216 | 			predictOptions := []gpt2.PredictOption{
217 | 				gpt2.SetTemperature(c.Temperature),
218 | 				gpt2.SetTopP(c.TopP),
219 | 				gpt2.SetTopK(c.TopK),
220 | 				gpt2.SetTokens(c.Maxtokens),
221 | 				gpt2.SetThreads(c.Threads),
222 | 			}
223 | 
224 | 			if c.Batch != 0 {
225 | 				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
226 | 			}
227 | 
228 | 			if c.Seed != 0 {
229 | 				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
230 | 			}
231 | 
232 | 			return model.Predict(
233 | 				s,
234 | 				predictOptions...,
235 | 			)
236 | 		}
237 | 	case *gptj.GPTJ:
238 | 		fn = func() (string, error) {
239 | 			// Generate the prediction using the language model
240 | 			predictOptions := []gptj.PredictOption{
241 | 				gptj.SetTemperature(c.Temperature),
242 | 				gptj.SetTopP(c.TopP),
243 | 				gptj.SetTopK(c.TopK),
244 | 				gptj.SetTokens(c.Maxtokens),
245 | 				gptj.SetThreads(c.Threads),
246 | 			}
247 | 
248 | 			if c.Batch != 0 {
249 | 				predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
250 | 			}
251 | 
252 | 			if c.Seed != 0 {
253 | 				predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
254 | 			}
255 | 
256 | 			return model.Predict(
257 | 				s,
258 | 				predictOptions...,
259 | 			)
260 | 		}
261 | 	case *llama.LLama:
262 | 		supportStreams = true
263 | 		fn = func() (string, error) {
264 | 
265 | 			if tokenCallback != nil {
266 | 				model.SetTokenCallback(tokenCallback)
267 | 			}
268 | 
269 | 			predictOptions := buildLLamaPredictOptions(c)
270 | 
271 | 			str, er := model.Predict(
272 | 				s,
273 | 				predictOptions...,
274 | 			)
275 | 			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
276 | 			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
277 | 			// after a stream event has occurred
278 | 			model.SetTokenCallback(nil)
279 | 			return str, er
280 | 		}
281 | 	}
282 | 
283 | 	return func() (string, error) {
284 | 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
285 | 		mutexMap.Lock()
286 | 		l, ok := mutexes[modelFile]
287 | 		if !ok {
288 | 			m := &sync.Mutex{}
289 | 			mutexes[modelFile] = m
290 | 			l = m
291 | 		}
292 | 		mutexMap.Unlock()
293 | 		l.Lock()
294 | 		defer l.Unlock()
295 | 
296 | 		res, err := fn()
297 | 		if tokenCallback != nil && !supportStreams {
298 | 			tokenCallback(res)
299 | 		}
300 | 		return res, err
301 | 	}, nil
302 | }
303 | 
304 | func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
305 | 	result := []Choice{}
306 | 
307 | 	n := input.N
308 | 
309 | 	if input.N == 0 {
310 | 		n = 1
311 | 	}
312 | 
313 | 	// get the model function to call for the result
314 | 	predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
315 | 	if err != nil {
316 | 		return result, err
317 | 	}
318 | 
319 | 	for i := 0; i < n; i++ {
320 | 		prediction, err := predFunc()
321 | 		if err != nil {
322 | 			return result, err
323 | 		}
324 | 
325 | 		prediction = Finetune(*config, predInput, prediction)
326 | 		cb(prediction, &result)
327 | 
328 | 		//result = append(result, Choice{Text: prediction})
329 | 
330 | 	}
331 | 	return result, err
332 | }
333 | 
334 | var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
335 | var mu sync.Mutex = sync.Mutex{}
336 | 
337 | func Finetune(config Config, input, prediction string) string {
338 | 	if config.Echo {
339 | 		prediction = input + prediction
340 | 	}
341 | 
342 | 	for _, c := range config.Cutstrings {
343 | 		mu.Lock()
344 | 		reg, ok := cutstrings[c]
345 | 		if !ok {
346 | 			cutstrings[c] = regexp.MustCompile(c)
347 | 			reg = cutstrings[c]
348 | 		}
349 | 		mu.Unlock()
350 | 		prediction = reg.ReplaceAllString(prediction, "")
351 | 	}
352 | 
353 | 	for _, c := range config.TrimSpace {
354 | 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
355 | 	}
356 | 	return prediction
357 | 
358 | }
359 | 


--------------------------------------------------------------------------------
/pkg/model/loader.go:
--------------------------------------------------------------------------------
  1 | package model
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strings"
 10 | 	"sync"
 11 | 	"text/template"
 12 | 
 13 | 	"github.com/hashicorp/go-multierror"
 14 | 	"github.com/rs/zerolog/log"
 15 | 
 16 | 	rwkv "github.com/donomii/go-rwkv.cpp"
 17 | 	gpt2 "github.com/go-skynet/go-gpt2.cpp"
 18 | 	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 19 | 	llama "github.com/go-skynet/go-llama.cpp"
 20 | )
 21 | 
 22 | type ModelLoader struct {
 23 | 	ModelPath string
 24 | 	mu        sync.Mutex
 25 | 
 26 | 	models            map[string]*llama.LLama
 27 | 	gptmodels         map[string]*gptj.GPTJ
 28 | 	gpt2models        map[string]*gpt2.GPT2
 29 | 	gptstablelmmodels map[string]*gpt2.StableLM
 30 | 	rwkv              map[string]*rwkv.RwkvState
 31 | 	promptsTemplates  map[string]*template.Template
 32 | }
 33 | 
 34 | func NewModelLoader(modelPath string) *ModelLoader {
 35 | 	return &ModelLoader{
 36 | 		ModelPath:         modelPath,
 37 | 		gpt2models:        make(map[string]*gpt2.GPT2),
 38 | 		gptmodels:         make(map[string]*gptj.GPTJ),
 39 | 		gptstablelmmodels: make(map[string]*gpt2.StableLM),
 40 | 		models:            make(map[string]*llama.LLama),
 41 | 		rwkv:              make(map[string]*rwkv.RwkvState),
 42 | 		promptsTemplates:  make(map[string]*template.Template),
 43 | 	}
 44 | }
 45 | 
 46 | func (ml *ModelLoader) ExistsInModelPath(s string) bool {
 47 | 	_, err := os.Stat(filepath.Join(ml.ModelPath, s))
 48 | 	return err == nil
 49 | }
 50 | 
 51 | func (ml *ModelLoader) ListModels() ([]string, error) {
 52 | 	files, err := ioutil.ReadDir(ml.ModelPath)
 53 | 	if err != nil {
 54 | 		return []string{}, err
 55 | 	}
 56 | 
 57 | 	models := []string{}
 58 | 	for _, file := range files {
 59 | 		// Skip templates, YAML and .keep files
 60 | 		if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
 61 | 			continue
 62 | 		}
 63 | 
 64 | 		models = append(models, file.Name())
 65 | 	}
 66 | 
 67 | 	return models, nil
 68 | }
 69 | 
 70 | func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {
 71 | 	ml.mu.Lock()
 72 | 	defer ml.mu.Unlock()
 73 | 
 74 | 	m, ok := ml.promptsTemplates[modelName]
 75 | 	if !ok {
 76 | 		modelFile := filepath.Join(ml.ModelPath, modelName)
 77 | 		if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
 78 | 			return "", err
 79 | 		}
 80 | 
 81 | 		t, exists := ml.promptsTemplates[modelName]
 82 | 		if exists {
 83 | 			m = t
 84 | 		}
 85 | 	}
 86 | 	if m == nil {
 87 | 		return "", fmt.Errorf("failed loading any template")
 88 | 	}
 89 | 
 90 | 	var buf bytes.Buffer
 91 | 
 92 | 	if err := m.Execute(&buf, in); err != nil {
 93 | 		return "", err
 94 | 	}
 95 | 	return buf.String(), nil
 96 | }
 97 | 
 98 | func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
 99 | 	// Check if the template was already loaded
100 | 	if _, ok := ml.promptsTemplates[modelName]; ok {
101 | 		return nil
102 | 	}
103 | 
104 | 	// Check if the model path exists
105 | 	// skip any error here - we run anyway if a template does not exist
106 | 	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)
107 | 
108 | 	if !ml.ExistsInModelPath(modelTemplateFile) {
109 | 		return nil
110 | 	}
111 | 
112 | 	dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
113 | 	if err != nil {
114 | 		return err
115 | 	}
116 | 
117 | 	// Parse the template
118 | 	tmpl, err := template.New("prompt").Parse(string(dat))
119 | 	if err != nil {
120 | 		return err
121 | 	}
122 | 	ml.promptsTemplates[modelName] = tmpl
123 | 
124 | 	return nil
125 | }
126 | 
127 | func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
128 | 	ml.mu.Lock()
129 | 	defer ml.mu.Unlock()
130 | 
131 | 	// Check if we already have a loaded model
132 | 	if !ml.ExistsInModelPath(modelName) {
133 | 		return nil, fmt.Errorf("model does not exist")
134 | 	}
135 | 
136 | 	if m, ok := ml.gptstablelmmodels[modelName]; ok {
137 | 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
138 | 		return m, nil
139 | 	}
140 | 
141 | 	// Load the model and keep it in memory for later use
142 | 	modelFile := filepath.Join(ml.ModelPath, modelName)
143 | 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
144 | 
145 | 	model, err := gpt2.NewStableLM(modelFile)
146 | 	if err != nil {
147 | 		return nil, err
148 | 	}
149 | 
150 | 	// If there is a prompt template, load it
151 | 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
152 | 		return nil, err
153 | 	}
154 | 
155 | 	ml.gptstablelmmodels[modelName] = model
156 | 	return model, err
157 | }
158 | 
159 | func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
160 | 	ml.mu.Lock()
161 | 	defer ml.mu.Unlock()
162 | 
163 | 	// Check if we already have a loaded model
164 | 	if !ml.ExistsInModelPath(modelName) {
165 | 		return nil, fmt.Errorf("model does not exist")
166 | 	}
167 | 
168 | 	if m, ok := ml.gpt2models[modelName]; ok {
169 | 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
170 | 		return m, nil
171 | 	}
172 | 
173 | 	// Load the model and keep it in memory for later use
174 | 	modelFile := filepath.Join(ml.ModelPath, modelName)
175 | 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
176 | 
177 | 	model, err := gpt2.New(modelFile)
178 | 	if err != nil {
179 | 		return nil, err
180 | 	}
181 | 
182 | 	// If there is a prompt template, load it
183 | 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
184 | 		return nil, err
185 | 	}
186 | 
187 | 	ml.gpt2models[modelName] = model
188 | 	return model, err
189 | }
190 | 
191 | func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
192 | 	ml.mu.Lock()
193 | 	defer ml.mu.Unlock()
194 | 
195 | 	// Check if we already have a loaded model
196 | 	if !ml.ExistsInModelPath(modelName) {
197 | 		return nil, fmt.Errorf("model does not exist")
198 | 	}
199 | 
200 | 	if m, ok := ml.gptmodels[modelName]; ok {
201 | 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
202 | 		return m, nil
203 | 	}
204 | 
205 | 	// Load the model and keep it in memory for later use
206 | 	modelFile := filepath.Join(ml.ModelPath, modelName)
207 | 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
208 | 
209 | 	model, err := gptj.New(modelFile)
210 | 	if err != nil {
211 | 		return nil, err
212 | 	}
213 | 
214 | 	// If there is a prompt template, load it
215 | 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
216 | 		return nil, err
217 | 	}
218 | 
219 | 	ml.gptmodels[modelName] = model
220 | 	return model, err
221 | }
222 | 
223 | func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
224 | 	ml.mu.Lock()
225 | 	defer ml.mu.Unlock()
226 | 
227 | 	log.Debug().Msgf("Loading model name: %s", modelName)
228 | 
229 | 	// Check if we already have a loaded model
230 | 	if !ml.ExistsInModelPath(modelName) {
231 | 		return nil, fmt.Errorf("model does not exist")
232 | 	}
233 | 
234 | 	if m, ok := ml.rwkv[modelName]; ok {
235 | 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
236 | 		return m, nil
237 | 	}
238 | 
239 | 	// Load the model and keep it in memory for later use
240 | 	modelFile := filepath.Join(ml.ModelPath, modelName)
241 | 	tokenPath := filepath.Join(ml.ModelPath, tokenFile)
242 | 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
243 | 
244 | 	model := rwkv.LoadFiles(modelFile, tokenPath, threads)
245 | 	if model == nil {
246 | 		return nil, fmt.Errorf("could not load model")
247 | 	}
248 | 
249 | 	ml.rwkv[modelName] = model
250 | 	return model, nil
251 | }
252 | 
253 | func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
254 | 	ml.mu.Lock()
255 | 	defer ml.mu.Unlock()
256 | 
257 | 	log.Debug().Msgf("Loading model name: %s", modelName)
258 | 
259 | 	// Check if we already have a loaded model
260 | 	if !ml.ExistsInModelPath(modelName) {
261 | 		return nil, fmt.Errorf("model does not exist")
262 | 	}
263 | 
264 | 	if m, ok := ml.models[modelName]; ok {
265 | 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
266 | 		return m, nil
267 | 	}
268 | 
269 | 	// Load the model and keep it in memory for later use
270 | 	modelFile := filepath.Join(ml.ModelPath, modelName)
271 | 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
272 | 
273 | 	model, err := llama.New(modelFile, opts...)
274 | 	if err != nil {
275 | 		return nil, err
276 | 	}
277 | 
278 | 	// If there is a prompt template, load it
279 | 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
280 | 		return nil, err
281 | 	}
282 | 
283 | 	ml.models[modelName] = model
284 | 	return model, err
285 | }
286 | 
287 | const tokenizerSuffix = ".tokenizer.json"
288 | 
289 | var loadedModels map[string]interface{} = map[string]interface{}{}
290 | var muModels sync.Mutex
291 | 
292 | func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
293 | 	switch strings.ToLower(backendString) {
294 | 	case "llama":
295 | 		return ml.LoadLLaMAModel(modelFile, llamaOpts...)
296 | 	case "stablelm":
297 | 		return ml.LoadStableLMModel(modelFile)
298 | 	case "gpt2":
299 | 		return ml.LoadGPT2Model(modelFile)
300 | 	case "gptj":
301 | 		return ml.LoadGPTJModel(modelFile)
302 | 	case "rwkv":
303 | 		return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
304 | 	default:
305 | 		return nil, fmt.Errorf("backend unsupported: %s", backendString)
306 | 	}
307 | }
308 | 
309 | func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
310 | 	updateModels := func(model interface{}) {
311 | 		muModels.Lock()
312 | 		defer muModels.Unlock()
313 | 		loadedModels[modelFile] = model
314 | 	}
315 | 
316 | 	muModels.Lock()
317 | 	m, exists := loadedModels[modelFile]
318 | 	if exists {
319 | 		muModels.Unlock()
320 | 		return m, nil
321 | 	}
322 | 	muModels.Unlock()
323 | 
324 | 	model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)
325 | 	if modelerr == nil {
326 | 		updateModels(model)
327 | 		return model, nil
328 | 	} else {
329 | 		err = multierror.Append(err, modelerr)
330 | 	}
331 | 
332 | 	model, modelerr = ml.LoadGPTJModel(modelFile)
333 | 	if modelerr == nil {
334 | 		updateModels(model)
335 | 		return model, nil
336 | 	} else {
337 | 		err = multierror.Append(err, modelerr)
338 | 	}
339 | 
340 | 	model, modelerr = ml.LoadGPT2Model(modelFile)
341 | 	if modelerr == nil {
342 | 		updateModels(model)
343 | 		return model, nil
344 | 	} else {
345 | 		err = multierror.Append(err, modelerr)
346 | 	}
347 | 
348 | 	model, modelerr = ml.LoadStableLMModel(modelFile)
349 | 	if modelerr == nil {
350 | 		updateModels(model)
351 | 		return model, nil
352 | 	} else {
353 | 		err = multierror.Append(err, modelerr)
354 | 	}
355 | 
356 | 	model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
357 | 	if modelerr == nil {
358 | 		updateModels(model)
359 | 		return model, nil
360 | 	} else {
361 | 		err = multierror.Append(err, modelerr)
362 | 	}
363 | 
364 | 	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
365 | }
366 | 


--------------------------------------------------------------------------------
/api/openai.go:
--------------------------------------------------------------------------------
  1 | package api
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"strings"
  9 | 
 10 | 	model "github.com/go-skynet/LocalAI/pkg/model"
 11 | 	"github.com/gofiber/fiber/v2"
 12 | 	"github.com/rs/zerolog/log"
 13 | 	"github.com/valyala/fasthttp"
 14 | )
 15 | 
 16 | // APIError provides error information returned by the OpenAI API.
 17 | type APIError struct {
 18 | 	Code    any     `json:"code,omitempty"`
 19 | 	Message string  `json:"message"`
 20 | 	Param   *string `json:"param,omitempty"`
 21 | 	Type    string  `json:"type"`
 22 | }
 23 | 
 24 | type ErrorResponse struct {
 25 | 	Error *APIError `json:"error,omitempty"`
 26 | }
 27 | 
 28 | type OpenAIUsage struct {
 29 | 	PromptTokens     int `json:"prompt_tokens"`
 30 | 	CompletionTokens int `json:"completion_tokens"`
 31 | 	TotalTokens      int `json:"total_tokens"`
 32 | }
 33 | 
 34 | type Item struct {
 35 | 	Embedding []float32 `json:"embedding"`
 36 | 	Index     int       `json:"index"`
 37 | 	Object    string    `json:"object,omitempty"`
 38 | }
 39 | 
 40 | type OpenAIResponse struct {
 41 | 	Created int      `json:"created,omitempty"`
 42 | 	Object  string   `json:"object,omitempty"`
 43 | 	ID      string   `json:"id,omitempty"`
 44 | 	Model   string   `json:"model,omitempty"`
 45 | 	Choices []Choice `json:"choices,omitempty"`
 46 | 	Data    []Item   `json:"data,omitempty"`
 47 | 
 48 | 	Usage OpenAIUsage `json:"usage"`
 49 | }
 50 | 
 51 | type Choice struct {
 52 | 	Index        int      `json:"index,omitempty"`
 53 | 	FinishReason string   `json:"finish_reason,omitempty"`
 54 | 	Message      *Message `json:"message,omitempty"`
 55 | 	Delta        *Message `json:"delta,omitempty"`
 56 | 	Text         string   `json:"text,omitempty"`
 57 | }
 58 | 
 59 | type Message struct {
 60 | 	Role    string `json:"role,omitempty" yaml:"role"`
 61 | 	Content string `json:"content,omitempty" yaml:"content"`
 62 | }
 63 | 
 64 | type OpenAIModel struct {
 65 | 	ID     string `json:"id"`
 66 | 	Object string `json:"object"`
 67 | }
 68 | 
 69 | type OpenAIRequest struct {
 70 | 	Model string `json:"model" yaml:"model"`
 71 | 
 72 | 	// Prompt is read only by completion API calls
 73 | 	Prompt interface{} `json:"prompt" yaml:"prompt"`
 74 | 
 75 | 	// Edit endpoint
 76 | 	Instruction string      `json:"instruction" yaml:"instruction"`
 77 | 	Input       interface{} `json:"input" yaml:"input"`
 78 | 
 79 | 	Stop interface{} `json:"stop" yaml:"stop"`
 80 | 
 81 | 	// Messages is read only by chat/completion API calls
 82 | 	Messages []Message `json:"messages" yaml:"messages"`
 83 | 
 84 | 	Stream bool `json:"stream"`
 85 | 	Echo   bool `json:"echo"`
 86 | 	// Common options between all the API calls
 87 | 	TopP        float64 `json:"top_p" yaml:"top_p"`
 88 | 	TopK        int     `json:"top_k" yaml:"top_k"`
 89 | 	Temperature float64 `json:"temperature" yaml:"temperature"`
 90 | 	Maxtokens   int     `json:"max_tokens" yaml:"max_tokens"`
 91 | 
 92 | 	N int `json:"n"`
 93 | 
 94 | 	// Custom parameters - not present in the OpenAI API
 95 | 	Batch         int     `json:"batch" yaml:"batch"`
 96 | 	F16           bool    `json:"f16" yaml:"f16"`
 97 | 	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
 98 | 	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
 99 | 	Keep          int     `json:"n_keep" yaml:"n_keep"`
100 | 
101 | 	MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
102 | 	MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
103 | 	Mirostat    int     `json:"mirostat" yaml:"mirostat"`
104 | 
105 | 	Seed int `json:"seed" yaml:"seed"`
106 | }
107 | 
108 | func defaultRequest(modelFile string) OpenAIRequest {
109 | 	return OpenAIRequest{
110 | 		TopP:        0.7,
111 | 		TopK:        80,
112 | 		Maxtokens:   512,
113 | 		Temperature: 0.9,
114 | 		Model:       modelFile,
115 | 	}
116 | }
117 | 
118 | // https://platform.openai.com/docs/api-reference/completions
119 | func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
120 | 	return func(c *fiber.Ctx) error {
121 | 		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
122 | 		if err != nil {
123 | 			return fmt.Errorf("failed reading parameters from request:%w", err)
124 | 		}
125 | 
126 | 		log.Debug().Msgf("Parameter Config: %+v", config)
127 | 
128 | 		templateFile := config.Model
129 | 
130 | 		if config.TemplateConfig.Completion != "" {
131 | 			templateFile = config.TemplateConfig.Completion
132 | 		}
133 | 
134 | 		var result []Choice
135 | 		for _, i := range config.PromptStrings {
136 | 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
137 | 			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
138 | 				Input string
139 | 			}{Input: i})
140 | 			if err == nil {
141 | 				i = templatedInput
142 | 				log.Debug().Msgf("Template found, input modified to: %s", i)
143 | 			}
144 | 
145 | 			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
146 | 				*c = append(*c, Choice{Text: s})
147 | 			}, nil)
148 | 			if err != nil {
149 | 				return err
150 | 			}
151 | 
152 | 			result = append(result, r...)
153 | 		}
154 | 
155 | 		resp := &OpenAIResponse{
156 | 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
157 | 			Choices: result,
158 | 			Object:  "text_completion",
159 | 		}
160 | 
161 | 		jsonResult, _ := json.Marshal(resp)
162 | 		log.Debug().Msgf("Response: %s", jsonResult)
163 | 
164 | 		// Return the prediction in the response body
165 | 		return c.JSON(resp)
166 | 	}
167 | }
168 | 
169 | // https://platform.openai.com/docs/api-reference/embeddings
170 | func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
171 | 	return func(c *fiber.Ctx) error {
172 | 		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
173 | 		if err != nil {
174 | 			return fmt.Errorf("failed reading parameters from request:%w", err)
175 | 		}
176 | 
177 | 		log.Debug().Msgf("Parameter Config: %+v", config)
178 | 		items := []Item{}
179 | 
180 | 		for i, s := range config.InputStrings {
181 | 
182 | 			// get the model function to call for the result
183 | 			embedFn, err := ModelEmbedding(s, loader, *config)
184 | 			if err != nil {
185 | 				return err
186 | 			}
187 | 
188 | 			embeddings, err := embedFn()
189 | 			if err != nil {
190 | 				return err
191 | 			}
192 | 			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
193 | 		}
194 | 
195 | 		resp := &OpenAIResponse{
196 | 			Model:  input.Model, // we have to return what the user sent here, due to OpenAI spec.
197 | 			Data:   items,
198 | 			Object: "list",
199 | 		}
200 | 
201 | 		jsonResult, _ := json.Marshal(resp)
202 | 		log.Debug().Msgf("Response: %s", jsonResult)
203 | 
204 | 		// Return the prediction in the response body
205 | 		return c.JSON(resp)
206 | 	}
207 | }
208 | 
209 | func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
210 | 
211 | 	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
212 | 		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
213 | 			resp := OpenAIResponse{
214 | 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
215 | 				Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
216 | 				Object:  "chat.completion.chunk",
217 | 			}
218 | 			log.Debug().Msgf("Sending goroutine: %s", s)
219 | 
220 | 			responses <- resp
221 | 			return true
222 | 		})
223 | 		close(responses)
224 | 	}
225 | 	return func(c *fiber.Ctx) error {
226 | 		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
227 | 		if err != nil {
228 | 			return fmt.Errorf("failed reading parameters from request:%w", err)
229 | 		}
230 | 
231 | 		log.Debug().Msgf("Parameter Config: %+v", config)
232 | 
233 | 		var predInput string
234 | 
235 | 		mess := []string{}
236 | 		for _, i := range input.Messages {
237 | 			r := config.Roles[i.Role]
238 | 			if r == "" {
239 | 				r = i.Role
240 | 			}
241 | 
242 | 			content := fmt.Sprint(r, " ", i.Content)
243 | 			mess = append(mess, content)
244 | 		}
245 | 
246 | 		predInput = strings.Join(mess, "\n")
247 | 
248 | 		if input.Stream {
249 | 			log.Debug().Msgf("Stream request received")
250 | 			c.Context().SetContentType("text/event-stream")
251 | 			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
252 | 			//	c.Set("Content-Type", "text/event-stream")
253 | 			c.Set("Cache-Control", "no-cache")
254 | 			c.Set("Connection", "keep-alive")
255 | 			c.Set("Transfer-Encoding", "chunked")
256 | 		}
257 | 
258 | 		templateFile := config.Model
259 | 
260 | 		if config.TemplateConfig.Chat != "" {
261 | 			templateFile = config.TemplateConfig.Chat
262 | 		}
263 | 
264 | 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
265 | 		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
266 | 			Input string
267 | 		}{Input: predInput})
268 | 		if err == nil {
269 | 			predInput = templatedInput
270 | 			log.Debug().Msgf("Template found, input modified to: %s", predInput)
271 | 		}
272 | 
273 | 		if input.Stream {
274 | 			responses := make(chan OpenAIResponse)
275 | 
276 | 			go process(predInput, input, config, loader, responses)
277 | 
278 | 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
279 | 
280 | 				for ev := range responses {
281 | 					var buf bytes.Buffer
282 | 					enc := json.NewEncoder(&buf)
283 | 					enc.Encode(ev)
284 | 
285 | 					fmt.Fprintf(w, "event: data\n\n")
286 | 					fmt.Fprintf(w, "data: %v\n\n", buf.String())
287 | 					log.Debug().Msgf("Sending chunk: %s", buf.String())
288 | 					w.Flush()
289 | 				}
290 | 
291 | 				w.WriteString("event: data\n\n")
292 | 				resp := &OpenAIResponse{
293 | 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
294 | 					Choices: []Choice{{FinishReason: "stop"}},
295 | 				}
296 | 				respData, _ := json.Marshal(resp)
297 | 
298 | 				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
299 | 				w.Flush()
300 | 			}))
301 | 			return nil
302 | 		}
303 | 
304 | 		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
305 | 			*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
306 | 		}, nil)
307 | 		if err != nil {
308 | 			return err
309 | 		}
310 | 
311 | 		resp := &OpenAIResponse{
312 | 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
313 | 			Choices: result,
314 | 			Object:  "chat.completion",
315 | 		}
316 | 		respData, _ := json.Marshal(resp)
317 | 		log.Debug().Msgf("Response: %s", respData)
318 | 
319 | 		// Return the prediction in the response body
320 | 		return c.JSON(resp)
321 | 	}
322 | }
323 | 
324 | func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
325 | 	return func(c *fiber.Ctx) error {
326 | 		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
327 | 		if err != nil {
328 | 			return fmt.Errorf("failed reading parameters from request:%w", err)
329 | 		}
330 | 
331 | 		log.Debug().Msgf("Parameter Config: %+v", config)
332 | 
333 | 		templateFile := config.Model
334 | 
335 | 		if config.TemplateConfig.Edit != "" {
336 | 			templateFile = config.TemplateConfig.Edit
337 | 		}
338 | 
339 | 		var result []Choice
340 | 		for _, i := range config.InputStrings {
341 | 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
342 | 			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
343 | 				Input       string
344 | 				Instruction string
345 | 			}{Input: i})
346 | 			if err == nil {
347 | 				i = templatedInput
348 | 				log.Debug().Msgf("Template found, input modified to: %s", i)
349 | 			}
350 | 
351 | 			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
352 | 				*c = append(*c, Choice{Text: s})
353 | 			}, nil)
354 | 			if err != nil {
355 | 				return err
356 | 			}
357 | 
358 | 			result = append(result, r...)
359 | 		}
360 | 
361 | 		resp := &OpenAIResponse{
362 | 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
363 | 			Choices: result,
364 | 			Object:  "edit",
365 | 		}
366 | 
367 | 		jsonResult, _ := json.Marshal(resp)
368 | 		log.Debug().Msgf("Response: %s", jsonResult)
369 | 
370 | 		// Return the prediction in the response body
371 | 		return c.JSON(resp)
372 | 	}
373 | }
374 | 
375 | func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
376 | 	return func(c *fiber.Ctx) error {
377 | 		models, err := loader.ListModels()
378 | 		if err != nil {
379 | 			return err
380 | 		}
381 | 		var mm map[string]interface{} = map[string]interface{}{}
382 | 
383 | 		dataModels := []OpenAIModel{}
384 | 		for _, m := range models {
385 | 			mm[m] = nil
386 | 			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
387 | 		}
388 | 
389 | 		for k := range cm {
390 | 			if _, exists := mm[k]; !exists {
391 | 				dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
392 | 			}
393 | 		}
394 | 
395 | 		return c.JSON(struct {
396 | 			Object string        `json:"object"`
397 | 			Data   []OpenAIModel `json:"data"`
398 | 		}{
399 | 			Object: "list",
400 | 			Data:   dataModels,
401 | 		})
402 | 	}
403 | }
404 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
  1 | github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
  2 | github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
  3 | github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
  4 | github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
  5 | github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
  6 | github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
  7 | github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
  8 | github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
  9 | github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 10 | github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 11 | github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 12 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 13 | github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 14 | github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 15 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 16 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 17 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 18 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 19 | github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
 20 | github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 21 | github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
 22 | github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 23 | github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
 24 | github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
 25 | github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
 26 | github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
 27 | github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
 28 | github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
 29 | github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
 30 | github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
 31 | github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
 32 | github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
 33 | github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
 34 | github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 35 | github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
 36 | github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
 37 | github.com/go-skynet/go-llama.cpp v0.0.0-20230504223241-67ff6a4db244/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
 38 | github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675 h1:plXywr95RghidIHPHl+O/zpcNXenEeS6w/6WftFNr9E=
 39 | github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
 40 | github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638 h1:+7UXkGG+LeqJ5oPBEJo5D73Y2drKOVzrlB8D+iG2PHw=
 41 | github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
 42 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 43 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 44 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 45 | github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
 46 | github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
 47 | github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
 48 | github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
 49 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 50 | github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 51 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 52 | github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
 53 | github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 54 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 55 | github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 56 | github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
 57 | github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
 58 | github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
 59 | github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
 60 | github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 61 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 62 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 63 | github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
 64 | github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 65 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 66 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 67 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 68 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 69 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 70 | github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 71 | github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 72 | github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
 73 | github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 74 | github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 75 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 76 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 77 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 78 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 79 | github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
 80 | github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 81 | github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 82 | github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 83 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
 84 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 85 | github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
 86 | github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
 87 | github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
 88 | github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
 89 | github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
 90 | github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
 91 | github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
 92 | github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
 93 | github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
 94 | github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
 95 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 96 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 97 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 98 | github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 99 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
100 | github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
101 | github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
102 | github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
103 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
104 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
105 | github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
106 | github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
107 | github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
108 | github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
109 | github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
110 | github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
111 | github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
112 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
113 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
114 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
115 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
116 | github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg=
117 | github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
118 | github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
119 | github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
120 | github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
121 | github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
122 | github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
123 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
124 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
125 | github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
126 | github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
127 | github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
128 | github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
129 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
130 | github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
131 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
132 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
133 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
134 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
135 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
136 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
137 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
138 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
139 | golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
140 | golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
141 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
142 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
143 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
144 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
145 | golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
146 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
147 | golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
148 | golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
149 | golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
150 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
151 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
152 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
153 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
154 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
155 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
156 | golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
157 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
158 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
159 | golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
160 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
161 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
162 | golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
163 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
164 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
165 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
166 | golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
167 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
168 | golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
169 | golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
170 | golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
171 | golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
172 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
173 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
174 | golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
175 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
176 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
177 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
178 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
179 | golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
180 | golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
181 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
182 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
183 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
184 | golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
185 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
186 | golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
187 | golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
188 | golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
189 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
190 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
191 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
192 | google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
193 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
194 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
195 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
196 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
197 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
198 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
199 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
200 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
201 | gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
202 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
203 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
204 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">
  2 |   <br>
  3 |   <img height="300" src="https://user-images.githubusercontent.com/2420543/233147843-88697415-6dbf-4368-a862-ab217f9f7342.jpeg"> <br>
  4 |     LocalAI
  5 | <br>
  6 | </h1>
  7 | 
  8 | [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)
  9 | 
 10 | [![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) 
 11 | 
 12 | **LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.
 13 | 
 14 | - OpenAI compatible API
 15 | - Supports multiple models
 16 | - Once loaded the first time, it keep models loaded in memory for faster inference
 17 | - Support for prompt templates
 18 | - Doesn't shell-out, but uses C bindings for a faster inference and better performance. 
 19 | 
 20 | LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
 21 | 
 22 | See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
 23 | 
 24 | ## News
 25 | 
 26 | - 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
 27 | - 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
 28 | 
 29 | Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
 30 | 
 31 | ### Blogs and articles
 32 | 
 33 | - [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
 34 | 
 35 | ## Contribute and help
 36 | 
 37 | To help the project you can:
 38 | 
 39 | - Upvote the [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
 40 | 
 41 | - [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
 42 | 
 43 | - If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
 44 | 
 45 | - If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome!
 46 | 
 47 | ## Model compatibility
 48 | 
 49 | It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
 50 | 
 51 | Tested with:
 52 | - Vicuna
 53 | - Alpaca
 54 | - [GPT4ALL](https://github.com/nomic-ai/gpt4all) (changes required, see below)
 55 | - [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin) (no changes required)
 56 | - Koala
 57 | - [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
 58 | - WizardLM
 59 | - [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
 60 | 
 61 | ### GPT4ALL
 62 | 
 63 | Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
 64 | 
 65 | ### RWKV
 66 | 
 67 | <details>
 68 | 
 69 | A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
 70 | 
 71 | Note: rwkv models have an associated tokenizer along that needs to be provided with it:
 72 | 
 73 | ```
 74 | 36464540 -rw-r--r--  1 mudler mudler 1.2G May  3 10:51 rwkv_small
 75 | 36464543 -rw-r--r--  1 mudler mudler 2.4M May  3 10:51 rwkv_small.tokenizer.json
 76 | ```
 77 | 
 78 | </details>
 79 | 
 80 | ### Others
 81 | 
 82 | It should also be compatible with StableLM and GPTNeoX ggml models (untested).
 83 | 
 84 | ### Hardware requirements
 85 | 
 86 | Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
 87 | 
 88 | 
 89 | ## Usage
 90 | 
 91 | > `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
 92 | 
 93 | The easiest way to run LocalAI is by using `docker-compose`:
 94 | 
 95 | ```bash
 96 | 
 97 | git clone https://github.com/go-skynet/LocalAI
 98 | 
 99 | cd LocalAI
100 | 
101 | # (optional) Checkout a specific LocalAI tag
102 | # git checkout -b build <TAG>
103 | 
104 | # copy your models to models/
105 | cp your-model.bin models/
106 | 
107 | # (optional) Edit the .env file to set things like context size and threads
108 | # vim .env
109 | 
110 | # start with docker-compose
111 | docker-compose up -d --build
112 | 
113 | # Now API is accessible at localhost:8080
114 | curl http://localhost:8080/v1/models
115 | # {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
116 | 
117 | curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
118 |      "model": "your-model.bin",            
119 |      "prompt": "A long time ago in a galaxy far, far away",
120 |      "temperature": 0.7
121 |    }'
122 | ```
123 | 
124 | ### Example: Use GPT4ALL-J model
125 | 
126 | <details>
127 | 
128 | ```bash
129 | # Clone LocalAI
130 | git clone https://github.com/go-skynet/LocalAI
131 | 
132 | cd LocalAI
133 | 
134 | # (optional) Checkout a specific LocalAI tag
135 | # git checkout -b build <TAG>
136 | 
137 | # Download gpt4all-j to models/
138 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
139 | 
140 | # Use a template from the examples
141 | cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
142 | 
143 | # (optional) Edit the .env file to set things like context size and threads
144 | # vim .env
145 | 
146 | # start with docker-compose
147 | docker-compose up -d --build
148 | 
149 | # Now API is accessible at localhost:8080
150 | curl http://localhost:8080/v1/models
151 | # {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
152 | 
153 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
154 |      "model": "ggml-gpt4all-j",
155 |      "messages": [{"role": "user", "content": "How are you?"}],
156 |      "temperature": 0.9 
157 |    }'
158 | 
159 | # {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
160 | ```
161 | </details>
162 | 
163 | To build locally, run `make build` (see below).
164 | 
165 | ### Other examples
166 | 
167 | To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
168 | 
169 | 
170 | ### Advanced configuration
171 | 
172 | LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
173 | 
174 | <details>
175 | 
176 | You can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 
177 | Consider the following `models` folder in the `example/chatbot-ui`:
178 | 
179 | ```
180 | base ❯ ls -liah examples/chatbot-ui/models 
181 | 36487587 drwxr-xr-x 2 mudler mudler 4.0K May  3 12:27 .
182 | 36487586 drwxr-xr-x 3 mudler mudler 4.0K May  3 10:42 ..
183 | 36465214 -rw-r--r-- 1 mudler mudler   10 Apr 27 07:46 completion.tmpl
184 | 36464855 -rw-r--r-- 1 mudler mudler 3.6G Apr 27 00:08 ggml-gpt4all-j
185 | 36464537 -rw-r--r-- 1 mudler mudler  245 May  3 10:42 gpt-3.5-turbo.yaml
186 | 36467388 -rw-r--r-- 1 mudler mudler  180 Apr 27 07:46 gpt4all.tmpl
187 | ```
188 | 
189 | In the `gpt-3.5-turbo.yaml` file it is defined the `gpt-3.5-turbo` model which is an alias to use `gpt4all-j` with pre-defined options.
190 | 
191 | For instance, consider the following that declares `gpt-3.5-turbo` backed by the `ggml-gpt4all-j` model:
192 | 
193 | ```yaml
194 | name: gpt-3.5-turbo
195 | # Default model parameters
196 | parameters:
197 |   # Relative to the models path
198 |   model: ggml-gpt4all-j
199 |   # temperature
200 |   temperature: 0.3
201 |   # all the OpenAI request options here..
202 | 
203 | # Default context size
204 | context_size: 512
205 | threads: 10
206 | # Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with.
207 | backend: gptj # available: llama, stablelm, gpt2, gptj rwkv
208 | # stopwords (if supported by the backend)
209 | stopwords:
210 | - "HUMAN:"
211 | - "### Response:"
212 | # define chat roles
213 | roles:
214 |   user: "HUMAN:"
215 |   system: "GPT:"
216 | template:
217 |   # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
218 |   completion: completion
219 |   chat: ggml-gpt4all-j
220 | ```
221 | 
222 | Specifying a `config-file` via CLI allows to declare models in a single file as a list, for instance:
223 | 
224 | ```yaml
225 | - name: list1
226 |   parameters:
227 |     model: testmodel
228 |   context_size: 512
229 |   threads: 10
230 |   stopwords:
231 |   - "HUMAN:"
232 |   - "### Response:"
233 |   roles:
234 |     user: "HUMAN:"
235 |     system: "GPT:"
236 |   template:
237 |     completion: completion
238 |     chat: ggml-gpt4all-j
239 | - name: list2
240 |   parameters:
241 |     model: testmodel
242 |   context_size: 512
243 |   threads: 10
244 |   stopwords:
245 |   - "HUMAN:"
246 |   - "### Response:"
247 |   roles:
248 |     user: "HUMAN:"
249 |     system: "GPT:"
250 |   template:
251 |     completion: completion
252 |    chat: ggml-gpt4all-j
253 | ```
254 | 
255 | See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
256 | 
257 | </details>
258 | 
259 | ### Prompt templates 
260 | 
261 | The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
262 | 
263 | <details>
264 | You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca:
265 | 
266 | ```
267 | The below instruction describes a task. Write a response that appropriately completes the request.
268 | 
269 | ### Instruction:
270 | {{.Input}}
271 | 
272 | ### Response:
273 | ```
274 | 
275 | See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
276 | 
277 | 
278 | For the edit endpoint, an example template for alpaca-based models can be:
279 | 
280 | ```yaml
281 | Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
282 | 
283 | ### Instruction:
284 | {{.Instruction}}
285 | 
286 | ### Input:
287 | {{.Input}}
288 | 
289 | ### Response:
290 | ```
291 | 
292 | </details>
293 | 
294 | ### CLI
295 | 
296 | You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
297 | 
298 | <details>
299 | 
300 | Usage:
301 | 
302 | ```
303 | local-ai --models-path <model_path> [--address <address>] [--threads <num_threads>]
304 | ```
305 | 
306 | | Parameter    | Environment Variable | Default Value | Description                            |
307 | | ------------ | -------------------- | ------------- | -------------------------------------- |
308 | | models-path        | MODELS_PATH           |               | The path where you have models (ending with `.bin`).      |
309 | | threads      | THREADS              | Number of Physical cores     | The number of threads to use for text generation. |
310 | | address      | ADDRESS              | :8080         | The address and port to listen on. |
311 | | context-size | CONTEXT_SIZE         | 512           | Default token context size. |
312 | | debug | DEBUG         | false           | Enable debug mode. |
313 | | config-file | CONFIG_FILE         | empty           | Path to a LocalAI config file. |
314 | 
315 | </details>
316 | 
317 | ## Setup
318 | 
319 | Currently LocalAI comes as a container image and can be used with docker or a container engine of choice. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
320 | 
321 | ### Docker
322 | 
323 | <details>
324 | Example of starting the API with `docker`:
325 | 
326 | ```bash
327 | docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
328 | ```
329 | 
330 | You should see:
331 | ```
332 | ┌───────────────────────────────────────────────────┐ 
333 | │                   Fiber v2.42.0                   │ 
334 | │               http://127.0.0.1:8080               │ 
335 | │       (bound on host 0.0.0.0 and port 8080)       │ 
336 | │                                                   │ 
337 | │ Handlers ............. 1  Processes ........... 1 │ 
338 | │ Prefork ....... Disabled  PID ................. 1 │ 
339 | └───────────────────────────────────────────────────┘ 
340 | ```
341 | 
342 | </details>
343 | 
344 | ### Build locally
345 | 
346 | <details>
347 | 
348 | In order to build the `LocalAI` container image locally you can use `docker`:
349 | 
350 | ```
351 | # build the image
352 | docker build -t LocalAI .
353 | docker run LocalAI
354 | ```
355 | 
356 | Or you can build the binary with `make`:
357 | 
358 | ```
359 | make build
360 | ```
361 | 
362 | </details>
363 | 
364 | ### Build on mac
365 | 
366 | Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`. 
367 | 
368 | <details>
369 | 
370 | The below has been tested by one mac user and found to work. Note that this doesn't use docker to run the server:
371 | 
372 | ```
373 | # install build dependencies
374 | brew install cmake
375 | brew install go
376 | 
377 | # clone the repo
378 | git clone https://github.com/go-skynet/LocalAI.git
379 | 
380 | cd LocalAI
381 | 
382 | # build the binary
383 | make build
384 | 
385 | # Download gpt4all-j to models/
386 | wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
387 | 
388 | # Use a template from the examples
389 | cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
390 | 
391 | # Run LocalAI
392 | ./local-ai --models-path ./models/ --debug
393 | 
394 | # Now API is accessible at localhost:8080
395 | curl http://localhost:8080/v1/models
396 | 
397 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
398 |      "model": "ggml-gpt4all-j",
399 |      "messages": [{"role": "user", "content": "How are you?"}],
400 |      "temperature": 0.9 
401 |    }'
402 | ```
403 | 
404 | </details>
405 | 
406 | ### Windows compatibility
407 | 
408 | It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
409 | 
410 | ### Run LocalAI in Kubernetes
411 | 
412 | LocalAI can be installed inside Kubernetes with helm.
413 | 
414 | <details>
415 | 
416 | 1. Add the helm repo
417 |     ```bash
418 |     helm repo add go-skynet https://go-skynet.github.io/helm-charts/
419 |     ```
420 | 1. Create a values files with your settings:
421 | ```bash
422 | cat <<EOF > values.yaml
423 | deployment:
424 |   image: quay.io/go-skynet/local-ai:latest
425 |   env:
426 |     threads: 4
427 |     contextSize: 1024
428 |     modelsPath: "/models"
429 | # Optionally create a PVC, mount the PV to the LocalAI Deployment,
430 | # and download a model to prepopulate the models directory
431 | modelsVolume:
432 |   enabled: true
433 |   url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
434 |   pvc:
435 |     size: 6Gi
436 |     accessModes:
437 |     - ReadWriteOnce
438 |   auth:
439 |     # Optional value for HTTP basic access authentication header
440 |     basic: "" # 'username:password' base64 encoded
441 | service:
442 |   type: ClusterIP
443 |   annotations: {}
444 |   # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
445 |   # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
446 | EOF
447 | ```
448 | 3. Install the helm chart:
449 | ```bash
450 | helm repo update
451 | helm install local-ai go-skynet/local-ai -f values.yaml
452 | ```
453 | 
454 | Check out also the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
455 | 
456 | </details>
457 | 
458 | ## Supported OpenAI API endpoints
459 | 
460 | You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create). 
461 | 
462 | Following the list of endpoints/parameters supported. 
463 | 
464 | Note:
465 | 
466 | - You can also specify the model as part of the OpenAI token.
467 | - If only one model is available, the API will use it for all the requests.
468 | 
469 | ### Chat completions
470 | 
471 | <details>
472 | For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
473 | 
474 | ```
475 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
476 |      "model": "ggml-koala-7b-model-q4_0-r2.bin",
477 |      "messages": [{"role": "user", "content": "Say this is a test!"}],
478 |      "temperature": 0.7
479 |    }'
480 | ```
481 | 
482 | Available additional parameters: `top_p`, `top_k`, `max_tokens`
483 | </details>
484 | 
485 | ### Edit completions
486 | 
487 | <details>
488 | To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
489 | 
490 | ```
491 | curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
492 |      "model": "ggml-koala-7b-model-q4_0-r2.bin",
493 |      "instruction": "rephrase",
494 |      "input": "Black cat jumped out of the window",
495 |      "temperature": 0.7
496 |    }'
497 | ```
498 | 
499 | Available additional parameters: `top_p`, `top_k`, `max_tokens`.
500 | 
501 | </details>
502 | 
503 | ### Completions
504 | 
505 | <details>
506 | 
507 | To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
508 | 
509 | ```
510 | curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
511 |      "model": "ggml-koala-7b-model-q4_0-r2.bin",
512 |      "prompt": "A long time ago in a galaxy far, far away",
513 |      "temperature": 0.7
514 |    }'
515 | ```
516 | 
517 | Available additional parameters: `top_p`, `top_k`, `max_tokens`
518 | 
519 | </details>
520 | 
521 | ### List models
522 | 
523 | <details>
524 | You can list all the models available with:
525 | 
526 | ```
527 | curl http://localhost:8080/v1/models
528 | ```
529 | 
530 | </details>
531 | 
532 | ## Frequently asked questions
533 | 
534 | Here are answers to some of the most common questions.
535 | 
536 | 
537 | ### How do I get models? 
538 | 
539 | <details>
540 | 
541 | Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all.
542 | 
543 | </details>
544 | 
545 | ### What's the difference with Serge, or XXX?
546 | 
547 | 
548 | <details>
549 | 
550 | LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference,  easy to set up locally and deploy to Kubernetes.
551 | 
552 | </details>
553 | 
554 | 
555 | ### Can I use it with a Discord bot, or XXX?
556 | 
557 | <details>
558 | 
559 | Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
560 | 
561 | </details>
562 | 
563 | 
564 | ### Can this leverage GPUs? 
565 | 
566 | <details>
567 | 
568 | Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/llama.cpp/discussions/915.
569 | 
570 | </details>
571 | 
572 | ### Where is the webUI? 
573 | 
574 | <details> 
575 | There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
576 | 
577 | </details>
578 | 
579 | ### Does it work with AutoGPT? 
580 | 
581 | <details>
582 | 
583 | AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon!
584 | 
585 | </details>
586 | 
587 | ## Projects already using LocalAI to run local models
588 | 
589 | Feel free to open up a PR to get your project listed!
590 | 
591 | - [Kairos](https://github.com/kairos-io/kairos)
592 | - [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
593 | 
594 | ## Blog posts and other articles
595 | 
596 | - https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
597 | - https://kairos.io/docs/examples/localai/
598 | 
599 | ## Short-term roadmap
600 | 
601 | - [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
602 | - [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
603 | - [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85)
604 | - [x] Multi-model support
605 | - [x] Have a webUI!
606 | - [x] Allow configuration of defaults for models.
607 | - [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui.
608 | 
609 | ## Star history
610 | 
611 | [![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
612 | 
613 | ## License
614 | 
615 | LocalAI is a community-driven project. It was initially created by [Ettore Di Giacinto](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
616 | 
617 | MIT
618 | 
619 | ## Golang bindings used
620 | 
621 | - [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
622 | - [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
623 | - [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
624 | - [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
625 | 
626 | ## Acknowledgements
627 | 
628 | - [llama.cpp](https://github.com/ggerganov/llama.cpp)
629 | - https://github.com/tatsu-lab/stanford_alpaca
630 | - https://github.com/cornelk/llama-go for the initial ideas
631 | - https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
632 | 
633 | ## Contributors
634 | 
635 | <a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
636 |   <img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
637 | </a>
638 | 


--------------------------------------------------------------------------------