├── .dockerignore ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── 10_bug_report.yml │ ├── 20_feature_request.md │ ├── 30_model_request.md │ └── config.yml └── workflows │ ├── latest.yaml │ ├── release.yaml │ └── test.yaml ├── .gitignore ├── .gitmodules ├── .golangci.yaml ├── .prettierrc.json ├── Dockerfile ├── LICENSE ├── README.md ├── api ├── client.go ├── client_test.go ├── types.go └── types_test.go ├── app ├── .gitignore ├── README.md ├── assets │ ├── app.ico │ ├── assets.go │ ├── setup.bmp │ ├── tray.ico │ └── tray_upgrade.ico ├── lifecycle │ ├── getstarted_nonwindows.go │ ├── getstarted_windows.go │ ├── lifecycle.go │ ├── logging.go │ ├── logging_nonwindows.go │ ├── logging_windows.go │ ├── paths.go │ ├── server.go │ ├── server_unix.go │ ├── server_windows.go │ ├── updater.go │ ├── updater_nonwindows.go │ └── updater_windows.go ├── main.go ├── ollama.iss ├── ollama.rc ├── ollama_welcome.ps1 ├── store │ ├── store.go │ ├── store_darwin.go │ ├── store_linux.go │ └── store_windows.go └── tray │ ├── commontray │ └── types.go │ ├── tray.go │ ├── tray_nonwindows.go │ ├── tray_windows.go │ └── wintray │ ├── eventloop.go │ ├── menus.go │ ├── messages.go │ ├── notifyicon.go │ ├── tray.go │ ├── w32api.go │ └── winclass.go ├── auth └── auth.go ├── cmd ├── cmd.go ├── interactive.go ├── interactive_test.go ├── start_darwin.go ├── start_default.go └── start_windows.go ├── convert ├── convert.go ├── convert_test.go ├── gemma.go ├── llama.go ├── mistral.go ├── mixtral.go ├── safetensors.go ├── sentencepiece │ └── sentencepiece_model.pb.go ├── sentencepiece_model.proto ├── tokenizer.go └── torch.go ├── docs ├── README.md ├── api.md ├── development.md ├── docker.md ├── faq.md ├── gpu.md ├── import.md ├── linux.md ├── modelfile.md ├── openai.md ├── troubleshooting.md ├── tutorials.md ├── tutorials │ ├── fly-gpu.md │ ├── langchainjs.md │ ├── langchainpy.md │ └── nvidia-jetson.md └── windows.md ├── envconfig ├── config.go └── config_test.go ├── examples ├── .gitignore ├── README.md ├── flyio │ ├── .gitignore │ └── README.md ├── go-chat │ └── main.go ├── go-generate-streaming │ └── main.go ├── go-generate │ └── main.go ├── go-http-generate │ ├── README.md │ └── main.go ├── go-multimodal │ └── main.go ├── go-pull-progress │ └── main.go ├── jupyter-notebook │ ├── README.md │ └── ollama.ipynb ├── kubernetes │ ├── README.md │ ├── cpu.yaml │ └── gpu.yaml ├── langchain-python-rag-document │ ├── README.md │ ├── main.py │ └── requirements.txt ├── langchain-python-rag-privategpt │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── constants.py │ ├── ingest.py │ ├── poetry.lock │ ├── privateGPT.py │ ├── pyproject.toml │ └── requirements.txt ├── langchain-python-rag-websummary │ ├── README.md │ ├── main.py │ └── requirements.txt ├── langchain-python-simple │ ├── README.md │ ├── main.py │ └── requirements.txt ├── langchain-typescript-simple │ ├── README.md │ ├── main.ts │ ├── package-lock.json │ └── package.json ├── modelfile-mario │ ├── Modelfile │ ├── logo.png │ └── readme.md ├── python-dockerit │ ├── Modelfile │ ├── README.md │ ├── dockerit.py │ └── requirements.txt ├── python-json-datagenerator │ ├── predefinedschema.py │ ├── randomaddresses.py │ ├── readme.md │ └── requirements.txt ├── python-loganalysis │ ├── Modelfile │ ├── loganalysis.py │ ├── logtest.logfile │ ├── readme.md │ └── requirements.txt ├── python-rag-newssummary │ ├── README.md │ ├── requirements.txt │ ├── summ.py │ └── utils.py ├── python-simplechat │ ├── client.py │ ├── readme.md │ └── requirements.txt ├── python-simplegenerate │ ├── README.md │ ├── client.py │ └── requirements.txt ├── typescript-functioncalling │ ├── extractemail.ts │ ├── extractwp.ts │ ├── info.txt │ ├── package-lock.json │ ├── package.json │ ├── readme.md │ └── wp.txt ├── typescript-mentors │ ├── .gitignore │ ├── README.md │ ├── character-generator.ts │ ├── mentors.ts │ └── package.json └── typescript-simplechat │ ├── client.ts │ ├── package.json │ └── readme.md ├── format ├── bytes.go ├── format.go ├── format_test.go ├── time.go └── time_test.go ├── go.mod ├── go.sum ├── gpu ├── amd_common.go ├── amd_hip_windows.go ├── amd_linux.go ├── amd_windows.go ├── assets.go ├── cpu_common.go ├── cuda_common.go ├── gpu.go ├── gpu_darwin.go ├── gpu_info.h ├── gpu_info_cpu.c ├── gpu_info_cudart.c ├── gpu_info_cudart.h ├── gpu_info_darwin.h ├── gpu_info_darwin.m ├── gpu_info_nvcuda.c ├── gpu_info_nvcuda.h ├── gpu_info_oneapi.c ├── gpu_info_oneapi.h ├── gpu_oneapi.go ├── gpu_test.go └── types.go ├── integration ├── README.md ├── basic_test.go ├── concurrency_test.go ├── context_test.go ├── llm_image_test.go ├── llm_test.go ├── max_queue_test.go └── utils_test.go ├── llm ├── ext_server │ ├── CMakeLists.txt │ ├── httplib.h │ ├── json.hpp │ ├── server.cpp │ └── utils.hpp ├── filetype.go ├── generate │ ├── gen_common.sh │ ├── gen_darwin.sh │ ├── gen_linux.sh │ ├── gen_windows.ps1 │ ├── generate_darwin.go │ ├── generate_linux.go │ └── generate_windows.go ├── ggla.go ├── ggml.go ├── gguf.go ├── llm.go ├── llm_darwin_amd64.go ├── llm_darwin_arm64.go ├── llm_linux.go ├── llm_windows.go ├── memory.go ├── patches │ ├── 01-load-progress.diff │ ├── 02-clip-log.diff │ ├── 03-load_exception.diff │ ├── 04-metal.diff │ └── 05-default-pretokenizer.diff ├── payload.go ├── server.go └── status.go ├── macapp ├── .eslintrc.json ├── .gitignore ├── README.md ├── assets │ ├── icon.icns │ ├── iconDarkTemplate.png │ ├── iconDarkTemplate@2x.png │ ├── iconDarkUpdateTemplate.png │ ├── iconDarkUpdateTemplate@2x.png │ ├── iconTemplate.png │ ├── iconTemplate@2x.png │ ├── iconUpdateTemplate.png │ └── iconUpdateTemplate@2x.png ├── forge.config.ts ├── package-lock.json ├── package.json ├── postcss.config.js ├── src │ ├── app.css │ ├── app.tsx │ ├── declarations.d.ts │ ├── index.html │ ├── index.ts │ ├── install.ts │ ├── ollama.svg │ ├── preload.ts │ └── renderer.tsx ├── tailwind.config.js ├── tsconfig.json ├── webpack.main.config.ts ├── webpack.plugins.ts ├── webpack.renderer.config.ts └── webpack.rules.ts ├── main.go ├── openai └── openai.go ├── parser ├── parser.go └── parser_test.go ├── progress ├── bar.go ├── progress.go └── spinner.go ├── readline ├── buffer.go ├── errors.go ├── history.go ├── readline.go ├── readline_unix.go ├── readline_windows.go ├── term.go ├── term_bsd.go ├── term_linux.go ├── term_windows.go └── types.go ├── scripts ├── build.sh ├── build_darwin.sh ├── build_docker.sh ├── build_linux.sh ├── build_remote.py ├── build_windows.ps1 ├── install.sh ├── publish.sh ├── push_docker.sh ├── rh_linux_deps.sh └── tag_latest.sh ├── server ├── auth.go ├── download.go ├── fixblobs.go ├── fixblobs_test.go ├── images.go ├── layer.go ├── manifest.go ├── manifest_test.go ├── model.go ├── modelpath.go ├── modelpath_test.go ├── prompt.go ├── prompt_test.go ├── routes.go ├── routes_create_test.go ├── routes_delete_test.go ├── routes_list_test.go ├── routes_test.go ├── sched.go ├── sched_test.go └── upload.go ├── types ├── errtypes │ └── errtypes.go └── model │ ├── name.go │ ├── name_test.go │ └── testdata │ └── fuzz │ └── FuzzName │ └── d37463aa416f6bab └── version └── version.go /.dockerignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | ollama 3 | app 4 | macapp 5 | dist 6 | llm/llama.cpp 7 | .env 8 | .cache 9 | test_data 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | llm/ext_server/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/10_bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | labels: [bug] 3 | description: Something isn't working right. 4 | body: 5 | - type: textarea 6 | id: description 7 | attributes: 8 | label: What is the issue? 9 | description: What happened? What did you expect to happen? 10 | validations: 11 | required: true 12 | - type: dropdown 13 | id: os 14 | attributes: 15 | label: OS 16 | description: Which operating system are you using? 17 | multiple: true 18 | options: 19 | - Linux 20 | - macOS 21 | - Windows 22 | - Docker 23 | - WSL2 24 | validations: 25 | required: false 26 | - type: dropdown 27 | id: gpu 28 | attributes: 29 | label: GPU 30 | description: Which GPU are you using? 31 | multiple: true 32 | options: 33 | - Nvidia 34 | - AMD 35 | - Intel 36 | - Apple 37 | - Other 38 | validations: 39 | required: false 40 | - type: dropdown 41 | id: cpu 42 | attributes: 43 | label: CPU 44 | description: Which CPU are you using? 45 | multiple: true 46 | options: 47 | - Intel 48 | - AMD 49 | - Apple 50 | - Other 51 | validations: 52 | required: false 53 | - type: input 54 | id: version 55 | attributes: 56 | label: Ollama version 57 | description: What version of Ollama are you using? (`ollama --version`) 58 | placeholder: e.g., 0.1.32 59 | validations: 60 | required: false 61 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/20_feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Request a new feature 4 | labels: feature request 5 | --- 6 | 7 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/30_model_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Model request 3 | about: Request support for a new model to be added to Ollama 4 | labels: model request 5 | --- -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Help 4 | url: https://discord.com/invite/ollama 5 | about: Please join our Discord server for help using Ollama 6 | - name: Troubleshooting 7 | url: https://github.com/ollama/ollama/blob/main/docs/faq.md#faq 8 | about: See the FAQ for common issues and solutions 9 | -------------------------------------------------------------------------------- /.github/workflows/latest.yaml: -------------------------------------------------------------------------------- 1 | name: latest 2 | 3 | on: 4 | release: 5 | types: [released] 6 | 7 | jobs: 8 | update-latest: 9 | environment: release 10 | runs-on: linux 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Login to Docker Hub 14 | uses: docker/login-action@v3 15 | with: 16 | username: ${{ vars.DOCKER_USER }} 17 | password: ${{ secrets.DOCKER_ACCESS_TOKEN }} 18 | - name: Tag images as latest 19 | env: 20 | PUSH: "1" 21 | shell: bash 22 | run: | 23 | export "VERSION=${GITHUB_REF_NAME#v}" 24 | ./scripts/tag_latest.sh 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .vscode 3 | .env 4 | .venv 5 | .swp 6 | dist 7 | ollama 8 | ggml-metal.metal 9 | .cache 10 | *.exe 11 | .idea 12 | test_data 13 | *.crt 14 | llm/build 15 | __debug_bin* -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "llama.cpp"] 2 | path = llm/llama.cpp 3 | url = https://github.com/ggerganov/llama.cpp.git 4 | shallow = true -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | run: 2 | timeout: 5m 3 | linters: 4 | enable: 5 | - asasalint 6 | - bidichk 7 | - bodyclose 8 | - containedctx 9 | - contextcheck 10 | - exportloopref 11 | - gocheckcompilerdirectives 12 | # FIXME: for some reason this errors on windows 13 | # - gofmt 14 | # - goimports 15 | - misspell 16 | - nilerr 17 | - unused 18 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "es5", 3 | "tabWidth": 2, 4 | "useTabs": false, 5 | "semi": false, 6 | "singleQuote": true, 7 | "jsxSingleQuote": true, 8 | "printWidth": 120, 9 | "arrowParens": "avoid" 10 | } 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Ollama 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /api/types_test.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "encoding/json" 5 | "math" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestKeepAliveParsingFromJSON(t *testing.T) { 14 | tests := []struct { 15 | name string 16 | req string 17 | exp *Duration 18 | }{ 19 | { 20 | name: "Positive Integer", 21 | req: `{ "keep_alive": 42 }`, 22 | exp: &Duration{42 * time.Second}, 23 | }, 24 | { 25 | name: "Positive Float", 26 | req: `{ "keep_alive": 42.5 }`, 27 | exp: &Duration{42 * time.Second}, 28 | }, 29 | { 30 | name: "Positive Integer String", 31 | req: `{ "keep_alive": "42m" }`, 32 | exp: &Duration{42 * time.Minute}, 33 | }, 34 | { 35 | name: "Negative Integer", 36 | req: `{ "keep_alive": -1 }`, 37 | exp: &Duration{math.MaxInt64}, 38 | }, 39 | { 40 | name: "Negative Float", 41 | req: `{ "keep_alive": -3.14 }`, 42 | exp: &Duration{math.MaxInt64}, 43 | }, 44 | { 45 | name: "Negative Integer String", 46 | req: `{ "keep_alive": "-1m" }`, 47 | exp: &Duration{math.MaxInt64}, 48 | }, 49 | } 50 | 51 | for _, test := range tests { 52 | t.Run(test.name, func(t *testing.T) { 53 | var dec ChatRequest 54 | err := json.Unmarshal([]byte(test.req), &dec) 55 | require.NoError(t, err) 56 | 57 | assert.Equal(t, test.exp, dec.KeepAlive) 58 | }) 59 | } 60 | } 61 | 62 | func TestDurationMarshalUnmarshal(t *testing.T) { 63 | tests := []struct { 64 | name string 65 | input time.Duration 66 | expected time.Duration 67 | }{ 68 | { 69 | "negative duration", 70 | time.Duration(-1), 71 | time.Duration(math.MaxInt64), 72 | }, 73 | { 74 | "positive duration", 75 | time.Duration(42 * time.Second), 76 | time.Duration(42 * time.Second), 77 | }, 78 | { 79 | "another positive duration", 80 | time.Duration(42 * time.Minute), 81 | time.Duration(42 * time.Minute), 82 | }, 83 | { 84 | "zero duration", 85 | time.Duration(0), 86 | time.Duration(0), 87 | }, 88 | { 89 | "max duration", 90 | time.Duration(math.MaxInt64), 91 | time.Duration(math.MaxInt64), 92 | }, 93 | } 94 | 95 | for _, test := range tests { 96 | t.Run(test.name, func(t *testing.T) { 97 | b, err := json.Marshal(Duration{test.input}) 98 | require.NoError(t, err) 99 | 100 | var d Duration 101 | err = json.Unmarshal(b, &d) 102 | require.NoError(t, err) 103 | 104 | assert.Equal(t, test.expected, d.Duration, "input %v, marshalled %v, got %v", test.input, string(b), d.Duration) 105 | }) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /app/.gitignore: -------------------------------------------------------------------------------- 1 | ollama.syso 2 | -------------------------------------------------------------------------------- /app/README.md: -------------------------------------------------------------------------------- 1 | # Ollama App 2 | 3 | ## Linux 4 | 5 | TODO 6 | 7 | ## MacOS 8 | 9 | TODO 10 | 11 | ## Windows 12 | 13 | If you want to build the installer, youll need to install 14 | - https://jrsoftware.org/isinfo.php 15 | 16 | 17 | In the top directory of this repo, run the following powershell script 18 | to build the ollama CLI, ollama app, and ollama installer. 19 | 20 | ``` 21 | powershell -ExecutionPolicy Bypass -File .\scripts\build_windows.ps1 22 | ``` 23 | -------------------------------------------------------------------------------- /app/assets/app.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/app.ico -------------------------------------------------------------------------------- /app/assets/assets.go: -------------------------------------------------------------------------------- 1 | package assets 2 | 3 | import ( 4 | "embed" 5 | "io/fs" 6 | ) 7 | 8 | //go:embed *.ico 9 | var icons embed.FS 10 | 11 | func ListIcons() ([]string, error) { 12 | return fs.Glob(icons, "*") 13 | } 14 | 15 | func GetIcon(filename string) ([]byte, error) { 16 | return icons.ReadFile(filename) 17 | } 18 | -------------------------------------------------------------------------------- /app/assets/setup.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/setup.bmp -------------------------------------------------------------------------------- /app/assets/tray.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/tray.ico -------------------------------------------------------------------------------- /app/assets/tray_upgrade.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/tray_upgrade.ico -------------------------------------------------------------------------------- /app/lifecycle/getstarted_nonwindows.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package lifecycle 4 | 5 | import "fmt" 6 | 7 | func GetStarted() error { 8 | return fmt.Errorf("GetStarted not implemented") 9 | } 10 | -------------------------------------------------------------------------------- /app/lifecycle/getstarted_windows.go: -------------------------------------------------------------------------------- 1 | package lifecycle 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | "os" 7 | "os/exec" 8 | "path/filepath" 9 | "syscall" 10 | ) 11 | 12 | func GetStarted() error { 13 | const CREATE_NEW_CONSOLE = 0x00000010 14 | var err error 15 | bannerScript := filepath.Join(AppDir, "ollama_welcome.ps1") 16 | args := []string{ 17 | // TODO once we're signed, the execution policy bypass should be removed 18 | "powershell", "-noexit", "-ExecutionPolicy", "Bypass", "-nologo", "-file", bannerScript, 19 | } 20 | args[0], err = exec.LookPath(args[0]) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | // Make sure the script actually exists 26 | _, err = os.Stat(bannerScript) 27 | if err != nil { 28 | return fmt.Errorf("getting started banner script error %s", err) 29 | } 30 | 31 | slog.Info(fmt.Sprintf("opening getting started terminal with %v", args)) 32 | attrs := &os.ProcAttr{ 33 | Files: []*os.File{os.Stdin, os.Stdout, os.Stderr}, 34 | Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false}, 35 | } 36 | proc, err := os.StartProcess(args[0], args, attrs) 37 | 38 | if err != nil { 39 | return fmt.Errorf("unable to start getting started shell %w", err) 40 | } 41 | 42 | slog.Debug(fmt.Sprintf("getting started terminal PID: %d", proc.Pid)) 43 | return proc.Release() 44 | } 45 | -------------------------------------------------------------------------------- /app/lifecycle/lifecycle.go: -------------------------------------------------------------------------------- 1 | package lifecycle 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "log/slog" 8 | "os" 9 | "os/signal" 10 | "syscall" 11 | 12 | "github.com/ollama/ollama/app/store" 13 | "github.com/ollama/ollama/app/tray" 14 | ) 15 | 16 | func Run() { 17 | InitLogging() 18 | 19 | ctx, cancel := context.WithCancel(context.Background()) 20 | var done chan int 21 | 22 | t, err := tray.NewTray() 23 | if err != nil { 24 | log.Fatalf("Failed to start: %s", err) 25 | } 26 | callbacks := t.GetCallbacks() 27 | 28 | signals := make(chan os.Signal, 1) 29 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 30 | 31 | go func() { 32 | slog.Debug("starting callback loop") 33 | for { 34 | select { 35 | case <-callbacks.Quit: 36 | slog.Debug("quit called") 37 | t.Quit() 38 | case <-signals: 39 | slog.Debug("shutting down due to signal") 40 | t.Quit() 41 | case <-callbacks.Update: 42 | err := DoUpgrade(cancel, done) 43 | if err != nil { 44 | slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err)) 45 | } 46 | case <-callbacks.ShowLogs: 47 | ShowLogs() 48 | case <-callbacks.DoFirstUse: 49 | err := GetStarted() 50 | if err != nil { 51 | slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err)) 52 | } 53 | } 54 | } 55 | }() 56 | 57 | // Are we first use? 58 | if !store.GetFirstTimeRun() { 59 | slog.Debug("First time run") 60 | err = t.DisplayFirstUseNotification() 61 | if err != nil { 62 | slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err)) 63 | } 64 | store.SetFirstTimeRun(true) 65 | } else { 66 | slog.Debug("Not first time, skipping first run notification") 67 | } 68 | 69 | if IsServerRunning(ctx) { 70 | slog.Info("Detected another instance of ollama running, exiting") 71 | os.Exit(1) 72 | } else { 73 | done, err = SpawnServer(ctx, CLIName) 74 | if err != nil { 75 | // TODO - should we retry in a backoff loop? 76 | // TODO - should we pop up a warning and maybe add a menu item to view application logs? 77 | slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err)) 78 | done = make(chan int, 1) 79 | done <- 1 80 | } 81 | } 82 | 83 | StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable) 84 | 85 | t.Run() 86 | cancel() 87 | slog.Info("Waiting for ollama server to shutdown...") 88 | if done != nil { 89 | <-done 90 | } 91 | slog.Info("Ollama app exiting") 92 | } 93 | -------------------------------------------------------------------------------- /app/lifecycle/logging.go: -------------------------------------------------------------------------------- 1 | package lifecycle 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | "os" 7 | "path/filepath" 8 | 9 | "github.com/ollama/ollama/envconfig" 10 | ) 11 | 12 | func InitLogging() { 13 | level := slog.LevelInfo 14 | 15 | if envconfig.Debug { 16 | level = slog.LevelDebug 17 | } 18 | 19 | var logFile *os.File 20 | var err error 21 | // Detect if we're a GUI app on windows, and if not, send logs to console 22 | if os.Stderr.Fd() != 0 { 23 | // Console app detected 24 | logFile = os.Stderr 25 | // TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion 26 | } else { 27 | logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755) 28 | if err != nil { 29 | slog.Error(fmt.Sprintf("failed to create server log %v", err)) 30 | return 31 | } 32 | } 33 | handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{ 34 | Level: level, 35 | AddSource: true, 36 | ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { 37 | if attr.Key == slog.SourceKey { 38 | source := attr.Value.Any().(*slog.Source) 39 | source.File = filepath.Base(source.File) 40 | } 41 | return attr 42 | }, 43 | }) 44 | 45 | slog.SetDefault(slog.New(handler)) 46 | 47 | slog.Info("ollama app started") 48 | } 49 | -------------------------------------------------------------------------------- /app/lifecycle/logging_nonwindows.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package lifecycle 4 | 5 | import "log/slog" 6 | 7 | func ShowLogs() { 8 | slog.Warn("ShowLogs not yet implemented") 9 | } 10 | -------------------------------------------------------------------------------- /app/lifecycle/logging_windows.go: -------------------------------------------------------------------------------- 1 | package lifecycle 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | "os/exec" 7 | "syscall" 8 | ) 9 | 10 | func ShowLogs() { 11 | cmd_path := "c:\\Windows\\system32\\cmd.exe" 12 | slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir)) 13 | cmd := exec.Command(cmd_path, "/c", "start", AppDataDir) 14 | cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000} 15 | err := cmd.Start() 16 | if err != nil { 17 | slog.Error(fmt.Sprintf("Failed to open log dir: %s", err)) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /app/lifecycle/paths.go: -------------------------------------------------------------------------------- 1 | package lifecycle 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "log/slog" 7 | "os" 8 | "path/filepath" 9 | "runtime" 10 | "strings" 11 | ) 12 | 13 | var ( 14 | AppName = "ollama app" 15 | CLIName = "ollama" 16 | AppDir = "/opt/Ollama" 17 | AppDataDir = "/opt/Ollama" 18 | // TODO - should there be a distinct log dir? 19 | UpdateStageDir = "/tmp" 20 | AppLogFile = "/tmp/ollama_app.log" 21 | ServerLogFile = "/tmp/ollama.log" 22 | UpgradeLogFile = "/tmp/ollama_update.log" 23 | Installer = "OllamaSetup.exe" 24 | ) 25 | 26 | func init() { 27 | if runtime.GOOS == "windows" { 28 | AppName += ".exe" 29 | CLIName += ".exe" 30 | // Logs, configs, downloads go to LOCALAPPDATA 31 | localAppData := os.Getenv("LOCALAPPDATA") 32 | AppDataDir = filepath.Join(localAppData, "Ollama") 33 | UpdateStageDir = filepath.Join(AppDataDir, "updates") 34 | AppLogFile = filepath.Join(AppDataDir, "app.log") 35 | ServerLogFile = filepath.Join(AppDataDir, "server.log") 36 | UpgradeLogFile = filepath.Join(AppDataDir, "upgrade.log") 37 | 38 | // Executables are stored in APPDATA 39 | AppDir = filepath.Join(localAppData, "Programs", "Ollama") 40 | 41 | // Make sure we have PATH set correctly for any spawned children 42 | paths := strings.Split(os.Getenv("PATH"), ";") 43 | // Start with whatever we find in the PATH/LD_LIBRARY_PATH 44 | found := false 45 | for _, path := range paths { 46 | d, err := filepath.Abs(path) 47 | if err != nil { 48 | continue 49 | } 50 | if strings.EqualFold(AppDir, d) { 51 | found = true 52 | } 53 | } 54 | if !found { 55 | paths = append(paths, AppDir) 56 | 57 | pathVal := strings.Join(paths, ";") 58 | slog.Debug("setting PATH=" + pathVal) 59 | err := os.Setenv("PATH", pathVal) 60 | if err != nil { 61 | slog.Error(fmt.Sprintf("failed to update PATH: %s", err)) 62 | } 63 | } 64 | 65 | // Make sure our logging dir exists 66 | _, err := os.Stat(AppDataDir) 67 | if errors.Is(err, os.ErrNotExist) { 68 | if err := os.MkdirAll(AppDataDir, 0o755); err != nil { 69 | slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err)) 70 | } 71 | } 72 | 73 | } else if runtime.GOOS == "darwin" { 74 | // TODO 75 | AppName += ".app" 76 | // } else if runtime.GOOS == "linux" { 77 | // TODO 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /app/lifecycle/server_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package lifecycle 4 | 5 | import ( 6 | "context" 7 | "errors" 8 | "fmt" 9 | "os" 10 | "os/exec" 11 | "syscall" 12 | ) 13 | 14 | func getCmd(ctx context.Context, cmd string) *exec.Cmd { 15 | return exec.CommandContext(ctx, cmd, "serve") 16 | } 17 | 18 | func terminate(cmd *exec.Cmd) error { 19 | return cmd.Process.Signal(os.Interrupt) 20 | } 21 | 22 | func isProcessExited(pid int) (bool, error) { 23 | proc, err := os.FindProcess(pid) 24 | if err != nil { 25 | return false, fmt.Errorf("failed to find process: %v", err) 26 | } 27 | 28 | err = proc.Signal(syscall.Signal(0)) 29 | if err != nil { 30 | if errors.Is(err, os.ErrProcessDone) || errors.Is(err, syscall.ESRCH) { 31 | return true, nil 32 | } 33 | 34 | return false, fmt.Errorf("error signaling process: %v", err) 35 | } 36 | 37 | return false, nil 38 | } 39 | -------------------------------------------------------------------------------- /app/lifecycle/server_windows.go: -------------------------------------------------------------------------------- 1 | package lifecycle 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os/exec" 7 | "syscall" 8 | 9 | "golang.org/x/sys/windows" 10 | ) 11 | 12 | func getCmd(ctx context.Context, exePath string) *exec.Cmd { 13 | cmd := exec.CommandContext(ctx, exePath, "serve") 14 | cmd.SysProcAttr = &syscall.SysProcAttr{ 15 | HideWindow: true, 16 | CreationFlags: windows.CREATE_NEW_PROCESS_GROUP, 17 | } 18 | 19 | return cmd 20 | } 21 | 22 | func terminate(cmd *exec.Cmd) error { 23 | dll, err := windows.LoadDLL("kernel32.dll") 24 | if err != nil { 25 | return err 26 | } 27 | defer dll.Release() // nolint: errcheck 28 | 29 | pid := cmd.Process.Pid 30 | 31 | f, err := dll.FindProc("AttachConsole") 32 | if err != nil { 33 | return err 34 | } 35 | 36 | r1, _, err := f.Call(uintptr(pid)) 37 | if r1 == 0 && err != syscall.ERROR_ACCESS_DENIED { 38 | return err 39 | } 40 | 41 | f, err = dll.FindProc("SetConsoleCtrlHandler") 42 | if err != nil { 43 | return err 44 | } 45 | 46 | r1, _, err = f.Call(0, 1) 47 | if r1 == 0 { 48 | return err 49 | } 50 | 51 | f, err = dll.FindProc("GenerateConsoleCtrlEvent") 52 | if err != nil { 53 | return err 54 | } 55 | 56 | r1, _, err = f.Call(windows.CTRL_BREAK_EVENT, uintptr(pid)) 57 | if r1 == 0 { 58 | return err 59 | } 60 | 61 | r1, _, err = f.Call(windows.CTRL_C_EVENT, uintptr(pid)) 62 | if r1 == 0 { 63 | return err 64 | } 65 | 66 | return nil 67 | } 68 | 69 | const STILL_ACTIVE = 259 70 | 71 | func isProcessExited(pid int) (bool, error) { 72 | hProcess, err := windows.OpenProcess(windows.PROCESS_QUERY_INFORMATION, false, uint32(pid)) 73 | if err != nil { 74 | return false, fmt.Errorf("failed to open process: %v", err) 75 | } 76 | defer windows.CloseHandle(hProcess) // nolint: errcheck 77 | 78 | var exitCode uint32 79 | err = windows.GetExitCodeProcess(hProcess, &exitCode) 80 | if err != nil { 81 | return false, fmt.Errorf("failed to get exit code: %v", err) 82 | } 83 | 84 | if exitCode == STILL_ACTIVE { 85 | return false, nil 86 | } 87 | 88 | return true, nil 89 | } 90 | -------------------------------------------------------------------------------- /app/lifecycle/updater_nonwindows.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package lifecycle 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | ) 9 | 10 | func DoUpgrade(cancel context.CancelFunc, done chan int) error { 11 | return fmt.Errorf("DoUpgrade not yet implemented") 12 | } 13 | -------------------------------------------------------------------------------- /app/lifecycle/updater_windows.go: -------------------------------------------------------------------------------- 1 | package lifecycle 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log/slog" 7 | "os" 8 | "os/exec" 9 | "path/filepath" 10 | ) 11 | 12 | func DoUpgrade(cancel context.CancelFunc, done chan int) error { 13 | files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform 14 | if err != nil { 15 | return fmt.Errorf("failed to lookup downloads: %s", err) 16 | } 17 | if len(files) == 0 { 18 | return fmt.Errorf("no update downloads found") 19 | } else if len(files) > 1 { 20 | // Shouldn't happen 21 | slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files)) 22 | } 23 | installerExe := files[0] 24 | 25 | slog.Info("starting upgrade with " + installerExe) 26 | slog.Info("upgrade log file " + UpgradeLogFile) 27 | 28 | // When running in debug mode, we'll be "verbose" and let the installer pop up and prompt 29 | installArgs := []string{ 30 | "/CLOSEAPPLICATIONS", // Quit the tray app if it's still running 31 | "/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd 32 | "/FORCECLOSEAPPLICATIONS", // Force close the tray app - might be needed 33 | } 34 | // make the upgrade as quiet as possible (no GUI, no prompts) 35 | installArgs = append(installArgs, 36 | "/SP", // Skip the "This will install... Do you wish to continue" prompt 37 | "/SUPPRESSMSGBOXES", 38 | "/SILENT", 39 | "/VERYSILENT", 40 | ) 41 | 42 | // Safeguard in case we have requests in flight that need to drain... 43 | slog.Info("Waiting for server to shutdown") 44 | cancel() 45 | if done != nil { 46 | <-done 47 | } else { 48 | // Shouldn't happen 49 | slog.Warn("done chan was nil, not actually waiting") 50 | } 51 | 52 | slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs)) 53 | os.Chdir(filepath.Dir(UpgradeLogFile)) //nolint:errcheck 54 | cmd := exec.Command(installerExe, installArgs...) 55 | 56 | if err := cmd.Start(); err != nil { 57 | return fmt.Errorf("unable to start ollama app %w", err) 58 | } 59 | 60 | if cmd.Process != nil { 61 | err = cmd.Process.Release() 62 | if err != nil { 63 | slog.Error(fmt.Sprintf("failed to release server process: %s", err)) 64 | } 65 | } else { 66 | // TODO - some details about why it didn't start, or is this a pedantic error case? 67 | return fmt.Errorf("installer process did not start") 68 | } 69 | 70 | // TODO should we linger for a moment and check to make sure it's actually running by checking the pid? 71 | 72 | slog.Info("Installer started in background, exiting") 73 | 74 | os.Exit(0) 75 | // Not reached 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /app/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // Compile with the following to get rid of the cmd pop up on windows 4 | // go build -ldflags="-H windowsgui" . 5 | 6 | import ( 7 | "github.com/ollama/ollama/app/lifecycle" 8 | ) 9 | 10 | func main() { 11 | lifecycle.Run() 12 | } 13 | -------------------------------------------------------------------------------- /app/ollama.rc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | VS_VERSION_INFO VERSIONINFO 4 | FILEFLAGSMASK 0x3fL 5 | #ifdef _DEBUG 6 | FILEFLAGS 0x1L 7 | #else 8 | FILEFLAGS 0x0L 9 | #endif 10 | FILEOS 0x40004L 11 | FILETYPE 0x1L 12 | FILESUBTYPE 0x0L 13 | BEGIN 14 | BLOCK "StringFileInfo" 15 | BEGIN 16 | BLOCK "040904b0" 17 | BEGIN 18 | VALUE "FileDescription", "Ollama" 19 | VALUE "InternalName", "Ollama" 20 | VALUE "OriginalFilename", "ollama app.exe" 21 | VALUE "ProductName", "Ollama" 22 | END 23 | END 24 | 25 | BLOCK "VarFileInfo" 26 | BEGIN 27 | VALUE "Translation", 0x409, 1200 28 | END 29 | END 30 | -------------------------------------------------------------------------------- /app/ollama_welcome.ps1: -------------------------------------------------------------------------------- 1 | # TODO - consider ANSI colors and maybe ASCII art... 2 | write-host "" 3 | write-host "Welcome to Ollama!" 4 | write-host "" 5 | write-host "Run your first model:" 6 | write-host "" 7 | write-host "`tollama run llama3" 8 | write-host "" -------------------------------------------------------------------------------- /app/store/store.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "log/slog" 8 | "os" 9 | "path/filepath" 10 | "sync" 11 | 12 | "github.com/google/uuid" 13 | ) 14 | 15 | type Store struct { 16 | ID string `json:"id"` 17 | FirstTimeRun bool `json:"first-time-run"` 18 | } 19 | 20 | var ( 21 | lock sync.Mutex 22 | store Store 23 | ) 24 | 25 | func GetID() string { 26 | lock.Lock() 27 | defer lock.Unlock() 28 | if store.ID == "" { 29 | initStore() 30 | } 31 | return store.ID 32 | 33 | } 34 | 35 | func GetFirstTimeRun() bool { 36 | lock.Lock() 37 | defer lock.Unlock() 38 | if store.ID == "" { 39 | initStore() 40 | } 41 | return store.FirstTimeRun 42 | } 43 | 44 | func SetFirstTimeRun(val bool) { 45 | lock.Lock() 46 | defer lock.Unlock() 47 | if store.FirstTimeRun == val { 48 | return 49 | } 50 | store.FirstTimeRun = val 51 | writeStore(getStorePath()) 52 | } 53 | 54 | // lock must be held 55 | func initStore() { 56 | storeFile, err := os.Open(getStorePath()) 57 | if err == nil { 58 | defer storeFile.Close() 59 | err = json.NewDecoder(storeFile).Decode(&store) 60 | if err == nil { 61 | slog.Debug(fmt.Sprintf("loaded existing store %s - ID: %s", getStorePath(), store.ID)) 62 | return 63 | } 64 | } else if !errors.Is(err, os.ErrNotExist) { 65 | slog.Debug(fmt.Sprintf("unexpected error searching for store: %s", err)) 66 | } 67 | slog.Debug("initializing new store") 68 | store.ID = uuid.New().String() 69 | writeStore(getStorePath()) 70 | } 71 | 72 | func writeStore(storeFilename string) { 73 | ollamaDir := filepath.Dir(storeFilename) 74 | _, err := os.Stat(ollamaDir) 75 | if errors.Is(err, os.ErrNotExist) { 76 | if err := os.MkdirAll(ollamaDir, 0o755); err != nil { 77 | slog.Error(fmt.Sprintf("create ollama dir %s: %v", ollamaDir, err)) 78 | return 79 | } 80 | } 81 | payload, err := json.Marshal(store) 82 | if err != nil { 83 | slog.Error(fmt.Sprintf("failed to marshal store: %s", err)) 84 | return 85 | } 86 | fp, err := os.OpenFile(storeFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) 87 | if err != nil { 88 | slog.Error(fmt.Sprintf("write store payload %s: %v", storeFilename, err)) 89 | return 90 | } 91 | defer fp.Close() 92 | if n, err := fp.Write(payload); err != nil || n != len(payload) { 93 | slog.Error(fmt.Sprintf("write store payload %s: %d vs %d -- %v", storeFilename, n, len(payload), err)) 94 | return 95 | } 96 | slog.Debug("Store contents: " + string(payload)) 97 | slog.Info(fmt.Sprintf("wrote store: %s", storeFilename)) 98 | } 99 | -------------------------------------------------------------------------------- /app/store/store_darwin.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | ) 7 | 8 | func getStorePath() string { 9 | // TODO - system wide location? 10 | 11 | home := os.Getenv("HOME") 12 | return filepath.Join(home, "Library", "Application Support", "Ollama", "config.json") 13 | } 14 | -------------------------------------------------------------------------------- /app/store/store_linux.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | ) 7 | 8 | func getStorePath() string { 9 | if os.Geteuid() == 0 { 10 | // TODO where should we store this on linux for system-wide operation? 11 | return "/etc/ollama/config.json" 12 | } 13 | 14 | home := os.Getenv("HOME") 15 | return filepath.Join(home, ".ollama", "config.json") 16 | } 17 | -------------------------------------------------------------------------------- /app/store/store_windows.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | ) 7 | 8 | func getStorePath() string { 9 | localAppData := os.Getenv("LOCALAPPDATA") 10 | return filepath.Join(localAppData, "Ollama", "config.json") 11 | } 12 | -------------------------------------------------------------------------------- /app/tray/commontray/types.go: -------------------------------------------------------------------------------- 1 | package commontray 2 | 3 | var ( 4 | Title = "Ollama" 5 | ToolTip = "Ollama" 6 | 7 | UpdateIconName = "tray_upgrade" 8 | IconName = "tray" 9 | ) 10 | 11 | type Callbacks struct { 12 | Quit chan struct{} 13 | Update chan struct{} 14 | DoFirstUse chan struct{} 15 | ShowLogs chan struct{} 16 | } 17 | 18 | type OllamaTray interface { 19 | GetCallbacks() Callbacks 20 | Run() 21 | UpdateAvailable(ver string) error 22 | DisplayFirstUseNotification() error 23 | Quit() 24 | } 25 | -------------------------------------------------------------------------------- /app/tray/tray.go: -------------------------------------------------------------------------------- 1 | package tray 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | 7 | "github.com/ollama/ollama/app/assets" 8 | "github.com/ollama/ollama/app/tray/commontray" 9 | ) 10 | 11 | func NewTray() (commontray.OllamaTray, error) { 12 | extension := ".png" 13 | if runtime.GOOS == "windows" { 14 | extension = ".ico" 15 | } 16 | iconName := commontray.UpdateIconName + extension 17 | updateIcon, err := assets.GetIcon(iconName) 18 | if err != nil { 19 | return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err) 20 | } 21 | iconName = commontray.IconName + extension 22 | icon, err := assets.GetIcon(iconName) 23 | if err != nil { 24 | return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err) 25 | } 26 | 27 | return InitPlatformTray(icon, updateIcon) 28 | } 29 | -------------------------------------------------------------------------------- /app/tray/tray_nonwindows.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package tray 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/ollama/ollama/app/tray/commontray" 9 | ) 10 | 11 | func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) { 12 | return nil, fmt.Errorf("NOT IMPLEMENTED YET") 13 | } 14 | -------------------------------------------------------------------------------- /app/tray/tray_windows.go: -------------------------------------------------------------------------------- 1 | package tray 2 | 3 | import ( 4 | "github.com/ollama/ollama/app/tray/commontray" 5 | "github.com/ollama/ollama/app/tray/wintray" 6 | ) 7 | 8 | func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) { 9 | return wintray.InitTray(icon, updateIcon) 10 | } 11 | -------------------------------------------------------------------------------- /app/tray/wintray/menus.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package wintray 4 | 5 | import ( 6 | "fmt" 7 | "log/slog" 8 | "unsafe" 9 | 10 | "golang.org/x/sys/windows" 11 | ) 12 | 13 | const ( 14 | updatAvailableMenuID = 1 15 | updateMenuID = updatAvailableMenuID + 1 16 | separatorMenuID = updateMenuID + 1 17 | diagLogsMenuID = separatorMenuID + 1 18 | diagSeparatorMenuID = diagLogsMenuID + 1 19 | quitMenuID = diagSeparatorMenuID + 1 20 | ) 21 | 22 | func (t *winTray) initMenus() error { 23 | if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil { 24 | return fmt.Errorf("unable to create menu entries %w\n", err) 25 | } 26 | if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil { 27 | return fmt.Errorf("unable to create menu entries %w", err) 28 | } 29 | if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil { 30 | return fmt.Errorf("unable to create menu entries %w\n", err) 31 | } 32 | return nil 33 | } 34 | 35 | func (t *winTray) UpdateAvailable(ver string) error { 36 | if !t.updateNotified { 37 | slog.Debug("updating menu and sending notification for new update") 38 | if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil { 39 | return fmt.Errorf("unable to create menu entries %w", err) 40 | } 41 | if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil { 42 | return fmt.Errorf("unable to create menu entries %w", err) 43 | } 44 | if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil { 45 | return fmt.Errorf("unable to create menu entries %w", err) 46 | } 47 | iconFilePath, err := iconBytesToFilePath(wt.updateIcon) 48 | if err != nil { 49 | return fmt.Errorf("unable to write icon data to temp file: %w", err) 50 | } 51 | if err := wt.setIcon(iconFilePath); err != nil { 52 | return fmt.Errorf("unable to set icon: %w", err) 53 | } 54 | t.updateNotified = true 55 | 56 | t.pendingUpdate = true 57 | // Now pop up the notification 58 | t.muNID.Lock() 59 | defer t.muNID.Unlock() 60 | copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle)) 61 | copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver))) 62 | t.nid.Flags |= NIF_INFO 63 | t.nid.Timeout = 10 64 | t.nid.Size = uint32(unsafe.Sizeof(*wt.nid)) 65 | err = t.nid.modify() 66 | if err != nil { 67 | return err 68 | } 69 | } 70 | return nil 71 | } 72 | -------------------------------------------------------------------------------- /app/tray/wintray/messages.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package wintray 4 | 5 | const ( 6 | firstTimeTitle = "Ollama is running" 7 | firstTimeMessage = "Click here to get started" 8 | updateTitle = "Update available" 9 | updateMessage = "Ollama version %s is ready to install" 10 | 11 | quitMenuTitle = "Quit Ollama" 12 | updateAvailableMenuTitle = "An update is available" 13 | updateMenutTitle = "Restart to update" 14 | diagLogsMenuTitle = "View logs" 15 | ) 16 | -------------------------------------------------------------------------------- /app/tray/wintray/notifyicon.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package wintray 4 | 5 | import ( 6 | "unsafe" 7 | 8 | "golang.org/x/sys/windows" 9 | ) 10 | 11 | // Contains information that the system needs to display notifications in the notification area. 12 | // Used by Shell_NotifyIcon. 13 | // https://msdn.microsoft.com/en-us/library/windows/desktop/bb773352(v=vs.85).aspx 14 | // https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159 15 | type notifyIconData struct { 16 | Size uint32 17 | Wnd windows.Handle 18 | ID, Flags, CallbackMessage uint32 19 | Icon windows.Handle 20 | Tip [128]uint16 21 | State, StateMask uint32 22 | Info [256]uint16 23 | // Timeout, Version uint32 24 | Timeout uint32 25 | 26 | InfoTitle [64]uint16 27 | InfoFlags uint32 28 | GuidItem windows.GUID 29 | BalloonIcon windows.Handle 30 | } 31 | 32 | func (nid *notifyIconData) add() error { 33 | const NIM_ADD = 0x00000000 34 | res, _, err := pShellNotifyIcon.Call( 35 | uintptr(NIM_ADD), 36 | uintptr(unsafe.Pointer(nid)), 37 | ) 38 | if res == 0 { 39 | return err 40 | } 41 | return nil 42 | } 43 | 44 | func (nid *notifyIconData) modify() error { 45 | const NIM_MODIFY = 0x00000001 46 | res, _, err := pShellNotifyIcon.Call( 47 | uintptr(NIM_MODIFY), 48 | uintptr(unsafe.Pointer(nid)), 49 | ) 50 | if res == 0 { 51 | return err 52 | } 53 | return nil 54 | } 55 | 56 | func (nid *notifyIconData) delete() error { 57 | const NIM_DELETE = 0x00000002 58 | res, _, err := pShellNotifyIcon.Call( 59 | uintptr(NIM_DELETE), 60 | uintptr(unsafe.Pointer(nid)), 61 | ) 62 | if res == 0 { 63 | return err 64 | } 65 | return nil 66 | } 67 | -------------------------------------------------------------------------------- /app/tray/wintray/winclass.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package wintray 4 | 5 | import ( 6 | "unsafe" 7 | 8 | "golang.org/x/sys/windows" 9 | ) 10 | 11 | // Contains window class information. 12 | // It is used with the RegisterClassEx and GetClassInfoEx functions. 13 | // https://msdn.microsoft.com/en-us/library/ms633577.aspx 14 | type wndClassEx struct { 15 | Size, Style uint32 16 | WndProc uintptr 17 | ClsExtra, WndExtra int32 18 | Instance, Icon, Cursor, Background windows.Handle 19 | MenuName, ClassName *uint16 20 | IconSm windows.Handle 21 | } 22 | 23 | // Registers a window class for subsequent use in calls to the CreateWindow or CreateWindowEx function. 24 | // https://msdn.microsoft.com/en-us/library/ms633587.aspx 25 | func (w *wndClassEx) register() error { 26 | w.Size = uint32(unsafe.Sizeof(*w)) 27 | res, _, err := pRegisterClass.Call(uintptr(unsafe.Pointer(w))) 28 | if res == 0 { 29 | return err 30 | } 31 | return nil 32 | } 33 | 34 | // Unregisters a window class, freeing the memory required for the class. 35 | // https://msdn.microsoft.com/en-us/library/ms644899.aspx 36 | func (w *wndClassEx) unregister() error { 37 | res, _, err := pUnregisterClass.Call( 38 | uintptr(unsafe.Pointer(w.ClassName)), 39 | uintptr(w.Instance), 40 | ) 41 | if res == 0 { 42 | return err 43 | } 44 | return nil 45 | } 46 | -------------------------------------------------------------------------------- /auth/auth.go: -------------------------------------------------------------------------------- 1 | package auth 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/rand" 7 | "encoding/base64" 8 | "fmt" 9 | "io" 10 | "log/slog" 11 | "os" 12 | "path/filepath" 13 | "strings" 14 | 15 | "golang.org/x/crypto/ssh" 16 | ) 17 | 18 | const defaultPrivateKey = "id_ed25519" 19 | 20 | func keyPath() (string, error) { 21 | home, err := os.UserHomeDir() 22 | if err != nil { 23 | return "", err 24 | } 25 | 26 | return filepath.Join(home, ".ollama", defaultPrivateKey), nil 27 | } 28 | 29 | func GetPublicKey() (string, error) { 30 | keyPath, err := keyPath() 31 | if err != nil { 32 | return "", err 33 | } 34 | 35 | privateKeyFile, err := os.ReadFile(keyPath) 36 | if err != nil { 37 | slog.Info(fmt.Sprintf("Failed to load private key: %v", err)) 38 | return "", err 39 | } 40 | 41 | privateKey, err := ssh.ParsePrivateKey(privateKeyFile) 42 | if err != nil { 43 | return "", err 44 | } 45 | 46 | publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey()) 47 | 48 | return strings.TrimSpace(string(publicKey)), nil 49 | } 50 | 51 | func NewNonce(r io.Reader, length int) (string, error) { 52 | nonce := make([]byte, length) 53 | if _, err := io.ReadFull(r, nonce); err != nil { 54 | return "", err 55 | } 56 | 57 | return base64.RawURLEncoding.EncodeToString(nonce), nil 58 | } 59 | 60 | func Sign(ctx context.Context, bts []byte) (string, error) { 61 | keyPath, err := keyPath() 62 | if err != nil { 63 | return "", err 64 | } 65 | 66 | privateKeyFile, err := os.ReadFile(keyPath) 67 | if err != nil { 68 | slog.Info(fmt.Sprintf("Failed to load private key: %v", err)) 69 | return "", err 70 | } 71 | 72 | privateKey, err := ssh.ParsePrivateKey(privateKeyFile) 73 | if err != nil { 74 | return "", err 75 | } 76 | 77 | // get the pubkey, but remove the type 78 | publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey()) 79 | parts := bytes.Split(publicKey, []byte(" ")) 80 | if len(parts) < 2 { 81 | return "", fmt.Errorf("malformed public key") 82 | } 83 | 84 | signedData, err := privateKey.Sign(rand.Reader, bts) 85 | if err != nil { 86 | return "", err 87 | } 88 | 89 | // signature is : 90 | return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil 91 | } 92 | -------------------------------------------------------------------------------- /cmd/start_darwin.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "os/exec" 8 | "strings" 9 | 10 | "github.com/ollama/ollama/api" 11 | ) 12 | 13 | func startApp(ctx context.Context, client *api.Client) error { 14 | exe, err := os.Executable() 15 | if err != nil { 16 | return err 17 | } 18 | link, err := os.Readlink(exe) 19 | if err != nil { 20 | return err 21 | } 22 | if !strings.Contains(link, "Ollama.app") { 23 | return fmt.Errorf("could not find ollama app") 24 | } 25 | path := strings.Split(link, "Ollama.app") 26 | if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil { 27 | return err 28 | } 29 | return waitForServer(ctx, client) 30 | } 31 | -------------------------------------------------------------------------------- /cmd/start_default.go: -------------------------------------------------------------------------------- 1 | //go:build !windows && !darwin 2 | 3 | package cmd 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | 9 | "github.com/ollama/ollama/api" 10 | ) 11 | 12 | func startApp(ctx context.Context, client *api.Client) error { 13 | return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it") 14 | } 15 | -------------------------------------------------------------------------------- /cmd/start_windows.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "os" 8 | "os/exec" 9 | "path/filepath" 10 | "strings" 11 | "syscall" 12 | 13 | "github.com/ollama/ollama/api" 14 | ) 15 | 16 | func startApp(ctx context.Context, client *api.Client) error { 17 | // log.Printf("XXX Attempting to find and start ollama app") 18 | AppName := "ollama app.exe" 19 | exe, err := os.Executable() 20 | if err != nil { 21 | return err 22 | } 23 | appExe := filepath.Join(filepath.Dir(exe), AppName) 24 | _, err = os.Stat(appExe) 25 | if errors.Is(err, os.ErrNotExist) { 26 | // Try the standard install location 27 | localAppData := os.Getenv("LOCALAPPDATA") 28 | appExe = filepath.Join(localAppData, "Ollama", AppName) 29 | _, err := os.Stat(appExe) 30 | if errors.Is(err, os.ErrNotExist) { 31 | // Finally look in the path 32 | appExe, err = exec.LookPath(AppName) 33 | if err != nil { 34 | return fmt.Errorf("could not locate ollama app") 35 | } 36 | } 37 | } 38 | // log.Printf("XXX attempting to start app %s", appExe) 39 | 40 | cmd_path := "c:\\Windows\\system32\\cmd.exe" 41 | cmd := exec.Command(cmd_path, "/c", appExe) 42 | // TODO - these hide flags aren't working - still pops up a command window for some reason 43 | cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: 0x08000000, HideWindow: true} 44 | 45 | // TODO this didn't help either... 46 | cmd.Stdin = strings.NewReader("") 47 | cmd.Stdout = os.Stdout 48 | cmd.Stderr = os.Stderr 49 | 50 | if err := cmd.Start(); err != nil { 51 | return fmt.Errorf("unable to start ollama app %w", err) 52 | } 53 | 54 | if cmd.Process != nil { 55 | defer cmd.Process.Release() //nolint:errcheck 56 | } 57 | return waitForServer(ctx, client) 58 | } 59 | -------------------------------------------------------------------------------- /convert/convert_test.go: -------------------------------------------------------------------------------- 1 | //go:build slow 2 | 3 | package convert 4 | 5 | import ( 6 | "os" 7 | "path/filepath" 8 | "testing" 9 | 10 | "github.com/ollama/ollama/llm" 11 | ) 12 | 13 | func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) { 14 | t.Helper() 15 | 16 | mf, err := GetModelFormat(p) 17 | if err != nil { 18 | t.Fatal(err) 19 | } 20 | 21 | params, err := mf.GetParams(p) 22 | if err != nil { 23 | t.Fatal(err) 24 | } 25 | 26 | arch, err := mf.GetModelArch("", p, params) 27 | if err != nil { 28 | t.Fatal(err) 29 | } 30 | 31 | if err := arch.LoadVocab(); err != nil { 32 | t.Fatal(err) 33 | } 34 | 35 | if err := arch.GetTensors(); err != nil { 36 | t.Fatal(err) 37 | } 38 | 39 | f, err := os.CreateTemp(t.TempDir(), "f16") 40 | if err != nil { 41 | t.Fatal(err) 42 | } 43 | defer f.Close() 44 | 45 | if err := arch.WriteGGUF(f); err != nil { 46 | t.Fatal(err) 47 | } 48 | 49 | r, err := os.Open(f.Name()) 50 | if err != nil { 51 | t.Fatal(err) 52 | } 53 | defer r.Close() 54 | 55 | m, _, err := llm.DecodeGGML(r) 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | 60 | return m.KV(), m.Tensors() 61 | } 62 | 63 | func TestConvertFull(t *testing.T) { 64 | cases := []struct { 65 | path string 66 | arch string 67 | tensors int 68 | layers int 69 | }{ 70 | {"Meta-Llama-3-8B-Instruct", "llama", 291, 35}, 71 | {"Mistral-7B-Instruct-v0.2", "llama", 291, 35}, 72 | {"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35}, 73 | {"gemma-2b-it", "gemma", 164, 20}, 74 | } 75 | 76 | for _, tt := range cases { 77 | t.Run(tt.path, func(t *testing.T) { 78 | p := filepath.Join("testdata", tt.path) 79 | if _, err := os.Stat(p); err != nil { 80 | t.Skipf("%s not found", p) 81 | } 82 | 83 | kv, tensors := convertFull(t, p) 84 | 85 | if kv.Architecture() != tt.arch { 86 | t.Fatalf("expected llama, got %s", kv.Architecture()) 87 | } 88 | 89 | if kv.FileType().String() != "F16" { 90 | t.Fatalf("expected F16, got %s", kv.FileType()) 91 | } 92 | 93 | if len(tensors) != tt.tensors { 94 | t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors)) 95 | } 96 | 97 | layers := tensors.Layers() 98 | if len(layers) != tt.layers { 99 | t.Fatalf("expected %d layers, got %d", tt.layers, len(layers)) 100 | } 101 | }) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /convert/mistral.go: -------------------------------------------------------------------------------- 1 | package convert 2 | 3 | import ( 4 | "io" 5 | "regexp" 6 | 7 | "github.com/ollama/ollama/llm" 8 | ) 9 | 10 | type MistralModel struct { 11 | ModelData 12 | } 13 | 14 | func (m *MistralModel) GetTensors() error { 15 | t, err := m.Format.GetTensors(m.Path, m.Params) 16 | if err != nil { 17 | return err 18 | } 19 | 20 | pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` 21 | re, err := regexp.Compile(pattern) 22 | if err != nil { 23 | return err 24 | } 25 | 26 | for _, l := range t { 27 | matches := re.FindAllStringSubmatch(l.Name, -1) 28 | if len(matches) > 0 { 29 | wt := l.WriterTo.(safetensorWriterTo) 30 | wt.repacker = m.Repack 31 | l.WriterTo = wt 32 | } 33 | m.Tensors = append(m.Tensors, l) 34 | } 35 | 36 | return nil 37 | } 38 | 39 | func (m *MistralModel) LoadVocab() error { 40 | v, err := LoadSentencePieceTokens(m.Path, m.Params) 41 | if err != nil { 42 | return err 43 | } 44 | m.Vocab = v 45 | return nil 46 | } 47 | 48 | func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error { 49 | kv := llm.KV{ 50 | "general.architecture": "llama", 51 | "general.name": m.Name, 52 | "llama.context_length": uint32(m.Params.ContextSize), 53 | "llama.embedding_length": uint32(m.Params.HiddenSize), 54 | "llama.block_count": uint32(m.Params.HiddenLayers), 55 | "llama.feed_forward_length": uint32(m.Params.IntermediateSize), 56 | "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), 57 | "llama.attention.head_count": uint32(m.Params.AttentionHeads), 58 | "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), 59 | "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), 60 | "general.file_type": uint32(1), 61 | "tokenizer.ggml.model": "llama", 62 | 63 | "tokenizer.ggml.tokens": m.Vocab.Tokens, 64 | "tokenizer.ggml.scores": m.Vocab.Scores, 65 | "tokenizer.ggml.token_type": m.Vocab.Types, 66 | 67 | "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), 68 | "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), 69 | "tokenizer.ggml.add_bos_token": true, 70 | "tokenizer.ggml.add_eos_token": false, 71 | "tokenizer.ggml.unknown_token_id": uint32(0), 72 | } 73 | 74 | return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) 75 | } 76 | 77 | func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { 78 | return llamaRepack(name, m.Params, data, shape) 79 | } 80 | -------------------------------------------------------------------------------- /convert/mixtral.go: -------------------------------------------------------------------------------- 1 | package convert 2 | 3 | import ( 4 | "io" 5 | "regexp" 6 | 7 | "github.com/ollama/ollama/llm" 8 | ) 9 | 10 | type MixtralModel struct { 11 | ModelData 12 | } 13 | 14 | func (m *MixtralModel) GetTensors() error { 15 | t, err := m.Format.GetTensors(m.Path, m.Params) 16 | if err != nil { 17 | return err 18 | } 19 | 20 | pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` 21 | re, err := regexp.Compile(pattern) 22 | if err != nil { 23 | return err 24 | } 25 | 26 | for _, l := range t { 27 | matches := re.FindAllStringSubmatch(l.Name, -1) 28 | if len(matches) > 0 { 29 | wt := l.WriterTo.(safetensorWriterTo) 30 | wt.repacker = m.Repack 31 | l.WriterTo = wt 32 | } 33 | m.Tensors = append(m.Tensors, l) 34 | } 35 | 36 | return nil 37 | } 38 | 39 | func (m *MixtralModel) LoadVocab() error { 40 | v, err := LoadSentencePieceTokens(m.Path, m.Params) 41 | if err != nil { 42 | return err 43 | } 44 | m.Vocab = v 45 | return nil 46 | } 47 | 48 | func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error { 49 | kv := llm.KV{ 50 | "general.architecture": "llama", 51 | "general.name": m.Name, 52 | "llama.block_count": uint32(m.Params.HiddenLayers), 53 | "llama.context_length": uint32(m.Params.ContextSize), 54 | "llama.embedding_length": uint32(m.Params.HiddenSize), 55 | "llama.feed_forward_length": uint32(m.Params.IntermediateSize), 56 | "llama.attention.head_count": uint32(m.Params.AttentionHeads), 57 | "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), 58 | 59 | "llama.rope.freq_base": float32(m.Params.RopeFrequencyBase), 60 | "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), 61 | 62 | "llama.expert_count": uint32(m.Params.Experts), 63 | "llama.expert_used_count": uint32(m.Params.ExpertsUsed), 64 | 65 | "llama.vocab_size": uint32(len(m.Vocab.Tokens)), 66 | "llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads), 67 | 68 | "general.file_type": uint32(1), 69 | "tokenizer.ggml.model": "llama", 70 | 71 | "tokenizer.ggml.tokens": m.Vocab.Tokens, 72 | "tokenizer.ggml.scores": m.Vocab.Scores, 73 | "tokenizer.ggml.token_type": m.Vocab.Types, 74 | 75 | "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), 76 | "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), 77 | "tokenizer.ggml.unknown_token_id": uint32(0), 78 | "tokenizer.ggml.add_bos_token": true, 79 | "tokenizer.ggml.add_eos_token": false, 80 | } 81 | 82 | return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors) 83 | } 84 | 85 | func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) { 86 | return llamaRepack(name, m.Params, data, shape) 87 | } 88 | -------------------------------------------------------------------------------- /convert/tokenizer.go: -------------------------------------------------------------------------------- 1 | package convert 2 | 3 | import ( 4 | "cmp" 5 | "crypto/sha256" 6 | "encoding/json" 7 | "fmt" 8 | "log/slog" 9 | "os" 10 | "slices" 11 | 12 | "golang.org/x/exp/maps" 13 | ) 14 | 15 | type Tokenizer struct { 16 | Version string `json:"version"` 17 | AddedTokens []Token `json:"added_tokens"` 18 | Model TokenizerModel `json:"model"` 19 | 20 | PreTokenizer struct { 21 | PreTokenizers []struct { 22 | Type string `json:"type"` 23 | Pattern struct { 24 | Regex string `json:"Regex"` 25 | } `json:"pattern"` 26 | } `json:"pretokenizers"` 27 | } `json:"pre_tokenizer"` 28 | } 29 | 30 | type TokenizerModel struct { 31 | Type string `json:"type"` 32 | Vocab map[string]int `json:"vocab"` 33 | Merges []string `json:"merges"` 34 | Tokens []Token 35 | } 36 | 37 | type Token struct { 38 | ID int `json:"id"` 39 | Content string `json:"content"` 40 | Special bool `json:"special"` 41 | UserDefined bool 42 | } 43 | 44 | func (t *Token) Type() int32 { 45 | switch { 46 | case t.Special: 47 | return tokenTypeControl 48 | case t.UserDefined: 49 | return tokenTypeUserDefined 50 | default: 51 | return tokenTypeNormal 52 | } 53 | } 54 | 55 | func (t *Tokenizer) maxID() int { 56 | return max( 57 | slices.Max(maps.Values(t.Model.Vocab)), 58 | slices.MaxFunc(t.AddedTokens, func(a, b Token) int { 59 | return cmp.Compare(a.ID, b.ID) 60 | }).ID, 61 | ) 62 | } 63 | 64 | func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) { 65 | f, err := os.Open(dirpath) 66 | if err != nil { 67 | panic(err) 68 | } 69 | defer f.Close() 70 | 71 | var t Tokenizer 72 | if err := json.NewDecoder(f).Decode(&t); err != nil { 73 | return "", nil, nil, err 74 | } 75 | 76 | tokens = make([]Token, t.maxID()+1) 77 | for k, v := range t.Model.Vocab { 78 | tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false} 79 | } 80 | 81 | for _, v := range t.AddedTokens { 82 | v.UserDefined = true 83 | tokens[v.ID] = v 84 | } 85 | 86 | sha256sum := sha256.New() 87 | for _, pt := range t.PreTokenizer.PreTokenizers { 88 | switch pt.Type { 89 | case "Split": 90 | if pt.Pattern.Regex != "" { 91 | sha256sum.Write([]byte(pt.Pattern.Regex)) 92 | } 93 | } 94 | } 95 | 96 | switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest { 97 | case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f": 98 | pre = "llama-bpe" 99 | case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02": 100 | pre = "deepseek-llm" 101 | case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e": 102 | pre = "deepseek-coder" 103 | default: 104 | slog.Warn("unknown pretokenizer, using default", "digest", digest) 105 | pre = "default" 106 | } 107 | 108 | return pre, tokens, t.Model.Merges, nil 109 | } 110 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | ### Getting Started 4 | * [Quickstart](../README.md#quickstart) 5 | * [Examples](../examples) 6 | * [Importing models](./import.md) 7 | * [Linux Documentation](./linux.md) 8 | * [Windows Documentation](./windows.md) 9 | * [Docker Documentation](./docker.md) 10 | 11 | ### Reference 12 | 13 | * [API Reference](./api.md) 14 | * [Modelfile Reference](./modelfile.md) 15 | * [OpenAI Compatibility](./openai.md) 16 | 17 | ### Resources 18 | 19 | * [Troubleshooting Guide](./troubleshooting.md) 20 | * [FAQ](./faq.md) 21 | * [Development guide](./development.md) 22 | -------------------------------------------------------------------------------- /docs/docker.md: -------------------------------------------------------------------------------- 1 | # Ollama Docker image 2 | 3 | ### CPU only 4 | 5 | ```bash 6 | docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama 7 | ``` 8 | 9 | ### Nvidia GPU 10 | Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation). 11 | 12 | #### Install with Apt 13 | 1. Configure the repository 14 | ```bash 15 | curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ 16 | | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg 17 | curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ 18 | | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ 19 | | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list 20 | sudo apt-get update 21 | ``` 22 | 2. Install the NVIDIA Container Toolkit packages 23 | ```bash 24 | sudo apt-get install -y nvidia-container-toolkit 25 | ``` 26 | 27 | #### Install with Yum or Dnf 28 | 1. Configure the repository 29 | 30 | ```bash 31 | curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \ 32 | | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo 33 | ``` 34 | 35 | 2. Install the NVIDIA Container Toolkit packages 36 | 37 | ```bash 38 | sudo yum install -y nvidia-container-toolkit 39 | ``` 40 | 41 | #### Configure Docker to use Nvidia driver 42 | ``` 43 | sudo nvidia-ctk runtime configure --runtime=docker 44 | sudo systemctl restart docker 45 | ``` 46 | 47 | #### Start the container 48 | 49 | ```bash 50 | docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama 51 | ``` 52 | 53 | ### AMD GPU 54 | 55 | To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command: 56 | 57 | ``` 58 | docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm 59 | ``` 60 | 61 | ### Run model locally 62 | 63 | Now you can run a model: 64 | 65 | ``` 66 | docker exec -it ollama ollama run llama3 67 | ``` 68 | 69 | ### Try different models 70 | 71 | More models can be found on the [Ollama library](https://ollama.com/library). 72 | -------------------------------------------------------------------------------- /docs/tutorials.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | 3 | Here is a list of ways you can use Ollama with other tools to build interesting applications. 4 | 5 | - [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md) 6 | - [Using LangChain with Ollama in Python](./tutorials/langchainpy.md) 7 | - [Running Ollama on NVIDIA Jetson Devices](./tutorials/nvidia-jetson.md) 8 | 9 | Also be sure to check out the [examples](../examples) directory for more ways to use Ollama. 10 | -------------------------------------------------------------------------------- /docs/tutorials/nvidia-jetson.md: -------------------------------------------------------------------------------- 1 | # Running Ollama on NVIDIA Jetson Devices 2 | 3 | Ollama runs well on [NVIDIA Jetson Devices](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) and should run out of the box with the standard installation instructions. 4 | 5 | The following has been tested on [JetPack 5.1.2](https://developer.nvidia.com/embedded/jetpack), but should also work on JetPack 6.0. 6 | 7 | - Install Ollama via standard Linux command (ignore the 404 error): `curl https://ollama.com/install.sh | sh` 8 | - Pull the model you want to use (e.g. mistral): `ollama pull mistral` 9 | - Start an interactive session: `ollama run mistral` 10 | 11 | And that's it! 12 | 13 | # Running Ollama in Docker 14 | 15 | When running GPU accelerated applications in Docker, it is highly recommended to use [dusty-nv jetson-containers repo](https://github.com/dusty-nv/jetson-containers). -------------------------------------------------------------------------------- /docs/windows.md: -------------------------------------------------------------------------------- 1 | # Ollama Windows Preview 2 | 3 | Welcome to the Ollama Windows preview. 4 | 5 | No more WSL required! 6 | 7 | Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support. 8 | After installing Ollama Windows Preview, Ollama will run in the background and 9 | the `ollama` command line is available in `cmd`, `powershell` or your favorite 10 | terminal application. As usual the Ollama [api](./api.md) will be served on 11 | `http://localhost:11434`. 12 | 13 | As this is a preview release, you should expect a few bugs here and there. If 14 | you run into a problem you can reach out on 15 | [Discord](https://discord.gg/ollama), or file an 16 | [issue](https://github.com/ollama/ollama/issues). 17 | Logs will often be helpful in diagnosing the problem (see 18 | [Troubleshooting](#troubleshooting) below) 19 | 20 | ## System Requirements 21 | 22 | * Windows 10 or newer, Home or Pro 23 | * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card 24 | * AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card 25 | 26 | ## API Access 27 | 28 | Here's a quick example showing API access from `powershell` 29 | ```powershell 30 | (Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json 31 | ``` 32 | 33 | ## Troubleshooting 34 | 35 | While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds 36 | a "view logs" menu item to the app, and increases logging for the GUI app and 37 | server. 38 | 39 | Ollama on Windows stores files in a few different locations. You can view them in 40 | the explorer window by hitting `+R` and type in: 41 | - `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates 42 | - *app.log* contains logs from the GUI application 43 | - *server.log* contains the server logs 44 | - *upgrade.log* contains log output for upgrades 45 | - `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH) 46 | - `explorer %HOMEPATH%\.ollama` contains models and configuration 47 | - `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories 48 | 49 | 50 | ## Standalone CLI 51 | 52 | The easiest way to install Ollama on Windows is to use the `OllamaSetup.exe` 53 | installer. It installs in your account without requiring Administrator rights. 54 | We update Ollama regularly to support the latest models, and this installer will 55 | help you keep up to date. 56 | 57 | If you'd like to install or integrate Ollama as a service, a standalone 58 | `ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI 59 | and GPU library dependencies for Nvidia and AMD. This allows for embedding 60 | Ollama in existing applications, or running it as a system service via `ollama 61 | serve` with tools such as [NSSM](https://nssm.cc/). 62 | -------------------------------------------------------------------------------- /envconfig/config_test.go: -------------------------------------------------------------------------------- 1 | package envconfig 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestConfig(t *testing.T) { 10 | Debug = false // Reset whatever was loaded in init() 11 | t.Setenv("OLLAMA_DEBUG", "") 12 | LoadConfig() 13 | require.False(t, Debug) 14 | t.Setenv("OLLAMA_DEBUG", "false") 15 | LoadConfig() 16 | require.False(t, Debug) 17 | t.Setenv("OLLAMA_DEBUG", "1") 18 | LoadConfig() 19 | require.True(t, Debug) 20 | t.Setenv("OLLAMA_FLASH_ATTENTION", "1") 21 | LoadConfig() 22 | require.True(t, FlashAttention) 23 | } 24 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This directory contains different examples of using Ollama. 4 | -------------------------------------------------------------------------------- /examples/flyio/.gitignore: -------------------------------------------------------------------------------- 1 | fly.toml 2 | -------------------------------------------------------------------------------- /examples/flyio/README.md: -------------------------------------------------------------------------------- 1 | # Deploy Ollama to Fly.io 2 | 3 | > Note: this example exposes a public endpoint and does not configure authentication. Use with care. 4 | 5 | ## Prerequisites 6 | 7 | - Ollama: https://ollama.com/download 8 | - Fly.io account. Sign up for a free account: https://fly.io/app/sign-up 9 | 10 | ## Steps 11 | 12 | 1. Login to Fly.io 13 | 14 | ```bash 15 | fly auth login 16 | ``` 17 | 18 | 1. Create a new Fly app 19 | 20 | ```bash 21 | fly launch --name --image ollama/ollama --internal-port 11434 --vm-size shared-cpu-8x --now 22 | ``` 23 | 24 | 1. Pull and run `orca-mini:3b` 25 | 26 | ```bash 27 | OLLAMA_HOST=https://.fly.dev ollama run orca-mini:3b 28 | ``` 29 | 30 | `shared-cpu-8x` is a free-tier eligible machine type. For better performance, switch to a `performance` or `dedicated` machine type or attach a GPU for hardware acceleration (see below). 31 | 32 | ## (Optional) Persistent Volume 33 | 34 | By default Fly Machines use ephemeral storage which is problematic if you want to use the same model across restarts without pulling it again. Create and attach a persistent volume to store the downloaded models: 35 | 36 | 1. Create the Fly Volume 37 | 38 | ```bash 39 | fly volume create ollama 40 | ``` 41 | 42 | 1. Update `fly.toml` and add `[mounts]` 43 | 44 | ```toml 45 | [mounts] 46 | source = "ollama" 47 | destination = "/mnt/ollama/models" 48 | ``` 49 | 50 | 1. Update `fly.toml` and add `[env]` 51 | 52 | ```toml 53 | [env] 54 | OLLAMA_MODELS = "/mnt/ollama/models" 55 | ``` 56 | 57 | 1. Deploy your app 58 | 59 | ```bash 60 | fly deploy 61 | ``` 62 | 63 | ## (Optional) Hardware Acceleration 64 | 65 | Fly.io GPU is currently in waitlist. Sign up for the waitlist: https://fly.io/gpu 66 | 67 | Once you've been accepted, create the app with the additional flags `--vm-gpu-kind a100-pcie-40gb` or `--vm-gpu-kind a100-pcie-80gb`. 68 | -------------------------------------------------------------------------------- /examples/go-chat/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | 8 | "github.com/ollama/ollama/api" 9 | ) 10 | 11 | func main() { 12 | client, err := api.ClientFromEnvironment() 13 | if err != nil { 14 | log.Fatal(err) 15 | } 16 | 17 | messages := []api.Message{ 18 | api.Message{ 19 | Role: "system", 20 | Content: "Provide very brief, concise responses", 21 | }, 22 | api.Message{ 23 | Role: "user", 24 | Content: "Name some unusual animals", 25 | }, 26 | api.Message{ 27 | Role: "assistant", 28 | Content: "Monotreme, platypus, echidna", 29 | }, 30 | api.Message{ 31 | Role: "user", 32 | Content: "which of these is the most dangerous?", 33 | }, 34 | } 35 | 36 | ctx := context.Background() 37 | req := &api.ChatRequest{ 38 | Model: "llama3", 39 | Messages: messages, 40 | } 41 | 42 | respFunc := func(resp api.ChatResponse) error { 43 | fmt.Print(resp.Message.Content) 44 | return nil 45 | } 46 | 47 | err = client.Chat(ctx, req, respFunc) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /examples/go-generate-streaming/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | 8 | "github.com/ollama/ollama/api" 9 | ) 10 | 11 | func main() { 12 | client, err := api.ClientFromEnvironment() 13 | if err != nil { 14 | log.Fatal(err) 15 | } 16 | 17 | // By default, GenerateRequest is streaming. 18 | req := &api.GenerateRequest{ 19 | Model: "gemma", 20 | Prompt: "how many planets are there?", 21 | } 22 | 23 | ctx := context.Background() 24 | respFunc := func(resp api.GenerateResponse) error { 25 | // Only print the response here; GenerateResponse has a number of other 26 | // interesting fields you want to examine. 27 | 28 | // In streaming mode, responses are partial so we call fmt.Print (and not 29 | // Println) in order to avoid spurious newlines being introduced. The 30 | // model will insert its own newlines if it wants. 31 | fmt.Print(resp.Response) 32 | return nil 33 | } 34 | 35 | err = client.Generate(ctx, req, respFunc) 36 | if err != nil { 37 | log.Fatal(err) 38 | } 39 | fmt.Println() 40 | } 41 | -------------------------------------------------------------------------------- /examples/go-generate/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | 8 | "github.com/ollama/ollama/api" 9 | ) 10 | 11 | func main() { 12 | client, err := api.ClientFromEnvironment() 13 | if err != nil { 14 | log.Fatal(err) 15 | } 16 | 17 | req := &api.GenerateRequest{ 18 | Model: "gemma", 19 | Prompt: "how many planets are there?", 20 | 21 | // set streaming to false 22 | Stream: new(bool), 23 | } 24 | 25 | ctx := context.Background() 26 | respFunc := func(resp api.GenerateResponse) error { 27 | // Only print the response here; GenerateResponse has a number of other 28 | // interesting fields you want to examine. 29 | fmt.Println(resp.Response) 30 | return nil 31 | } 32 | 33 | err = client.Generate(ctx, req, respFunc) 34 | if err != nil { 35 | log.Fatal(err) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /examples/go-http-generate/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/examples/go-http-generate/README.md -------------------------------------------------------------------------------- /examples/go-http-generate/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "log" 8 | "net/http" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | body := []byte(`{"model":"mistral"}`) 14 | resp, err := http.Post("http://localhost:11434/api/generate", "application/json", bytes.NewBuffer(body)) 15 | 16 | if err != nil { 17 | fmt.Print(err.Error()) 18 | os.Exit(1) 19 | } 20 | 21 | defer resp.Body.Close() 22 | 23 | responseData, err := io.ReadAll(resp.Body) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | fmt.Println(string(responseData)) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /examples/go-multimodal/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | 9 | "github.com/ollama/ollama/api" 10 | ) 11 | 12 | func main() { 13 | if len(os.Args) <= 1 { 14 | log.Fatal("usage: ") 15 | } 16 | 17 | imgData, err := os.ReadFile(os.Args[1]) 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | 22 | client, err := api.ClientFromEnvironment() 23 | if err != nil { 24 | log.Fatal(err) 25 | } 26 | 27 | req := &api.GenerateRequest{ 28 | Model: "llava", 29 | Prompt: "describe this image", 30 | Images: []api.ImageData{imgData}, 31 | } 32 | 33 | ctx := context.Background() 34 | respFunc := func(resp api.GenerateResponse) error { 35 | // In streaming mode, responses are partial so we call fmt.Print (and not 36 | // Println) in order to avoid spurious newlines being introduced. The 37 | // model will insert its own newlines if it wants. 38 | fmt.Print(resp.Response) 39 | return nil 40 | } 41 | 42 | err = client.Generate(ctx, req, respFunc) 43 | if err != nil { 44 | log.Fatal(err) 45 | } 46 | fmt.Println() 47 | } 48 | -------------------------------------------------------------------------------- /examples/go-pull-progress/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | 8 | "github.com/ollama/ollama/api" 9 | ) 10 | 11 | func main() { 12 | client, err := api.ClientFromEnvironment() 13 | if err != nil { 14 | log.Fatal(err) 15 | } 16 | 17 | ctx := context.Background() 18 | 19 | req := &api.PullRequest{ 20 | Model: "mistral", 21 | } 22 | progressFunc := func(resp api.ProgressResponse) error { 23 | fmt.Printf("Progress: status=%v, total=%v, completed=%v\n", resp.Status, resp.Total, resp.Completed) 24 | return nil 25 | } 26 | 27 | err = client.Pull(ctx, req, progressFunc) 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /examples/jupyter-notebook/README.md: -------------------------------------------------------------------------------- 1 | # Ollama Jupyter Notebook 2 | 3 | This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely. 4 | 5 | For best results, use an instance with GPU accelerator. 6 | -------------------------------------------------------------------------------- /examples/kubernetes/README.md: -------------------------------------------------------------------------------- 1 | # Deploy Ollama to Kubernetes 2 | 3 | ## Prerequisites 4 | 5 | - Ollama: https://ollama.com/download 6 | - Kubernetes cluster. This example will use Google Kubernetes Engine. 7 | 8 | ## Steps 9 | 10 | 1. Create the Ollama namespace, deployment, and service 11 | 12 | ```bash 13 | kubectl apply -f cpu.yaml 14 | ``` 15 | 16 | ## (Optional) Hardware Acceleration 17 | 18 | Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin) which is deployed in Kubernetes in form of daemonset. Follow the link for more details. 19 | 20 | Once configured, create a GPU enabled Ollama deployment. 21 | 22 | ```bash 23 | kubectl apply -f gpu.yaml 24 | ``` 25 | 26 | ## Test 27 | 28 | 1. Port forward the Ollama service to connect and use it locally 29 | 30 | ```bash 31 | kubectl -n ollama port-forward service/ollama 11434:80 32 | ``` 33 | 34 | 1. Pull and run a model, for example `orca-mini:3b` 35 | 36 | ```bash 37 | ollama run orca-mini:3b 38 | ``` -------------------------------------------------------------------------------- /examples/kubernetes/cpu.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: ollama 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: ollama 11 | namespace: ollama 12 | spec: 13 | selector: 14 | matchLabels: 15 | name: ollama 16 | template: 17 | metadata: 18 | labels: 19 | name: ollama 20 | spec: 21 | containers: 22 | - name: ollama 23 | image: ollama/ollama:latest 24 | ports: 25 | - name: http 26 | containerPort: 11434 27 | protocol: TCP 28 | --- 29 | apiVersion: v1 30 | kind: Service 31 | metadata: 32 | name: ollama 33 | namespace: ollama 34 | spec: 35 | type: ClusterIP 36 | selector: 37 | name: ollama 38 | ports: 39 | - port: 80 40 | name: http 41 | targetPort: http 42 | protocol: TCP 43 | -------------------------------------------------------------------------------- /examples/kubernetes/gpu.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: ollama 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: ollama 11 | namespace: ollama 12 | spec: 13 | strategy: 14 | type: Recreate 15 | selector: 16 | matchLabels: 17 | name: ollama 18 | template: 19 | metadata: 20 | labels: 21 | name: ollama 22 | spec: 23 | containers: 24 | - name: ollama 25 | image: ollama/ollama:latest 26 | env: 27 | - name: PATH 28 | value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin 29 | - name: LD_LIBRARY_PATH 30 | value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 31 | - name: NVIDIA_DRIVER_CAPABILITIES 32 | value: compute,utility 33 | ports: 34 | - name: http 35 | containerPort: 11434 36 | protocol: TCP 37 | resources: 38 | limits: 39 | nvidia.com/gpu: 1 40 | tolerations: 41 | - key: nvidia.com/gpu 42 | operator: Exists 43 | effect: NoSchedule 44 | --- 45 | apiVersion: v1 46 | kind: Service 47 | metadata: 48 | name: ollama 49 | namespace: ollama 50 | spec: 51 | type: ClusterIP 52 | selector: 53 | name: ollama 54 | ports: 55 | - port: 80 56 | name: http 57 | targetPort: http 58 | protocol: TCP 59 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-document/README.md: -------------------------------------------------------------------------------- 1 | # LangChain Document QA 2 | 3 | This example provides an interface for asking questions to a PDF document. 4 | 5 | ## Setup 6 | 7 | ``` 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | ## Run 12 | 13 | ``` 14 | python main.py 15 | ``` 16 | 17 | A prompt will appear, where questions may be asked: 18 | 19 | ``` 20 | Query: How many locations does WeWork have? 21 | ``` 22 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-document/main.py: -------------------------------------------------------------------------------- 1 | from langchain.document_loaders import OnlinePDFLoader 2 | from langchain.vectorstores import Chroma 3 | from langchain.embeddings import GPT4AllEmbeddings 4 | from langchain import PromptTemplate 5 | from langchain.llms import Ollama 6 | from langchain.callbacks.manager import CallbackManager 7 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 8 | from langchain.chains import RetrievalQA 9 | import sys 10 | import os 11 | 12 | class SuppressStdout: 13 | def __enter__(self): 14 | self._original_stdout = sys.stdout 15 | self._original_stderr = sys.stderr 16 | sys.stdout = open(os.devnull, 'w') 17 | sys.stderr = open(os.devnull, 'w') 18 | 19 | def __exit__(self, exc_type, exc_val, exc_tb): 20 | sys.stdout.close() 21 | sys.stdout = self._original_stdout 22 | sys.stderr = self._original_stderr 23 | 24 | # load the pdf and split it into chunks 25 | loader = OnlinePDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001813756/975b3e9b-268e-4798-a9e4-2a9a7c92dc10.pdf") 26 | data = loader.load() 27 | 28 | from langchain.text_splitter import RecursiveCharacterTextSplitter 29 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) 30 | all_splits = text_splitter.split_documents(data) 31 | 32 | with SuppressStdout(): 33 | vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings()) 34 | 35 | while True: 36 | query = input("\nQuery: ") 37 | if query == "exit": 38 | break 39 | if query.strip() == "": 40 | continue 41 | 42 | # Prompt 43 | template = """Use the following pieces of context to answer the question at the end. 44 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 45 | Use three sentences maximum and keep the answer as concise as possible. 46 | {context} 47 | Question: {question} 48 | Helpful Answer:""" 49 | QA_CHAIN_PROMPT = PromptTemplate( 50 | input_variables=["context", "question"], 51 | template=template, 52 | ) 53 | 54 | llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) 55 | qa_chain = RetrievalQA.from_chain_type( 56 | llm, 57 | retriever=vectorstore.as_retriever(), 58 | chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}, 59 | ) 60 | 61 | result = qa_chain({"query": query}) 62 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-document/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.4.0 2 | aiohttp==3.8.5 3 | aiosignal==1.3.1 4 | anyio==3.7.1 5 | astunparse==1.6.3 6 | async-timeout==4.0.3 7 | attrs==23.1.0 8 | backoff==2.2.1 9 | beautifulsoup4==4.12.2 10 | bs4==0.0.1 11 | cachetools==5.3.1 12 | certifi==2023.7.22 13 | cffi==1.15.1 14 | chardet==5.2.0 15 | charset-normalizer==3.2.0 16 | Chroma==0.2.0 17 | chroma-hnswlib==0.7.2 18 | chromadb==0.4.5 19 | click==8.1.6 20 | coloredlogs==15.0.1 21 | cryptography==41.0.3 22 | dataclasses-json==0.5.14 23 | fastapi==0.99.1 24 | filetype==1.2.0 25 | flatbuffers==23.5.26 26 | frozenlist==1.4.0 27 | gast==0.4.0 28 | google-auth==2.22.0 29 | google-auth-oauthlib==1.0.0 30 | google-pasta==0.2.0 31 | gpt4all==1.0.8 32 | grpcio==1.57.0 33 | h11==0.14.0 34 | h5py==3.9.0 35 | httptools==0.6.0 36 | humanfriendly==10.0 37 | idna==3.4 38 | importlib-resources==6.0.1 39 | joblib==1.3.2 40 | keras==2.13.1 41 | langchain==0.0.261 42 | langsmith==0.0.21 43 | libclang==16.0.6 44 | lxml==4.9.3 45 | Markdown==3.4.4 46 | MarkupSafe==2.1.3 47 | marshmallow==3.20.1 48 | monotonic==1.6 49 | mpmath==1.3.0 50 | multidict==6.0.4 51 | mypy-extensions==1.0.0 52 | nltk==3.8.1 53 | numexpr==2.8.5 54 | numpy==1.24.3 55 | oauthlib==3.2.2 56 | onnxruntime==1.15.1 57 | openapi-schema-pydantic==1.2.4 58 | opt-einsum==3.3.0 59 | overrides==7.4.0 60 | packaging==23.1 61 | pdf2image==1.16.3 62 | pdfminer==20191125 63 | pdfminer.six==20221105 64 | Pillow==10.0.0 65 | posthog==3.0.1 66 | protobuf==4.24.0 67 | pulsar-client==3.2.0 68 | pyasn1==0.5.0 69 | pyasn1-modules==0.3.0 70 | pycparser==2.21 71 | pycryptodome==3.18.0 72 | pydantic==1.10.12 73 | PyPika==0.48.9 74 | python-dateutil==2.8.2 75 | python-dotenv==1.0.0 76 | python-magic==0.4.27 77 | PyYAML==6.0.1 78 | regex==2023.8.8 79 | requests==2.31.0 80 | requests-oauthlib==1.3.1 81 | rsa==4.9 82 | six==1.16.0 83 | sniffio==1.3.0 84 | soupsieve==2.4.1 85 | SQLAlchemy==2.0.19 86 | starlette==0.27.0 87 | sympy==1.12 88 | tabulate==0.9.0 89 | tenacity==8.2.2 90 | tensorboard==2.13.0 91 | tensorboard-data-server==0.7.1 92 | tensorflow==2.13.0 93 | tensorflow-estimator==2.13.0 94 | tensorflow-hub==0.14.0 95 | tensorflow-macos==2.13.0 96 | termcolor==2.3.0 97 | tokenizers==0.13.3 98 | tqdm==4.66.1 99 | typing-inspect==0.9.0 100 | typing_extensions==4.5.0 101 | unstructured==0.9.2 102 | urllib3==1.26.16 103 | uvicorn==0.23.2 104 | uvloop==0.17.0 105 | watchfiles==0.19.0 106 | websockets==11.0.3 107 | Werkzeug==2.3.6 108 | wrapt==1.15.0 109 | yarl==1.9.2 110 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-privategpt/README.md: -------------------------------------------------------------------------------- 1 | # PrivateGPT with Llama 2 uncensored 2 | 3 | https://github.com/ollama/ollama/assets/3325447/20cf8ec6-ff25-42c6-bdd8-9be594e3ce1b 4 | 5 | > Note: this example is a slightly modified version of PrivateGPT using models such as Llama 2 Uncensored. All credit for PrivateGPT goes to Iván Martínez who is the creator of it, and you can find his GitHub repo [here](https://github.com/imartinez/privateGPT). 6 | 7 | ### Setup 8 | 9 | Set up a virtual environment (optional): 10 | 11 | ``` 12 | python3 -m venv .venv 13 | source .venv/bin/activate 14 | ``` 15 | 16 | Install the Python dependencies: 17 | 18 | ```shell 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | Pull the model you'd like to use: 23 | 24 | ``` 25 | ollama pull llama2-uncensored 26 | ``` 27 | 28 | ### Getting WeWork's latest quarterly earnings report (10-Q) 29 | 30 | ``` 31 | mkdir source_documents 32 | curl https://d18rn0p25nwr6d.cloudfront.net/CIK-0001813756/975b3e9b-268e-4798-a9e4-2a9a7c92dc10.pdf -o source_documents/wework.pdf 33 | ``` 34 | 35 | ### Ingesting files 36 | 37 | ```shell 38 | python ingest.py 39 | ``` 40 | 41 | Output should look like this: 42 | 43 | ```shell 44 | Creating new vectorstore 45 | Loading documents from source_documents 46 | Loading new documents: 100%|██████████████████████| 1/1 [00:01<00:00, 1.73s/it] 47 | Loaded 1 new documents from source_documents 48 | Split into 90 chunks of text (max. 500 tokens each) 49 | Creating embeddings. May take some minutes... 50 | Using embedded DuckDB with persistence: data will be stored in: db 51 | Ingestion complete! You can now run privateGPT.py to query your documents 52 | ``` 53 | 54 | ### Ask questions 55 | 56 | ```shell 57 | python privateGPT.py 58 | 59 | Enter a query: How many locations does WeWork have? 60 | 61 | > Answer (took 17.7 s.): 62 | As of June 2023, WeWork has 777 locations worldwide, including 610 Consolidated Locations (as defined in the section entitled Key Performance Indicators). 63 | ``` 64 | 65 | ### Try a different model: 66 | 67 | ``` 68 | ollama pull llama2:13b 69 | MODEL=llama2:13b python privateGPT.py 70 | ``` 71 | 72 | ## Adding more files 73 | 74 | Put any and all your files into the `source_documents` directory 75 | 76 | The supported extensions are: 77 | 78 | - `.csv`: CSV, 79 | - `.docx`: Word Document, 80 | - `.doc`: Word Document, 81 | - `.enex`: EverNote, 82 | - `.eml`: Email, 83 | - `.epub`: EPub, 84 | - `.html`: HTML File, 85 | - `.md`: Markdown, 86 | - `.msg`: Outlook Message, 87 | - `.odt`: Open Document Text, 88 | - `.pdf`: Portable Document Format (PDF), 89 | - `.pptx` : PowerPoint Document, 90 | - `.ppt` : PowerPoint Document, 91 | - `.txt`: Text file (UTF-8), 92 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-privategpt/constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | from chromadb.config import Settings 3 | 4 | # Define the folder for storing database 5 | PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY', 'db') 6 | 7 | # Define the Chroma settings 8 | CHROMA_SETTINGS = Settings( 9 | persist_directory=PERSIST_DIRECTORY, 10 | anonymized_telemetry=False 11 | ) 12 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-privategpt/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "privategpt" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Ivan Martinez "] 6 | license = "Apache Version 2.0" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.10" 11 | langchain = "0.0.261" 12 | gpt4all = "^1.0.3" 13 | chromadb = "^0.3.26" 14 | PyMuPDF = "^1.22.5" 15 | python-dotenv = "^1.0.0" 16 | unstructured = "^0.8.0" 17 | extract-msg = "^0.41.5" 18 | tabulate = "^0.9.0" 19 | pandoc = "^2.3" 20 | pypandoc = "^1.11" 21 | tqdm = "^4.65.0" 22 | sentence-transformers = "^2.2.2" 23 | 24 | [build-system] 25 | requires = ["poetry-core"] 26 | build-backend = "poetry.core.masonry.api" 27 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-privategpt/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.274 2 | gpt4all==1.0.8 3 | chromadb==0.4.7 4 | llama-cpp-python==0.1.81 5 | urllib3==2.0.4 6 | PyMuPDF==1.23.5 7 | python-dotenv==1.0.0 8 | unstructured==0.10.8 9 | extract-msg==0.45.0 10 | tabulate==0.9.0 11 | pandoc==2.3 12 | pypandoc==1.11 13 | tqdm==4.66.1 14 | sentence_transformers==2.2.2 -------------------------------------------------------------------------------- /examples/langchain-python-rag-websummary/README.md: -------------------------------------------------------------------------------- 1 | # LangChain Web Summarization 2 | 3 | This example summarizes the website, [https://ollama.com/blog/run-llama2-uncensored-locally](https://ollama.com/blog/run-llama2-uncensored-locally) 4 | 5 | ## Running the Example 6 | 7 | 1. Ensure you have the `llama2` model installed: 8 | 9 | ```bash 10 | ollama pull llama2 11 | ``` 12 | 13 | 2. Install the Python Requirements. 14 | 15 | ```bash 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | 3. Run the example: 20 | 21 | ```bash 22 | python main.py 23 | ``` 24 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-websummary/main.py: -------------------------------------------------------------------------------- 1 | from langchain_community.llms import Ollama 2 | from langchain_community.document_loaders import WebBaseLoader 3 | from langchain.chains.summarize import load_summarize_chain 4 | 5 | loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally") 6 | docs = loader.load() 7 | 8 | llm = Ollama(model="llama3") 9 | chain = load_summarize_chain(llm, chain_type="stuff") 10 | 11 | result = chain.invoke(docs) 12 | print(result) 13 | -------------------------------------------------------------------------------- /examples/langchain-python-rag-websummary/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.259 2 | -------------------------------------------------------------------------------- /examples/langchain-python-simple/README.md: -------------------------------------------------------------------------------- 1 | # LangChain 2 | 3 | This example is a basic "hello world" of using LangChain with Ollama. 4 | 5 | ## Running the Example 6 | 7 | 1. Ensure you have the `llama3` model installed: 8 | 9 | ```bash 10 | ollama pull llama3 11 | ``` 12 | 13 | 2. Install the Python Requirements. 14 | 15 | ```bash 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | 3. Run the example: 20 | 21 | ```bash 22 | python main.py 23 | ``` 24 | -------------------------------------------------------------------------------- /examples/langchain-python-simple/main.py: -------------------------------------------------------------------------------- 1 | from langchain.llms import Ollama 2 | 3 | input = input("What is your question?") 4 | llm = Ollama(model="llama3") 5 | res = llm.predict(input) 6 | print (res) 7 | -------------------------------------------------------------------------------- /examples/langchain-python-simple/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.259 2 | -------------------------------------------------------------------------------- /examples/langchain-typescript-simple/README.md: -------------------------------------------------------------------------------- 1 | # LangChain 2 | 3 | This example is a basic "hello world" of using LangChain with Ollama using Node.js and Typescript. 4 | 5 | ## Running the Example 6 | 7 | 1. Install the prerequisites: 8 | 9 | ```bash 10 | npm install 11 | ``` 12 | 13 | 2. Ensure the `mistral` model is available: 14 | 15 | ```bash 16 | ollama pull mistral 17 | ``` 18 | 19 | 3. Run the example: 20 | 21 | ```bash 22 | npm start 23 | ``` 24 | -------------------------------------------------------------------------------- /examples/langchain-typescript-simple/main.ts: -------------------------------------------------------------------------------- 1 | import { Ollama } from 'langchain/llms/ollama'; 2 | import * as readline from "readline"; 3 | 4 | async function main() { 5 | const ollama = new Ollama({ 6 | model: 'mistral' 7 | // other parameters can be found at https://js.langchain.com/docs/api/llms_ollama/classes/Ollama 8 | }); 9 | 10 | const rl = readline.createInterface({ 11 | input: process.stdin, 12 | output: process.stdout, 13 | }); 14 | 15 | rl.question("What is your question: \n", async (user_input) => { 16 | const stream = await ollama.stream(user_input); 17 | 18 | for await (const chunk of stream) { 19 | process.stdout.write(chunk); 20 | } 21 | rl.close(); 22 | }) 23 | } 24 | 25 | main(); -------------------------------------------------------------------------------- /examples/langchain-typescript-simple/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "start": "tsx main.ts" 4 | }, 5 | "devDependencies": { 6 | "tsx": "^4.6.2", 7 | "typescript": "^5.3.3" 8 | }, 9 | "dependencies": { 10 | "langchain": "^0.0.165", 11 | "readline": "^1.3.0" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /examples/modelfile-mario/Modelfile: -------------------------------------------------------------------------------- 1 | FROM llama3 2 | PARAMETER temperature 1 3 | SYSTEM """ 4 | You are Mario from super mario bros, acting as an assistant. 5 | """ 6 | -------------------------------------------------------------------------------- /examples/modelfile-mario/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/examples/modelfile-mario/logo.png -------------------------------------------------------------------------------- /examples/modelfile-mario/readme.md: -------------------------------------------------------------------------------- 1 | image of Italian plumber 2 | 3 | # Example character: Mario 4 | 5 | This example shows how to create a basic character using Llama3 as the base model. 6 | 7 | To run this example: 8 | 9 | 1. Download the Modelfile 10 | 2. `ollama pull llama3` to get the base model used in the model file. 11 | 3. `ollama create NAME -f ./Modelfile` 12 | 4. `ollama run NAME` 13 | 14 | Ask it some questions like "Who are you?" or "Is Peach in trouble again?" 15 | 16 | ## Editing this file 17 | 18 | What the model file looks like: 19 | 20 | ``` 21 | FROM llama3 22 | PARAMETER temperature 1 23 | SYSTEM """ 24 | You are Mario from Super Mario Bros, acting as an assistant. 25 | """ 26 | ``` 27 | 28 | What if you want to change its behaviour? 29 | 30 | - Try changing the prompt 31 | - Try changing the parameters [Docs](https://github.com/ollama/ollama/blob/main/docs/modelfile.md) 32 | - Try changing the model (e.g. An uncensored model by `FROM wizard-vicuna` this is the wizard-vicuna uncensored model ) 33 | 34 | Once the changes are made, 35 | 36 | 1. `ollama create NAME -f ./Modelfile` 37 | 2. `ollama run NAME` 38 | 3. Iterate until you are happy with the results. 39 | 40 | Notes: 41 | 42 | - This example is for research purposes only. There is no affiliation with any entity. 43 | - When using an uncensored model, please be aware that it may generate offensive content. 44 | -------------------------------------------------------------------------------- /examples/python-dockerit/Modelfile: -------------------------------------------------------------------------------- 1 | FROM mistral 2 | SYSTEM """ 3 | You are an experienced Devops engineer focused on docker. When given specifications for a particular need or application you know the best way to host that within a docker container. For instance if someone tells you they want an nginx server to host files located at /web you will answer as follows 4 | 5 | ---start 6 | FROM nginx:alpine 7 | COPY /myweb /usr/share/nginx/html 8 | EXPOSE 80 9 | ---end 10 | 11 | Notice that the answer you should give is just the contents of the dockerfile with no explanation and there are three dashes and the word start at the beginning and 3 dashes and the word end. The full output can be piped into a file and run as is. Here is another example. The user will ask to launch a Postgres server with a password of abc123. And the response should be 12 | 13 | ---start 14 | FROM postgres:latest 15 | ENV POSTGRES_PASSWORD=abc123 16 | EXPOSE 5432 17 | ---end 18 | 19 | Again it's just the contents of the dockerfile and nothing else. 20 | """ 21 | -------------------------------------------------------------------------------- /examples/python-dockerit/README.md: -------------------------------------------------------------------------------- 1 | # DockerIt 2 | 3 | DockerIt is a tool to help you build and run your application in a Docker container. It consists of a model that defines the system prompt and model weights to use, along with a python script to then build the container and run the image automatically. 4 | 5 | ## Running the Example 6 | 7 | 1. Ensure you have the `mattw/dockerit` model installed: 8 | 9 | ```bash 10 | ollama pull mattw/dockerit 11 | ``` 12 | 13 | 2. Make sure Docker is running on your machine. 14 | 15 | 3. Install the Python Requirements. 16 | 17 | ```bash 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | 4. Run the example: 22 | 23 | ```bash 24 | python dockerit.py "simple postgres server with admin password set to 123" 25 | ``` 26 | 27 | 5. Enter the name you would like to use for your container image. 28 | 29 | ## Caveats 30 | 31 | This is a simple example. It's assuming the Dockerfile content generated is going to work. In many cases, even with simple web servers, it fails when trying to copy files that don't exist. It's simply an example of what you could possibly do. 32 | -------------------------------------------------------------------------------- /examples/python-dockerit/dockerit.py: -------------------------------------------------------------------------------- 1 | import requests, json, docker, io, sys 2 | inputDescription = " ".join(sys.argv[1:]) 3 | imageName = input("Enter the name of the image: ") 4 | client = docker.from_env() 5 | s = requests.Session() 6 | output="" 7 | with s.post('http://localhost:11434/api/generate', json={'model': 'dockerit', 'prompt': inputDescription}, stream=True) as r: 8 | for line in r.iter_lines(): 9 | if line: 10 | j = json.loads(line) 11 | if "response" in j: 12 | output = output +j["response"] 13 | output = output[output.find("---start")+9:output.find("---end")-1] 14 | f = io.BytesIO(bytes(output, 'utf-8')) 15 | client.images.build(fileobj=f, tag=imageName) 16 | container = client.containers.run(imageName, detach=True) 17 | print("Container named", container.name, " started with id: ",container.id) 18 | -------------------------------------------------------------------------------- /examples/python-dockerit/requirements.txt: -------------------------------------------------------------------------------- 1 | docker -------------------------------------------------------------------------------- /examples/python-json-datagenerator/predefinedschema.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import random 4 | 5 | model = "llama3" 6 | template = { 7 | "firstName": "", 8 | "lastName": "", 9 | "address": { 10 | "street": "", 11 | "city": "", 12 | "state": "", 13 | "zipCode": "" 14 | }, 15 | "phoneNumber": "" 16 | } 17 | 18 | prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and phone number. \nUse the following template: {json.dumps(template)}." 19 | 20 | data = { 21 | "prompt": prompt, 22 | "model": model, 23 | "format": "json", 24 | "stream": False, 25 | "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, 26 | } 27 | 28 | print(f"Generating a sample user") 29 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) 30 | json_data = json.loads(response.text) 31 | print(json.dumps(json.loads(json_data["response"]), indent=2)) 32 | -------------------------------------------------------------------------------- /examples/python-json-datagenerator/randomaddresses.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import random 4 | 5 | countries = [ 6 | "United States", 7 | "United Kingdom", 8 | "the Netherlands", 9 | "Germany", 10 | "Mexico", 11 | "Canada", 12 | "France", 13 | ] 14 | country = random.choice(countries) 15 | model = "llama3" 16 | 17 | prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters." 18 | 19 | data = { 20 | "prompt": prompt, 21 | "model": model, 22 | "format": "json", 23 | "stream": False, 24 | "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, 25 | } 26 | 27 | print(f"Generating a sample user in {country}") 28 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) 29 | json_data = json.loads(response.text) 30 | 31 | print(json.dumps(json.loads(json_data["response"]), indent=2)) 32 | -------------------------------------------------------------------------------- /examples/python-json-datagenerator/readme.md: -------------------------------------------------------------------------------- 1 | # JSON Output Example 2 | 3 | ![llmjson 2023-11-10 15_31_31](https://github.com/ollama/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25) 4 | 5 | There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in. 6 | 7 | ## Running the Example 8 | 9 | 1. Ensure you have the `llama3` model installed: 10 | 11 | ```bash 12 | ollama pull llama3 13 | ``` 14 | 15 | 2. Install the Python Requirements. 16 | 17 | ```bash 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | 3. Run the Random Addresses example: 22 | 23 | ```bash 24 | python randomaddresses.py 25 | ``` 26 | 27 | 4. Run the Predefined Schema example: 28 | 29 | ```bash 30 | python predefinedschema.py 31 | ``` 32 | 33 | ## Review the Code 34 | 35 | Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body. 36 | 37 | ```python 38 | prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters." 39 | 40 | data = { 41 | "prompt": prompt, 42 | "model": model, 43 | "format": "json", 44 | "stream": False, 45 | "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100}, 46 | } 47 | ``` 48 | 49 | When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country. 50 | 51 | In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with. 52 | 53 | Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read. 54 | 55 | ```python 56 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=False) 57 | json_data = json.loads(response.text) 58 | 59 | print(json.dumps(json.loads(json_data["response"]), indent=2)) 60 | ``` 61 | -------------------------------------------------------------------------------- /examples/python-json-datagenerator/requirements.txt: -------------------------------------------------------------------------------- 1 | Requests==2.31.0 2 | -------------------------------------------------------------------------------- /examples/python-loganalysis/Modelfile: -------------------------------------------------------------------------------- 1 | FROM codebooga:latest 2 | 3 | SYSTEM """ 4 | You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer. 5 | """ 6 | 7 | PARAMETER TEMPERATURE 0.3 8 | 9 | -------------------------------------------------------------------------------- /examples/python-loganalysis/loganalysis.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import requests 4 | import json 5 | 6 | # prelines and postlines represent the number of lines of context to include in the output around the error 7 | prelines = 10 8 | postlines = 10 9 | 10 | def find_errors_in_log_file(): 11 | if len(sys.argv) < 2: 12 | print("Usage: python loganalysis.py ") 13 | return 14 | 15 | log_file_path = sys.argv[1] 16 | with open(log_file_path, 'r') as log_file: 17 | log_lines = log_file.readlines() 18 | 19 | error_logs = [] 20 | for i, line in enumerate(log_lines): 21 | if "error" in line.lower(): 22 | start_index = max(0, i - prelines) 23 | end_index = min(len(log_lines), i + postlines + 1) 24 | error_logs.extend(log_lines[start_index:end_index]) 25 | 26 | return error_logs 27 | 28 | error_logs = find_errors_in_log_file() 29 | 30 | data = { 31 | "prompt": "\n".join(error_logs), 32 | "model": "mattw/loganalyzer" 33 | } 34 | 35 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=True) 36 | for line in response.iter_lines(): 37 | if line: 38 | json_data = json.loads(line) 39 | if json_data['done'] == False: 40 | print(json_data['response'], end='', flush=True) 41 | 42 | -------------------------------------------------------------------------------- /examples/python-loganalysis/readme.md: -------------------------------------------------------------------------------- 1 | # Log Analysis example 2 | 3 | ![loganalyzer 2023-11-10 08_53_29](https://github.com/ollama/ollama/assets/633681/ad30f1fc-321f-4953-8914-e30e24db9921) 4 | 5 | This example shows one possible way to create a log file analyzer. It uses the model **mattw/loganalyzer** which is based on **codebooga**, a 34b parameter model. 6 | 7 | To use it, run: 8 | 9 | `python loganalysis.py ` 10 | 11 | You can try this with the `logtest.logfile` file included in this directory. 12 | 13 | ## Running the Example 14 | 15 | 1. Ensure you have the `mattw/loganalyzer` model installed: 16 | 17 | ```bash 18 | ollama pull mattw/loganalyzer 19 | ``` 20 | 21 | 2. Install the Python Requirements. 22 | 23 | ```bash 24 | pip install -r requirements.txt 25 | ``` 26 | 27 | 3. Run the example: 28 | 29 | ```bash 30 | python loganalysis.py logtest.logfile 31 | ``` 32 | 33 | ## Review the code 34 | 35 | The first part of this example is a Modelfile that takes `codebooga` and applies a new System Prompt: 36 | 37 | ```plaintext 38 | SYSTEM """ 39 | You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer. 40 | """ 41 | ``` 42 | 43 | This model is available at https://ollama.com/mattw/loganalyzer. You can customize it and add to your own namespace using the command `ollama create -f ` then `ollama push `. 44 | 45 | Then loganalysis.py scans all the lines in the given log file and searches for the word 'error'. When the word is found, the 10 lines before and after are set as the prompt for a call to the Generate API. 46 | 47 | ```python 48 | data = { 49 | "prompt": "\n".join(error_logs), 50 | "model": "mattw/loganalyzer" 51 | } 52 | ``` 53 | 54 | Finally, the streamed output is parsed and the response field in the output is printed to the line. 55 | 56 | ```python 57 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=True) 58 | for line in response.iter_lines(): 59 | if line: 60 | json_data = json.loads(line) 61 | if json_data['done'] == False: 62 | print(json_data['response'], end='') 63 | 64 | ``` 65 | 66 | ## Next Steps 67 | 68 | There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines. 69 | 70 | Try different models and different prompts to analyze the data. You could consider adding retrieval augmented generation (RAG) to this to help understand newer log formats. 71 | -------------------------------------------------------------------------------- /examples/python-loganalysis/requirements.txt: -------------------------------------------------------------------------------- 1 | Requests==2.31.0 2 | -------------------------------------------------------------------------------- /examples/python-rag-newssummary/README.md: -------------------------------------------------------------------------------- 1 | # News Summarizer 2 | 3 | This example goes through a series of steps: 4 | 5 | 1. You choose a topic area (e.g., "news", "NVidia", "music", etc.). 6 | 2. Gets the most recent articles on that topic from various sources. 7 | 3. Uses Ollama to summarize each article. 8 | 4. Creates chunks of sentences from each article. 9 | 5. Uses Sentence Transformers to generate embeddings for each of those chunks. 10 | 6. You enter a question regarding the summaries shown. 11 | 7. Uses Sentence Transformers to generate an embedding for that question. 12 | 8. Uses the embedded question to find the most similar chunks. 13 | 9. Feeds all that to Ollama to generate a good answer to your question based on these news articles. 14 | 15 | This example lets you pick from a few different topic areas, then summarize the most recent x articles for that topic. It then creates chunks of sentences from each article and then generates embeddings for each of those chunks. 16 | 17 | ## Running the Example 18 | 19 | 1. Ensure you have the `mistral-openorca` model installed: 20 | 21 | ```bash 22 | ollama pull mistral-openorca 23 | ``` 24 | 25 | 2. Install the Python Requirements. 26 | 27 | ```bash 28 | pip install -r requirements.txt 29 | ``` 30 | 31 | 3. Run the example: 32 | 33 | ```bash 34 | python summ.py 35 | ``` 36 | -------------------------------------------------------------------------------- /examples/python-rag-newssummary/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.12.2 2 | feedparser==6.0.10 3 | mattsollamatools==0.0.8 4 | newspaper3k==0.2.8 5 | nltk==3.8.1 6 | numpy==1.24.3 7 | Requests==2.31.0 8 | scikit_learn==1.3.0 9 | sentence_transformers==2.2.2 10 | -------------------------------------------------------------------------------- /examples/python-rag-newssummary/summ.py: -------------------------------------------------------------------------------- 1 | import curses 2 | import json 3 | from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search 4 | import requests 5 | from sentence_transformers import SentenceTransformer 6 | from mattsollamatools import chunker 7 | 8 | if __name__ == "__main__": 9 | chosen_topic = curses.wrapper(menu) 10 | print("Here is your news summary:\n") 11 | urls = getUrls(chosen_topic, n=5) 12 | model = SentenceTransformer('all-MiniLM-L6-v2') 13 | allEmbeddings = [] 14 | 15 | for url in urls: 16 | article={} 17 | article['embeddings'] = [] 18 | article['url'] = url 19 | text = getArticleText(url) 20 | summary = get_summary(text) 21 | chunks = chunker(text) # Use the chunk_text function from web_utils 22 | embeddings = model.encode(chunks) 23 | for (chunk, embedding) in zip(chunks, embeddings): 24 | item = {} 25 | item['source'] = chunk 26 | item['embedding'] = embedding.tolist() # Convert NumPy array to list 27 | item['sourcelength'] = len(chunk) 28 | article['embeddings'].append(item) 29 | 30 | allEmbeddings.append(article) 31 | 32 | print(f"{summary}\n") 33 | 34 | 35 | while True: 36 | context = [] 37 | # Input a question from the user 38 | question = input("Enter your question about the news, or type quit: ") 39 | 40 | if question.lower() == 'quit': 41 | break 42 | 43 | # Embed the user's question 44 | question_embedding = model.encode([question]) 45 | 46 | # Perform KNN search to find the best matches (indices and source text) 47 | best_matches = knn_search(question_embedding, allEmbeddings, k=10) 48 | 49 | 50 | sourcetext="" 51 | for i, (index, source_text) in enumerate(best_matches, start=1): 52 | sourcetext += f"{i}. Index: {index}, Source Text: {source_text}" 53 | 54 | systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}" 55 | 56 | url = "http://localhost:11434/api/generate" 57 | 58 | payload = { 59 | "model": "mistral-openorca", 60 | "prompt": question, 61 | "system": systemPrompt, 62 | "stream": False, 63 | "context": context 64 | } 65 | 66 | # Convert the payload to a JSON string 67 | payload_json = json.dumps(payload) 68 | 69 | # Set the headers to specify JSON content 70 | headers = { 71 | "Content-Type": "application/json" 72 | } 73 | 74 | # Send the POST request 75 | response = requests.post(url, data=payload_json, headers=headers) 76 | 77 | # Check the response 78 | if response.status_code == 200: 79 | output = json.loads(response.text) 80 | context = output['context'] 81 | print(output['response']+ "\n") 82 | 83 | 84 | else: 85 | print(f"Request failed with status code {response.status_code}") 86 | 87 | -------------------------------------------------------------------------------- /examples/python-simplechat/client.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` 5 | model = "llama3" # TODO: update this for whatever model you wish to use 6 | 7 | 8 | def chat(messages): 9 | r = requests.post( 10 | "http://0.0.0.0:11434/api/chat", 11 | json={"model": model, "messages": messages, "stream": True}, 12 | stream=True 13 | ) 14 | r.raise_for_status() 15 | output = "" 16 | 17 | for line in r.iter_lines(): 18 | body = json.loads(line) 19 | if "error" in body: 20 | raise Exception(body["error"]) 21 | if body.get("done") is False: 22 | message = body.get("message", "") 23 | content = message.get("content", "") 24 | output += content 25 | # the response streams one token at a time, print that as we receive it 26 | print(content, end="", flush=True) 27 | 28 | if body.get("done", False): 29 | message["content"] = output 30 | return message 31 | 32 | 33 | def main(): 34 | messages = [] 35 | 36 | while True: 37 | user_input = input("Enter a prompt: ") 38 | if not user_input: 39 | exit() 40 | print() 41 | messages.append({"role": "user", "content": user_input}) 42 | message = chat(messages) 43 | messages.append(message) 44 | print("\n\n") 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /examples/python-simplechat/readme.md: -------------------------------------------------------------------------------- 1 | # Simple Chat Example 2 | 3 | The **chat** endpoint is one of two ways to generate text from an LLM with Ollama, and is introduced in version 0.1.14. At a high level, you provide the endpoint an array of objects with a role and content specified. Then with each output and prompt, you add more of those role/content objects, which builds up the history. 4 | 5 | ## Running the Example 6 | 7 | 1. Ensure you have the `llama3` model installed: 8 | 9 | ```bash 10 | ollama pull llama3 11 | ``` 12 | 13 | 2. Install the Python Requirements. 14 | 15 | ```bash 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | 3. Run the example: 20 | 21 | ```bash 22 | python client.py 23 | ``` 24 | 25 | ## Review the Code 26 | 27 | You can see in the **chat** function that actually calling the endpoint is done simply with: 28 | 29 | ```python 30 | r = requests.post( 31 | "http://0.0.0.0:11434/api/chat", 32 | json={"model": model, "messages": messages, "stream": True}, 33 | ) 34 | ``` 35 | 36 | With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field. 37 | 38 | The final JSON object doesn't provide the full content, so you will need to build the content yourself. 39 | 40 | In the **main** function, we collect `user_input` and add it as a message to our messages and that is passed to the chat function. When the LLM is done responding the output is added as another message. 41 | 42 | ## Next Steps 43 | 44 | In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used. 45 | -------------------------------------------------------------------------------- /examples/python-simplechat/requirements.txt: -------------------------------------------------------------------------------- 1 | Requests==2.31.0 2 | -------------------------------------------------------------------------------- /examples/python-simplegenerate/README.md: -------------------------------------------------------------------------------- 1 | # Simple Generate Example 2 | 3 | This is a simple example using the **Generate** endpoint. 4 | 5 | ## Running the Example 6 | 7 | 1. Ensure you have the `stablelm-zephyr` model installed: 8 | 9 | ```bash 10 | ollama pull stablelm-zephyr 11 | ``` 12 | 13 | 2. Install the Python Requirements. 14 | 15 | ```bash 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | 3. Run the example: 20 | 21 | ```bash 22 | python client.py 23 | ``` 24 | 25 | ## Review the Code 26 | 27 | The **main** function simply asks for input, then passes that to the generate function. The output from generate is then passed back to generate on the next run. 28 | 29 | The **generate** function uses `requests.post` to call `/api/generate`, passing the model, prompt, and context. The `generate` endpoint returns a stream of JSON blobs that are then iterated through, looking for the response values. That is then printed out. The final JSON object includes the full context of the conversation so far, and that is the return value from the function. 30 | -------------------------------------------------------------------------------- /examples/python-simplegenerate/client.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` 5 | model = 'stablelm-zephyr' # TODO: update this for whatever model you wish to use 6 | 7 | def generate(prompt, context): 8 | r = requests.post('http://localhost:11434/api/generate', 9 | json={ 10 | 'model': model, 11 | 'prompt': prompt, 12 | 'context': context, 13 | }, 14 | stream=True) 15 | r.raise_for_status() 16 | 17 | for line in r.iter_lines(): 18 | body = json.loads(line) 19 | response_part = body.get('response', '') 20 | # the response streams one token at a time, print that as we receive it 21 | print(response_part, end='', flush=True) 22 | 23 | if 'error' in body: 24 | raise Exception(body['error']) 25 | 26 | if body.get('done', False): 27 | return body['context'] 28 | 29 | def main(): 30 | context = [] # the context stores a conversation history, you can use this to make the model more context aware 31 | while True: 32 | user_input = input("Enter a prompt: ") 33 | if not user_input: 34 | exit() 35 | print() 36 | context = generate(user_input, context) 37 | print() 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /examples/python-simplegenerate/requirements.txt: -------------------------------------------------------------------------------- 1 | Requests==2.31.0 2 | -------------------------------------------------------------------------------- /examples/typescript-functioncalling/extractwp.ts: -------------------------------------------------------------------------------- 1 | import { Ollama } from "ollama-node"; 2 | import { readFile } from "fs/promises"; 3 | 4 | async function main() { 5 | 6 | const ollama = new Ollama(); 7 | 8 | // Set the system prompt to prepare the model to receive a prompt and a schema and set some rules for the output. 9 | const systemprompt = `You will be given a text along with a prompt and a schema. You will have to extract the information requested in the prompt from the text and generate output in JSON observing the schema provided. If the schema shows a type of integer or number, you must only show a integer for that field. A string should always be a valid string. If a value is unknown, leave it empty. Output the JSON with extra spaces to ensure that it pretty prints.` 10 | 11 | const schema = { 12 | "people": [{ 13 | "name": { 14 | "type": "string", 15 | "description": "Name of the person" 16 | }, 17 | "title": { 18 | "type": "string", 19 | "description": "Title of the person" 20 | } 21 | }], 22 | } 23 | 24 | // Depending on the model chosen, you may be limited by the size of the context window, so limit the context to 2000 words. 25 | const textcontent = await readFile("./wp.txt", "utf-8").then((text) => text.split(" ").slice(0, 2000).join(" ")); 26 | 27 | // Specific instructions for this task 28 | const prompt = `Review the source text and determine the 10 most important people to focus on. Then extract the name and title for those people. Output should be in JSON.\n\nSchema: \n${JSON.stringify(schema, null, 2)}\n\nSource Text:\n${textcontent}` 29 | 30 | await ollama.setModel("neural-chat"); 31 | ollama.setSystemPrompt(systemprompt); 32 | 33 | // setJSONFormat is the equivalent of setting 'format: json' in the API 34 | ollama.setJSONFormat(true); 35 | await ollama.streamingGenerate(prompt, (word) => { process.stdout.write(word) }) 36 | } 37 | 38 | main(); -------------------------------------------------------------------------------- /examples/typescript-functioncalling/info.txt: -------------------------------------------------------------------------------- 1 | --- 2 | Hi matt, 3 | 4 | thanks for letting me know that you are going to come today, November 16, for my tea party. My address is 123 Falk St on Bainbridge Island. I live in the house with the red door. I will be home all day so just come by whenever you want. 5 | 6 | Fred 7 | 8 | --- 9 | Great, send the check to our office at 1917 1st St, Seattle, WA 98101. I will let you know when we receive it. 10 | 11 | Mark Richardson 12 | Big Corp 13 | --- 14 | We are looking forward to seeing you at our Local AI Meetup. It will be held on December 3. It will be at the offices of Enormous Co. Our address is 344 1st Ave, Seattle, WA 98101. We will be meeting in the conference room on the 3rd floor. 15 | 16 | Barbara Reilly 17 | Enormous Co. -------------------------------------------------------------------------------- /examples/typescript-functioncalling/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "ollama-node": "^0.1.27" 4 | }, 5 | "devDependencies": { 6 | "tsx": "^4.1.2", 7 | "typescript": "^5.2.2" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /examples/typescript-functioncalling/readme.md: -------------------------------------------------------------------------------- 1 | # Function calling 2 | 3 | ![function calling 2023-11-16 16_12_58](https://github.com/ollama/ollama/assets/633681/a0acc247-9746-45ab-b325-b65dfbbee4fb) 4 | 5 | One of the features added to some models is 'function calling'. It's a bit of a confusing name. It's understandable if you think that means the model can call functions, but that's not what it means. Function calling simply means that the output of the model is formatted in JSON, using a preconfigured schema, and uses the expected types. Then your code can use the output of the model and call functions with it. Using the JSON format in Ollama, you can use any model for function calling. 6 | 7 | The two examples provided can extract information out of the provided texts. The first example uses the first couple of chapters from War and Peace by Lev Nikolayevich Tolstoy, and extracts the names and titles of the characters introduced in the story. The second example uses a more complicated schema to pull out addresses and event information from a series of emails. 8 | 9 | ## Running the examples 10 | 11 | 1. Clone this repo and navigate to the `examples/typescript-functioncalling` directory. 12 | 2. Install the dependencies with `npm install`. 13 | 3. Review the `wp.txt` file. 14 | 4. Run `tsx extractwp.ts`. 15 | 5. Review the `info.txt` file. 16 | 6. Run `tsx extractemail.ts`. 17 | 18 | ## Review the Code 19 | 20 | Both examples do roughly the same thing with different source material. They both use the same system prompt, which tells the model to expect some instructions and a schema. Then we inject the schema into the prompt and generate an answer. 21 | 22 | The first example, `extractwp.ts`, outputs the resulting JSON to the console, listing the characters introduced at the start of War and Peace. The second example, `extractemail.ts`, is a bit more complicated, extracting two different types of information: addresses and events. It outputs the results to a JSON blob, then the addresses are handed off to one function called `reportAddresses` and the events are handed off to another function called `reportEvents`. 23 | 24 | Notice that both examples are using the model from Intel called `neural-chat`. This is not a model tuned for function calling, yet it performs very well at this task. 25 | 26 | ## Next Steps 27 | 28 | Try exporting some of your real emails to the input file and seeing how well the model does. Try pointing the first example at other books. You could even have it cycle through all the sections and maybe add up the number of times any character is seen throughout the book, determining the most important characters. You can also try out different models. 29 | -------------------------------------------------------------------------------- /examples/typescript-mentors/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | package-lock.json 3 | -------------------------------------------------------------------------------- /examples/typescript-mentors/character-generator.ts: -------------------------------------------------------------------------------- 1 | import { Ollama } from 'ollama-node' 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | 5 | async function characterGenerator() { 6 | const character = process.argv[2]; 7 | console.log(`You are creating a character for ${character}.`); 8 | const foldername = character.replace(/\s/g, '').toLowerCase(); 9 | const directory = path.join(__dirname, foldername); 10 | if (!fs.existsSync(directory)) { 11 | fs.mkdirSync(directory, { recursive: true }); 12 | } 13 | 14 | const ollama = new Ollama(); 15 | ollama.setModel("stablebeluga2:70b-q4_K_M"); 16 | const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `); 17 | 18 | const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`; 19 | 20 | fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => { 21 | if (err) throw err; 22 | console.log('The file has been saved!'); 23 | }); 24 | } 25 | 26 | characterGenerator(); 27 | -------------------------------------------------------------------------------- /examples/typescript-mentors/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "charactergen": "tsx character-generator.ts", 4 | "start": "tsx mentors.ts" 5 | }, 6 | "dependencies": { 7 | "fs": "^0.0.1-security", 8 | "ollama-node": "^0.0.3", 9 | "path": "^0.12.7" 10 | }, 11 | "devDependencies": { 12 | "tsx": "^4.6.2", 13 | "typescript": "^5.3.3" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /examples/typescript-simplechat/client.ts: -------------------------------------------------------------------------------- 1 | import * as readline from "readline"; 2 | 3 | const model = "llama3"; 4 | type Message = { 5 | role: "assistant" | "user" | "system"; 6 | content: string; 7 | } 8 | const messages: Message[] = [{ 9 | role: "system", 10 | content: "You are a helpful AI agent." 11 | }] 12 | 13 | const rl = readline.createInterface({ 14 | input: process.stdin, 15 | output: process.stdout 16 | }) 17 | 18 | async function chat(messages: Message[]): Promise { 19 | const body = { 20 | model: model, 21 | messages: messages 22 | } 23 | 24 | const response = await fetch("http://localhost:11434/api/chat", { 25 | method: "POST", 26 | body: JSON.stringify(body) 27 | }) 28 | 29 | const reader = response.body?.getReader() 30 | if (!reader) { 31 | throw new Error("Failed to read response body") 32 | } 33 | let content = "" 34 | while (true) { 35 | const { done, value } = await reader.read() 36 | if (done) { 37 | break; 38 | } 39 | const rawjson = new TextDecoder().decode(value); 40 | const json = JSON.parse(rawjson) 41 | 42 | if (json.done === false) { 43 | process.stdout.write(json.message.content); 44 | content += json.message.content 45 | } 46 | 47 | } 48 | return { role: "assistant", content: content }; 49 | } 50 | 51 | async function askQuestion(): Promise { 52 | return new Promise((resolve) => { 53 | rl.question("\n\nAsk a question: (press enter alone to quit)\n\n", async (user_input) => { 54 | if (user_input.trim() === "") { 55 | rl.close(); 56 | console.log("Thankyou. Goodbye.\n") 57 | console.log("=======\nHere is the message history that was used in this conversation.\n=======\n") 58 | messages.forEach(message => { 59 | console.log(message) 60 | }) 61 | resolve(); 62 | } else { 63 | console.log(); 64 | messages.push({ role: "user", content: user_input }); 65 | messages.push(await chat(messages)); 66 | await askQuestion(); // Ask the next question 67 | } 68 | }); 69 | }); 70 | } 71 | 72 | async function main() { 73 | await askQuestion(); 74 | 75 | } 76 | 77 | main(); 78 | -------------------------------------------------------------------------------- /examples/typescript-simplechat/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "start": "tsx client.ts" 4 | }, 5 | "dependencies": { 6 | "@types/node": "^20.10.4", 7 | "prompt-sync": "^4.2.0", 8 | "readline": "^1.3.0", 9 | "tsx": "^4.6.2", 10 | "typescript": "^5.3.3" 11 | } 12 | } -------------------------------------------------------------------------------- /examples/typescript-simplechat/readme.md: -------------------------------------------------------------------------------- 1 | # Simple Chat Example 2 | 3 | The **chat** endpoint, available as of v0.1.14, is one of two ways to generate text from an LLM with Ollama. At a high level, you provide the endpoint an array of message objects with a role and content specified. Then with each output and prompt, you add more messages, which builds up the history. 4 | 5 | ## Run the Example 6 | 7 | `npm start` 8 | 9 | ## Review the Code 10 | 11 | You can see in the **chat** function that is actually calling the endpoint is simply done with: 12 | 13 | ```typescript 14 | const body = { 15 | model: model, 16 | messages: messages 17 | } 18 | 19 | const response = await fetch("http://localhost:11434/api/chat", { 20 | method: "POST", 21 | body: JSON.stringify(body) 22 | }) 23 | ``` 24 | 25 | With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field. 26 | 27 | The final JSON object doesn't provide the full content, so you will need to build the content yourself. In this example, **chat** takes the full array of messages and outputs the resulting message from this call of the chat endpoint. 28 | 29 | In the **askQuestion** function, we collect `user_input` and add it as a message to our messages, and that is passed to the chat function. When the LLM is done responding, the output is added as another message to the messages array. 30 | 31 | At the end, you will see a printout of all the messages. 32 | 33 | ## Next Steps 34 | 35 | In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used. 36 | -------------------------------------------------------------------------------- /format/bytes.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | ) 7 | 8 | const ( 9 | Byte = 1 10 | 11 | KiloByte = Byte * 1000 12 | MegaByte = KiloByte * 1000 13 | GigaByte = MegaByte * 1000 14 | TeraByte = GigaByte * 1000 15 | 16 | KibiByte = Byte * 1024 17 | MebiByte = KibiByte * 1024 18 | GibiByte = MebiByte * 1024 19 | ) 20 | 21 | func HumanBytes(b int64) string { 22 | var value float64 23 | var unit string 24 | 25 | switch { 26 | case b >= TeraByte: 27 | value = float64(b) / TeraByte 28 | unit = "TB" 29 | case b >= GigaByte: 30 | value = float64(b) / GigaByte 31 | unit = "GB" 32 | case b >= MegaByte: 33 | value = float64(b) / MegaByte 34 | unit = "MB" 35 | case b >= KiloByte: 36 | value = float64(b) / KiloByte 37 | unit = "KB" 38 | default: 39 | return fmt.Sprintf("%d B", b) 40 | } 41 | 42 | switch { 43 | case value >= 100: 44 | return fmt.Sprintf("%d %s", int(value), unit) 45 | case value >= 10: 46 | return fmt.Sprintf("%d %s", int(value), unit) 47 | case value != math.Trunc(value): 48 | return fmt.Sprintf("%.1f %s", value, unit) 49 | default: 50 | return fmt.Sprintf("%d %s", int(value), unit) 51 | } 52 | } 53 | 54 | func HumanBytes2(b uint64) string { 55 | switch { 56 | case b >= GibiByte: 57 | return fmt.Sprintf("%.1f GiB", float64(b)/GibiByte) 58 | case b >= MebiByte: 59 | return fmt.Sprintf("%.1f MiB", float64(b)/MebiByte) 60 | case b >= KibiByte: 61 | return fmt.Sprintf("%.1f KiB", float64(b)/KibiByte) 62 | default: 63 | return fmt.Sprintf("%d B", b) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /format/format.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | ) 7 | 8 | const ( 9 | Thousand = 1000 10 | Million = Thousand * 1000 11 | Billion = Million * 1000 12 | ) 13 | 14 | func HumanNumber(b uint64) string { 15 | switch { 16 | case b >= Billion: 17 | number := float64(b) / Billion 18 | if number == math.Floor(number) { 19 | return fmt.Sprintf("%.0fB", number) // no decimals if whole number 20 | } 21 | return fmt.Sprintf("%.1fB", number) // one decimal if not a whole number 22 | case b >= Million: 23 | number := float64(b) / Million 24 | if number == math.Floor(number) { 25 | return fmt.Sprintf("%.0fM", number) // no decimals if whole number 26 | } 27 | return fmt.Sprintf("%.2fM", number) // two decimals if not a whole number 28 | case b >= Thousand: 29 | return fmt.Sprintf("%.0fK", float64(b)/Thousand) 30 | default: 31 | return fmt.Sprintf("%d", b) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /format/format_test.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestHumanNumber(t *testing.T) { 8 | 9 | type testCase struct { 10 | input uint64 11 | expected string 12 | } 13 | 14 | testCases := []testCase{ 15 | {0, "0"}, 16 | {1000000, "1M"}, 17 | {125000000, "125M"}, 18 | {500500000, "500.50M"}, 19 | {500550000, "500.55M"}, 20 | {1000000000, "1B"}, 21 | {2800000000, "2.8B"}, 22 | {2850000000, "2.9B"}, 23 | {1000000000000, "1000B"}, 24 | } 25 | 26 | for _, tc := range testCases { 27 | t.Run(tc.expected, func(t *testing.T) { 28 | result := HumanNumber(tc.input) 29 | if result != tc.expected { 30 | t.Errorf("Expected %s, got %s", tc.expected, result) 31 | } 32 | }) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /format/time.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | // humanDuration returns a human-readable approximation of a 11 | // duration (eg. "About a minute", "4 hours ago", etc.). 12 | func humanDuration(d time.Duration) string { 13 | seconds := int(d.Seconds()) 14 | 15 | switch { 16 | case seconds < 1: 17 | return "Less than a second" 18 | case seconds == 1: 19 | return "1 second" 20 | case seconds < 60: 21 | return fmt.Sprintf("%d seconds", seconds) 22 | } 23 | 24 | minutes := int(d.Minutes()) 25 | switch { 26 | case minutes == 1: 27 | return "About a minute" 28 | case minutes < 60: 29 | return fmt.Sprintf("%d minutes", minutes) 30 | } 31 | 32 | hours := int(math.Round(d.Hours())) 33 | switch { 34 | case hours == 1: 35 | return "About an hour" 36 | case hours < 48: 37 | return fmt.Sprintf("%d hours", hours) 38 | case hours < 24*7*2: 39 | return fmt.Sprintf("%d days", hours/24) 40 | case hours < 24*30*2: 41 | return fmt.Sprintf("%d weeks", hours/24/7) 42 | case hours < 24*365*2: 43 | return fmt.Sprintf("%d months", hours/24/30) 44 | } 45 | 46 | return fmt.Sprintf("%d years", int(d.Hours())/24/365) 47 | } 48 | 49 | func HumanTime(t time.Time, zeroValue string) string { 50 | return humanTime(t, zeroValue) 51 | } 52 | 53 | func HumanTimeLower(t time.Time, zeroValue string) string { 54 | return strings.ToLower(humanTime(t, zeroValue)) 55 | } 56 | 57 | func humanTime(t time.Time, zeroValue string) string { 58 | if t.IsZero() { 59 | return zeroValue 60 | } 61 | 62 | delta := time.Since(t) 63 | if int(delta.Hours())/24/365 < -20 { 64 | return "Forever" 65 | } else if delta < 0 { 66 | return humanDuration(-delta) + " from now" 67 | } 68 | 69 | return humanDuration(delta) + " ago" 70 | } 71 | -------------------------------------------------------------------------------- /format/time_test.go: -------------------------------------------------------------------------------- 1 | package format 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func assertEqual(t *testing.T, a interface{}, b interface{}) { 9 | if a != b { 10 | t.Errorf("Assert failed, expected %v, got %v", b, a) 11 | } 12 | } 13 | 14 | func TestHumanTime(t *testing.T) { 15 | now := time.Now() 16 | 17 | t.Run("zero value", func(t *testing.T) { 18 | assertEqual(t, HumanTime(time.Time{}, "never"), "never") 19 | }) 20 | 21 | t.Run("time in the future", func(t *testing.T) { 22 | v := now.Add(48 * time.Hour) 23 | assertEqual(t, HumanTime(v, ""), "2 days from now") 24 | }) 25 | 26 | t.Run("time in the past", func(t *testing.T) { 27 | v := now.Add(-48 * time.Hour) 28 | assertEqual(t, HumanTime(v, ""), "2 days ago") 29 | }) 30 | 31 | t.Run("soon", func(t *testing.T) { 32 | v := now.Add(800 * time.Millisecond) 33 | assertEqual(t, HumanTime(v, ""), "Less than a second from now") 34 | }) 35 | 36 | t.Run("time way in the future", func(t *testing.T) { 37 | v := now.Add(24 * time.Hour * 365 * 200) 38 | assertEqual(t, HumanTime(v, ""), "Forever") 39 | }) 40 | 41 | t.Run("time way in the future lowercase", func(t *testing.T) { 42 | v := now.Add(24 * time.Hour * 365 * 200) 43 | assertEqual(t, HumanTimeLower(v, ""), "forever") 44 | }) 45 | } 46 | -------------------------------------------------------------------------------- /gpu/cpu_common.go: -------------------------------------------------------------------------------- 1 | package gpu 2 | 3 | import ( 4 | "log/slog" 5 | 6 | "golang.org/x/sys/cpu" 7 | ) 8 | 9 | func GetCPUVariant() string { 10 | if cpu.X86.HasAVX2 { 11 | slog.Debug("CPU has AVX2") 12 | return "avx2" 13 | } 14 | if cpu.X86.HasAVX { 15 | slog.Debug("CPU has AVX") 16 | return "avx" 17 | } 18 | slog.Debug("CPU does not have vector extensions") 19 | // else LCD 20 | return "" 21 | } 22 | -------------------------------------------------------------------------------- /gpu/cuda_common.go: -------------------------------------------------------------------------------- 1 | //go:build linux || windows 2 | 3 | package gpu 4 | 5 | import ( 6 | "log/slog" 7 | "strings" 8 | ) 9 | 10 | func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { 11 | ids := []string{} 12 | for _, info := range gpuInfo { 13 | if info.Library != "cuda" { 14 | // TODO shouldn't happen if things are wired correctly... 15 | slog.Debug("cudaGetVisibleDevicesEnv skipping over non-cuda device", "library", info.Library) 16 | continue 17 | } 18 | ids = append(ids, info.ID) 19 | } 20 | return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") 21 | 22 | } 23 | -------------------------------------------------------------------------------- /gpu/gpu_darwin.go: -------------------------------------------------------------------------------- 1 | //go:build darwin 2 | 3 | package gpu 4 | 5 | /* 6 | #cgo CFLAGS: -x objective-c 7 | #cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal 8 | #include "gpu_info_darwin.h" 9 | */ 10 | import "C" 11 | import ( 12 | "runtime" 13 | 14 | "github.com/ollama/ollama/format" 15 | ) 16 | 17 | const ( 18 | metalMinimumMemory = 512 * format.MebiByte 19 | ) 20 | 21 | func GetGPUInfo() GpuInfoList { 22 | mem, _ := GetCPUMem() 23 | if runtime.GOARCH == "amd64" { 24 | return []GpuInfo{ 25 | { 26 | Library: "cpu", 27 | Variant: GetCPUVariant(), 28 | memInfo: mem, 29 | }, 30 | } 31 | } 32 | info := GpuInfo{ 33 | Library: "metal", 34 | ID: "0", 35 | } 36 | info.TotalMemory = uint64(C.getRecommendedMaxVRAM()) 37 | 38 | // TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work) 39 | info.FreeMemory = info.TotalMemory 40 | 41 | info.MinimumMemory = metalMinimumMemory 42 | return []GpuInfo{info} 43 | } 44 | 45 | func GetCPUMem() (memInfo, error) { 46 | return memInfo{ 47 | TotalMemory: uint64(C.getPhysicalMemory()), 48 | FreeMemory: 0, 49 | }, nil 50 | } 51 | 52 | func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { 53 | // No-op on darwin 54 | return "", "" 55 | } 56 | -------------------------------------------------------------------------------- /gpu/gpu_info.h: -------------------------------------------------------------------------------- 1 | #ifndef __APPLE__ 2 | #ifndef __GPU_INFO_H__ 3 | #define __GPU_INFO_H__ 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef _WIN32 9 | #include 10 | #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags) 11 | #define LOAD_SYMBOL(handle, sym) dlsym(handle, sym) 12 | #define LOAD_ERR() strdup(dlerror()) 13 | #define UNLOAD_LIBRARY(handle) dlclose(handle) 14 | #else 15 | #include 16 | #define LOAD_LIBRARY(lib, flags) LoadLibrary(lib) 17 | #define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym) 18 | #define UNLOAD_LIBRARY(handle) FreeLibrary(handle) 19 | #define LOAD_ERR() ({\ 20 | LPSTR messageBuffer = NULL; \ 21 | size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \ 22 | NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \ 23 | char *resp = strdup(messageBuffer); \ 24 | LocalFree(messageBuffer); \ 25 | resp; \ 26 | }) 27 | 28 | #endif 29 | 30 | #define LOG(verbose, ...) \ 31 | do { \ 32 | if (verbose) { \ 33 | fprintf(stderr, __VA_ARGS__); \ 34 | } \ 35 | } while (0) 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | #define GPU_ID_LEN 64 42 | #define GPU_NAME_LEN 96 43 | 44 | typedef struct mem_info { 45 | char *err; // If non-nill, caller responsible for freeing 46 | char gpu_id[GPU_ID_LEN]; 47 | char gpu_name[GPU_NAME_LEN]; 48 | uint64_t total; 49 | uint64_t free; 50 | 51 | // Compute Capability 52 | int major; 53 | int minor; 54 | int patch; 55 | } mem_info_t; 56 | 57 | void cpu_check_ram(mem_info_t *resp); 58 | 59 | #ifdef __cplusplus 60 | } 61 | #endif 62 | 63 | #include "gpu_info_cudart.h" 64 | #include "gpu_info_nvcuda.h" 65 | #include "gpu_info_oneapi.h" 66 | 67 | #endif // __GPU_INFO_H__ 68 | #endif // __APPLE__ -------------------------------------------------------------------------------- /gpu/gpu_info_cpu.c: -------------------------------------------------------------------------------- 1 | #include "gpu_info.h" 2 | // Fallbacks for CPU mode 3 | 4 | #ifdef _WIN32 5 | #include 6 | void cpu_check_ram(mem_info_t *resp) { 7 | resp->err = NULL; 8 | MEMORYSTATUSEX info; 9 | info.dwLength = sizeof(info); 10 | if (GlobalMemoryStatusEx(&info) != 0) { 11 | resp->total = info.ullTotalPhys; 12 | resp->free = info.ullAvailPhys; 13 | snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0"); 14 | } else { 15 | resp->err = LOAD_ERR(); 16 | } 17 | return; 18 | } 19 | 20 | #elif __linux__ 21 | #include 22 | #include 23 | #include 24 | void cpu_check_ram(mem_info_t *resp) { 25 | struct sysinfo info; 26 | resp->err = NULL; 27 | if (sysinfo(&info) != 0) { 28 | resp->err = strdup(strerror(errno)); 29 | } else { 30 | resp->total = info.totalram * info.mem_unit; 31 | resp->free = info.freeram * info.mem_unit; 32 | snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0"); 33 | } 34 | return; 35 | } 36 | 37 | #elif __APPLE__ 38 | // TODO consider an Apple implementation that does something useful 39 | // mem_info_t cpu_check_ram() { 40 | // mem_info_t resp = {0, 0, NULL}; 41 | // return resp; 42 | // } 43 | #else 44 | #error "Unsupported platform" 45 | #endif 46 | -------------------------------------------------------------------------------- /gpu/gpu_info_darwin.h: -------------------------------------------------------------------------------- 1 | #import 2 | #include 3 | uint64_t getRecommendedMaxVRAM(); 4 | uint64_t getPhysicalMemory(); 5 | -------------------------------------------------------------------------------- /gpu/gpu_info_darwin.m: -------------------------------------------------------------------------------- 1 | // go:build darwin 2 | #include "gpu_info_darwin.h" 3 | 4 | uint64_t getRecommendedMaxVRAM() { 5 | id device = MTLCreateSystemDefaultDevice(); 6 | uint64_t result = device.recommendedMaxWorkingSetSize; 7 | CFRelease(device); 8 | return result; 9 | } 10 | 11 | uint64_t getPhysicalMemory() { 12 | return [[NSProcessInfo processInfo] physicalMemory]; 13 | } 14 | -------------------------------------------------------------------------------- /gpu/gpu_info_nvcuda.h: -------------------------------------------------------------------------------- 1 | #ifndef __APPLE__ 2 | #ifndef __GPU_INFO_NVCUDA_H__ 3 | #define __GPU_INFO_NVCUDA_H__ 4 | #include "gpu_info.h" 5 | 6 | // Just enough typedef's to dlopen/dlsym for memory information 7 | typedef enum cudaError_enum { 8 | CUDA_SUCCESS = 0, 9 | CUDA_ERROR_INVALID_VALUE = 1, 10 | CUDA_ERROR_MEMORY_ALLOCATION = 2, 11 | CUDA_ERROR_NOT_INITIALIZED = 3, 12 | CUDA_ERROR_INSUFFICIENT_DRIVER = 35, 13 | // Other values omitted for now... 14 | } CUresult; 15 | 16 | typedef enum CUdevice_attribute_enum { 17 | CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, 18 | CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, 19 | 20 | // TODO - not yet wired up but may be useful for Jetson or other 21 | // integrated GPU scenarios with shared memory 22 | CU_DEVICE_ATTRIBUTE_INTEGRATED = 18 23 | 24 | } CUdevice_attribute; 25 | 26 | typedef void *nvcudaDevice_t; // Opaque is sufficient 27 | typedef struct nvcudaMemory_st { 28 | uint64_t total; 29 | uint64_t free; 30 | } nvcudaMemory_t; 31 | 32 | typedef struct nvcudaDriverVersion { 33 | int major; 34 | int minor; 35 | } nvcudaDriverVersion_t; 36 | 37 | typedef struct CUuuid_st { 38 | unsigned char bytes[16]; 39 | } CUuuid; 40 | 41 | typedef int CUdevice; 42 | typedef void* CUcontext; 43 | 44 | typedef struct nvcuda_handle { 45 | void *handle; 46 | uint16_t verbose; 47 | int driver_major; 48 | int driver_minor; 49 | CUresult (*cuInit)(unsigned int Flags); 50 | CUresult (*cuDriverGetVersion)(int *driverVersion); 51 | CUresult (*cuDeviceGetCount)(int *); 52 | CUresult (*cuDeviceGet)(CUdevice* device, int ordinal); 53 | CUresult (*cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev); 54 | CUresult (*cuDeviceGetUuid)(CUuuid* uuid, CUdevice dev); // signature compatible with cuDeviceGetUuid_v2 55 | CUresult (*cuDeviceGetName)(char *name, int len, CUdevice dev); 56 | 57 | // Context specific aspects 58 | CUresult (*cuCtxCreate_v3)(CUcontext* pctx, void *params, int len, unsigned int flags, CUdevice dev); 59 | CUresult (*cuMemGetInfo_v2)(uint64_t* free, uint64_t* total); 60 | CUresult (*cuCtxDestroy)(CUcontext ctx); 61 | } nvcuda_handle_t; 62 | 63 | typedef struct nvcuda_init_resp { 64 | char *err; // If err is non-null handle is invalid 65 | nvcuda_handle_t ch; 66 | int num_devices; 67 | } nvcuda_init_resp_t; 68 | 69 | void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp); 70 | void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp); 71 | void nvcuda_release(nvcuda_handle_t ch); 72 | 73 | #endif // __GPU_INFO_NVCUDA_H__ 74 | #endif // __APPLE__ 75 | -------------------------------------------------------------------------------- /gpu/gpu_oneapi.go: -------------------------------------------------------------------------------- 1 | //go:build linux || windows 2 | 3 | package gpu 4 | 5 | import ( 6 | "log/slog" 7 | "strings" 8 | ) 9 | 10 | func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { 11 | ids := []string{} 12 | for _, info := range gpuInfo { 13 | if info.Library != "oneapi" { 14 | // TODO shouldn't happen if things are wired correctly... 15 | slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library) 16 | continue 17 | } 18 | ids = append(ids, info.ID) 19 | } 20 | return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",") 21 | } 22 | -------------------------------------------------------------------------------- /gpu/gpu_test.go: -------------------------------------------------------------------------------- 1 | package gpu 2 | 3 | import ( 4 | "runtime" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestBasicGetGPUInfo(t *testing.T) { 11 | info := GetGPUInfo() 12 | assert.Greater(t, len(info), 0) 13 | assert.Contains(t, "cuda rocm cpu metal", info[0].Library) 14 | if info[0].Library != "cpu" { 15 | assert.Greater(t, info[0].TotalMemory, uint64(0)) 16 | assert.Greater(t, info[0].FreeMemory, uint64(0)) 17 | } 18 | } 19 | 20 | func TestCPUMemInfo(t *testing.T) { 21 | info, err := GetCPUMem() 22 | assert.NoError(t, err) 23 | switch runtime.GOOS { 24 | case "darwin": 25 | t.Skip("CPU memory not populated on darwin") 26 | case "linux", "windows": 27 | assert.Greater(t, info.TotalMemory, uint64(0)) 28 | assert.Greater(t, info.FreeMemory, uint64(0)) 29 | default: 30 | return 31 | } 32 | } 33 | 34 | // TODO - add some logic to figure out card type through other means and actually verify we got back what we expected 35 | -------------------------------------------------------------------------------- /gpu/types.go: -------------------------------------------------------------------------------- 1 | package gpu 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | 7 | "github.com/ollama/ollama/format" 8 | ) 9 | 10 | type memInfo struct { 11 | TotalMemory uint64 `json:"total_memory,omitempty"` 12 | FreeMemory uint64 `json:"free_memory,omitempty"` 13 | } 14 | 15 | // Beginning of an `ollama info` command 16 | type GpuInfo struct { 17 | memInfo 18 | Library string `json:"library,omitempty"` 19 | 20 | // Optional variant to select (e.g. versions, cpu feature flags) 21 | Variant string `json:"variant,omitempty"` 22 | 23 | // MinimumMemory represents the minimum memory required to use the GPU 24 | MinimumMemory uint64 `json:"-"` 25 | 26 | // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly 27 | DependencyPath string `json:"lib_path,omitempty"` 28 | 29 | // GPU information 30 | ID string `json:"gpu_id"` // string to use for selection of this specific GPU 31 | Name string `json:"name"` // user friendly name if available 32 | Compute string `json:"compute"` // Compute Capability or gfx 33 | 34 | // Driver Information - TODO no need to put this on each GPU 35 | DriverMajor int `json:"driver_major,omitempty"` 36 | DriverMinor int `json:"driver_minor,omitempty"` 37 | 38 | // TODO other performance capability info to help in scheduling decisions 39 | } 40 | 41 | type GpuInfoList []GpuInfo 42 | 43 | // Split up the set of gpu info's by Library and variant 44 | func (l GpuInfoList) ByLibrary() []GpuInfoList { 45 | resp := []GpuInfoList{} 46 | libs := []string{} 47 | for _, info := range l { 48 | found := false 49 | requested := info.Library 50 | if info.Variant != "" { 51 | requested += "_" + info.Variant 52 | } 53 | for i, lib := range libs { 54 | if lib == requested { 55 | resp[i] = append(resp[i], info) 56 | found = true 57 | break 58 | } 59 | } 60 | if !found { 61 | libs = append(libs, info.Library) 62 | resp = append(resp, []GpuInfo{info}) 63 | } 64 | } 65 | return resp 66 | } 67 | 68 | // Report the GPU information into the log an Info level 69 | func (l GpuInfoList) LogDetails() { 70 | for _, g := range l { 71 | slog.Info("inference compute", 72 | "id", g.ID, 73 | "library", g.Library, 74 | "compute", g.Compute, 75 | "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor), 76 | "name", g.Name, 77 | "total", format.HumanBytes2(g.TotalMemory), 78 | "available", format.HumanBytes2(g.FreeMemory), 79 | ) 80 | } 81 | } 82 | 83 | // Sort by Free Space 84 | type ByFreeMemory []GpuInfo 85 | 86 | func (a ByFreeMemory) Len() int { return len(a) } 87 | func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 88 | func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory } 89 | -------------------------------------------------------------------------------- /integration/README.md: -------------------------------------------------------------------------------- 1 | # Integration Tests 2 | 3 | This directory contains integration tests to exercise Ollama end-to-end to verify behavior 4 | 5 | By default, these tests are disabled so `go test ./...` will exercise only unit tests. To run integration tests you must pass the integration tag. `go test -tags=integration ./...` 6 | 7 | 8 | The integration tests have 2 modes of operating. 9 | 10 | 1. By default, they will start the server on a random port, run the tests, and then shutdown the server. 11 | 2. If `OLLAMA_TEST_EXISTING` is set to a non-empty string, the tests will run against an existing running server, which can be remote 12 | -------------------------------------------------------------------------------- /integration/basic_test.go: -------------------------------------------------------------------------------- 1 | //go:build integration 2 | 3 | package integration 4 | 5 | import ( 6 | "context" 7 | "log/slog" 8 | "os" 9 | "runtime" 10 | "testing" 11 | "time" 12 | 13 | "github.com/ollama/ollama/api" 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | func TestOrcaMiniBlueSky(t *testing.T) { 18 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) 19 | defer cancel() 20 | // Set up the test data 21 | req := api.GenerateRequest{ 22 | Model: "orca-mini", 23 | Prompt: "why is the sky blue?", 24 | Stream: &stream, 25 | Options: map[string]interface{}{ 26 | "temperature": 0, 27 | "seed": 123, 28 | }, 29 | } 30 | GenerateTestHelper(ctx, t, req, []string{"rayleigh", "scattering"}) 31 | } 32 | 33 | func TestUnicodeModelDir(t *testing.T) { 34 | // This is only useful for Windows with utf-16 characters, so skip this test for other platforms 35 | if runtime.GOOS != "windows" { 36 | t.Skip("Unicode test only applicable to windows") 37 | } 38 | // Only works for local testing 39 | if os.Getenv("OLLAMA_TEST_EXISTING") != "" { 40 | t.Skip("TestUnicodeModelDir only works for local testing, skipping") 41 | } 42 | 43 | modelDir, err := os.MkdirTemp("", "ollama_埃") 44 | require.NoError(t, err) 45 | defer os.RemoveAll(modelDir) 46 | slog.Info("unicode", "OLLAMA_MODELS", modelDir) 47 | 48 | oldModelsDir := os.Getenv("OLLAMA_MODELS") 49 | if oldModelsDir == "" { 50 | defer os.Unsetenv("OLLAMA_MODELS") 51 | } else { 52 | defer os.Setenv("OLLAMA_MODELS", oldModelsDir) 53 | } 54 | err = os.Setenv("OLLAMA_MODELS", modelDir) 55 | require.NoError(t, err) 56 | 57 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) 58 | defer cancel() 59 | 60 | req := api.GenerateRequest{ 61 | Model: "orca-mini", 62 | Prompt: "why is the sky blue?", 63 | Stream: &stream, 64 | Options: map[string]interface{}{ 65 | "temperature": 0, 66 | "seed": 123, 67 | }, 68 | } 69 | GenerateTestHelper(ctx, t, req, []string{"rayleigh", "scattering"}) 70 | } 71 | -------------------------------------------------------------------------------- /integration/context_test.go: -------------------------------------------------------------------------------- 1 | //go:build integration 2 | 3 | package integration 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | "time" 9 | 10 | "github.com/ollama/ollama/api" 11 | ) 12 | 13 | func TestContextExhaustion(t *testing.T) { 14 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) // TODO maybe shorter? 15 | defer cancel() 16 | // Set up the test data 17 | req := api.GenerateRequest{ 18 | Model: "llama2", 19 | Prompt: "Write me a story with a ton of emojis?", 20 | Stream: &stream, 21 | Options: map[string]interface{}{ 22 | "temperature": 0, 23 | "seed": 123, 24 | "num_ctx": 128, 25 | }, 26 | } 27 | GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"}) 28 | } 29 | -------------------------------------------------------------------------------- /integration/llm_test.go: -------------------------------------------------------------------------------- 1 | //go:build integration 2 | 3 | package integration 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | "time" 9 | 10 | "github.com/ollama/ollama/api" 11 | ) 12 | 13 | // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server 14 | // package to avoid circular dependencies 15 | 16 | var ( 17 | stream = false 18 | req = [2]api.GenerateRequest{ 19 | { 20 | Model: "orca-mini", 21 | Prompt: "why is the ocean blue?", 22 | Stream: &stream, 23 | Options: map[string]interface{}{ 24 | "seed": 42, 25 | "temperature": 0.0, 26 | }, 27 | }, { 28 | Model: "orca-mini", 29 | Prompt: "what is the origin of the us thanksgiving holiday?", 30 | Stream: &stream, 31 | Options: map[string]interface{}{ 32 | "seed": 42, 33 | "temperature": 0.0, 34 | }, 35 | }, 36 | } 37 | resp = [2][]string{ 38 | []string{"sunlight"}, 39 | []string{"england", "english", "massachusetts", "pilgrims"}, 40 | } 41 | ) 42 | 43 | func TestIntegrationSimpleOrcaMini(t *testing.T) { 44 | ctx, cancel := context.WithTimeout(context.Background(), time.Second*120) 45 | defer cancel() 46 | GenerateTestHelper(ctx, t, req[0], resp[0]) 47 | } 48 | -------------------------------------------------------------------------------- /llm/ext_server/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(TARGET ollama_llama_server) 3 | option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) 4 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 5 | add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h) 6 | install(TARGETS ${TARGET} RUNTIME) 7 | target_compile_definitions(${TARGET} PRIVATE 8 | SERVER_VERBOSE=$ 9 | ) 10 | target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) 11 | if (WIN32) 12 | TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) 13 | endif() 14 | target_compile_features(${TARGET} PRIVATE cxx_std_11) -------------------------------------------------------------------------------- /llm/generate/generate_darwin.go: -------------------------------------------------------------------------------- 1 | package generate 2 | 3 | //go:generate bash ./gen_darwin.sh 4 | -------------------------------------------------------------------------------- /llm/generate/generate_linux.go: -------------------------------------------------------------------------------- 1 | package generate 2 | 3 | //go:generate bash ./gen_linux.sh 4 | -------------------------------------------------------------------------------- /llm/generate/generate_windows.go: -------------------------------------------------------------------------------- 1 | package generate 2 | 3 | //go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1 4 | -------------------------------------------------------------------------------- /llm/llm.go: -------------------------------------------------------------------------------- 1 | package llm 2 | 3 | // #cgo CFLAGS: -Illama.cpp 4 | // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++ 5 | // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++ 6 | // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++ 7 | // #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++ 8 | // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++ 9 | // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++ 10 | // #include 11 | // #include "llama.h" 12 | import "C" 13 | import ( 14 | "fmt" 15 | "unsafe" 16 | ) 17 | 18 | // SystemInfo is an unused example of calling llama.cpp functions using CGo 19 | func SystemInfo() string { 20 | return C.GoString(C.llama_print_system_info()) 21 | } 22 | 23 | func Quantize(infile, outfile string, ftype fileType) error { 24 | cinfile := C.CString(infile) 25 | defer C.free(unsafe.Pointer(cinfile)) 26 | 27 | coutfile := C.CString(outfile) 28 | defer C.free(unsafe.Pointer(coutfile)) 29 | 30 | params := C.llama_model_quantize_default_params() 31 | params.nthread = -1 32 | params.ftype = ftype.Value() 33 | 34 | if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 { 35 | return fmt.Errorf("llama_model_quantize: %d", rc) 36 | } 37 | 38 | return nil 39 | } 40 | -------------------------------------------------------------------------------- /llm/llm_darwin_amd64.go: -------------------------------------------------------------------------------- 1 | package llm 2 | 3 | import ( 4 | "embed" 5 | ) 6 | 7 | //go:embed build/darwin/x86_64/*/bin/* 8 | var libEmbed embed.FS 9 | -------------------------------------------------------------------------------- /llm/llm_darwin_arm64.go: -------------------------------------------------------------------------------- 1 | package llm 2 | 3 | import ( 4 | "embed" 5 | ) 6 | 7 | //go:embed build/darwin/arm64/*/bin/* 8 | var libEmbed embed.FS 9 | -------------------------------------------------------------------------------- /llm/llm_linux.go: -------------------------------------------------------------------------------- 1 | package llm 2 | 3 | import "embed" 4 | 5 | //go:embed build/linux/*/*/bin/* 6 | var libEmbed embed.FS 7 | -------------------------------------------------------------------------------- /llm/llm_windows.go: -------------------------------------------------------------------------------- 1 | package llm 2 | 3 | import "embed" 4 | 5 | // unused on windows 6 | var libEmbed embed.FS 7 | -------------------------------------------------------------------------------- /llm/patches/01-load-progress.diff: -------------------------------------------------------------------------------- 1 | diff --git a/common/common.cpp b/common/common.cpp 2 | index ba1ecf0e..cead57cc 100644 3 | --- a/common/common.cpp 4 | +++ b/common/common.cpp 5 | @@ -1836,6 +1836,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & 6 | mparams.use_mmap = params.use_mmap; 7 | mparams.use_mlock = params.use_mlock; 8 | mparams.check_tensors = params.check_tensors; 9 | + mparams.progress_callback = params.progress_callback; 10 | + mparams.progress_callback_user_data = params.progress_callback_user_data; 11 | if (params.kv_overrides.empty()) { 12 | mparams.kv_overrides = NULL; 13 | } else { 14 | diff --git a/common/common.h b/common/common.h 15 | index d80344f2..71e84834 100644 16 | --- a/common/common.h 17 | +++ b/common/common.h 18 | @@ -174,6 +174,13 @@ struct gpt_params { 19 | // multimodal models (see examples/llava) 20 | std::string mmproj = ""; // path to multimodal projector 21 | std::vector image; // path to image file(s) 22 | + 23 | + // Called with a progress value between 0.0 and 1.0. Pass NULL to disable. 24 | + // If the provided progress_callback returns true, model loading continues. 25 | + // If it returns false, model loading is immediately aborted. 26 | + llama_progress_callback progress_callback = NULL; 27 | + // context pointer passed to the progress callback 28 | + void * progress_callback_user_data; 29 | }; 30 | 31 | void gpt_params_handle_model_default(gpt_params & params); 32 | -------------------------------------------------------------------------------- /llm/patches/02-clip-log.diff: -------------------------------------------------------------------------------- 1 | diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp 2 | index e431c7f7..f077e688 100644 3 | --- a/examples/llava/clip.cpp 4 | +++ b/examples/llava/clip.cpp 5 | @@ -3,6 +3,7 @@ 6 | // I'll gradually clean and extend it 7 | // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch 8 | #include "clip.h" 9 | +#include "common.h" 10 | #include "log.h" 11 | #include "ggml.h" 12 | #include "ggml-alloc.h" 13 | -------------------------------------------------------------------------------- /llm/patches/03-load_exception.diff: -------------------------------------------------------------------------------- 1 | From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001 2 | From: Michael Yang 3 | Date: Thu, 23 May 2024 11:18:45 -0700 4 | Subject: [PATCH] throw exception on load errors 5 | 6 | --- 7 | llama.cpp | 25 ++++++++++++++++--------- 8 | 1 file changed, 16 insertions(+), 9 deletions(-) 9 | 10 | diff --git a/llama.cpp b/llama.cpp 11 | index 15c66077..8ba90b6a 100644 12 | --- a/llama.cpp 13 | +++ b/llama.cpp 14 | @@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam 15 | } 16 | } catch (const std::exception & err) { 17 | LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what()); 18 | - return -1; 19 | + throw; 20 | } 21 | 22 | return 0; 23 | @@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file( 24 | } 25 | model->rpc_servers.push_back(servers); 26 | } 27 | - int status = llama_model_load(path_model, *model, params); 28 | - GGML_ASSERT(status <= 0); 29 | - if (status < 0) { 30 | - if (status == -1) { 31 | - LLAMA_LOG_ERROR("%s: failed to load model\n", __func__); 32 | - } else if (status == -2) { 33 | - LLAMA_LOG_INFO("%s: cancelled model load\n", __func__); 34 | + 35 | + try { 36 | + int status = llama_model_load(path_model, *model, params); 37 | + GGML_ASSERT(status <= 0); 38 | + if (status < 0) { 39 | + if (status == -1) { 40 | + LLAMA_LOG_ERROR("%s: failed to load model\n", __func__); 41 | + } else if (status == -2) { 42 | + LLAMA_LOG_INFO("%s: cancelled model load\n", __func__); 43 | + } 44 | + delete model; 45 | + return nullptr; 46 | } 47 | + } catch (...) { 48 | + LLAMA_LOG_ERROR("%s: exception loading model\n", __func__); 49 | delete model; 50 | - return nullptr; 51 | + throw; 52 | } 53 | 54 | return model; 55 | -- 56 | 2.45.1 57 | 58 | -------------------------------------------------------------------------------- /llm/patches/04-metal.diff: -------------------------------------------------------------------------------- 1 | diff --git a/ggml-metal.m b/ggml-metal.m 2 | index 0207b787..b5e9884b 100644 3 | --- a/ggml-metal.m 4 | +++ b/ggml-metal.m 5 | @@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute( 6 | // to the matrix-vector kernel 7 | int ne11_mm_min = 1; 8 | 9 | -#if 0 10 | // the numbers below are measured on M2 Ultra for 7B and 13B models 11 | // these numbers do not translate to other devices or model sizes 12 | // TODO: need to find a better approach 13 | - if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) { 14 | - switch (src0t) { 15 | - case GGML_TYPE_F16: ne11_mm_min = 2; break; 16 | - case GGML_TYPE_Q8_0: ne11_mm_min = 7; break; 17 | - case GGML_TYPE_Q2_K: ne11_mm_min = 15; break; 18 | - case GGML_TYPE_Q3_K: ne11_mm_min = 7; break; 19 | - case GGML_TYPE_Q4_0: 20 | - case GGML_TYPE_Q4_1: ne11_mm_min = 15; break; 21 | - case GGML_TYPE_Q4_K: ne11_mm_min = 11; break; 22 | - case GGML_TYPE_Q5_0: // not tested yet 23 | - case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet 24 | - case GGML_TYPE_Q5_K: ne11_mm_min = 7; break; 25 | - case GGML_TYPE_Q6_K: ne11_mm_min = 7; break; 26 | - default: ne11_mm_min = 1; break; 27 | - } 28 | + switch (src0t) { 29 | + case GGML_TYPE_F16: ne11_mm_min = 2; break; 30 | + case GGML_TYPE_Q8_0: ne11_mm_min = 7; break; 31 | + case GGML_TYPE_Q2_K: ne11_mm_min = 15; break; 32 | + case GGML_TYPE_Q3_K: ne11_mm_min = 7; break; 33 | + case GGML_TYPE_Q4_0: 34 | + case GGML_TYPE_Q4_1: ne11_mm_min = 15; break; 35 | + case GGML_TYPE_Q4_K: ne11_mm_min = 11; break; 36 | + case GGML_TYPE_Q5_0: // not tested yet 37 | + case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet 38 | + case GGML_TYPE_Q5_K: ne11_mm_min = 7; break; 39 | + case GGML_TYPE_Q6_K: ne11_mm_min = 7; break; 40 | + default: ne11_mm_min = 1; break; 41 | } 42 | -#endif 43 | 44 | // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs 45 | // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel 46 | -------------------------------------------------------------------------------- /llm/patches/05-default-pretokenizer.diff: -------------------------------------------------------------------------------- 1 | diff --git a/llama.cpp b/llama.cpp 2 | index 40d2ec2c..74f3ee9c 100644 3 | --- a/llama.cpp 4 | +++ b/llama.cpp 5 | @@ -4642,16 +4642,7 @@ static void llm_load_vocab( 6 | 7 | // for now, only BPE models have pre-tokenizers 8 | if (vocab.type == LLAMA_VOCAB_TYPE_BPE) { 9 | - if (tokenizer_pre.empty()) { 10 | - LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__); 11 | - LLAMA_LOG_WARN("%s: \n", __func__); 12 | - LLAMA_LOG_WARN("%s: ************************************ \n", __func__); 13 | - LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED! \n", __func__); 14 | - LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__); 15 | - LLAMA_LOG_WARN("%s: ************************************ \n", __func__); 16 | - LLAMA_LOG_WARN("%s: \n", __func__); 17 | - vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; 18 | - } else if ( 19 | + if ( 20 | tokenizer_pre == "default") { 21 | vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; 22 | } else if ( 23 | @@ -4703,7 +4694,8 @@ static void llm_load_vocab( 24 | tokenizer_pre == "smaug-bpe") { 25 | vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMAUG; 26 | } else { 27 | - throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); 28 | + LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__); 29 | + vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; 30 | } 31 | } else { 32 | vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; 33 | -------------------------------------------------------------------------------- /llm/status.go: -------------------------------------------------------------------------------- 1 | package llm 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | ) 7 | 8 | // StatusWriter is a writer that captures error messages from the llama runner process 9 | type StatusWriter struct { 10 | LastErrMsg string 11 | out *os.File 12 | } 13 | 14 | func NewStatusWriter(out *os.File) *StatusWriter { 15 | return &StatusWriter{ 16 | out: out, 17 | } 18 | } 19 | 20 | // TODO - regex matching to detect errors like 21 | // libcublasLt.so.11: cannot open shared object file: No such file or directory 22 | 23 | var errorPrefixes = []string{ 24 | "error:", 25 | "CUDA error", 26 | "cudaMalloc failed", 27 | "\"ERR\"", 28 | } 29 | 30 | func (w *StatusWriter) Write(b []byte) (int, error) { 31 | var errMsg string 32 | for _, prefix := range errorPrefixes { 33 | if _, after, ok := bytes.Cut(b, []byte(prefix)); ok { 34 | errMsg = prefix + string(bytes.TrimSpace(after)) 35 | } 36 | } 37 | if errMsg != "" { 38 | w.LastErrMsg = errMsg 39 | } 40 | 41 | return w.out.Write(b) 42 | } 43 | -------------------------------------------------------------------------------- /macapp/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": true, 4 | "es6": true, 5 | "node": true 6 | }, 7 | "extends": [ 8 | "eslint:recommended", 9 | "plugin:@typescript-eslint/eslint-recommended", 10 | "plugin:@typescript-eslint/recommended", 11 | "plugin:import/recommended", 12 | "plugin:import/electron", 13 | "plugin:import/typescript" 14 | ], 15 | "parser": "@typescript-eslint/parser" 16 | } 17 | -------------------------------------------------------------------------------- /macapp/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | 9 | # Diagnostic reports (https://nodejs.org/api/report.html) 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | *.pid.lock 17 | .DS_Store 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # TypeScript v1 declaration files 40 | typings/ 41 | 42 | # TypeScript cache 43 | *.tsbuildinfo 44 | 45 | # Optional npm cache directory 46 | .npm 47 | 48 | # Optional eslint cache 49 | .eslintcache 50 | 51 | # Optional REPL history 52 | .node_repl_history 53 | 54 | # Output of 'npm pack' 55 | *.tgz 56 | 57 | # Yarn Integrity file 58 | .yarn-integrity 59 | 60 | # dotenv environment variables file 61 | .env 62 | .env.test 63 | 64 | # parcel-bundler cache (https://parceljs.org/) 65 | .cache 66 | 67 | # next.js build output 68 | .next 69 | 70 | # nuxt.js build output 71 | .nuxt 72 | 73 | # vuepress build output 74 | .vuepress/dist 75 | 76 | # Serverless directories 77 | .serverless/ 78 | 79 | # FuseBox cache 80 | .fusebox/ 81 | 82 | # DynamoDB Local files 83 | .dynamodb/ 84 | 85 | # Webpack 86 | .webpack/ 87 | 88 | # Vite 89 | .vite/ 90 | 91 | # Electron-Forge 92 | out/ 93 | -------------------------------------------------------------------------------- /macapp/README.md: -------------------------------------------------------------------------------- 1 | # Desktop 2 | 3 | This app builds upon Ollama to provide a desktop experience for running models. 4 | 5 | ## Developing 6 | 7 | First, build the `ollama` binary: 8 | 9 | ``` 10 | cd .. 11 | go build . 12 | ``` 13 | 14 | Then run the desktop app with `npm start`: 15 | 16 | ``` 17 | cd macapp 18 | npm install 19 | npm start 20 | ``` 21 | 22 | -------------------------------------------------------------------------------- /macapp/assets/icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/icon.icns -------------------------------------------------------------------------------- /macapp/assets/iconDarkTemplate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkTemplate.png -------------------------------------------------------------------------------- /macapp/assets/iconDarkTemplate@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkTemplate@2x.png -------------------------------------------------------------------------------- /macapp/assets/iconDarkUpdateTemplate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkUpdateTemplate.png -------------------------------------------------------------------------------- /macapp/assets/iconDarkUpdateTemplate@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkUpdateTemplate@2x.png -------------------------------------------------------------------------------- /macapp/assets/iconTemplate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconTemplate.png -------------------------------------------------------------------------------- /macapp/assets/iconTemplate@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconTemplate@2x.png -------------------------------------------------------------------------------- /macapp/assets/iconUpdateTemplate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconUpdateTemplate.png -------------------------------------------------------------------------------- /macapp/assets/iconUpdateTemplate@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconUpdateTemplate@2x.png -------------------------------------------------------------------------------- /macapp/forge.config.ts: -------------------------------------------------------------------------------- 1 | import type { ForgeConfig } from '@electron-forge/shared-types' 2 | import { MakerSquirrel } from '@electron-forge/maker-squirrel' 3 | import { MakerZIP } from '@electron-forge/maker-zip' 4 | import { PublisherGithub } from '@electron-forge/publisher-github' 5 | import { AutoUnpackNativesPlugin } from '@electron-forge/plugin-auto-unpack-natives' 6 | import { WebpackPlugin } from '@electron-forge/plugin-webpack' 7 | import * as path from 'path' 8 | import * as fs from 'fs' 9 | 10 | import { mainConfig } from './webpack.main.config' 11 | import { rendererConfig } from './webpack.renderer.config' 12 | 13 | const packageJson = JSON.parse(fs.readFileSync(path.resolve(__dirname, './package.json'), 'utf8')) 14 | 15 | const config: ForgeConfig = { 16 | packagerConfig: { 17 | appVersion: process.env.VERSION || packageJson.version, 18 | asar: true, 19 | icon: './assets/icon.icns', 20 | extraResource: [ 21 | '../dist/ollama', 22 | path.join(__dirname, './assets/iconTemplate.png'), 23 | path.join(__dirname, './assets/iconTemplate@2x.png'), 24 | path.join(__dirname, './assets/iconUpdateTemplate.png'), 25 | path.join(__dirname, './assets/iconUpdateTemplate@2x.png'), 26 | path.join(__dirname, './assets/iconDarkTemplate.png'), 27 | path.join(__dirname, './assets/iconDarkTemplate@2x.png'), 28 | path.join(__dirname, './assets/iconDarkUpdateTemplate.png'), 29 | path.join(__dirname, './assets/iconDarkUpdateTemplate@2x.png'), 30 | ], 31 | ...(process.env.SIGN 32 | ? { 33 | osxSign: { 34 | identity: process.env.APPLE_IDENTITY, 35 | }, 36 | osxNotarize: { 37 | tool: 'notarytool', 38 | appleId: process.env.APPLE_ID || '', 39 | appleIdPassword: process.env.APPLE_PASSWORD || '', 40 | teamId: process.env.APPLE_TEAM_ID || '', 41 | }, 42 | } 43 | : {}), 44 | osxUniversal: { 45 | x64ArchFiles: '**/ollama', 46 | }, 47 | }, 48 | rebuildConfig: {}, 49 | makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])], 50 | hooks: { 51 | readPackageJson: async (_, packageJson) => { 52 | return { ...packageJson, version: process.env.VERSION || packageJson.version } 53 | }, 54 | }, 55 | plugins: [ 56 | new AutoUnpackNativesPlugin({}), 57 | new WebpackPlugin({ 58 | mainConfig, 59 | devContentSecurityPolicy: `default-src * 'unsafe-eval' 'unsafe-inline'; img-src data: 'self'`, 60 | renderer: { 61 | config: rendererConfig, 62 | nodeIntegration: true, 63 | entryPoints: [ 64 | { 65 | html: './src/index.html', 66 | js: './src/renderer.tsx', 67 | name: 'main_window', 68 | preload: { 69 | js: './src/preload.ts', 70 | }, 71 | }, 72 | ], 73 | }, 74 | }), 75 | ], 76 | } 77 | 78 | export default config 79 | -------------------------------------------------------------------------------- /macapp/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | 'postcss-import': {}, 4 | tailwindcss: {}, 5 | autoprefixer: {}, 6 | }, 7 | } 8 | -------------------------------------------------------------------------------- /macapp/src/app.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | html, 6 | body { 7 | background: transparent; 8 | } 9 | 10 | .drag { 11 | -webkit-app-region: drag; 12 | } 13 | 14 | .no-drag { 15 | -webkit-app-region: no-drag; 16 | } 17 | 18 | .blink { 19 | -webkit-animation: 1s blink step-end infinite; 20 | -moz-animation: 1s blink step-end infinite; 21 | -ms-animation: 1s blink step-end infinite; 22 | -o-animation: 1s blink step-end infinite; 23 | animation: 1s blink step-end infinite; 24 | } 25 | 26 | @keyframes blink { 27 | from, 28 | to { 29 | color: transparent; 30 | } 31 | 50% { 32 | color: black; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /macapp/src/declarations.d.ts: -------------------------------------------------------------------------------- 1 | declare module '*.svg' { 2 | const content: string 3 | export default content 4 | } 5 | -------------------------------------------------------------------------------- /macapp/src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 | 9 | 10 | -------------------------------------------------------------------------------- /macapp/src/install.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs' 2 | import { exec as cbExec } from 'child_process' 3 | import * as path from 'path' 4 | import { promisify } from 'util' 5 | 6 | const app = process && process.type === 'renderer' ? require('@electron/remote').app : require('electron').app 7 | const ollama = app.isPackaged ? path.join(process.resourcesPath, 'ollama') : path.resolve(process.cwd(), '..', 'ollama') 8 | const exec = promisify(cbExec) 9 | const symlinkPath = '/usr/local/bin/ollama' 10 | 11 | export function installed() { 12 | return fs.existsSync(symlinkPath) && fs.readlinkSync(symlinkPath) === ollama 13 | } 14 | 15 | export async function install() { 16 | const command = `do shell script "mkdir -p ${path.dirname( 17 | symlinkPath 18 | )} && ln -F -s \\"${ollama}\\" \\"${symlinkPath}\\"" with administrator privileges` 19 | 20 | await exec(`osascript -e '${command}'`) 21 | } 22 | -------------------------------------------------------------------------------- /macapp/src/preload.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/src/preload.ts -------------------------------------------------------------------------------- /macapp/src/renderer.tsx: -------------------------------------------------------------------------------- 1 | import App from './app' 2 | import './app.css' 3 | import { createRoot } from 'react-dom/client' 4 | 5 | const container = document.getElementById('app') 6 | const root = createRoot(container) 7 | root.render() 8 | -------------------------------------------------------------------------------- /macapp/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ['./src/**/*.{js,ts,jsx,tsx,mdx}'], 4 | theme: {}, 5 | plugins: [], 6 | } 7 | -------------------------------------------------------------------------------- /macapp/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES6", 4 | "allowJs": true, 5 | "module": "commonjs", 6 | "skipLibCheck": true, 7 | "esModuleInterop": true, 8 | "noImplicitAny": true, 9 | "sourceMap": true, 10 | "baseUrl": ".", 11 | "outDir": "dist", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "paths": { 15 | "*": ["node_modules/*"] 16 | }, 17 | "jsx": "react-jsx" 18 | }, 19 | "include": ["src/**/*"] 20 | } 21 | -------------------------------------------------------------------------------- /macapp/webpack.main.config.ts: -------------------------------------------------------------------------------- 1 | import type { Configuration } from 'webpack' 2 | 3 | import { rules } from './webpack.rules' 4 | import { plugins } from './webpack.plugins' 5 | 6 | export const mainConfig: Configuration = { 7 | /** 8 | * This is the main entry point for your application, it's the first file 9 | * that runs in the main process. 10 | */ 11 | entry: './src/index.ts', 12 | // Put your normal webpack config below here 13 | module: { 14 | rules, 15 | }, 16 | plugins, 17 | resolve: { 18 | extensions: ['.js', '.ts', '.jsx', '.tsx', '.css', '.json'], 19 | }, 20 | } 21 | -------------------------------------------------------------------------------- /macapp/webpack.plugins.ts: -------------------------------------------------------------------------------- 1 | import type IForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin' 2 | import { DefinePlugin } from 'webpack' 3 | 4 | // eslint-disable-next-line @typescript-eslint/no-var-requires 5 | const ForkTsCheckerWebpackPlugin: typeof IForkTsCheckerWebpackPlugin = require('fork-ts-checker-webpack-plugin') 6 | 7 | export const plugins = [ 8 | new ForkTsCheckerWebpackPlugin({ 9 | logger: 'webpack-infrastructure', 10 | }), 11 | new DefinePlugin({ 12 | 'process.env.TELEMETRY_WRITE_KEY': JSON.stringify(process.env.TELEMETRY_WRITE_KEY), 13 | }), 14 | ] 15 | -------------------------------------------------------------------------------- /macapp/webpack.renderer.config.ts: -------------------------------------------------------------------------------- 1 | import type { Configuration } from 'webpack' 2 | 3 | import { rules } from './webpack.rules' 4 | import { plugins } from './webpack.plugins' 5 | 6 | rules.push({ 7 | test: /\.css$/, 8 | use: [{ loader: 'style-loader' }, { loader: 'css-loader' }, { loader: 'postcss-loader' }], 9 | }) 10 | 11 | export const rendererConfig: Configuration = { 12 | module: { 13 | rules, 14 | }, 15 | plugins, 16 | resolve: { 17 | extensions: ['.js', '.ts', '.jsx', '.tsx', '.css'], 18 | }, 19 | } 20 | -------------------------------------------------------------------------------- /macapp/webpack.rules.ts: -------------------------------------------------------------------------------- 1 | import type { ModuleOptions } from 'webpack' 2 | 3 | export const rules: Required['rules'] = [ 4 | // Add support for native node modules 5 | { 6 | // We're specifying native_modules in the test because the asset relocator loader generates a 7 | // "fake" .node file which is really a cjs file. 8 | test: /native_modules[/\\].+\.node$/, 9 | use: 'node-loader', 10 | }, 11 | { 12 | test: /[/\\]node_modules[/\\].+\.(m?js|node)$/, 13 | parser: { amd: false }, 14 | use: { 15 | loader: '@vercel/webpack-asset-relocator-loader', 16 | options: { 17 | outputAssetBase: 'native_modules', 18 | }, 19 | }, 20 | }, 21 | { 22 | test: /\.tsx?$/, 23 | exclude: /(node_modules|\.webpack)/, 24 | use: { 25 | loader: 'ts-loader', 26 | options: { 27 | transpileOnly: true, 28 | }, 29 | }, 30 | }, 31 | { 32 | test: /\.svg$/, 33 | use: ['@svgr/webpack'], 34 | }, 35 | ] 36 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/ollama/ollama/cmd" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | func main() { 11 | cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background())) 12 | } 13 | -------------------------------------------------------------------------------- /progress/progress.go: -------------------------------------------------------------------------------- 1 | package progress 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | type State interface { 11 | String() string 12 | } 13 | 14 | type Progress struct { 15 | mu sync.Mutex 16 | w io.Writer 17 | 18 | pos int 19 | 20 | ticker *time.Ticker 21 | states []State 22 | } 23 | 24 | func NewProgress(w io.Writer) *Progress { 25 | p := &Progress{w: w} 26 | go p.start() 27 | return p 28 | } 29 | 30 | func (p *Progress) stop() bool { 31 | for _, state := range p.states { 32 | if spinner, ok := state.(*Spinner); ok { 33 | spinner.Stop() 34 | } 35 | } 36 | 37 | if p.ticker != nil { 38 | p.ticker.Stop() 39 | p.ticker = nil 40 | p.render() 41 | return true 42 | } 43 | 44 | return false 45 | } 46 | 47 | func (p *Progress) Stop() bool { 48 | stopped := p.stop() 49 | if stopped { 50 | fmt.Fprint(p.w, "\n") 51 | } 52 | return stopped 53 | } 54 | 55 | func (p *Progress) StopAndClear() bool { 56 | fmt.Fprint(p.w, "\033[?25l") 57 | defer fmt.Fprint(p.w, "\033[?25h") 58 | 59 | stopped := p.stop() 60 | if stopped { 61 | // clear all progress lines 62 | for i := 0; i < p.pos; i++ { 63 | if i > 0 { 64 | fmt.Fprint(p.w, "\033[A") 65 | } 66 | fmt.Fprint(p.w, "\033[2K\033[1G") 67 | } 68 | } 69 | 70 | return stopped 71 | } 72 | 73 | func (p *Progress) Add(key string, state State) { 74 | p.mu.Lock() 75 | defer p.mu.Unlock() 76 | 77 | p.states = append(p.states, state) 78 | } 79 | 80 | func (p *Progress) render() { 81 | p.mu.Lock() 82 | defer p.mu.Unlock() 83 | 84 | fmt.Fprint(p.w, "\033[?25l") 85 | defer fmt.Fprint(p.w, "\033[?25h") 86 | 87 | // clear already rendered progress lines 88 | for i := 0; i < p.pos; i++ { 89 | if i > 0 { 90 | fmt.Fprint(p.w, "\033[A") 91 | } 92 | fmt.Fprint(p.w, "\033[2K\033[1G") 93 | } 94 | 95 | // render progress lines 96 | for i, state := range p.states { 97 | fmt.Fprint(p.w, state.String()) 98 | if i < len(p.states)-1 { 99 | fmt.Fprint(p.w, "\n") 100 | } 101 | } 102 | 103 | p.pos = len(p.states) 104 | } 105 | 106 | func (p *Progress) start() { 107 | p.ticker = time.NewTicker(100 * time.Millisecond) 108 | for range p.ticker.C { 109 | p.render() 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /progress/spinner.go: -------------------------------------------------------------------------------- 1 | package progress 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "time" 7 | ) 8 | 9 | type Spinner struct { 10 | message string 11 | messageWidth int 12 | 13 | parts []string 14 | 15 | value int 16 | 17 | ticker *time.Ticker 18 | started time.Time 19 | stopped time.Time 20 | } 21 | 22 | func NewSpinner(message string) *Spinner { 23 | s := &Spinner{ 24 | message: message, 25 | parts: []string{ 26 | "⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏", 27 | }, 28 | started: time.Now(), 29 | } 30 | go s.start() 31 | return s 32 | } 33 | 34 | func (s *Spinner) String() string { 35 | var sb strings.Builder 36 | if len(s.message) > 0 { 37 | message := strings.TrimSpace(s.message) 38 | if s.messageWidth > 0 && len(message) > s.messageWidth { 39 | message = message[:s.messageWidth] 40 | } 41 | 42 | fmt.Fprintf(&sb, "%s", message) 43 | if padding := s.messageWidth - sb.Len(); padding > 0 { 44 | sb.WriteString(strings.Repeat(" ", padding)) 45 | } 46 | 47 | sb.WriteString(" ") 48 | } 49 | 50 | if s.stopped.IsZero() { 51 | spinner := s.parts[s.value] 52 | sb.WriteString(spinner) 53 | sb.WriteString(" ") 54 | } 55 | 56 | return sb.String() 57 | } 58 | 59 | func (s *Spinner) start() { 60 | s.ticker = time.NewTicker(100 * time.Millisecond) 61 | for range s.ticker.C { 62 | s.value = (s.value + 1) % len(s.parts) 63 | if !s.stopped.IsZero() { 64 | return 65 | } 66 | } 67 | } 68 | 69 | func (s *Spinner) Stop() { 70 | if s.stopped.IsZero() { 71 | s.stopped = time.Now() 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /readline/errors.go: -------------------------------------------------------------------------------- 1 | package readline 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ( 8 | ErrInterrupt = errors.New("Interrupt") 9 | ) 10 | 11 | type InterruptError struct { 12 | Line []rune 13 | } 14 | 15 | func (*InterruptError) Error() string { 16 | return "Interrupted" 17 | } 18 | -------------------------------------------------------------------------------- /readline/readline_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package readline 4 | 5 | import ( 6 | "syscall" 7 | ) 8 | 9 | func handleCharCtrlZ(fd int, termios any) (string, error) { 10 | t := termios.(*Termios) 11 | if err := UnsetRawMode(fd, t); err != nil { 12 | return "", err 13 | } 14 | 15 | _ = syscall.Kill(0, syscall.SIGSTOP) 16 | 17 | // on resume... 18 | return "", nil 19 | } 20 | -------------------------------------------------------------------------------- /readline/readline_windows.go: -------------------------------------------------------------------------------- 1 | package readline 2 | 3 | func handleCharCtrlZ(fd int, state any) (string, error) { 4 | // not supported 5 | return "", nil 6 | } 7 | -------------------------------------------------------------------------------- /readline/term.go: -------------------------------------------------------------------------------- 1 | //go:build aix || darwin || dragonfly || freebsd || (linux && !appengine) || netbsd || openbsd || os400 || solaris 2 | 3 | package readline 4 | 5 | import ( 6 | "syscall" 7 | ) 8 | 9 | type Termios syscall.Termios 10 | 11 | func SetRawMode(fd int) (*Termios, error) { 12 | termios, err := getTermios(fd) 13 | if err != nil { 14 | return nil, err 15 | } 16 | 17 | newTermios := *termios 18 | newTermios.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON 19 | newTermios.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN 20 | newTermios.Cflag &^= syscall.CSIZE | syscall.PARENB 21 | newTermios.Cflag |= syscall.CS8 22 | newTermios.Cc[syscall.VMIN] = 1 23 | newTermios.Cc[syscall.VTIME] = 0 24 | 25 | return termios, setTermios(fd, &newTermios) 26 | } 27 | 28 | func UnsetRawMode(fd int, termios any) error { 29 | t := termios.(*Termios) 30 | return setTermios(fd, t) 31 | } 32 | 33 | // IsTerminal returns true if the given file descriptor is a terminal. 34 | func IsTerminal(fd int) bool { 35 | _, err := getTermios(fd) 36 | return err == nil 37 | } 38 | -------------------------------------------------------------------------------- /readline/term_bsd.go: -------------------------------------------------------------------------------- 1 | //go:build darwin || freebsd || netbsd || openbsd 2 | 3 | package readline 4 | 5 | import ( 6 | "syscall" 7 | "unsafe" 8 | ) 9 | 10 | func getTermios(fd int) (*Termios, error) { 11 | termios := new(Termios) 12 | _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0) 13 | if err != 0 { 14 | return nil, err 15 | } 16 | return termios, nil 17 | } 18 | 19 | func setTermios(fd int, termios *Termios) error { 20 | _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0) 21 | if err != 0 { 22 | return err 23 | } 24 | return nil 25 | } 26 | -------------------------------------------------------------------------------- /readline/term_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux || solaris 2 | 3 | package readline 4 | 5 | import ( 6 | "syscall" 7 | "unsafe" 8 | ) 9 | 10 | const tcgets = 0x5401 11 | const tcsets = 0x5402 12 | 13 | func getTermios(fd int) (*Termios, error) { 14 | termios := new(Termios) 15 | _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcgets, uintptr(unsafe.Pointer(termios)), 0, 0, 0) 16 | if err != 0 { 17 | return nil, err 18 | } 19 | return termios, nil 20 | } 21 | 22 | func setTermios(fd int, termios *Termios) error { 23 | _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcsets, uintptr(unsafe.Pointer(termios)), 0, 0, 0) 24 | if err != 0 { 25 | return err 26 | } 27 | return nil 28 | } 29 | -------------------------------------------------------------------------------- /readline/term_windows.go: -------------------------------------------------------------------------------- 1 | package readline 2 | 3 | import ( 4 | "golang.org/x/sys/windows" 5 | ) 6 | 7 | type State struct { 8 | mode uint32 9 | } 10 | 11 | // IsTerminal checks if the given file descriptor is associated with a terminal 12 | func IsTerminal(fd int) bool { 13 | var st uint32 14 | err := windows.GetConsoleMode(windows.Handle(fd), &st) 15 | return err == nil 16 | } 17 | 18 | func SetRawMode(fd int) (*State, error) { 19 | var st uint32 20 | if err := windows.GetConsoleMode(windows.Handle(fd), &st); err != nil { 21 | return nil, err 22 | } 23 | 24 | // this enables raw mode by turning off various flags in the console mode: https://pkg.go.dev/golang.org/x/sys/windows#pkg-constants 25 | raw := st &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT) 26 | 27 | // turn on ENABLE_VIRTUAL_TERMINAL_INPUT to enable escape sequences 28 | raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT 29 | if err := windows.SetConsoleMode(windows.Handle(fd), raw); err != nil { 30 | return nil, err 31 | } 32 | return &State{st}, nil 33 | } 34 | 35 | func UnsetRawMode(fd int, state any) error { 36 | s := state.(*State) 37 | return windows.SetConsoleMode(windows.Handle(fd), s.mode) 38 | } 39 | -------------------------------------------------------------------------------- /readline/types.go: -------------------------------------------------------------------------------- 1 | package readline 2 | 3 | const ( 4 | CharNull = 0 5 | CharLineStart = 1 6 | CharBackward = 2 7 | CharInterrupt = 3 8 | CharDelete = 4 9 | CharLineEnd = 5 10 | CharForward = 6 11 | CharBell = 7 12 | CharCtrlH = 8 13 | CharTab = 9 14 | CharCtrlJ = 10 15 | CharKill = 11 16 | CharCtrlL = 12 17 | CharEnter = 13 18 | CharNext = 14 19 | CharPrev = 16 20 | CharBckSearch = 18 21 | CharFwdSearch = 19 22 | CharTranspose = 20 23 | CharCtrlU = 21 24 | CharCtrlW = 23 25 | CharCtrlY = 25 26 | CharCtrlZ = 26 27 | CharEsc = 27 28 | CharSpace = 32 29 | CharEscapeEx = 91 30 | CharBackspace = 127 31 | ) 32 | 33 | const ( 34 | KeyDel = 51 35 | KeyUp = 65 36 | KeyDown = 66 37 | KeyRight = 67 38 | KeyLeft = 68 39 | MetaEnd = 70 40 | MetaStart = 72 41 | ) 42 | 43 | const ( 44 | CursorUp = "\033[1A" 45 | CursorDown = "\033[1B" 46 | CursorRight = "\033[1C" 47 | CursorLeft = "\033[1D" 48 | 49 | CursorSave = "\033[s" 50 | CursorRestore = "\033[u" 51 | 52 | CursorUpN = "\033[%dA" 53 | CursorDownN = "\033[%dB" 54 | CursorRightN = "\033[%dC" 55 | CursorLeftN = "\033[%dD" 56 | 57 | CursorEOL = "\033[E" 58 | CursorBOL = "\033[1G" 59 | CursorHide = "\033[?25l" 60 | CursorShow = "\033[?25h" 61 | 62 | ClearToEOL = "\033[K" 63 | ClearLine = "\033[2K" 64 | ClearScreen = "\033[2J" 65 | CursorReset = "\033[0;0f" 66 | 67 | ColorGrey = "\033[38;5;245m" 68 | ColorDefault = "\033[0m" 69 | 70 | StartBracketedPaste = "\033[?2004h" 71 | EndBracketedPaste = "\033[?2004l" 72 | ) 73 | 74 | const ( 75 | CharBracketedPaste = 50 76 | CharBracketedPasteStart = "00~" 77 | CharBracketedPasteEnd = "01~" 78 | ) 79 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | usage() { 6 | echo "usage: $(basename $0) VERSION" 7 | exit 1 8 | } 9 | 10 | [ "$#" -eq 1 ] || usage 11 | 12 | export VERSION="$1" 13 | 14 | # build universal MacOS binary 15 | sh $(dirname $0)/build_darwin.sh 16 | 17 | # # build arm64 and amd64 Linux binaries 18 | sh $(dirname $0)/build_linux.sh 19 | 20 | # # build arm64 and amd64 Docker images 21 | sh $(dirname $0)/build_docker.sh 22 | -------------------------------------------------------------------------------- /scripts/build_darwin.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} 6 | export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" 7 | 8 | mkdir -p dist 9 | 10 | for TARGETARCH in arm64 amd64; do 11 | rm -rf llm/llama.cpp/build 12 | GOOS=darwin GOARCH=$TARGETARCH go generate ./... 13 | CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH 14 | CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov 15 | done 16 | 17 | lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64 18 | rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64 19 | if [ -n "$APPLE_IDENTITY" ]; then 20 | codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama 21 | else 22 | echo "Skipping code signing - set APPLE_IDENTITY" 23 | fi 24 | chmod +x dist/ollama 25 | 26 | # build and optionally sign the mac app 27 | npm install --prefix macapp 28 | if [ -n "$APPLE_IDENTITY" ]; then 29 | npm run --prefix macapp make:sign 30 | else 31 | npm run --prefix macapp make 32 | fi 33 | cp macapp/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip 34 | 35 | # sign the binary and rename it 36 | if [ -n "$APPLE_IDENTITY" ]; then 37 | codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama 38 | else 39 | echo "WARNING: Skipping code signing - set APPLE_IDENTITY" 40 | fi 41 | ditto -c -k --keepParent dist/ollama dist/temp.zip 42 | if [ -n "$APPLE_IDENTITY" ]; then 43 | xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID 44 | fi 45 | mv dist/ollama dist/ollama-darwin 46 | rm -f dist/temp.zip 47 | -------------------------------------------------------------------------------- /scripts/build_linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} 6 | export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" 7 | 8 | BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} 9 | export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""} 10 | mkdir -p dist 11 | 12 | for TARGETARCH in ${BUILD_ARCH}; do 13 | docker build \ 14 | --platform=linux/$TARGETARCH \ 15 | --build-arg=GOFLAGS \ 16 | --build-arg=CGO_CFLAGS \ 17 | --build-arg=OLLAMA_CUSTOM_CPU_DEFS \ 18 | --build-arg=AMDGPU_TARGETS \ 19 | --target build-$TARGETARCH \ 20 | -f Dockerfile \ 21 | -t builder:$TARGETARCH \ 22 | . 23 | docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH 24 | docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-$TARGETARCH 25 | 26 | if [ "$TARGETARCH" = "amd64" ]; then 27 | docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/ 28 | fi 29 | 30 | docker rm builder-$TARGETARCH 31 | done 32 | -------------------------------------------------------------------------------- /scripts/build_remote.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import subprocess 3 | import sys 4 | from urllib.parse import urlparse 5 | from git import Repo 6 | 7 | # Helper script to be able to build on remote repos using git to push local changes 8 | # (e.g. particularly helpful to target a remote windows build system) 9 | # 10 | # Typical windows remote git config looks like this: 11 | # 12 | #[remote "windows-pa"] 13 | # url = jdoe@desktop-foo:C:/Users/Jdoe/code/ollama 14 | # fetch = +refs/heads/*:refs/remotes/windows-pa/* 15 | # uploadpack = powershell git upload-pack 16 | # receivepack = powershell git receive-pack 17 | # 18 | 19 | # TODO - add argpare and make this more configurable 20 | # - force flag becomes optional 21 | # - generate, build or test ... 22 | 23 | # Note: remote repo will need this run once: 24 | # git config --local receive.denyCurrentBranch updateInstead 25 | repo = Repo(".") 26 | 27 | # On linux, add links in /usr/local/bin to the go binaries to avoid needing this 28 | # GoCmd = "/usr/local/go/bin/go" 29 | GoCmd = "go" 30 | 31 | if repo.is_dirty(): 32 | print("Tree is dirty. Commit your changes before running this script") 33 | sys.exit(1) 34 | 35 | if len(sys.argv) != 2: 36 | print("Please specify the remote name: " + ', '.join([r.name for r in repo.remotes])) 37 | sys.exit(1) 38 | remote_name = sys.argv[1] 39 | 40 | remote = {r.name: r for r in repo.remotes}[remote_name] 41 | raw_url = list(remote.urls)[0] 42 | url = urlparse(raw_url) 43 | # Windows urls don't quite parse properly 44 | if url.scheme == "" and url.netloc == "": 45 | url = urlparse("ssh://" + raw_url) 46 | print("URL: " + str(url)) 47 | netloc = url.netloc.split(":")[0] 48 | path = url.path 49 | branch_name = repo.active_branch.name 50 | 51 | print("Force pushing content to remote...") 52 | # Use with care given the force push 53 | remote.push(force=True).raise_if_error() 54 | 55 | print("Ensuring correct branch checked out on remote via ssh...") 56 | subprocess.check_call(['ssh', netloc, 'cd', path, ';', 'git', 'checkout', branch_name]) 57 | 58 | 59 | # TODO - add some hardening to try to figure out how to set up the path properly 60 | # subprocess.check_call(['ssh', netloc, 'cd', path, ';', 'env']) 61 | # TODO - or consider paramiko maybe 62 | 63 | print("Running Windows Build Script") 64 | subprocess.check_call(['ssh', netloc, 'cd', path, ';', "powershell", "-ExecutionPolicy", "Bypass", "-File", "./scripts/build_windows.ps1"]) 65 | 66 | # print("Building") 67 | # subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.']) 68 | 69 | print("Copying built result") 70 | subprocess.check_call(['scp', netloc +":"+ path + "/ollama.exe", './dist/']) 71 | 72 | print("Copying installer") 73 | subprocess.check_call(['scp', netloc +":"+ path + "/dist/Ollama Setup.exe", './dist/']) 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /scripts/publish.sh: -------------------------------------------------------------------------------- 1 | # Set your variables here. 2 | REPO="jmorganca/ollama" 3 | 4 | # Check if VERSION is set 5 | if [[ -z "${VERSION}" ]]; then 6 | echo "VERSION is not set. Please set the VERSION environment variable." 7 | exit 1 8 | fi 9 | 10 | OS=$(go env GOOS) 11 | 12 | ./script/build_${OS}.sh 13 | 14 | # Create a new tag if it doesn't exist. 15 | if ! git rev-parse v$VERSION >/dev/null 2>&1; then 16 | git tag v$VERSION 17 | fi 18 | 19 | git push origin v$VERSION 20 | 21 | # Create a new release. 22 | gh release create -p v$VERSION -t v$VERSION 23 | 24 | # Upload the zip file. 25 | gh release upload v$VERSION ./dist/* --clobber 26 | -------------------------------------------------------------------------------- /scripts/push_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | export VERSION=${VERSION:-0.0.0} 6 | export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" 7 | 8 | docker build \ 9 | --push \ 10 | --platform=linux/arm64,linux/amd64 \ 11 | --build-arg=VERSION \ 12 | --build-arg=GOFLAGS \ 13 | -f Dockerfile \ 14 | -t ollama/ollama -t ollama/ollama:$VERSION \ 15 | . 16 | -------------------------------------------------------------------------------- /scripts/rh_linux_deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Script for common Dockerfile dependency installation in redhat linux based images 4 | 5 | set -ex 6 | MACHINE=$(uname -m) 7 | 8 | if grep -i "centos" /etc/system-release >/dev/null; then 9 | # Centos 7 derivatives have too old of a git version to run our generate script 10 | # uninstall and ignore failures 11 | yum remove -y git 12 | yum -y install epel-release centos-release-scl 13 | yum -y install dnf 14 | if [ "${MACHINE}" = "x86_64" ]; then 15 | yum -y install https://repo.ius.io/ius-release-el7.rpm 16 | dnf install -y git236 17 | else 18 | dnf install -y rh-git227-git 19 | ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git 20 | fi 21 | dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ 22 | elif grep -i "rocky" /etc/system-release >/dev/null; then 23 | # Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC) 24 | cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo 25 | [vault] 26 | name=Rocky Vault 27 | baseurl=https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ 28 | gpgcheck=1 29 | enabled=1 30 | countme=1 31 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-rockyofficial 32 | EOF 33 | dnf install -y git \ 34 | gcc-toolset-10-gcc-10.2.1-8.2.el8 \ 35 | gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 36 | else 37 | echo "ERROR Unexpected distro" 38 | exit 1 39 | fi 40 | 41 | if [ -n "${CMAKE_VERSION}" ]; then 42 | curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 43 | fi 44 | 45 | if [ -n "${GOLANG_VERSION}" ]; then 46 | if [ "${MACHINE}" = "x86_64" ]; then 47 | GO_ARCH="amd64" 48 | else 49 | GO_ARCH="arm64" 50 | fi 51 | mkdir -p /usr/local 52 | curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-${GO_ARCH}.tar.gz | tar xz -C /usr/local 53 | ln -s /usr/local/go/bin/go /usr/local/bin/go 54 | ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt 55 | fi 56 | -------------------------------------------------------------------------------- /scripts/tag_latest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eu 4 | 5 | # We use 2 different image repositories to handle combining architecture images into multiarch manifest 6 | # (The ROCm image is x86 only and is not a multiarch manifest) 7 | # For developers, you can override the DOCKER_ORG to generate multiarch manifests 8 | # DOCKER_ORG=jdoe VERSION=0.1.30 PUSH=1 ./scripts/tag_latest.sh 9 | DOCKER_ORG=${DOCKER_ORG:-"ollama"} 10 | RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"} 11 | FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"} 12 | 13 | # Set PUSH to a non-empty string to trigger push instead of load 14 | PUSH=${PUSH:-""} 15 | 16 | echo "Assembling manifest and tagging latest" 17 | docker manifest rm ${FINAL_IMAGE_REPO}:latest || true 18 | docker manifest create ${FINAL_IMAGE_REPO}:latest \ 19 | ${RELEASE_IMAGE_REPO}:$VERSION-amd64 \ 20 | ${RELEASE_IMAGE_REPO}:$VERSION-arm64 21 | 22 | docker pull ${RELEASE_IMAGE_REPO}:$VERSION-rocm 23 | docker tag ${RELEASE_IMAGE_REPO}:$VERSION-rocm ${FINAL_IMAGE_REPO}:rocm 24 | 25 | if [ -n "${PUSH}" ]; then 26 | echo "Pushing latest tags up..." 27 | docker manifest push ${FINAL_IMAGE_REPO}:latest 28 | docker push ${FINAL_IMAGE_REPO}:rocm 29 | else 30 | echo "Not pushing ${FINAL_IMAGE_REPO}:latest and ${FINAL_IMAGE_REPO}:rocm" 31 | fi 32 | 33 | 34 | -------------------------------------------------------------------------------- /server/auth.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "crypto/rand" 6 | "crypto/sha256" 7 | "encoding/base64" 8 | "encoding/hex" 9 | "encoding/json" 10 | "fmt" 11 | "io" 12 | "net/http" 13 | "net/url" 14 | "strconv" 15 | "strings" 16 | "time" 17 | 18 | "github.com/ollama/ollama/api" 19 | "github.com/ollama/ollama/auth" 20 | ) 21 | 22 | type registryChallenge struct { 23 | Realm string 24 | Service string 25 | Scope string 26 | } 27 | 28 | func (r registryChallenge) URL() (*url.URL, error) { 29 | redirectURL, err := url.Parse(r.Realm) 30 | if err != nil { 31 | return nil, err 32 | } 33 | 34 | values := redirectURL.Query() 35 | values.Add("service", r.Service) 36 | for _, s := range strings.Split(r.Scope, " ") { 37 | values.Add("scope", s) 38 | } 39 | 40 | values.Add("ts", strconv.FormatInt(time.Now().Unix(), 10)) 41 | 42 | nonce, err := auth.NewNonce(rand.Reader, 16) 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | values.Add("nonce", nonce) 48 | 49 | redirectURL.RawQuery = values.Encode() 50 | return redirectURL, nil 51 | } 52 | 53 | func getAuthorizationToken(ctx context.Context, challenge registryChallenge) (string, error) { 54 | redirectURL, err := challenge.URL() 55 | if err != nil { 56 | return "", err 57 | } 58 | 59 | sha256sum := sha256.Sum256(nil) 60 | data := []byte(fmt.Sprintf("%s,%s,%s", http.MethodGet, redirectURL.String(), base64.StdEncoding.EncodeToString([]byte(hex.EncodeToString(sha256sum[:]))))) 61 | 62 | headers := make(http.Header) 63 | signature, err := auth.Sign(ctx, data) 64 | if err != nil { 65 | return "", err 66 | } 67 | 68 | headers.Add("Authorization", signature) 69 | 70 | response, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, nil) 71 | if err != nil { 72 | return "", err 73 | } 74 | defer response.Body.Close() 75 | 76 | body, err := io.ReadAll(response.Body) 77 | if err != nil { 78 | return "", fmt.Errorf("%d: %v", response.StatusCode, err) 79 | } 80 | 81 | if response.StatusCode >= http.StatusBadRequest { 82 | if len(body) > 0 { 83 | return "", fmt.Errorf("%d: %s", response.StatusCode, body) 84 | } else { 85 | return "", fmt.Errorf("%d", response.StatusCode) 86 | } 87 | } 88 | 89 | var token api.TokenResponse 90 | if err := json.Unmarshal(body, &token); err != nil { 91 | return "", err 92 | } 93 | 94 | return token.Token, nil 95 | } 96 | -------------------------------------------------------------------------------- /server/fixblobs.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "strings" 7 | ) 8 | 9 | // fixBlobs walks the provided dir and replaces (":") to ("-") in the file 10 | // prefix. (e.g. sha256:1234 -> sha256-1234) 11 | func fixBlobs(dir string) error { 12 | return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { 13 | if err != nil { 14 | return err 15 | } 16 | baseName := filepath.Base(path) 17 | typ, sha, ok := strings.Cut(baseName, ":") 18 | if ok && typ == "sha256" { 19 | newPath := filepath.Join(filepath.Dir(path), typ+"-"+sha) 20 | if err := os.Rename(path, newPath); err != nil { 21 | return err 22 | } 23 | } 24 | return nil 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /server/fixblobs_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "io/fs" 5 | "os" 6 | "path/filepath" 7 | "runtime" 8 | "slices" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestFixBlobs(t *testing.T) { 14 | cases := []struct { 15 | path []string 16 | want []string 17 | }{ 18 | {path: []string{"sha256-1234"}, want: []string{"sha256-1234"}}, 19 | {path: []string{"sha256:1234"}, want: []string{"sha256-1234"}}, 20 | {path: []string{"sha259:5678"}, want: []string{"sha259:5678"}}, 21 | {path: []string{"sha256:abcd"}, want: []string{"sha256-abcd"}}, 22 | {path: []string{"x/y/sha256:abcd"}, want: []string{"x/y/sha256-abcd"}}, 23 | {path: []string{"x:y/sha256:abcd"}, want: []string{"x:y/sha256-abcd"}}, 24 | {path: []string{"x:y/sha256:abcd"}, want: []string{"x:y/sha256-abcd"}}, 25 | {path: []string{"x:y/sha256:abcd", "sha256:1234"}, want: []string{"x:y/sha256-abcd", "sha256-1234"}}, 26 | {path: []string{"x:y/sha256:abcd", "sha256-1234"}, want: []string{"x:y/sha256-abcd", "sha256-1234"}}, 27 | } 28 | 29 | for _, tt := range cases { 30 | t.Run(strings.Join(tt.path, "|"), func(t *testing.T) { 31 | hasColon := slices.ContainsFunc(tt.path, func(s string) bool { return strings.Contains(s, ":") }) 32 | if hasColon && runtime.GOOS == "windows" { 33 | t.Skip("skipping test on windows") 34 | } 35 | 36 | rootDir := t.TempDir() 37 | for _, path := range tt.path { 38 | fullPath := filepath.Join(rootDir, path) 39 | fullDir, _ := filepath.Split(fullPath) 40 | 41 | t.Logf("creating dir %s", fullDir) 42 | if err := os.MkdirAll(fullDir, 0o755); err != nil { 43 | t.Fatal(err) 44 | } 45 | 46 | t.Logf("writing file %s", fullPath) 47 | if err := os.WriteFile(fullPath, nil, 0o644); err != nil { 48 | t.Fatal(err) 49 | } 50 | } 51 | 52 | if err := fixBlobs(rootDir); err != nil { 53 | t.Fatal(err) 54 | } 55 | 56 | got := slurpFiles(os.DirFS(rootDir)) 57 | 58 | slices.Sort(tt.want) 59 | slices.Sort(got) 60 | if !slices.Equal(got, tt.want) { 61 | t.Fatalf("got = %v, want %v", got, tt.want) 62 | } 63 | }) 64 | } 65 | } 66 | 67 | func slurpFiles(fsys fs.FS) []string { 68 | var sfs []string 69 | fn := func(path string, d fs.DirEntry, err error) error { 70 | if err != nil { 71 | return err 72 | } 73 | if d.IsDir() { 74 | return nil 75 | } 76 | sfs = append(sfs, path) 77 | return nil 78 | } 79 | if err := fs.WalkDir(fsys, ".", fn); err != nil { 80 | panic(err) 81 | } 82 | return sfs 83 | } 84 | -------------------------------------------------------------------------------- /server/layer.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "crypto/sha256" 5 | "fmt" 6 | "io" 7 | "os" 8 | ) 9 | 10 | type Layer struct { 11 | MediaType string `json:"mediaType"` 12 | Digest string `json:"digest"` 13 | Size int64 `json:"size"` 14 | From string `json:"from,omitempty"` 15 | status string 16 | } 17 | 18 | func NewLayer(r io.Reader, mediatype string) (*Layer, error) { 19 | blobs, err := GetBlobsPath("") 20 | if err != nil { 21 | return nil, err 22 | } 23 | 24 | temp, err := os.CreateTemp(blobs, "sha256-") 25 | if err != nil { 26 | return nil, err 27 | } 28 | defer temp.Close() 29 | defer os.Remove(temp.Name()) 30 | 31 | sha256sum := sha256.New() 32 | n, err := io.Copy(io.MultiWriter(temp, sha256sum), r) 33 | if err != nil { 34 | return nil, err 35 | } 36 | 37 | if err := temp.Close(); err != nil { 38 | return nil, err 39 | } 40 | 41 | digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)) 42 | blob, err := GetBlobsPath(digest) 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | status := "using existing layer" 48 | if _, err := os.Stat(blob); err != nil { 49 | status = "creating new layer" 50 | if err := os.Rename(temp.Name(), blob); err != nil { 51 | return nil, err 52 | } 53 | } 54 | 55 | return &Layer{ 56 | MediaType: mediatype, 57 | Digest: digest, 58 | Size: n, 59 | status: fmt.Sprintf("%s %s", status, digest), 60 | }, nil 61 | } 62 | 63 | func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) { 64 | blob, err := GetBlobsPath(digest) 65 | if err != nil { 66 | return nil, err 67 | } 68 | 69 | fi, err := os.Stat(blob) 70 | if err != nil { 71 | return nil, err 72 | } 73 | 74 | return &Layer{ 75 | MediaType: mediatype, 76 | Digest: digest, 77 | Size: fi.Size(), 78 | From: from, 79 | status: fmt.Sprintf("using existing layer %s", digest), 80 | }, nil 81 | } 82 | 83 | func (l *Layer) Open() (io.ReadSeekCloser, error) { 84 | blob, err := GetBlobsPath(l.Digest) 85 | if err != nil { 86 | return nil, err 87 | } 88 | 89 | return os.Open(blob) 90 | } 91 | 92 | func (l *Layer) Remove() error { 93 | ms, err := Manifests() 94 | if err != nil { 95 | return err 96 | } 97 | 98 | for _, m := range ms { 99 | for _, layer := range append(m.Layers, m.Config) { 100 | if layer.Digest == l.Digest { 101 | // something is using this layer 102 | return nil 103 | } 104 | } 105 | } 106 | 107 | blob, err := GetBlobsPath(l.Digest) 108 | if err != nil { 109 | return err 110 | } 111 | 112 | return os.Remove(blob) 113 | } 114 | -------------------------------------------------------------------------------- /server/routes_delete_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/ollama/ollama/api" 10 | ) 11 | 12 | func TestDelete(t *testing.T) { 13 | p := t.TempDir() 14 | t.Setenv("OLLAMA_MODELS", p) 15 | var s Server 16 | 17 | w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ 18 | Name: "test", 19 | Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), 20 | }) 21 | 22 | if w.Code != http.StatusOK { 23 | t.Fatalf("expected status code 200, actual %d", w.Code) 24 | } 25 | 26 | w = createRequest(t, s.CreateModelHandler, api.CreateRequest{ 27 | Name: "test2", 28 | Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)), 29 | }) 30 | 31 | if w.Code != http.StatusOK { 32 | t.Fatalf("expected status code 200, actual %d", w.Code) 33 | } 34 | 35 | checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ 36 | filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"), 37 | filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"), 38 | }) 39 | 40 | checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ 41 | filepath.Join(p, "blobs", "sha256-8f2c2167d789c6b2302dff965160fa5029f6a24096d262c1cbb469f21a045382"), 42 | filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"), 43 | filepath.Join(p, "blobs", "sha256-ca239d7bd8ea90e4a5d2e6bf88f8d74a47b14336e73eb4e18bed4dd325018116"), 44 | filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"), 45 | }) 46 | 47 | w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"}) 48 | 49 | if w.Code != http.StatusOK { 50 | t.Fatalf("expected status code 200, actual %d", w.Code) 51 | } 52 | 53 | checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{ 54 | filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"), 55 | }) 56 | 57 | checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ 58 | filepath.Join(p, "blobs", "sha256-8f2c2167d789c6b2302dff965160fa5029f6a24096d262c1cbb469f21a045382"), 59 | filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"), 60 | filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"), 61 | }) 62 | 63 | w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test2"}) 64 | 65 | if w.Code != http.StatusOK { 66 | t.Fatalf("expected status code 200, actual %d", w.Code) 67 | } 68 | 69 | checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{}) 70 | checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{}) 71 | } 72 | -------------------------------------------------------------------------------- /server/routes_list_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | "slices" 8 | "testing" 9 | 10 | "github.com/ollama/ollama/api" 11 | ) 12 | 13 | func TestList(t *testing.T) { 14 | t.Setenv("OLLAMA_MODELS", t.TempDir()) 15 | 16 | expectNames := []string{ 17 | "mistral:7b-instruct-q4_0", 18 | "zephyr:7b-beta-q5_K_M", 19 | "apple/OpenELM:latest", 20 | "boreas:2b-code-v1.5-q6_K", 21 | "notus:7b-v1-IQ2_S", 22 | // TODO: host:port currently fails on windows (#4107) 23 | // "localhost:5000/library/eurus:700b-v0.5-iq3_XXS", 24 | "mynamespace/apeliotes:latest", 25 | "myhost/mynamespace/lips:code", 26 | } 27 | 28 | var s Server 29 | for _, n := range expectNames { 30 | createRequest(t, s.CreateModelHandler, api.CreateRequest{ 31 | Name: n, 32 | Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)), 33 | }) 34 | } 35 | 36 | w := createRequest(t, s.ListModelsHandler, nil) 37 | if w.Code != http.StatusOK { 38 | t.Fatalf("expected status code 200, actual %d", w.Code) 39 | } 40 | 41 | var resp api.ListResponse 42 | if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { 43 | t.Fatal(err) 44 | } 45 | 46 | if len(resp.Models) != len(expectNames) { 47 | t.Fatalf("expected %d models, actual %d", len(expectNames), len(resp.Models)) 48 | } 49 | 50 | actualNames := make([]string, len(resp.Models)) 51 | for i, m := range resp.Models { 52 | actualNames[i] = m.Name 53 | } 54 | 55 | slices.Sort(actualNames) 56 | slices.Sort(expectNames) 57 | 58 | if !slices.Equal(actualNames, expectNames) { 59 | t.Fatalf("expected slices to be equal %v", actualNames) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /types/errtypes/errtypes.go: -------------------------------------------------------------------------------- 1 | // Package errtypes contains custom error types 2 | package errtypes 3 | 4 | import ( 5 | "fmt" 6 | "strings" 7 | ) 8 | 9 | const UnknownOllamaKeyErrMsg = "unknown ollama key" 10 | const InvalidModelNameErrMsg = "invalid model name" 11 | 12 | // TODO: This should have a structured response from the API 13 | type UnknownOllamaKey struct { 14 | Key string 15 | } 16 | 17 | func (e *UnknownOllamaKey) Error() string { 18 | return fmt.Sprintf("unauthorized: %s %q", UnknownOllamaKeyErrMsg, strings.TrimSpace(e.Key)) 19 | } 20 | -------------------------------------------------------------------------------- /types/model/testdata/fuzz/FuzzName/d37463aa416f6bab: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | string("00@") 3 | -------------------------------------------------------------------------------- /version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | var Version string = "0.0.0" 4 | --------------------------------------------------------------------------------