├── .dockerignore
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── 10_bug_report.yml
    │   ├── 20_feature_request.md
    │   ├── 30_model_request.md
    │   └── config.yml
    └── workflows
    │   ├── latest.yaml
    │   ├── release.yaml
    │   └── test.yaml
├── .gitignore
├── .gitmodules
├── .golangci.yaml
├── .prettierrc.json
├── Dockerfile
├── LICENSE
├── README.md
├── api
    ├── client.go
    ├── client_test.go
    ├── types.go
    └── types_test.go
├── app
    ├── .gitignore
    ├── README.md
    ├── assets
    │   ├── app.ico
    │   ├── assets.go
    │   ├── setup.bmp
    │   ├── tray.ico
    │   └── tray_upgrade.ico
    ├── lifecycle
    │   ├── getstarted_nonwindows.go
    │   ├── getstarted_windows.go
    │   ├── lifecycle.go
    │   ├── logging.go
    │   ├── logging_nonwindows.go
    │   ├── logging_windows.go
    │   ├── paths.go
    │   ├── server.go
    │   ├── server_unix.go
    │   ├── server_windows.go
    │   ├── updater.go
    │   ├── updater_nonwindows.go
    │   └── updater_windows.go
    ├── main.go
    ├── ollama.iss
    ├── ollama.rc
    ├── ollama_welcome.ps1
    ├── store
    │   ├── store.go
    │   ├── store_darwin.go
    │   ├── store_linux.go
    │   └── store_windows.go
    └── tray
    │   ├── commontray
    │       └── types.go
    │   ├── tray.go
    │   ├── tray_nonwindows.go
    │   ├── tray_windows.go
    │   └── wintray
    │       ├── eventloop.go
    │       ├── menus.go
    │       ├── messages.go
    │       ├── notifyicon.go
    │       ├── tray.go
    │       ├── w32api.go
    │       └── winclass.go
├── auth
    └── auth.go
├── cmd
    ├── cmd.go
    ├── interactive.go
    ├── interactive_test.go
    ├── start_darwin.go
    ├── start_default.go
    └── start_windows.go
├── convert
    ├── convert.go
    ├── convert_test.go
    ├── gemma.go
    ├── llama.go
    ├── mistral.go
    ├── mixtral.go
    ├── safetensors.go
    ├── sentencepiece
    │   └── sentencepiece_model.pb.go
    ├── sentencepiece_model.proto
    ├── tokenizer.go
    └── torch.go
├── docs
    ├── README.md
    ├── api.md
    ├── development.md
    ├── docker.md
    ├── faq.md
    ├── gpu.md
    ├── import.md
    ├── linux.md
    ├── modelfile.md
    ├── openai.md
    ├── troubleshooting.md
    ├── tutorials.md
    ├── tutorials
    │   ├── fly-gpu.md
    │   ├── langchainjs.md
    │   ├── langchainpy.md
    │   └── nvidia-jetson.md
    └── windows.md
├── envconfig
    ├── config.go
    └── config_test.go
├── examples
    ├── .gitignore
    ├── README.md
    ├── flyio
    │   ├── .gitignore
    │   └── README.md
    ├── go-chat
    │   └── main.go
    ├── go-generate-streaming
    │   └── main.go
    ├── go-generate
    │   └── main.go
    ├── go-http-generate
    │   ├── README.md
    │   └── main.go
    ├── go-multimodal
    │   └── main.go
    ├── go-pull-progress
    │   └── main.go
    ├── jupyter-notebook
    │   ├── README.md
    │   └── ollama.ipynb
    ├── kubernetes
    │   ├── README.md
    │   ├── cpu.yaml
    │   └── gpu.yaml
    ├── langchain-python-rag-document
    │   ├── README.md
    │   ├── main.py
    │   └── requirements.txt
    ├── langchain-python-rag-privategpt
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── constants.py
    │   ├── ingest.py
    │   ├── poetry.lock
    │   ├── privateGPT.py
    │   ├── pyproject.toml
    │   └── requirements.txt
    ├── langchain-python-rag-websummary
    │   ├── README.md
    │   ├── main.py
    │   └── requirements.txt
    ├── langchain-python-simple
    │   ├── README.md
    │   ├── main.py
    │   └── requirements.txt
    ├── langchain-typescript-simple
    │   ├── README.md
    │   ├── main.ts
    │   ├── package-lock.json
    │   └── package.json
    ├── modelfile-mario
    │   ├── Modelfile
    │   ├── logo.png
    │   └── readme.md
    ├── python-dockerit
    │   ├── Modelfile
    │   ├── README.md
    │   ├── dockerit.py
    │   └── requirements.txt
    ├── python-json-datagenerator
    │   ├── predefinedschema.py
    │   ├── randomaddresses.py
    │   ├── readme.md
    │   └── requirements.txt
    ├── python-loganalysis
    │   ├── Modelfile
    │   ├── loganalysis.py
    │   ├── logtest.logfile
    │   ├── readme.md
    │   └── requirements.txt
    ├── python-rag-newssummary
    │   ├── README.md
    │   ├── requirements.txt
    │   ├── summ.py
    │   └── utils.py
    ├── python-simplechat
    │   ├── client.py
    │   ├── readme.md
    │   └── requirements.txt
    ├── python-simplegenerate
    │   ├── README.md
    │   ├── client.py
    │   └── requirements.txt
    ├── typescript-functioncalling
    │   ├── extractemail.ts
    │   ├── extractwp.ts
    │   ├── info.txt
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── readme.md
    │   └── wp.txt
    ├── typescript-mentors
    │   ├── .gitignore
    │   ├── README.md
    │   ├── character-generator.ts
    │   ├── mentors.ts
    │   └── package.json
    └── typescript-simplechat
    │   ├── client.ts
    │   ├── package.json
    │   └── readme.md
├── format
    ├── bytes.go
    ├── format.go
    ├── format_test.go
    ├── time.go
    └── time_test.go
├── go.mod
├── go.sum
├── gpu
    ├── amd_common.go
    ├── amd_hip_windows.go
    ├── amd_linux.go
    ├── amd_windows.go
    ├── assets.go
    ├── cpu_common.go
    ├── cuda_common.go
    ├── gpu.go
    ├── gpu_darwin.go
    ├── gpu_info.h
    ├── gpu_info_cpu.c
    ├── gpu_info_cudart.c
    ├── gpu_info_cudart.h
    ├── gpu_info_darwin.h
    ├── gpu_info_darwin.m
    ├── gpu_info_nvcuda.c
    ├── gpu_info_nvcuda.h
    ├── gpu_info_oneapi.c
    ├── gpu_info_oneapi.h
    ├── gpu_oneapi.go
    ├── gpu_test.go
    └── types.go
├── integration
    ├── README.md
    ├── basic_test.go
    ├── concurrency_test.go
    ├── context_test.go
    ├── llm_image_test.go
    ├── llm_test.go
    ├── max_queue_test.go
    └── utils_test.go
├── llm
    ├── ext_server
    │   ├── CMakeLists.txt
    │   ├── httplib.h
    │   ├── json.hpp
    │   ├── server.cpp
    │   └── utils.hpp
    ├── filetype.go
    ├── generate
    │   ├── gen_common.sh
    │   ├── gen_darwin.sh
    │   ├── gen_linux.sh
    │   ├── gen_windows.ps1
    │   ├── generate_darwin.go
    │   ├── generate_linux.go
    │   └── generate_windows.go
    ├── ggla.go
    ├── ggml.go
    ├── gguf.go
    ├── llm.go
    ├── llm_darwin_amd64.go
    ├── llm_darwin_arm64.go
    ├── llm_linux.go
    ├── llm_windows.go
    ├── memory.go
    ├── patches
    │   ├── 01-load-progress.diff
    │   ├── 02-clip-log.diff
    │   ├── 03-load_exception.diff
    │   ├── 04-metal.diff
    │   └── 05-default-pretokenizer.diff
    ├── payload.go
    ├── server.go
    └── status.go
├── macapp
    ├── .eslintrc.json
    ├── .gitignore
    ├── README.md
    ├── assets
    │   ├── icon.icns
    │   ├── iconDarkTemplate.png
    │   ├── iconDarkTemplate@2x.png
    │   ├── iconDarkUpdateTemplate.png
    │   ├── iconDarkUpdateTemplate@2x.png
    │   ├── iconTemplate.png
    │   ├── iconTemplate@2x.png
    │   ├── iconUpdateTemplate.png
    │   └── iconUpdateTemplate@2x.png
    ├── forge.config.ts
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.js
    ├── src
    │   ├── app.css
    │   ├── app.tsx
    │   ├── declarations.d.ts
    │   ├── index.html
    │   ├── index.ts
    │   ├── install.ts
    │   ├── ollama.svg
    │   ├── preload.ts
    │   └── renderer.tsx
    ├── tailwind.config.js
    ├── tsconfig.json
    ├── webpack.main.config.ts
    ├── webpack.plugins.ts
    ├── webpack.renderer.config.ts
    └── webpack.rules.ts
├── main.go
├── openai
    └── openai.go
├── parser
    ├── parser.go
    └── parser_test.go
├── progress
    ├── bar.go
    ├── progress.go
    └── spinner.go
├── readline
    ├── buffer.go
    ├── errors.go
    ├── history.go
    ├── readline.go
    ├── readline_unix.go
    ├── readline_windows.go
    ├── term.go
    ├── term_bsd.go
    ├── term_linux.go
    ├── term_windows.go
    └── types.go
├── scripts
    ├── build.sh
    ├── build_darwin.sh
    ├── build_docker.sh
    ├── build_linux.sh
    ├── build_remote.py
    ├── build_windows.ps1
    ├── install.sh
    ├── publish.sh
    ├── push_docker.sh
    ├── rh_linux_deps.sh
    └── tag_latest.sh
├── server
    ├── auth.go
    ├── download.go
    ├── fixblobs.go
    ├── fixblobs_test.go
    ├── images.go
    ├── layer.go
    ├── manifest.go
    ├── manifest_test.go
    ├── model.go
    ├── modelpath.go
    ├── modelpath_test.go
    ├── prompt.go
    ├── prompt_test.go
    ├── routes.go
    ├── routes_create_test.go
    ├── routes_delete_test.go
    ├── routes_list_test.go
    ├── routes_test.go
    ├── sched.go
    ├── sched_test.go
    └── upload.go
├── types
    ├── errtypes
    │   └── errtypes.go
    └── model
    │   ├── name.go
    │   ├── name_test.go
    │   └── testdata
    │       └── fuzz
    │           └── FuzzName
    │               └── d37463aa416f6bab
└── version
    └── version.go


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | ollama
 3 | app
 4 | macapp
 5 | dist
 6 | llm/llama.cpp
 7 | .env
 8 | .cache
 9 | test_data
10 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | llm/ext_server/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/10_bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug report
 2 | labels: [bug]
 3 | description: Something isn't working right.
 4 | body:
 5 |   - type: textarea
 6 |     id: description
 7 |     attributes:
 8 |       label: What is the issue?
 9 |       description: What happened? What did you expect to happen?
10 |     validations:
11 |       required: true
12 |   - type: dropdown
13 |     id: os
14 |     attributes:
15 |       label: OS
16 |       description: Which operating system are you using?
17 |       multiple: true
18 |       options:
19 |         - Linux
20 |         - macOS
21 |         - Windows
22 |         - Docker
23 |         - WSL2
24 |     validations:
25 |       required: false
26 |   - type: dropdown
27 |     id: gpu
28 |     attributes:
29 |       label: GPU
30 |       description: Which GPU are you using?
31 |       multiple: true
32 |       options:
33 |         - Nvidia
34 |         - AMD
35 |         - Intel
36 |         - Apple
37 |         - Other
38 |     validations:
39 |       required: false
40 |   - type: dropdown
41 |     id: cpu
42 |     attributes:
43 |       label: CPU
44 |       description: Which CPU are you using?
45 |       multiple: true
46 |       options:
47 |         - Intel
48 |         - AMD
49 |         - Apple
50 |         - Other
51 |     validations:
52 |       required: false
53 |   - type: input
54 |     id: version
55 |     attributes:
56 |       label: Ollama version
57 |       description: What version of Ollama are you using? (`ollama --version`)
58 |       placeholder: e.g., 0.1.32
59 |     validations:
60 |       required: false
61 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/20_feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Request a new feature
4 | labels: feature request
5 | ---
6 | 
7 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/30_model_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Model request
3 | about: Request support for a new model to be added to Ollama
4 | labels: model request
5 | ---


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Help
4 |     url: https://discord.com/invite/ollama
5 |     about: Please join our Discord server for help using Ollama
6 |   - name: Troubleshooting
7 |     url: https://github.com/ollama/ollama/blob/main/docs/faq.md#faq
8 |     about: See the FAQ for common issues and solutions
9 | 


--------------------------------------------------------------------------------
/.github/workflows/latest.yaml:
--------------------------------------------------------------------------------
 1 | name: latest
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [released]
 6 | 
 7 | jobs:
 8 |   update-latest:
 9 |     environment: release
10 |     runs-on: linux
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - name: Login to Docker Hub
14 |         uses: docker/login-action@v3
15 |         with:
16 |           username: ${{ vars.DOCKER_USER }}
17 |           password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
18 |       - name: Tag images as latest
19 |         env:
20 |           PUSH: "1"
21 |         shell: bash
22 |         run: |
23 |           export "VERSION=${GITHUB_REF_NAME#v}"
24 |           ./scripts/tag_latest.sh
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .vscode
 3 | .env
 4 | .venv
 5 | .swp
 6 | dist
 7 | ollama
 8 | ggml-metal.metal
 9 | .cache
10 | *.exe
11 | .idea
12 | test_data
13 | *.crt
14 | llm/build
15 | __debug_bin*


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "llama.cpp"]
2 | 	path = llm/llama.cpp
3 | 	url = https://github.com/ggerganov/llama.cpp.git
4 | 	shallow = true


--------------------------------------------------------------------------------
/.golangci.yaml:
--------------------------------------------------------------------------------
 1 | run:
 2 |   timeout: 5m
 3 | linters:
 4 |   enable:
 5 |     - asasalint
 6 |     - bidichk
 7 |     - bodyclose
 8 |     - containedctx
 9 |     - contextcheck
10 |     - exportloopref
11 |     - gocheckcompilerdirectives
12 |     # FIXME: for some reason this errors on windows
13 |     # - gofmt
14 |     # - goimports
15 |     - misspell
16 |     - nilerr
17 |     - unused
18 | 


--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "trailingComma": "es5",
 3 |   "tabWidth": 2,
 4 |   "useTabs": false,
 5 |   "semi": false,
 6 |   "singleQuote": true,
 7 |   "jsxSingleQuote": true,
 8 |   "printWidth": 120,
 9 |   "arrowParens": "avoid"
10 | }
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Ollama
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/api/types_test.go:
--------------------------------------------------------------------------------
  1 | package api
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"math"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | func TestKeepAliveParsingFromJSON(t *testing.T) {
 14 | 	tests := []struct {
 15 | 		name string
 16 | 		req  string
 17 | 		exp  *Duration
 18 | 	}{
 19 | 		{
 20 | 			name: "Positive Integer",
 21 | 			req:  `{ "keep_alive": 42 }`,
 22 | 			exp:  &Duration{42 * time.Second},
 23 | 		},
 24 | 		{
 25 | 			name: "Positive Float",
 26 | 			req:  `{ "keep_alive": 42.5 }`,
 27 | 			exp:  &Duration{42 * time.Second},
 28 | 		},
 29 | 		{
 30 | 			name: "Positive Integer String",
 31 | 			req:  `{ "keep_alive": "42m" }`,
 32 | 			exp:  &Duration{42 * time.Minute},
 33 | 		},
 34 | 		{
 35 | 			name: "Negative Integer",
 36 | 			req:  `{ "keep_alive": -1 }`,
 37 | 			exp:  &Duration{math.MaxInt64},
 38 | 		},
 39 | 		{
 40 | 			name: "Negative Float",
 41 | 			req:  `{ "keep_alive": -3.14 }`,
 42 | 			exp:  &Duration{math.MaxInt64},
 43 | 		},
 44 | 		{
 45 | 			name: "Negative Integer String",
 46 | 			req:  `{ "keep_alive": "-1m" }`,
 47 | 			exp:  &Duration{math.MaxInt64},
 48 | 		},
 49 | 	}
 50 | 
 51 | 	for _, test := range tests {
 52 | 		t.Run(test.name, func(t *testing.T) {
 53 | 			var dec ChatRequest
 54 | 			err := json.Unmarshal([]byte(test.req), &dec)
 55 | 			require.NoError(t, err)
 56 | 
 57 | 			assert.Equal(t, test.exp, dec.KeepAlive)
 58 | 		})
 59 | 	}
 60 | }
 61 | 
 62 | func TestDurationMarshalUnmarshal(t *testing.T) {
 63 | 	tests := []struct {
 64 | 		name     string
 65 | 		input    time.Duration
 66 | 		expected time.Duration
 67 | 	}{
 68 | 		{
 69 | 			"negative duration",
 70 | 			time.Duration(-1),
 71 | 			time.Duration(math.MaxInt64),
 72 | 		},
 73 | 		{
 74 | 			"positive duration",
 75 | 			time.Duration(42 * time.Second),
 76 | 			time.Duration(42 * time.Second),
 77 | 		},
 78 | 		{
 79 | 			"another positive duration",
 80 | 			time.Duration(42 * time.Minute),
 81 | 			time.Duration(42 * time.Minute),
 82 | 		},
 83 | 		{
 84 | 			"zero duration",
 85 | 			time.Duration(0),
 86 | 			time.Duration(0),
 87 | 		},
 88 | 		{
 89 | 			"max duration",
 90 | 			time.Duration(math.MaxInt64),
 91 | 			time.Duration(math.MaxInt64),
 92 | 		},
 93 | 	}
 94 | 
 95 | 	for _, test := range tests {
 96 | 		t.Run(test.name, func(t *testing.T) {
 97 | 			b, err := json.Marshal(Duration{test.input})
 98 | 			require.NoError(t, err)
 99 | 
100 | 			var d Duration
101 | 			err = json.Unmarshal(b, &d)
102 | 			require.NoError(t, err)
103 | 
104 | 			assert.Equal(t, test.expected, d.Duration, "input %v, marshalled %v, got %v", test.input, string(b), d.Duration)
105 | 		})
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/app/.gitignore:
--------------------------------------------------------------------------------
1 | ollama.syso
2 | 


--------------------------------------------------------------------------------
/app/README.md:
--------------------------------------------------------------------------------
 1 | # Ollama App
 2 | 
 3 | ## Linux
 4 | 
 5 | TODO
 6 | 
 7 | ## MacOS
 8 | 
 9 | TODO
10 | 
11 | ## Windows
12 | 
13 | If you want to build the installer, youll need to install
14 | - https://jrsoftware.org/isinfo.php
15 | 
16 | 
17 | In the top directory of this repo, run the following powershell script
18 | to build the ollama CLI, ollama app, and ollama installer.
19 | 
20 | ```
21 | powershell -ExecutionPolicy Bypass -File .\scripts\build_windows.ps1
22 | ```
23 | 


--------------------------------------------------------------------------------
/app/assets/app.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/app.ico


--------------------------------------------------------------------------------
/app/assets/assets.go:
--------------------------------------------------------------------------------
 1 | package assets
 2 | 
 3 | import (
 4 | 	"embed"
 5 | 	"io/fs"
 6 | )
 7 | 
 8 | //go:embed *.ico
 9 | var icons embed.FS
10 | 
11 | func ListIcons() ([]string, error) {
12 | 	return fs.Glob(icons, "*")
13 | }
14 | 
15 | func GetIcon(filename string) ([]byte, error) {
16 | 	return icons.ReadFile(filename)
17 | }
18 | 


--------------------------------------------------------------------------------
/app/assets/setup.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/setup.bmp


--------------------------------------------------------------------------------
/app/assets/tray.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/tray.ico


--------------------------------------------------------------------------------
/app/assets/tray_upgrade.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/app/assets/tray_upgrade.ico


--------------------------------------------------------------------------------
/app/lifecycle/getstarted_nonwindows.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows
 2 | 
 3 | package lifecycle
 4 | 
 5 | import "fmt"
 6 | 
 7 | func GetStarted() error {
 8 | 	return fmt.Errorf("GetStarted not implemented")
 9 | }
10 | 


--------------------------------------------------------------------------------
/app/lifecycle/getstarted_windows.go:
--------------------------------------------------------------------------------
 1 | package lifecycle
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | 	"os"
 7 | 	"os/exec"
 8 | 	"path/filepath"
 9 | 	"syscall"
10 | )
11 | 
12 | func GetStarted() error {
13 | 	const CREATE_NEW_CONSOLE = 0x00000010
14 | 	var err error
15 | 	bannerScript := filepath.Join(AppDir, "ollama_welcome.ps1")
16 | 	args := []string{
17 | 		// TODO once we're signed, the execution policy bypass should be removed
18 | 		"powershell", "-noexit", "-ExecutionPolicy", "Bypass", "-nologo", "-file", bannerScript,
19 | 	}
20 | 	args[0], err = exec.LookPath(args[0])
21 | 	if err != nil {
22 | 		return err
23 | 	}
24 | 
25 | 	// Make sure the script actually exists
26 | 	_, err = os.Stat(bannerScript)
27 | 	if err != nil {
28 | 		return fmt.Errorf("getting started banner script error %s", err)
29 | 	}
30 | 
31 | 	slog.Info(fmt.Sprintf("opening getting started terminal with %v", args))
32 | 	attrs := &os.ProcAttr{
33 | 		Files: []*os.File{os.Stdin, os.Stdout, os.Stderr},
34 | 		Sys:   &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
35 | 	}
36 | 	proc, err := os.StartProcess(args[0], args, attrs)
37 | 
38 | 	if err != nil {
39 | 		return fmt.Errorf("unable to start getting started shell %w", err)
40 | 	}
41 | 
42 | 	slog.Debug(fmt.Sprintf("getting started terminal PID: %d", proc.Pid))
43 | 	return proc.Release()
44 | }
45 | 


--------------------------------------------------------------------------------
/app/lifecycle/lifecycle.go:
--------------------------------------------------------------------------------
 1 | package lifecycle
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 	"log/slog"
 8 | 	"os"
 9 | 	"os/signal"
10 | 	"syscall"
11 | 
12 | 	"github.com/ollama/ollama/app/store"
13 | 	"github.com/ollama/ollama/app/tray"
14 | )
15 | 
16 | func Run() {
17 | 	InitLogging()
18 | 
19 | 	ctx, cancel := context.WithCancel(context.Background())
20 | 	var done chan int
21 | 
22 | 	t, err := tray.NewTray()
23 | 	if err != nil {
24 | 		log.Fatalf("Failed to start: %s", err)
25 | 	}
26 | 	callbacks := t.GetCallbacks()
27 | 
28 | 	signals := make(chan os.Signal, 1)
29 | 	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
30 | 
31 | 	go func() {
32 | 		slog.Debug("starting callback loop")
33 | 		for {
34 | 			select {
35 | 			case <-callbacks.Quit:
36 | 				slog.Debug("quit called")
37 | 				t.Quit()
38 | 			case <-signals:
39 | 				slog.Debug("shutting down due to signal")
40 | 				t.Quit()
41 | 			case <-callbacks.Update:
42 | 				err := DoUpgrade(cancel, done)
43 | 				if err != nil {
44 | 					slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
45 | 				}
46 | 			case <-callbacks.ShowLogs:
47 | 				ShowLogs()
48 | 			case <-callbacks.DoFirstUse:
49 | 				err := GetStarted()
50 | 				if err != nil {
51 | 					slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
52 | 				}
53 | 			}
54 | 		}
55 | 	}()
56 | 
57 | 	// Are we first use?
58 | 	if !store.GetFirstTimeRun() {
59 | 		slog.Debug("First time run")
60 | 		err = t.DisplayFirstUseNotification()
61 | 		if err != nil {
62 | 			slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
63 | 		}
64 | 		store.SetFirstTimeRun(true)
65 | 	} else {
66 | 		slog.Debug("Not first time, skipping first run notification")
67 | 	}
68 | 
69 | 	if IsServerRunning(ctx) {
70 | 		slog.Info("Detected another instance of ollama running, exiting")
71 | 		os.Exit(1)
72 | 	} else {
73 | 		done, err = SpawnServer(ctx, CLIName)
74 | 		if err != nil {
75 | 			// TODO - should we retry in a backoff loop?
76 | 			// TODO - should we pop up a warning and maybe add a menu item to view application logs?
77 | 			slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
78 | 			done = make(chan int, 1)
79 | 			done <- 1
80 | 		}
81 | 	}
82 | 
83 | 	StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
84 | 
85 | 	t.Run()
86 | 	cancel()
87 | 	slog.Info("Waiting for ollama server to shutdown...")
88 | 	if done != nil {
89 | 		<-done
90 | 	}
91 | 	slog.Info("Ollama app exiting")
92 | }
93 | 


--------------------------------------------------------------------------------
/app/lifecycle/logging.go:
--------------------------------------------------------------------------------
 1 | package lifecycle
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | 	"os"
 7 | 	"path/filepath"
 8 | 
 9 | 	"github.com/ollama/ollama/envconfig"
10 | )
11 | 
12 | func InitLogging() {
13 | 	level := slog.LevelInfo
14 | 
15 | 	if envconfig.Debug {
16 | 		level = slog.LevelDebug
17 | 	}
18 | 
19 | 	var logFile *os.File
20 | 	var err error
21 | 	// Detect if we're a GUI app on windows, and if not, send logs to console
22 | 	if os.Stderr.Fd() != 0 {
23 | 		// Console app detected
24 | 		logFile = os.Stderr
25 | 		// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
26 | 	} else {
27 | 		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
28 | 		if err != nil {
29 | 			slog.Error(fmt.Sprintf("failed to create server log %v", err))
30 | 			return
31 | 		}
32 | 	}
33 | 	handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
34 | 		Level:     level,
35 | 		AddSource: true,
36 | 		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
37 | 			if attr.Key == slog.SourceKey {
38 | 				source := attr.Value.Any().(*slog.Source)
39 | 				source.File = filepath.Base(source.File)
40 | 			}
41 | 			return attr
42 | 		},
43 | 	})
44 | 
45 | 	slog.SetDefault(slog.New(handler))
46 | 
47 | 	slog.Info("ollama app started")
48 | }
49 | 


--------------------------------------------------------------------------------
/app/lifecycle/logging_nonwindows.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows
 2 | 
 3 | package lifecycle
 4 | 
 5 | import "log/slog"
 6 | 
 7 | func ShowLogs() {
 8 | 	slog.Warn("ShowLogs not yet implemented")
 9 | }
10 | 


--------------------------------------------------------------------------------
/app/lifecycle/logging_windows.go:
--------------------------------------------------------------------------------
 1 | package lifecycle
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | 	"os/exec"
 7 | 	"syscall"
 8 | )
 9 | 
10 | func ShowLogs() {
11 | 	cmd_path := "c:\\Windows\\system32\\cmd.exe"
12 | 	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
13 | 	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
14 | 	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
15 | 	err := cmd.Start()
16 | 	if err != nil {
17 | 		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
18 | 	}
19 | }
20 | 


--------------------------------------------------------------------------------
/app/lifecycle/paths.go:
--------------------------------------------------------------------------------
 1 | package lifecycle
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | 	"log/slog"
 7 | 	"os"
 8 | 	"path/filepath"
 9 | 	"runtime"
10 | 	"strings"
11 | )
12 | 
13 | var (
14 | 	AppName    = "ollama app"
15 | 	CLIName    = "ollama"
16 | 	AppDir     = "/opt/Ollama"
17 | 	AppDataDir = "/opt/Ollama"
18 | 	// TODO - should there be a distinct log dir?
19 | 	UpdateStageDir = "/tmp"
20 | 	AppLogFile     = "/tmp/ollama_app.log"
21 | 	ServerLogFile  = "/tmp/ollama.log"
22 | 	UpgradeLogFile = "/tmp/ollama_update.log"
23 | 	Installer      = "OllamaSetup.exe"
24 | )
25 | 
26 | func init() {
27 | 	if runtime.GOOS == "windows" {
28 | 		AppName += ".exe"
29 | 		CLIName += ".exe"
30 | 		// Logs, configs, downloads go to LOCALAPPDATA
31 | 		localAppData := os.Getenv("LOCALAPPDATA")
32 | 		AppDataDir = filepath.Join(localAppData, "Ollama")
33 | 		UpdateStageDir = filepath.Join(AppDataDir, "updates")
34 | 		AppLogFile = filepath.Join(AppDataDir, "app.log")
35 | 		ServerLogFile = filepath.Join(AppDataDir, "server.log")
36 | 		UpgradeLogFile = filepath.Join(AppDataDir, "upgrade.log")
37 | 
38 | 		// Executables are stored in APPDATA
39 | 		AppDir = filepath.Join(localAppData, "Programs", "Ollama")
40 | 
41 | 		// Make sure we have PATH set correctly for any spawned children
42 | 		paths := strings.Split(os.Getenv("PATH"), ";")
43 | 		// Start with whatever we find in the PATH/LD_LIBRARY_PATH
44 | 		found := false
45 | 		for _, path := range paths {
46 | 			d, err := filepath.Abs(path)
47 | 			if err != nil {
48 | 				continue
49 | 			}
50 | 			if strings.EqualFold(AppDir, d) {
51 | 				found = true
52 | 			}
53 | 		}
54 | 		if !found {
55 | 			paths = append(paths, AppDir)
56 | 
57 | 			pathVal := strings.Join(paths, ";")
58 | 			slog.Debug("setting PATH=" + pathVal)
59 | 			err := os.Setenv("PATH", pathVal)
60 | 			if err != nil {
61 | 				slog.Error(fmt.Sprintf("failed to update PATH: %s", err))
62 | 			}
63 | 		}
64 | 
65 | 		// Make sure our logging dir exists
66 | 		_, err := os.Stat(AppDataDir)
67 | 		if errors.Is(err, os.ErrNotExist) {
68 | 			if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
69 | 				slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
70 | 			}
71 | 		}
72 | 
73 | 	} else if runtime.GOOS == "darwin" {
74 | 		// TODO
75 | 		AppName += ".app"
76 | 		// } else if runtime.GOOS == "linux" {
77 | 		// TODO
78 | 	}
79 | }
80 | 


--------------------------------------------------------------------------------
/app/lifecycle/server_unix.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows
 2 | 
 3 | package lifecycle
 4 | 
 5 | import (
 6 | 	"context"
 7 | 	"errors"
 8 | 	"fmt"
 9 | 	"os"
10 | 	"os/exec"
11 | 	"syscall"
12 | )
13 | 
14 | func getCmd(ctx context.Context, cmd string) *exec.Cmd {
15 | 	return exec.CommandContext(ctx, cmd, "serve")
16 | }
17 | 
18 | func terminate(cmd *exec.Cmd) error {
19 | 	return cmd.Process.Signal(os.Interrupt)
20 | }
21 | 
22 | func isProcessExited(pid int) (bool, error) {
23 | 	proc, err := os.FindProcess(pid)
24 | 	if err != nil {
25 | 		return false, fmt.Errorf("failed to find process: %v", err)
26 | 	}
27 | 
28 | 	err = proc.Signal(syscall.Signal(0))
29 | 	if err != nil {
30 | 		if errors.Is(err, os.ErrProcessDone) || errors.Is(err, syscall.ESRCH) {
31 | 			return true, nil
32 | 		}
33 | 
34 | 		return false, fmt.Errorf("error signaling process: %v", err)
35 | 	}
36 | 
37 | 	return false, nil
38 | }
39 | 


--------------------------------------------------------------------------------
/app/lifecycle/server_windows.go:
--------------------------------------------------------------------------------
 1 | package lifecycle
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os/exec"
 7 | 	"syscall"
 8 | 
 9 | 	"golang.org/x/sys/windows"
10 | )
11 | 
12 | func getCmd(ctx context.Context, exePath string) *exec.Cmd {
13 | 	cmd := exec.CommandContext(ctx, exePath, "serve")
14 | 	cmd.SysProcAttr = &syscall.SysProcAttr{
15 | 		HideWindow:    true,
16 | 		CreationFlags: windows.CREATE_NEW_PROCESS_GROUP,
17 | 	}
18 | 
19 | 	return cmd
20 | }
21 | 
22 | func terminate(cmd *exec.Cmd) error {
23 | 	dll, err := windows.LoadDLL("kernel32.dll")
24 | 	if err != nil {
25 | 		return err
26 | 	}
27 | 	defer dll.Release() // nolint: errcheck
28 | 
29 | 	pid := cmd.Process.Pid
30 | 
31 | 	f, err := dll.FindProc("AttachConsole")
32 | 	if err != nil {
33 | 		return err
34 | 	}
35 | 
36 | 	r1, _, err := f.Call(uintptr(pid))
37 | 	if r1 == 0 && err != syscall.ERROR_ACCESS_DENIED {
38 | 		return err
39 | 	}
40 | 
41 | 	f, err = dll.FindProc("SetConsoleCtrlHandler")
42 | 	if err != nil {
43 | 		return err
44 | 	}
45 | 
46 | 	r1, _, err = f.Call(0, 1)
47 | 	if r1 == 0 {
48 | 		return err
49 | 	}
50 | 
51 | 	f, err = dll.FindProc("GenerateConsoleCtrlEvent")
52 | 	if err != nil {
53 | 		return err
54 | 	}
55 | 
56 | 	r1, _, err = f.Call(windows.CTRL_BREAK_EVENT, uintptr(pid))
57 | 	if r1 == 0 {
58 | 		return err
59 | 	}
60 | 
61 | 	r1, _, err = f.Call(windows.CTRL_C_EVENT, uintptr(pid))
62 | 	if r1 == 0 {
63 | 		return err
64 | 	}
65 | 
66 | 	return nil
67 | }
68 | 
69 | const STILL_ACTIVE = 259
70 | 
71 | func isProcessExited(pid int) (bool, error) {
72 | 	hProcess, err := windows.OpenProcess(windows.PROCESS_QUERY_INFORMATION, false, uint32(pid))
73 | 	if err != nil {
74 | 		return false, fmt.Errorf("failed to open process: %v", err)
75 | 	}
76 | 	defer windows.CloseHandle(hProcess) // nolint: errcheck
77 | 
78 | 	var exitCode uint32
79 | 	err = windows.GetExitCodeProcess(hProcess, &exitCode)
80 | 	if err != nil {
81 | 		return false, fmt.Errorf("failed to get exit code: %v", err)
82 | 	}
83 | 
84 | 	if exitCode == STILL_ACTIVE {
85 | 		return false, nil
86 | 	}
87 | 
88 | 	return true, nil
89 | }
90 | 


--------------------------------------------------------------------------------
/app/lifecycle/updater_nonwindows.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows
 2 | 
 3 | package lifecycle
 4 | 
 5 | import (
 6 | 	"context"
 7 | 	"fmt"
 8 | )
 9 | 
10 | func DoUpgrade(cancel context.CancelFunc, done chan int) error {
11 | 	return fmt.Errorf("DoUpgrade not yet implemented")
12 | }
13 | 


--------------------------------------------------------------------------------
/app/lifecycle/updater_windows.go:
--------------------------------------------------------------------------------
 1 | package lifecycle
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log/slog"
 7 | 	"os"
 8 | 	"os/exec"
 9 | 	"path/filepath"
10 | )
11 | 
12 | func DoUpgrade(cancel context.CancelFunc, done chan int) error {
13 | 	files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform
14 | 	if err != nil {
15 | 		return fmt.Errorf("failed to lookup downloads: %s", err)
16 | 	}
17 | 	if len(files) == 0 {
18 | 		return fmt.Errorf("no update downloads found")
19 | 	} else if len(files) > 1 {
20 | 		// Shouldn't happen
21 | 		slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
22 | 	}
23 | 	installerExe := files[0]
24 | 
25 | 	slog.Info("starting upgrade with " + installerExe)
26 | 	slog.Info("upgrade log file " + UpgradeLogFile)
27 | 
28 | 	// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
29 | 	installArgs := []string{
30 | 		"/CLOSEAPPLICATIONS",                    // Quit the tray app if it's still running
31 | 		"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
32 | 		"/FORCECLOSEAPPLICATIONS",               // Force close the tray app - might be needed
33 | 	}
34 | 	// make the upgrade as quiet as possible (no GUI, no prompts)
35 | 	installArgs = append(installArgs,
36 | 		"/SP", // Skip the "This will install... Do you wish to continue" prompt
37 | 		"/SUPPRESSMSGBOXES",
38 | 		"/SILENT",
39 | 		"/VERYSILENT",
40 | 	)
41 | 
42 | 	// Safeguard in case we have requests in flight that need to drain...
43 | 	slog.Info("Waiting for server to shutdown")
44 | 	cancel()
45 | 	if done != nil {
46 | 		<-done
47 | 	} else {
48 | 		// Shouldn't happen
49 | 		slog.Warn("done chan was nil, not actually waiting")
50 | 	}
51 | 
52 | 	slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs))
53 | 	os.Chdir(filepath.Dir(UpgradeLogFile)) //nolint:errcheck
54 | 	cmd := exec.Command(installerExe, installArgs...)
55 | 
56 | 	if err := cmd.Start(); err != nil {
57 | 		return fmt.Errorf("unable to start ollama app %w", err)
58 | 	}
59 | 
60 | 	if cmd.Process != nil {
61 | 		err = cmd.Process.Release()
62 | 		if err != nil {
63 | 			slog.Error(fmt.Sprintf("failed to release server process: %s", err))
64 | 		}
65 | 	} else {
66 | 		// TODO - some details about why it didn't start, or is this a pedantic error case?
67 | 		return fmt.Errorf("installer process did not start")
68 | 	}
69 | 
70 | 	// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
71 | 
72 | 	slog.Info("Installer started in background, exiting")
73 | 
74 | 	os.Exit(0)
75 | 	// Not reached
76 | 	return nil
77 | }
78 | 


--------------------------------------------------------------------------------
/app/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | // Compile with the following to get rid of the cmd pop up on windows
 4 | // go build -ldflags="-H windowsgui" .
 5 | 
 6 | import (
 7 | 	"github.com/ollama/ollama/app/lifecycle"
 8 | )
 9 | 
10 | func main() {
11 | 	lifecycle.Run()
12 | }
13 | 


--------------------------------------------------------------------------------
/app/ollama.rc:
--------------------------------------------------------------------------------
 1 | #include <winver.h>
 2 | 
 3 | VS_VERSION_INFO VERSIONINFO
 4 |  FILEFLAGSMASK 0x3fL
 5 | #ifdef _DEBUG
 6 |  FILEFLAGS 0x1L
 7 | #else
 8 |  FILEFLAGS 0x0L
 9 | #endif
10 |  FILEOS 0x40004L
11 |  FILETYPE 0x1L
12 |  FILESUBTYPE 0x0L
13 | BEGIN
14 |     BLOCK "StringFileInfo"
15 |     BEGIN
16 |         BLOCK "040904b0"
17 |         BEGIN
18 |             VALUE "FileDescription", "Ollama"
19 |             VALUE "InternalName", "Ollama"
20 |             VALUE "OriginalFilename", "ollama app.exe"
21 |             VALUE "ProductName", "Ollama"
22 |         END
23 |     END
24 | 
25 |     BLOCK "VarFileInfo"
26 |     BEGIN
27 |         VALUE "Translation", 0x409, 1200
28 |     END
29 | END
30 | 


--------------------------------------------------------------------------------
/app/ollama_welcome.ps1:
--------------------------------------------------------------------------------
1 | # TODO - consider ANSI colors and maybe ASCII art...
2 | write-host ""
3 | write-host "Welcome to Ollama!"
4 | write-host ""
5 | write-host "Run your first model:"
6 | write-host ""
7 | write-host "`tollama run llama3"
8 | write-host ""


--------------------------------------------------------------------------------
/app/store/store.go:
--------------------------------------------------------------------------------
 1 | package store
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"errors"
 6 | 	"fmt"
 7 | 	"log/slog"
 8 | 	"os"
 9 | 	"path/filepath"
10 | 	"sync"
11 | 
12 | 	"github.com/google/uuid"
13 | )
14 | 
15 | type Store struct {
16 | 	ID           string `json:"id"`
17 | 	FirstTimeRun bool   `json:"first-time-run"`
18 | }
19 | 
20 | var (
21 | 	lock  sync.Mutex
22 | 	store Store
23 | )
24 | 
25 | func GetID() string {
26 | 	lock.Lock()
27 | 	defer lock.Unlock()
28 | 	if store.ID == "" {
29 | 		initStore()
30 | 	}
31 | 	return store.ID
32 | 
33 | }
34 | 
35 | func GetFirstTimeRun() bool {
36 | 	lock.Lock()
37 | 	defer lock.Unlock()
38 | 	if store.ID == "" {
39 | 		initStore()
40 | 	}
41 | 	return store.FirstTimeRun
42 | }
43 | 
44 | func SetFirstTimeRun(val bool) {
45 | 	lock.Lock()
46 | 	defer lock.Unlock()
47 | 	if store.FirstTimeRun == val {
48 | 		return
49 | 	}
50 | 	store.FirstTimeRun = val
51 | 	writeStore(getStorePath())
52 | }
53 | 
54 | // lock must be held
55 | func initStore() {
56 | 	storeFile, err := os.Open(getStorePath())
57 | 	if err == nil {
58 | 		defer storeFile.Close()
59 | 		err = json.NewDecoder(storeFile).Decode(&store)
60 | 		if err == nil {
61 | 			slog.Debug(fmt.Sprintf("loaded existing store %s - ID: %s", getStorePath(), store.ID))
62 | 			return
63 | 		}
64 | 	} else if !errors.Is(err, os.ErrNotExist) {
65 | 		slog.Debug(fmt.Sprintf("unexpected error searching for store: %s", err))
66 | 	}
67 | 	slog.Debug("initializing new store")
68 | 	store.ID = uuid.New().String()
69 | 	writeStore(getStorePath())
70 | }
71 | 
72 | func writeStore(storeFilename string) {
73 | 	ollamaDir := filepath.Dir(storeFilename)
74 | 	_, err := os.Stat(ollamaDir)
75 | 	if errors.Is(err, os.ErrNotExist) {
76 | 		if err := os.MkdirAll(ollamaDir, 0o755); err != nil {
77 | 			slog.Error(fmt.Sprintf("create ollama dir %s: %v", ollamaDir, err))
78 | 			return
79 | 		}
80 | 	}
81 | 	payload, err := json.Marshal(store)
82 | 	if err != nil {
83 | 		slog.Error(fmt.Sprintf("failed to marshal store: %s", err))
84 | 		return
85 | 	}
86 | 	fp, err := os.OpenFile(storeFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
87 | 	if err != nil {
88 | 		slog.Error(fmt.Sprintf("write store payload %s: %v", storeFilename, err))
89 | 		return
90 | 	}
91 | 	defer fp.Close()
92 | 	if n, err := fp.Write(payload); err != nil || n != len(payload) {
93 | 		slog.Error(fmt.Sprintf("write store payload %s: %d vs %d -- %v", storeFilename, n, len(payload), err))
94 | 		return
95 | 	}
96 | 	slog.Debug("Store contents: " + string(payload))
97 | 	slog.Info(fmt.Sprintf("wrote store: %s", storeFilename))
98 | }
99 | 


--------------------------------------------------------------------------------
/app/store/store_darwin.go:
--------------------------------------------------------------------------------
 1 | package store
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | )
 7 | 
 8 | func getStorePath() string {
 9 | 	// TODO - system wide location?
10 | 
11 | 	home := os.Getenv("HOME")
12 | 	return filepath.Join(home, "Library", "Application Support", "Ollama", "config.json")
13 | }
14 | 


--------------------------------------------------------------------------------
/app/store/store_linux.go:
--------------------------------------------------------------------------------
 1 | package store
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | )
 7 | 
 8 | func getStorePath() string {
 9 | 	if os.Geteuid() == 0 {
10 | 		// TODO where should we store this on linux for system-wide operation?
11 | 		return "/etc/ollama/config.json"
12 | 	}
13 | 
14 | 	home := os.Getenv("HOME")
15 | 	return filepath.Join(home, ".ollama", "config.json")
16 | }
17 | 


--------------------------------------------------------------------------------
/app/store/store_windows.go:
--------------------------------------------------------------------------------
 1 | package store
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | )
 7 | 
 8 | func getStorePath() string {
 9 | 	localAppData := os.Getenv("LOCALAPPDATA")
10 | 	return filepath.Join(localAppData, "Ollama", "config.json")
11 | }
12 | 


--------------------------------------------------------------------------------
/app/tray/commontray/types.go:
--------------------------------------------------------------------------------
 1 | package commontray
 2 | 
 3 | var (
 4 | 	Title   = "Ollama"
 5 | 	ToolTip = "Ollama"
 6 | 
 7 | 	UpdateIconName = "tray_upgrade"
 8 | 	IconName       = "tray"
 9 | )
10 | 
11 | type Callbacks struct {
12 | 	Quit       chan struct{}
13 | 	Update     chan struct{}
14 | 	DoFirstUse chan struct{}
15 | 	ShowLogs   chan struct{}
16 | }
17 | 
18 | type OllamaTray interface {
19 | 	GetCallbacks() Callbacks
20 | 	Run()
21 | 	UpdateAvailable(ver string) error
22 | 	DisplayFirstUseNotification() error
23 | 	Quit()
24 | }
25 | 


--------------------------------------------------------------------------------
/app/tray/tray.go:
--------------------------------------------------------------------------------
 1 | package tray
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"runtime"
 6 | 
 7 | 	"github.com/ollama/ollama/app/assets"
 8 | 	"github.com/ollama/ollama/app/tray/commontray"
 9 | )
10 | 
11 | func NewTray() (commontray.OllamaTray, error) {
12 | 	extension := ".png"
13 | 	if runtime.GOOS == "windows" {
14 | 		extension = ".ico"
15 | 	}
16 | 	iconName := commontray.UpdateIconName + extension
17 | 	updateIcon, err := assets.GetIcon(iconName)
18 | 	if err != nil {
19 | 		return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err)
20 | 	}
21 | 	iconName = commontray.IconName + extension
22 | 	icon, err := assets.GetIcon(iconName)
23 | 	if err != nil {
24 | 		return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err)
25 | 	}
26 | 
27 | 	return InitPlatformTray(icon, updateIcon)
28 | }
29 | 


--------------------------------------------------------------------------------
/app/tray/tray_nonwindows.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows
 2 | 
 3 | package tray
 4 | 
 5 | import (
 6 | 	"fmt"
 7 | 
 8 | 	"github.com/ollama/ollama/app/tray/commontray"
 9 | )
10 | 
11 | func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
12 | 	return nil, fmt.Errorf("NOT IMPLEMENTED YET")
13 | }
14 | 


--------------------------------------------------------------------------------
/app/tray/tray_windows.go:
--------------------------------------------------------------------------------
 1 | package tray
 2 | 
 3 | import (
 4 | 	"github.com/ollama/ollama/app/tray/commontray"
 5 | 	"github.com/ollama/ollama/app/tray/wintray"
 6 | )
 7 | 
 8 | func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
 9 | 	return wintray.InitTray(icon, updateIcon)
10 | }
11 | 


--------------------------------------------------------------------------------
/app/tray/wintray/menus.go:
--------------------------------------------------------------------------------
 1 | //go:build windows
 2 | 
 3 | package wintray
 4 | 
 5 | import (
 6 | 	"fmt"
 7 | 	"log/slog"
 8 | 	"unsafe"
 9 | 
10 | 	"golang.org/x/sys/windows"
11 | )
12 | 
13 | const (
14 | 	updatAvailableMenuID = 1
15 | 	updateMenuID         = updatAvailableMenuID + 1
16 | 	separatorMenuID      = updateMenuID + 1
17 | 	diagLogsMenuID       = separatorMenuID + 1
18 | 	diagSeparatorMenuID  = diagLogsMenuID + 1
19 | 	quitMenuID           = diagSeparatorMenuID + 1
20 | )
21 | 
22 | func (t *winTray) initMenus() error {
23 | 	if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil {
24 | 		return fmt.Errorf("unable to create menu entries %w\n", err)
25 | 	}
26 | 	if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil {
27 | 		return fmt.Errorf("unable to create menu entries %w", err)
28 | 	}
29 | 	if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil {
30 | 		return fmt.Errorf("unable to create menu entries %w\n", err)
31 | 	}
32 | 	return nil
33 | }
34 | 
35 | func (t *winTray) UpdateAvailable(ver string) error {
36 | 	if !t.updateNotified {
37 | 		slog.Debug("updating menu and sending notification for new update")
38 | 		if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
39 | 			return fmt.Errorf("unable to create menu entries %w", err)
40 | 		}
41 | 		if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
42 | 			return fmt.Errorf("unable to create menu entries %w", err)
43 | 		}
44 | 		if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil {
45 | 			return fmt.Errorf("unable to create menu entries %w", err)
46 | 		}
47 | 		iconFilePath, err := iconBytesToFilePath(wt.updateIcon)
48 | 		if err != nil {
49 | 			return fmt.Errorf("unable to write icon data to temp file: %w", err)
50 | 		}
51 | 		if err := wt.setIcon(iconFilePath); err != nil {
52 | 			return fmt.Errorf("unable to set icon: %w", err)
53 | 		}
54 | 		t.updateNotified = true
55 | 
56 | 		t.pendingUpdate = true
57 | 		// Now pop up the notification
58 | 		t.muNID.Lock()
59 | 		defer t.muNID.Unlock()
60 | 		copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle))
61 | 		copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver)))
62 | 		t.nid.Flags |= NIF_INFO
63 | 		t.nid.Timeout = 10
64 | 		t.nid.Size = uint32(unsafe.Sizeof(*wt.nid))
65 | 		err = t.nid.modify()
66 | 		if err != nil {
67 | 			return err
68 | 		}
69 | 	}
70 | 	return nil
71 | }
72 | 


--------------------------------------------------------------------------------
/app/tray/wintray/messages.go:
--------------------------------------------------------------------------------
 1 | //go:build windows
 2 | 
 3 | package wintray
 4 | 
 5 | const (
 6 | 	firstTimeTitle   = "Ollama is running"
 7 | 	firstTimeMessage = "Click here to get started"
 8 | 	updateTitle      = "Update available"
 9 | 	updateMessage    = "Ollama version %s is ready to install"
10 | 
11 | 	quitMenuTitle            = "Quit Ollama"
12 | 	updateAvailableMenuTitle = "An update is available"
13 | 	updateMenutTitle         = "Restart to update"
14 | 	diagLogsMenuTitle        = "View logs"
15 | )
16 | 


--------------------------------------------------------------------------------
/app/tray/wintray/notifyicon.go:
--------------------------------------------------------------------------------
 1 | //go:build windows
 2 | 
 3 | package wintray
 4 | 
 5 | import (
 6 | 	"unsafe"
 7 | 
 8 | 	"golang.org/x/sys/windows"
 9 | )
10 | 
11 | // Contains information that the system needs to display notifications in the notification area.
12 | // Used by Shell_NotifyIcon.
13 | // https://msdn.microsoft.com/en-us/library/windows/desktop/bb773352(v=vs.85).aspx
14 | // https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159
15 | type notifyIconData struct {
16 | 	Size                       uint32
17 | 	Wnd                        windows.Handle
18 | 	ID, Flags, CallbackMessage uint32
19 | 	Icon                       windows.Handle
20 | 	Tip                        [128]uint16
21 | 	State, StateMask           uint32
22 | 	Info                       [256]uint16
23 | 	// Timeout, Version           uint32
24 | 	Timeout uint32
25 | 
26 | 	InfoTitle   [64]uint16
27 | 	InfoFlags   uint32
28 | 	GuidItem    windows.GUID
29 | 	BalloonIcon windows.Handle
30 | }
31 | 
32 | func (nid *notifyIconData) add() error {
33 | 	const NIM_ADD = 0x00000000
34 | 	res, _, err := pShellNotifyIcon.Call(
35 | 		uintptr(NIM_ADD),
36 | 		uintptr(unsafe.Pointer(nid)),
37 | 	)
38 | 	if res == 0 {
39 | 		return err
40 | 	}
41 | 	return nil
42 | }
43 | 
44 | func (nid *notifyIconData) modify() error {
45 | 	const NIM_MODIFY = 0x00000001
46 | 	res, _, err := pShellNotifyIcon.Call(
47 | 		uintptr(NIM_MODIFY),
48 | 		uintptr(unsafe.Pointer(nid)),
49 | 	)
50 | 	if res == 0 {
51 | 		return err
52 | 	}
53 | 	return nil
54 | }
55 | 
56 | func (nid *notifyIconData) delete() error {
57 | 	const NIM_DELETE = 0x00000002
58 | 	res, _, err := pShellNotifyIcon.Call(
59 | 		uintptr(NIM_DELETE),
60 | 		uintptr(unsafe.Pointer(nid)),
61 | 	)
62 | 	if res == 0 {
63 | 		return err
64 | 	}
65 | 	return nil
66 | }
67 | 


--------------------------------------------------------------------------------
/app/tray/wintray/winclass.go:
--------------------------------------------------------------------------------
 1 | //go:build windows
 2 | 
 3 | package wintray
 4 | 
 5 | import (
 6 | 	"unsafe"
 7 | 
 8 | 	"golang.org/x/sys/windows"
 9 | )
10 | 
11 | // Contains window class information.
12 | // It is used with the RegisterClassEx and GetClassInfoEx functions.
13 | // https://msdn.microsoft.com/en-us/library/ms633577.aspx
14 | type wndClassEx struct {
15 | 	Size, Style                        uint32
16 | 	WndProc                            uintptr
17 | 	ClsExtra, WndExtra                 int32
18 | 	Instance, Icon, Cursor, Background windows.Handle
19 | 	MenuName, ClassName                *uint16
20 | 	IconSm                             windows.Handle
21 | }
22 | 
23 | // Registers a window class for subsequent use in calls to the CreateWindow or CreateWindowEx function.
24 | // https://msdn.microsoft.com/en-us/library/ms633587.aspx
25 | func (w *wndClassEx) register() error {
26 | 	w.Size = uint32(unsafe.Sizeof(*w))
27 | 	res, _, err := pRegisterClass.Call(uintptr(unsafe.Pointer(w)))
28 | 	if res == 0 {
29 | 		return err
30 | 	}
31 | 	return nil
32 | }
33 | 
34 | // Unregisters a window class, freeing the memory required for the class.
35 | // https://msdn.microsoft.com/en-us/library/ms644899.aspx
36 | func (w *wndClassEx) unregister() error {
37 | 	res, _, err := pUnregisterClass.Call(
38 | 		uintptr(unsafe.Pointer(w.ClassName)),
39 | 		uintptr(w.Instance),
40 | 	)
41 | 	if res == 0 {
42 | 		return err
43 | 	}
44 | 	return nil
45 | }
46 | 


--------------------------------------------------------------------------------
/auth/auth.go:
--------------------------------------------------------------------------------
 1 | package auth
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"crypto/rand"
 7 | 	"encoding/base64"
 8 | 	"fmt"
 9 | 	"io"
10 | 	"log/slog"
11 | 	"os"
12 | 	"path/filepath"
13 | 	"strings"
14 | 
15 | 	"golang.org/x/crypto/ssh"
16 | )
17 | 
18 | const defaultPrivateKey = "id_ed25519"
19 | 
20 | func keyPath() (string, error) {
21 | 	home, err := os.UserHomeDir()
22 | 	if err != nil {
23 | 		return "", err
24 | 	}
25 | 
26 | 	return filepath.Join(home, ".ollama", defaultPrivateKey), nil
27 | }
28 | 
29 | func GetPublicKey() (string, error) {
30 | 	keyPath, err := keyPath()
31 | 	if err != nil {
32 | 		return "", err
33 | 	}
34 | 
35 | 	privateKeyFile, err := os.ReadFile(keyPath)
36 | 	if err != nil {
37 | 		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
38 | 		return "", err
39 | 	}
40 | 
41 | 	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
42 | 	if err != nil {
43 | 		return "", err
44 | 	}
45 | 
46 | 	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
47 | 
48 | 	return strings.TrimSpace(string(publicKey)), nil
49 | }
50 | 
51 | func NewNonce(r io.Reader, length int) (string, error) {
52 | 	nonce := make([]byte, length)
53 | 	if _, err := io.ReadFull(r, nonce); err != nil {
54 | 		return "", err
55 | 	}
56 | 
57 | 	return base64.RawURLEncoding.EncodeToString(nonce), nil
58 | }
59 | 
60 | func Sign(ctx context.Context, bts []byte) (string, error) {
61 | 	keyPath, err := keyPath()
62 | 	if err != nil {
63 | 		return "", err
64 | 	}
65 | 
66 | 	privateKeyFile, err := os.ReadFile(keyPath)
67 | 	if err != nil {
68 | 		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
69 | 		return "", err
70 | 	}
71 | 
72 | 	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
73 | 	if err != nil {
74 | 		return "", err
75 | 	}
76 | 
77 | 	// get the pubkey, but remove the type
78 | 	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
79 | 	parts := bytes.Split(publicKey, []byte(" "))
80 | 	if len(parts) < 2 {
81 | 		return "", fmt.Errorf("malformed public key")
82 | 	}
83 | 
84 | 	signedData, err := privateKey.Sign(rand.Reader, bts)
85 | 	if err != nil {
86 | 		return "", err
87 | 	}
88 | 
89 | 	// signature is <pubkey>:<signature>
90 | 	return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
91 | }
92 | 


--------------------------------------------------------------------------------
/cmd/start_darwin.go:
--------------------------------------------------------------------------------
 1 | package cmd
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"os/exec"
 8 | 	"strings"
 9 | 
10 | 	"github.com/ollama/ollama/api"
11 | )
12 | 
13 | func startApp(ctx context.Context, client *api.Client) error {
14 | 	exe, err := os.Executable()
15 | 	if err != nil {
16 | 		return err
17 | 	}
18 | 	link, err := os.Readlink(exe)
19 | 	if err != nil {
20 | 		return err
21 | 	}
22 | 	if !strings.Contains(link, "Ollama.app") {
23 | 		return fmt.Errorf("could not find ollama app")
24 | 	}
25 | 	path := strings.Split(link, "Ollama.app")
26 | 	if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
27 | 		return err
28 | 	}
29 | 	return waitForServer(ctx, client)
30 | }
31 | 


--------------------------------------------------------------------------------
/cmd/start_default.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows && !darwin
 2 | 
 3 | package cmd
 4 | 
 5 | import (
 6 | 	"context"
 7 | 	"fmt"
 8 | 
 9 | 	"github.com/ollama/ollama/api"
10 | )
11 | 
12 | func startApp(ctx context.Context, client *api.Client) error {
13 | 	return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
14 | }
15 | 


--------------------------------------------------------------------------------
/cmd/start_windows.go:
--------------------------------------------------------------------------------
 1 | package cmd
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"fmt"
 7 | 	"os"
 8 | 	"os/exec"
 9 | 	"path/filepath"
10 | 	"strings"
11 | 	"syscall"
12 | 
13 | 	"github.com/ollama/ollama/api"
14 | )
15 | 
16 | func startApp(ctx context.Context, client *api.Client) error {
17 | 	// log.Printf("XXX Attempting to find and start ollama app")
18 | 	AppName := "ollama app.exe"
19 | 	exe, err := os.Executable()
20 | 	if err != nil {
21 | 		return err
22 | 	}
23 | 	appExe := filepath.Join(filepath.Dir(exe), AppName)
24 | 	_, err = os.Stat(appExe)
25 | 	if errors.Is(err, os.ErrNotExist) {
26 | 		// Try the standard install location
27 | 		localAppData := os.Getenv("LOCALAPPDATA")
28 | 		appExe = filepath.Join(localAppData, "Ollama", AppName)
29 | 		_, err := os.Stat(appExe)
30 | 		if errors.Is(err, os.ErrNotExist) {
31 | 			// Finally look in the path
32 | 			appExe, err = exec.LookPath(AppName)
33 | 			if err != nil {
34 | 				return fmt.Errorf("could not locate ollama app")
35 | 			}
36 | 		}
37 | 	}
38 | 	// log.Printf("XXX attempting to start app %s", appExe)
39 | 
40 | 	cmd_path := "c:\\Windows\\system32\\cmd.exe"
41 | 	cmd := exec.Command(cmd_path, "/c", appExe)
42 | 	// TODO - these hide flags aren't working - still pops up a command window for some reason
43 | 	cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: 0x08000000, HideWindow: true}
44 | 
45 | 	// TODO this didn't help either...
46 | 	cmd.Stdin = strings.NewReader("")
47 | 	cmd.Stdout = os.Stdout
48 | 	cmd.Stderr = os.Stderr
49 | 
50 | 	if err := cmd.Start(); err != nil {
51 | 		return fmt.Errorf("unable to start ollama app %w", err)
52 | 	}
53 | 
54 | 	if cmd.Process != nil {
55 | 		defer cmd.Process.Release() //nolint:errcheck
56 | 	}
57 | 	return waitForServer(ctx, client)
58 | }
59 | 


--------------------------------------------------------------------------------
/convert/convert_test.go:
--------------------------------------------------------------------------------
  1 | //go:build slow
  2 | 
  3 | package convert
  4 | 
  5 | import (
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"testing"
  9 | 
 10 | 	"github.com/ollama/ollama/llm"
 11 | )
 12 | 
 13 | func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
 14 | 	t.Helper()
 15 | 
 16 | 	mf, err := GetModelFormat(p)
 17 | 	if err != nil {
 18 | 		t.Fatal(err)
 19 | 	}
 20 | 
 21 | 	params, err := mf.GetParams(p)
 22 | 	if err != nil {
 23 | 		t.Fatal(err)
 24 | 	}
 25 | 
 26 | 	arch, err := mf.GetModelArch("", p, params)
 27 | 	if err != nil {
 28 | 		t.Fatal(err)
 29 | 	}
 30 | 
 31 | 	if err := arch.LoadVocab(); err != nil {
 32 | 		t.Fatal(err)
 33 | 	}
 34 | 
 35 | 	if err := arch.GetTensors(); err != nil {
 36 | 		t.Fatal(err)
 37 | 	}
 38 | 
 39 | 	f, err := os.CreateTemp(t.TempDir(), "f16")
 40 | 	if err != nil {
 41 | 		t.Fatal(err)
 42 | 	}
 43 | 	defer f.Close()
 44 | 
 45 | 	if err := arch.WriteGGUF(f); err != nil {
 46 | 		t.Fatal(err)
 47 | 	}
 48 | 
 49 | 	r, err := os.Open(f.Name())
 50 | 	if err != nil {
 51 | 		t.Fatal(err)
 52 | 	}
 53 | 	defer r.Close()
 54 | 
 55 | 	m, _, err := llm.DecodeGGML(r)
 56 | 	if err != nil {
 57 | 		t.Fatal(err)
 58 | 	}
 59 | 
 60 | 	return m.KV(), m.Tensors()
 61 | }
 62 | 
 63 | func TestConvertFull(t *testing.T) {
 64 | 	cases := []struct {
 65 | 		path    string
 66 | 		arch    string
 67 | 		tensors int
 68 | 		layers  int
 69 | 	}{
 70 | 		{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
 71 | 		{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
 72 | 		{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
 73 | 		{"gemma-2b-it", "gemma", 164, 20},
 74 | 	}
 75 | 
 76 | 	for _, tt := range cases {
 77 | 		t.Run(tt.path, func(t *testing.T) {
 78 | 			p := filepath.Join("testdata", tt.path)
 79 | 			if _, err := os.Stat(p); err != nil {
 80 | 				t.Skipf("%s not found", p)
 81 | 			}
 82 | 
 83 | 			kv, tensors := convertFull(t, p)
 84 | 
 85 | 			if kv.Architecture() != tt.arch {
 86 | 				t.Fatalf("expected llama, got %s", kv.Architecture())
 87 | 			}
 88 | 
 89 | 			if kv.FileType().String() != "F16" {
 90 | 				t.Fatalf("expected F16, got %s", kv.FileType())
 91 | 			}
 92 | 
 93 | 			if len(tensors) != tt.tensors {
 94 | 				t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
 95 | 			}
 96 | 
 97 | 			layers := tensors.Layers()
 98 | 			if len(layers) != tt.layers {
 99 | 				t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
100 | 			}
101 | 		})
102 | 	}
103 | }
104 | 


--------------------------------------------------------------------------------
/convert/mistral.go:
--------------------------------------------------------------------------------
 1 | package convert
 2 | 
 3 | import (
 4 | 	"io"
 5 | 	"regexp"
 6 | 
 7 | 	"github.com/ollama/ollama/llm"
 8 | )
 9 | 
10 | type MistralModel struct {
11 | 	ModelData
12 | }
13 | 
14 | func (m *MistralModel) GetTensors() error {
15 | 	t, err := m.Format.GetTensors(m.Path, m.Params)
16 | 	if err != nil {
17 | 		return err
18 | 	}
19 | 
20 | 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
21 | 	re, err := regexp.Compile(pattern)
22 | 	if err != nil {
23 | 		return err
24 | 	}
25 | 
26 | 	for _, l := range t {
27 | 		matches := re.FindAllStringSubmatch(l.Name, -1)
28 | 		if len(matches) > 0 {
29 | 			wt := l.WriterTo.(safetensorWriterTo)
30 | 			wt.repacker = m.Repack
31 | 			l.WriterTo = wt
32 | 		}
33 | 		m.Tensors = append(m.Tensors, l)
34 | 	}
35 | 
36 | 	return nil
37 | }
38 | 
39 | func (m *MistralModel) LoadVocab() error {
40 | 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
41 | 	if err != nil {
42 | 		return err
43 | 	}
44 | 	m.Vocab = v
45 | 	return nil
46 | }
47 | 
48 | func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
49 | 	kv := llm.KV{
50 | 		"general.architecture":                   "llama",
51 | 		"general.name":                           m.Name,
52 | 		"llama.context_length":                   uint32(m.Params.ContextSize),
53 | 		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
54 | 		"llama.block_count":                      uint32(m.Params.HiddenLayers),
55 | 		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
56 | 		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
57 | 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
58 | 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
59 | 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
60 | 		"general.file_type":                      uint32(1),
61 | 		"tokenizer.ggml.model":                   "llama",
62 | 
63 | 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
64 | 		"tokenizer.ggml.scores":     m.Vocab.Scores,
65 | 		"tokenizer.ggml.token_type": m.Vocab.Types,
66 | 
67 | 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
68 | 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
69 | 		"tokenizer.ggml.add_bos_token":    true,
70 | 		"tokenizer.ggml.add_eos_token":    false,
71 | 		"tokenizer.ggml.unknown_token_id": uint32(0),
72 | 	}
73 | 
74 | 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
75 | }
76 | 
77 | func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
78 | 	return llamaRepack(name, m.Params, data, shape)
79 | }
80 | 


--------------------------------------------------------------------------------
/convert/mixtral.go:
--------------------------------------------------------------------------------
 1 | package convert
 2 | 
 3 | import (
 4 | 	"io"
 5 | 	"regexp"
 6 | 
 7 | 	"github.com/ollama/ollama/llm"
 8 | )
 9 | 
10 | type MixtralModel struct {
11 | 	ModelData
12 | }
13 | 
14 | func (m *MixtralModel) GetTensors() error {
15 | 	t, err := m.Format.GetTensors(m.Path, m.Params)
16 | 	if err != nil {
17 | 		return err
18 | 	}
19 | 
20 | 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
21 | 	re, err := regexp.Compile(pattern)
22 | 	if err != nil {
23 | 		return err
24 | 	}
25 | 
26 | 	for _, l := range t {
27 | 		matches := re.FindAllStringSubmatch(l.Name, -1)
28 | 		if len(matches) > 0 {
29 | 			wt := l.WriterTo.(safetensorWriterTo)
30 | 			wt.repacker = m.Repack
31 | 			l.WriterTo = wt
32 | 		}
33 | 		m.Tensors = append(m.Tensors, l)
34 | 	}
35 | 
36 | 	return nil
37 | }
38 | 
39 | func (m *MixtralModel) LoadVocab() error {
40 | 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
41 | 	if err != nil {
42 | 		return err
43 | 	}
44 | 	m.Vocab = v
45 | 	return nil
46 | }
47 | 
48 | func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
49 | 	kv := llm.KV{
50 | 		"general.architecture":          "llama",
51 | 		"general.name":                  m.Name,
52 | 		"llama.block_count":             uint32(m.Params.HiddenLayers),
53 | 		"llama.context_length":          uint32(m.Params.ContextSize),
54 | 		"llama.embedding_length":        uint32(m.Params.HiddenSize),
55 | 		"llama.feed_forward_length":     uint32(m.Params.IntermediateSize),
56 | 		"llama.attention.head_count":    uint32(m.Params.AttentionHeads),
57 | 		"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
58 | 
59 | 		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
60 | 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
61 | 
62 | 		"llama.expert_count":      uint32(m.Params.Experts),
63 | 		"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
64 | 
65 | 		"llama.vocab_size":           uint32(len(m.Vocab.Tokens)),
66 | 		"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
67 | 
68 | 		"general.file_type":    uint32(1),
69 | 		"tokenizer.ggml.model": "llama",
70 | 
71 | 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
72 | 		"tokenizer.ggml.scores":     m.Vocab.Scores,
73 | 		"tokenizer.ggml.token_type": m.Vocab.Types,
74 | 
75 | 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
76 | 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
77 | 		"tokenizer.ggml.unknown_token_id": uint32(0),
78 | 		"tokenizer.ggml.add_bos_token":    true,
79 | 		"tokenizer.ggml.add_eos_token":    false,
80 | 	}
81 | 
82 | 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
83 | }
84 | 
85 | func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
86 | 	return llamaRepack(name, m.Params, data, shape)
87 | }
88 | 


--------------------------------------------------------------------------------
/convert/tokenizer.go:
--------------------------------------------------------------------------------
  1 | package convert
  2 | 
  3 | import (
  4 | 	"cmp"
  5 | 	"crypto/sha256"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"log/slog"
  9 | 	"os"
 10 | 	"slices"
 11 | 
 12 | 	"golang.org/x/exp/maps"
 13 | )
 14 | 
 15 | type Tokenizer struct {
 16 | 	Version     string         `json:"version"`
 17 | 	AddedTokens []Token        `json:"added_tokens"`
 18 | 	Model       TokenizerModel `json:"model"`
 19 | 
 20 | 	PreTokenizer struct {
 21 | 		PreTokenizers []struct {
 22 | 			Type    string `json:"type"`
 23 | 			Pattern struct {
 24 | 				Regex string `json:"Regex"`
 25 | 			} `json:"pattern"`
 26 | 		} `json:"pretokenizers"`
 27 | 	} `json:"pre_tokenizer"`
 28 | }
 29 | 
 30 | type TokenizerModel struct {
 31 | 	Type   string         `json:"type"`
 32 | 	Vocab  map[string]int `json:"vocab"`
 33 | 	Merges []string       `json:"merges"`
 34 | 	Tokens []Token
 35 | }
 36 | 
 37 | type Token struct {
 38 | 	ID          int    `json:"id"`
 39 | 	Content     string `json:"content"`
 40 | 	Special     bool   `json:"special"`
 41 | 	UserDefined bool
 42 | }
 43 | 
 44 | func (t *Token) Type() int32 {
 45 | 	switch {
 46 | 	case t.Special:
 47 | 		return tokenTypeControl
 48 | 	case t.UserDefined:
 49 | 		return tokenTypeUserDefined
 50 | 	default:
 51 | 		return tokenTypeNormal
 52 | 	}
 53 | }
 54 | 
 55 | func (t *Tokenizer) maxID() int {
 56 | 	return max(
 57 | 		slices.Max(maps.Values(t.Model.Vocab)),
 58 | 		slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
 59 | 			return cmp.Compare(a.ID, b.ID)
 60 | 		}).ID,
 61 | 	)
 62 | }
 63 | 
 64 | func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
 65 | 	f, err := os.Open(dirpath)
 66 | 	if err != nil {
 67 | 		panic(err)
 68 | 	}
 69 | 	defer f.Close()
 70 | 
 71 | 	var t Tokenizer
 72 | 	if err := json.NewDecoder(f).Decode(&t); err != nil {
 73 | 		return "", nil, nil, err
 74 | 	}
 75 | 
 76 | 	tokens = make([]Token, t.maxID()+1)
 77 | 	for k, v := range t.Model.Vocab {
 78 | 		tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
 79 | 	}
 80 | 
 81 | 	for _, v := range t.AddedTokens {
 82 | 		v.UserDefined = true
 83 | 		tokens[v.ID] = v
 84 | 	}
 85 | 
 86 | 	sha256sum := sha256.New()
 87 | 	for _, pt := range t.PreTokenizer.PreTokenizers {
 88 | 		switch pt.Type {
 89 | 		case "Split":
 90 | 			if pt.Pattern.Regex != "" {
 91 | 				sha256sum.Write([]byte(pt.Pattern.Regex))
 92 | 			}
 93 | 		}
 94 | 	}
 95 | 
 96 | 	switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
 97 | 	case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
 98 | 		pre = "llama-bpe"
 99 | 	case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
100 | 		pre = "deepseek-llm"
101 | 	case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
102 | 		pre = "deepseek-coder"
103 | 	default:
104 | 		slog.Warn("unknown pretokenizer, using default", "digest", digest)
105 | 		pre = "default"
106 | 	}
107 | 
108 | 	return pre, tokens, t.Model.Merges, nil
109 | }
110 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation
 2 | 
 3 | ### Getting Started
 4 | * [Quickstart](../README.md#quickstart)
 5 | * [Examples](../examples)
 6 | * [Importing models](./import.md)
 7 | * [Linux Documentation](./linux.md)
 8 | * [Windows Documentation](./windows.md)
 9 | * [Docker Documentation](./docker.md)
10 | 
11 | ### Reference
12 | 
13 | * [API Reference](./api.md)
14 | * [Modelfile Reference](./modelfile.md)
15 | * [OpenAI Compatibility](./openai.md)
16 | 
17 | ### Resources
18 | 
19 | * [Troubleshooting Guide](./troubleshooting.md)
20 | * [FAQ](./faq.md)
21 | * [Development guide](./development.md)
22 | 


--------------------------------------------------------------------------------
/docs/docker.md:
--------------------------------------------------------------------------------
 1 | # Ollama Docker image
 2 | 
 3 | ### CPU only
 4 | 
 5 | ```bash
 6 | docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
 7 | ```
 8 | 
 9 | ### Nvidia GPU
10 | Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
11 | 
12 | #### Install with Apt
13 | 1.  Configure the repository
14 | ```bash
15 | curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
16 |     | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
17 | curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
18 |     | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
19 |     | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
20 | sudo apt-get update
21 | ```
22 | 2.  Install the NVIDIA Container Toolkit packages
23 | ```bash
24 | sudo apt-get install -y nvidia-container-toolkit
25 | ```
26 | 
27 | #### Install with Yum or Dnf
28 | 1.  Configure the repository
29 |     
30 | ```bash
31 | curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
32 |     | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
33 | ```
34 |     
35 | 2. Install the NVIDIA Container Toolkit packages
36 |     
37 | ```bash
38 | sudo yum install -y nvidia-container-toolkit
39 | ```
40 | 
41 | #### Configure Docker to use Nvidia driver 
42 | ```
43 | sudo nvidia-ctk runtime configure --runtime=docker
44 | sudo systemctl restart docker
45 | ```
46 | 
47 | #### Start the container
48 | 
49 | ```bash
50 | docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
51 | ```
52 | 
53 | ### AMD GPU
54 | 
55 | To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
56 | 
57 | ```
58 | docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
59 | ```
60 | 
61 | ### Run model locally
62 | 
63 | Now you can run a model:
64 | 
65 | ```
66 | docker exec -it ollama ollama run llama3
67 | ```
68 | 
69 | ### Try different models
70 | 
71 | More models can be found on the [Ollama library](https://ollama.com/library).
72 | 


--------------------------------------------------------------------------------
/docs/tutorials.md:
--------------------------------------------------------------------------------
 1 | # Tutorials
 2 | 
 3 | Here is a list of ways you can use Ollama with other tools to build interesting applications.
 4 | 
 5 | - [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md)
 6 | - [Using LangChain with Ollama in Python](./tutorials/langchainpy.md)
 7 | - [Running Ollama on NVIDIA Jetson Devices](./tutorials/nvidia-jetson.md)
 8 | 
 9 | Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.
10 | 


--------------------------------------------------------------------------------
/docs/tutorials/nvidia-jetson.md:
--------------------------------------------------------------------------------
 1 | # Running Ollama on NVIDIA Jetson Devices
 2 | 
 3 | Ollama runs well on [NVIDIA Jetson Devices](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) and should run out of the box with the standard installation instructions. 
 4 | 
 5 | The following has been tested on [JetPack 5.1.2](https://developer.nvidia.com/embedded/jetpack), but should also work on JetPack 6.0.
 6 | 
 7 | - Install Ollama via standard Linux command (ignore the 404 error): `curl https://ollama.com/install.sh | sh`
 8 | - Pull the model you want to use (e.g. mistral): `ollama pull mistral`
 9 | - Start an interactive session: `ollama run mistral`
10 | 
11 | And that's it!
12 | 
13 | # Running Ollama in Docker
14 | 
15 | When running GPU accelerated applications in Docker, it is highly recommended to use [dusty-nv jetson-containers repo](https://github.com/dusty-nv/jetson-containers).


--------------------------------------------------------------------------------
/docs/windows.md:
--------------------------------------------------------------------------------
 1 | # Ollama Windows Preview
 2 | 
 3 | Welcome to the Ollama Windows preview.
 4 | 
 5 | No more WSL required!
 6 | 
 7 | Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
 8 | After installing Ollama Windows Preview, Ollama will run in the background and
 9 | the `ollama` command line is available in `cmd`, `powershell` or your favorite
10 | terminal application. As usual the Ollama [api](./api.md) will be served on
11 | `http://localhost:11434`.
12 | 
13 | As this is a preview release, you should expect a few bugs here and there.  If
14 | you run into a problem you can reach out on
15 | [Discord](https://discord.gg/ollama), or file an
16 | [issue](https://github.com/ollama/ollama/issues).
17 | Logs will often be helpful in diagnosing the problem (see
18 | [Troubleshooting](#troubleshooting) below)
19 | 
20 | ## System Requirements
21 | 
22 | * Windows 10 or newer, Home or Pro
23 | * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
24 | * AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
25 | 
26 | ## API Access
27 | 
28 | Here's a quick example showing API access from `powershell`
29 | ```powershell
30 | (Invoke-WebRequest -method POST -Body '{"model":"llama3", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
31 | ```
32 | 
33 | ## Troubleshooting
34 | 
35 | While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
36 | a "view logs" menu item to the app, and increases logging for the GUI app and
37 | server.
38 | 
39 | Ollama on Windows stores files in a few different locations.  You can view them in
40 | the explorer window by hitting `<cmd>+R` and type in:
41 | - `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
42 |     - *app.log* contains logs from the GUI application
43 |     - *server.log* contains the server logs
44 |     - *upgrade.log* contains log output for upgrades
45 | - `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
46 | - `explorer %HOMEPATH%\.ollama` contains models and configuration
47 | - `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories
48 | 
49 | 
50 | ## Standalone CLI
51 | 
52 | The easiest way to install Ollama on Windows is to use the `OllamaSetup.exe`
53 | installer. It installs in your account without requiring Administrator rights.
54 | We update Ollama regularly to support the latest models, and this installer will
55 | help you keep up to date.
56 | 
57 | If you'd like to install or integrate Ollama as a service, a standalone
58 | `ollama-windows-amd64.zip` zip file is available containing only the Ollama CLI
59 | and GPU library dependencies for Nvidia and AMD. This allows for embedding
60 | Ollama in existing applications, or running it as a system service via `ollama
61 | serve` with tools such as [NSSM](https://nssm.cc/).
62 | 


--------------------------------------------------------------------------------
/envconfig/config_test.go:
--------------------------------------------------------------------------------
 1 | package envconfig
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestConfig(t *testing.T) {
10 | 	Debug = false // Reset whatever was loaded in init()
11 | 	t.Setenv("OLLAMA_DEBUG", "")
12 | 	LoadConfig()
13 | 	require.False(t, Debug)
14 | 	t.Setenv("OLLAMA_DEBUG", "false")
15 | 	LoadConfig()
16 | 	require.False(t, Debug)
17 | 	t.Setenv("OLLAMA_DEBUG", "1")
18 | 	LoadConfig()
19 | 	require.True(t, Debug)
20 | 	t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
21 | 	LoadConfig()
22 | 	require.True(t, FlashAttention)
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | 
3 | This directory contains different examples of using Ollama.
4 | 


--------------------------------------------------------------------------------
/examples/flyio/.gitignore:
--------------------------------------------------------------------------------
1 | fly.toml
2 | 


--------------------------------------------------------------------------------
/examples/flyio/README.md:
--------------------------------------------------------------------------------
 1 | # Deploy Ollama to Fly.io
 2 | 
 3 | > Note: this example exposes a public endpoint and does not configure authentication. Use with care.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | - Ollama: https://ollama.com/download
 8 | - Fly.io account. Sign up for a free account: https://fly.io/app/sign-up
 9 | 
10 | ## Steps
11 | 
12 | 1. Login to Fly.io
13 | 
14 |     ```bash
15 |     fly auth login
16 |     ```
17 | 
18 | 1. Create a new Fly app
19 | 
20 |     ```bash
21 |     fly launch --name <name> --image ollama/ollama --internal-port 11434 --vm-size shared-cpu-8x --now
22 |     ```
23 | 
24 | 1. Pull and run `orca-mini:3b`
25 | 
26 |     ```bash
27 |     OLLAMA_HOST=https://<name>.fly.dev ollama run orca-mini:3b
28 |     ```
29 | 
30 | `shared-cpu-8x` is a free-tier eligible machine type. For better performance, switch to a `performance` or `dedicated` machine type or attach a GPU for hardware acceleration (see below).
31 | 
32 | ## (Optional) Persistent Volume
33 | 
34 | By default Fly Machines use ephemeral storage which is problematic if you want to use the same model across restarts without pulling it again. Create and attach a persistent volume to store the downloaded models:
35 | 
36 | 1. Create the Fly Volume
37 | 
38 |     ```bash
39 |     fly volume create ollama
40 |     ```
41 | 
42 | 1. Update `fly.toml` and add `[mounts]`
43 | 
44 |     ```toml
45 |     [mounts]
46 |       source = "ollama"
47 |       destination = "/mnt/ollama/models"
48 |     ```
49 | 
50 | 1. Update `fly.toml` and add `[env]`
51 | 
52 |     ```toml
53 |     [env]
54 |       OLLAMA_MODELS = "/mnt/ollama/models"
55 |     ```
56 | 
57 | 1. Deploy your app
58 | 
59 |     ```bash
60 |     fly deploy
61 |     ```
62 | 
63 | ## (Optional) Hardware Acceleration
64 | 
65 | Fly.io GPU is currently in waitlist. Sign up for the waitlist: https://fly.io/gpu
66 | 
67 | Once you've been accepted, create the app with the additional flags `--vm-gpu-kind a100-pcie-40gb` or `--vm-gpu-kind a100-pcie-80gb`.
68 | 


--------------------------------------------------------------------------------
/examples/go-chat/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 
 8 | 	"github.com/ollama/ollama/api"
 9 | )
10 | 
11 | func main() {
12 | 	client, err := api.ClientFromEnvironment()
13 | 	if err != nil {
14 | 		log.Fatal(err)
15 | 	}
16 | 
17 | 	messages := []api.Message{
18 | 		api.Message{
19 | 			Role:    "system",
20 | 			Content: "Provide very brief, concise responses",
21 | 		},
22 | 		api.Message{
23 | 			Role:    "user",
24 | 			Content: "Name some unusual animals",
25 | 		},
26 | 		api.Message{
27 | 			Role:    "assistant",
28 | 			Content: "Monotreme, platypus, echidna",
29 | 		},
30 | 		api.Message{
31 | 			Role:    "user",
32 | 			Content: "which of these is the most dangerous?",
33 | 		},
34 | 	}
35 | 
36 | 	ctx := context.Background()
37 | 	req := &api.ChatRequest{
38 | 		Model:    "llama3",
39 | 		Messages: messages,
40 | 	}
41 | 
42 | 	respFunc := func(resp api.ChatResponse) error {
43 | 		fmt.Print(resp.Message.Content)
44 | 		return nil
45 | 	}
46 | 
47 | 	err = client.Chat(ctx, req, respFunc)
48 | 	if err != nil {
49 | 		log.Fatal(err)
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/examples/go-generate-streaming/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 
 8 | 	"github.com/ollama/ollama/api"
 9 | )
10 | 
11 | func main() {
12 | 	client, err := api.ClientFromEnvironment()
13 | 	if err != nil {
14 | 		log.Fatal(err)
15 | 	}
16 | 
17 | 	// By default, GenerateRequest is streaming.
18 | 	req := &api.GenerateRequest{
19 | 		Model:  "gemma",
20 | 		Prompt: "how many planets are there?",
21 | 	}
22 | 
23 | 	ctx := context.Background()
24 | 	respFunc := func(resp api.GenerateResponse) error {
25 | 		// Only print the response here; GenerateResponse has a number of other
26 | 		// interesting fields you want to examine.
27 | 
28 | 		// In streaming mode, responses are partial so we call fmt.Print (and not
29 | 		// Println) in order to avoid spurious newlines being introduced. The
30 | 		// model will insert its own newlines if it wants.
31 | 		fmt.Print(resp.Response)
32 | 		return nil
33 | 	}
34 | 
35 | 	err = client.Generate(ctx, req, respFunc)
36 | 	if err != nil {
37 | 		log.Fatal(err)
38 | 	}
39 | 	fmt.Println()
40 | }
41 | 


--------------------------------------------------------------------------------
/examples/go-generate/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 
 8 | 	"github.com/ollama/ollama/api"
 9 | )
10 | 
11 | func main() {
12 | 	client, err := api.ClientFromEnvironment()
13 | 	if err != nil {
14 | 		log.Fatal(err)
15 | 	}
16 | 
17 | 	req := &api.GenerateRequest{
18 | 		Model:  "gemma",
19 | 		Prompt: "how many planets are there?",
20 | 
21 | 		// set streaming to false
22 | 		Stream: new(bool),
23 | 	}
24 | 
25 | 	ctx := context.Background()
26 | 	respFunc := func(resp api.GenerateResponse) error {
27 | 		// Only print the response here; GenerateResponse has a number of other
28 | 		// interesting fields you want to examine.
29 | 		fmt.Println(resp.Response)
30 | 		return nil
31 | 	}
32 | 
33 | 	err = client.Generate(ctx, req, respFunc)
34 | 	if err != nil {
35 | 		log.Fatal(err)
36 | 	}
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/go-http-generate/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/examples/go-http-generate/README.md


--------------------------------------------------------------------------------
/examples/go-http-generate/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"io"
 7 | 	"log"
 8 | 	"net/http"
 9 | 	"os"
10 | )
11 | 
12 | func main() {
13 | 	body := []byte(`{"model":"mistral"}`)
14 | 	resp, err := http.Post("http://localhost:11434/api/generate", "application/json", bytes.NewBuffer(body))
15 | 
16 | 	if err != nil {
17 | 		fmt.Print(err.Error())
18 | 		os.Exit(1)
19 | 	}
20 | 
21 | 	defer resp.Body.Close()
22 | 
23 | 	responseData, err := io.ReadAll(resp.Body)
24 | 	if err != nil {
25 | 		log.Fatal(err)
26 | 	}
27 | 	fmt.Println(string(responseData))
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/examples/go-multimodal/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 	"os"
 8 | 
 9 | 	"github.com/ollama/ollama/api"
10 | )
11 | 
12 | func main() {
13 | 	if len(os.Args) <= 1 {
14 | 		log.Fatal("usage: <image name>")
15 | 	}
16 | 
17 | 	imgData, err := os.ReadFile(os.Args[1])
18 | 	if err != nil {
19 | 		log.Fatal(err)
20 | 	}
21 | 
22 | 	client, err := api.ClientFromEnvironment()
23 | 	if err != nil {
24 | 		log.Fatal(err)
25 | 	}
26 | 
27 | 	req := &api.GenerateRequest{
28 | 		Model:  "llava",
29 | 		Prompt: "describe this image",
30 | 		Images: []api.ImageData{imgData},
31 | 	}
32 | 
33 | 	ctx := context.Background()
34 | 	respFunc := func(resp api.GenerateResponse) error {
35 | 		// In streaming mode, responses are partial so we call fmt.Print (and not
36 | 		// Println) in order to avoid spurious newlines being introduced. The
37 | 		// model will insert its own newlines if it wants.
38 | 		fmt.Print(resp.Response)
39 | 		return nil
40 | 	}
41 | 
42 | 	err = client.Generate(ctx, req, respFunc)
43 | 	if err != nil {
44 | 		log.Fatal(err)
45 | 	}
46 | 	fmt.Println()
47 | }
48 | 


--------------------------------------------------------------------------------
/examples/go-pull-progress/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log"
 7 | 
 8 | 	"github.com/ollama/ollama/api"
 9 | )
10 | 
11 | func main() {
12 | 	client, err := api.ClientFromEnvironment()
13 | 	if err != nil {
14 | 		log.Fatal(err)
15 | 	}
16 | 
17 | 	ctx := context.Background()
18 | 
19 | 	req := &api.PullRequest{
20 | 		Model: "mistral",
21 | 	}
22 | 	progressFunc := func(resp api.ProgressResponse) error {
23 | 		fmt.Printf("Progress: status=%v, total=%v, completed=%v\n", resp.Status, resp.Total, resp.Completed)
24 | 		return nil
25 | 	}
26 | 
27 | 	err = client.Pull(ctx, req, progressFunc)
28 | 	if err != nil {
29 | 		log.Fatal(err)
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/examples/jupyter-notebook/README.md:
--------------------------------------------------------------------------------
1 | # Ollama Jupyter Notebook
2 | 
3 | This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
4 | 
5 | For best results, use an instance with GPU accelerator.
6 | 


--------------------------------------------------------------------------------
/examples/kubernetes/README.md:
--------------------------------------------------------------------------------
 1 | # Deploy Ollama to Kubernetes
 2 | 
 3 | ## Prerequisites
 4 | 
 5 | - Ollama: https://ollama.com/download
 6 | - Kubernetes cluster. This example will use Google Kubernetes Engine.
 7 | 
 8 | ## Steps
 9 | 
10 | 1. Create the Ollama namespace, deployment, and service
11 | 
12 |    ```bash
13 |    kubectl apply -f cpu.yaml
14 |    ```
15 | 
16 | ## (Optional) Hardware Acceleration
17 | 
18 | Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin) which is deployed in Kubernetes in form of daemonset. Follow the link for more details.
19 | 
20 | Once configured, create a GPU enabled Ollama deployment.
21 | 
22 | ```bash
23 | kubectl apply -f gpu.yaml
24 | ```
25 | 
26 | ## Test
27 | 
28 | 1. Port forward the Ollama service to connect and use it locally
29 | 
30 |    ```bash
31 |    kubectl -n ollama port-forward service/ollama 11434:80
32 |    ```
33 | 
34 | 1. Pull and run a model, for example `orca-mini:3b`
35 | 
36 |    ```bash
37 |    ollama run orca-mini:3b
38 |    ```


--------------------------------------------------------------------------------
/examples/kubernetes/cpu.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Namespace
 4 | metadata:
 5 |   name: ollama
 6 | ---
 7 | apiVersion: apps/v1
 8 | kind: Deployment
 9 | metadata:
10 |   name: ollama
11 |   namespace: ollama
12 | spec:
13 |   selector:
14 |     matchLabels:
15 |       name: ollama
16 |   template:
17 |     metadata:
18 |       labels:
19 |         name: ollama
20 |     spec:
21 |       containers:
22 |       - name: ollama
23 |         image: ollama/ollama:latest
24 |         ports:
25 |         - name: http
26 |           containerPort: 11434
27 |           protocol: TCP
28 | ---
29 | apiVersion: v1
30 | kind: Service
31 | metadata:
32 |   name: ollama
33 |   namespace: ollama
34 | spec:
35 |   type: ClusterIP
36 |   selector:
37 |     name: ollama
38 |   ports:
39 |   - port: 80
40 |     name: http
41 |     targetPort: http
42 |     protocol: TCP
43 | 


--------------------------------------------------------------------------------
/examples/kubernetes/gpu.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Namespace
 4 | metadata:
 5 |   name: ollama
 6 | ---
 7 | apiVersion: apps/v1
 8 | kind: Deployment
 9 | metadata:
10 |   name: ollama
11 |   namespace: ollama
12 | spec:
13 |   strategy:
14 |     type: Recreate
15 |   selector:
16 |     matchLabels:
17 |       name: ollama
18 |   template:
19 |     metadata:
20 |       labels:
21 |         name: ollama
22 |     spec:
23 |       containers:
24 |       - name: ollama
25 |         image: ollama/ollama:latest
26 |         env:
27 |         - name: PATH
28 |           value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
29 |         - name: LD_LIBRARY_PATH
30 |           value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
31 |         - name: NVIDIA_DRIVER_CAPABILITIES
32 |           value: compute,utility
33 |         ports:
34 |         - name: http
35 |           containerPort: 11434
36 |           protocol: TCP
37 |         resources:
38 |           limits:
39 |             nvidia.com/gpu: 1
40 |       tolerations:
41 |       - key: nvidia.com/gpu
42 |         operator: Exists
43 |         effect: NoSchedule
44 | ---
45 | apiVersion: v1
46 | kind: Service
47 | metadata:
48 |   name: ollama
49 |   namespace: ollama
50 | spec:
51 |   type: ClusterIP
52 |   selector:
53 |     name: ollama
54 |   ports:
55 |   - port: 80
56 |     name: http
57 |     targetPort: http
58 |     protocol: TCP
59 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-document/README.md:
--------------------------------------------------------------------------------
 1 | # LangChain Document QA
 2 | 
 3 | This example provides an interface for asking questions to a PDF document.
 4 | 
 5 | ## Setup
 6 | 
 7 | ```
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | ## Run
12 | 
13 | ```
14 | python main.py
15 | ```
16 | 
17 | A prompt will appear, where questions may be asked:
18 | 
19 | ```
20 | Query: How many locations does WeWork have?
21 | ```
22 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-document/main.py:
--------------------------------------------------------------------------------
 1 | from langchain.document_loaders import OnlinePDFLoader
 2 | from langchain.vectorstores import Chroma
 3 | from langchain.embeddings import GPT4AllEmbeddings
 4 | from langchain import PromptTemplate
 5 | from langchain.llms import Ollama
 6 | from langchain.callbacks.manager import CallbackManager
 7 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 8 | from langchain.chains import RetrievalQA
 9 | import sys
10 | import os
11 | 
12 | class SuppressStdout:
13 |     def __enter__(self):
14 |         self._original_stdout = sys.stdout
15 |         self._original_stderr = sys.stderr
16 |         sys.stdout = open(os.devnull, 'w')
17 |         sys.stderr = open(os.devnull, 'w')
18 | 
19 |     def __exit__(self, exc_type, exc_val, exc_tb):
20 |         sys.stdout.close()
21 |         sys.stdout = self._original_stdout
22 |         sys.stderr = self._original_stderr
23 | 
24 | # load the pdf and split it into chunks
25 | loader = OnlinePDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001813756/975b3e9b-268e-4798-a9e4-2a9a7c92dc10.pdf")
26 | data = loader.load()
27 | 
28 | from langchain.text_splitter import RecursiveCharacterTextSplitter
29 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
30 | all_splits = text_splitter.split_documents(data)
31 | 
32 | with SuppressStdout():
33 |     vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())
34 | 
35 | while True:
36 |     query = input("\nQuery: ")
37 |     if query == "exit":
38 |         break
39 |     if query.strip() == "":
40 |         continue
41 | 
42 |     # Prompt
43 |     template = """Use the following pieces of context to answer the question at the end.
44 |     If you don't know the answer, just say that you don't know, don't try to make up an answer.
45 |     Use three sentences maximum and keep the answer as concise as possible.
46 |     {context}
47 |     Question: {question}
48 |     Helpful Answer:"""
49 |     QA_CHAIN_PROMPT = PromptTemplate(
50 |         input_variables=["context", "question"],
51 |         template=template,
52 |     )
53 | 
54 |     llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
55 |     qa_chain = RetrievalQA.from_chain_type(
56 |         llm,
57 |         retriever=vectorstore.as_retriever(),
58 |         chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
59 |     )
60 | 
61 |     result = qa_chain({"query": query})
62 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-document/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==1.4.0
  2 | aiohttp==3.8.5
  3 | aiosignal==1.3.1
  4 | anyio==3.7.1
  5 | astunparse==1.6.3
  6 | async-timeout==4.0.3
  7 | attrs==23.1.0
  8 | backoff==2.2.1
  9 | beautifulsoup4==4.12.2
 10 | bs4==0.0.1
 11 | cachetools==5.3.1
 12 | certifi==2023.7.22
 13 | cffi==1.15.1
 14 | chardet==5.2.0
 15 | charset-normalizer==3.2.0
 16 | Chroma==0.2.0
 17 | chroma-hnswlib==0.7.2
 18 | chromadb==0.4.5
 19 | click==8.1.6
 20 | coloredlogs==15.0.1
 21 | cryptography==41.0.3
 22 | dataclasses-json==0.5.14
 23 | fastapi==0.99.1
 24 | filetype==1.2.0
 25 | flatbuffers==23.5.26
 26 | frozenlist==1.4.0
 27 | gast==0.4.0
 28 | google-auth==2.22.0
 29 | google-auth-oauthlib==1.0.0
 30 | google-pasta==0.2.0
 31 | gpt4all==1.0.8
 32 | grpcio==1.57.0
 33 | h11==0.14.0
 34 | h5py==3.9.0
 35 | httptools==0.6.0
 36 | humanfriendly==10.0
 37 | idna==3.4
 38 | importlib-resources==6.0.1
 39 | joblib==1.3.2
 40 | keras==2.13.1
 41 | langchain==0.0.261
 42 | langsmith==0.0.21
 43 | libclang==16.0.6
 44 | lxml==4.9.3
 45 | Markdown==3.4.4
 46 | MarkupSafe==2.1.3
 47 | marshmallow==3.20.1
 48 | monotonic==1.6
 49 | mpmath==1.3.0
 50 | multidict==6.0.4
 51 | mypy-extensions==1.0.0
 52 | nltk==3.8.1
 53 | numexpr==2.8.5
 54 | numpy==1.24.3
 55 | oauthlib==3.2.2
 56 | onnxruntime==1.15.1
 57 | openapi-schema-pydantic==1.2.4
 58 | opt-einsum==3.3.0
 59 | overrides==7.4.0
 60 | packaging==23.1
 61 | pdf2image==1.16.3
 62 | pdfminer==20191125
 63 | pdfminer.six==20221105
 64 | Pillow==10.0.0
 65 | posthog==3.0.1
 66 | protobuf==4.24.0
 67 | pulsar-client==3.2.0
 68 | pyasn1==0.5.0
 69 | pyasn1-modules==0.3.0
 70 | pycparser==2.21
 71 | pycryptodome==3.18.0
 72 | pydantic==1.10.12
 73 | PyPika==0.48.9
 74 | python-dateutil==2.8.2
 75 | python-dotenv==1.0.0
 76 | python-magic==0.4.27
 77 | PyYAML==6.0.1
 78 | regex==2023.8.8
 79 | requests==2.31.0
 80 | requests-oauthlib==1.3.1
 81 | rsa==4.9
 82 | six==1.16.0
 83 | sniffio==1.3.0
 84 | soupsieve==2.4.1
 85 | SQLAlchemy==2.0.19
 86 | starlette==0.27.0
 87 | sympy==1.12
 88 | tabulate==0.9.0
 89 | tenacity==8.2.2
 90 | tensorboard==2.13.0
 91 | tensorboard-data-server==0.7.1
 92 | tensorflow==2.13.0
 93 | tensorflow-estimator==2.13.0
 94 | tensorflow-hub==0.14.0
 95 | tensorflow-macos==2.13.0
 96 | termcolor==2.3.0
 97 | tokenizers==0.13.3
 98 | tqdm==4.66.1
 99 | typing-inspect==0.9.0
100 | typing_extensions==4.5.0
101 | unstructured==0.9.2
102 | urllib3==1.26.16
103 | uvicorn==0.23.2
104 | uvloop==0.17.0
105 | watchfiles==0.19.0
106 | websockets==11.0.3
107 | Werkzeug==2.3.6
108 | wrapt==1.15.0
109 | yarl==1.9.2
110 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-privategpt/README.md:
--------------------------------------------------------------------------------
 1 | # PrivateGPT with Llama 2 uncensored
 2 | 
 3 | https://github.com/ollama/ollama/assets/3325447/20cf8ec6-ff25-42c6-bdd8-9be594e3ce1b
 4 | 
 5 | > Note: this example is a slightly modified version of PrivateGPT using models such as Llama 2 Uncensored. All credit for PrivateGPT goes to Iván Martínez who is the creator of it, and you can find his GitHub repo [here](https://github.com/imartinez/privateGPT).
 6 | 
 7 | ### Setup
 8 | 
 9 | Set up a virtual environment (optional):
10 | 
11 | ```
12 | python3 -m venv .venv
13 | source .venv/bin/activate
14 | ```
15 | 
16 | Install the Python dependencies:
17 | 
18 | ```shell
19 | pip install -r requirements.txt
20 | ```
21 | 
22 | Pull the model you'd like to use:
23 | 
24 | ```
25 | ollama pull llama2-uncensored
26 | ```
27 | 
28 | ### Getting WeWork's latest quarterly earnings report (10-Q)
29 | 
30 | ```
31 | mkdir source_documents
32 | curl https://d18rn0p25nwr6d.cloudfront.net/CIK-0001813756/975b3e9b-268e-4798-a9e4-2a9a7c92dc10.pdf -o source_documents/wework.pdf
33 | ```
34 | 
35 | ### Ingesting files
36 | 
37 | ```shell
38 | python ingest.py
39 | ```
40 | 
41 | Output should look like this:
42 | 
43 | ```shell
44 | Creating new vectorstore
45 | Loading documents from source_documents
46 | Loading new documents: 100%|██████████████████████| 1/1 [00:01<00:00,  1.73s/it]
47 | Loaded 1 new documents from source_documents
48 | Split into 90 chunks of text (max. 500 tokens each)
49 | Creating embeddings. May take some minutes...
50 | Using embedded DuckDB with persistence: data will be stored in: db
51 | Ingestion complete! You can now run privateGPT.py to query your documents
52 | ```
53 | 
54 | ### Ask questions
55 | 
56 | ```shell
57 | python privateGPT.py
58 | 
59 | Enter a query: How many locations does WeWork have?
60 | 
61 | > Answer (took 17.7 s.):
62 | As of June 2023, WeWork has 777 locations worldwide, including 610 Consolidated Locations (as defined in the section entitled Key Performance Indicators).
63 | ```
64 | 
65 | ### Try a different model:
66 | 
67 | ```
68 | ollama pull llama2:13b
69 | MODEL=llama2:13b python privateGPT.py
70 | ```
71 | 
72 | ## Adding more files
73 | 
74 | Put any and all your files into the `source_documents` directory
75 | 
76 | The supported extensions are:
77 | 
78 | - `.csv`: CSV,
79 | - `.docx`: Word Document,
80 | - `.doc`: Word Document,
81 | - `.enex`: EverNote,
82 | - `.eml`: Email,
83 | - `.epub`: EPub,
84 | - `.html`: HTML File,
85 | - `.md`: Markdown,
86 | - `.msg`: Outlook Message,
87 | - `.odt`: Open Document Text,
88 | - `.pdf`: Portable Document Format (PDF),
89 | - `.pptx` : PowerPoint Document,
90 | - `.ppt` : PowerPoint Document,
91 | - `.txt`: Text file (UTF-8),
92 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-privategpt/constants.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from chromadb.config import Settings
 3 | 
 4 | # Define the folder for storing database
 5 | PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY', 'db')
 6 | 
 7 | # Define the Chroma settings
 8 | CHROMA_SETTINGS = Settings(
 9 |         persist_directory=PERSIST_DIRECTORY,
10 |         anonymized_telemetry=False
11 | )
12 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-privategpt/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "privategpt"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Ivan Martinez <ivanmartit@gmail.com>"]
 6 | license = "Apache Version 2.0"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.10"
11 | langchain = "0.0.261"
12 | gpt4all = "^1.0.3"
13 | chromadb = "^0.3.26"
14 | PyMuPDF = "^1.22.5"
15 | python-dotenv = "^1.0.0"
16 | unstructured = "^0.8.0"
17 | extract-msg = "^0.41.5"
18 | tabulate = "^0.9.0"
19 | pandoc = "^2.3"
20 | pypandoc = "^1.11"
21 | tqdm = "^4.65.0"
22 | sentence-transformers = "^2.2.2"
23 | 
24 | [build-system]
25 | requires = ["poetry-core"]
26 | build-backend = "poetry.core.masonry.api"
27 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-privategpt/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.0.274
 2 | gpt4all==1.0.8
 3 | chromadb==0.4.7
 4 | llama-cpp-python==0.1.81
 5 | urllib3==2.0.4
 6 | PyMuPDF==1.23.5
 7 | python-dotenv==1.0.0
 8 | unstructured==0.10.8
 9 | extract-msg==0.45.0
10 | tabulate==0.9.0
11 | pandoc==2.3
12 | pypandoc==1.11
13 | tqdm==4.66.1
14 | sentence_transformers==2.2.2


--------------------------------------------------------------------------------
/examples/langchain-python-rag-websummary/README.md:
--------------------------------------------------------------------------------
 1 | # LangChain Web Summarization
 2 | 
 3 | This example summarizes the website, [https://ollama.com/blog/run-llama2-uncensored-locally](https://ollama.com/blog/run-llama2-uncensored-locally)
 4 | 
 5 | ## Running the Example
 6 | 
 7 | 1. Ensure you have the `llama2` model installed:
 8 | 
 9 |    ```bash
10 |    ollama pull llama2
11 |    ```
12 | 
13 | 2. Install the Python Requirements.
14 | 
15 |    ```bash
16 |    pip install -r requirements.txt
17 |    ```
18 | 
19 | 3. Run the example:
20 | 
21 |    ```bash
22 |    python main.py
23 |    ```
24 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-websummary/main.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.llms import Ollama
 2 | from langchain_community.document_loaders import WebBaseLoader
 3 | from langchain.chains.summarize import load_summarize_chain
 4 | 
 5 | loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
 6 | docs = loader.load()
 7 | 
 8 | llm = Ollama(model="llama3")
 9 | chain = load_summarize_chain(llm, chain_type="stuff")
10 | 
11 | result = chain.invoke(docs) 
12 | print(result)
13 | 


--------------------------------------------------------------------------------
/examples/langchain-python-rag-websummary/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.259
2 | 


--------------------------------------------------------------------------------
/examples/langchain-python-simple/README.md:
--------------------------------------------------------------------------------
 1 | # LangChain
 2 | 
 3 | This example is a basic "hello world" of using LangChain with Ollama.
 4 | 
 5 | ## Running the Example
 6 | 
 7 | 1. Ensure you have the `llama3` model installed:
 8 | 
 9 |    ```bash
10 |    ollama pull llama3
11 |    ```
12 | 
13 | 2. Install the Python Requirements.
14 | 
15 |    ```bash
16 |    pip install -r requirements.txt
17 |    ```
18 | 
19 | 3. Run the example:
20 | 
21 |    ```bash
22 |    python main.py
23 |    ```
24 | 


--------------------------------------------------------------------------------
/examples/langchain-python-simple/main.py:
--------------------------------------------------------------------------------
1 | from langchain.llms import Ollama
2 | 
3 | input = input("What is your question?")
4 | llm = Ollama(model="llama3")
5 | res = llm.predict(input)
6 | print (res)
7 | 


--------------------------------------------------------------------------------
/examples/langchain-python-simple/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.259
2 | 


--------------------------------------------------------------------------------
/examples/langchain-typescript-simple/README.md:
--------------------------------------------------------------------------------
 1 | # LangChain
 2 | 
 3 | This example is a basic "hello world" of using LangChain with Ollama using Node.js and Typescript.
 4 | 
 5 | ## Running the Example
 6 | 
 7 | 1. Install the prerequisites:
 8 | 
 9 |    ```bash
10 |    npm install
11 |    ```
12 | 
13 | 2. Ensure the `mistral` model is available:
14 | 
15 |    ```bash
16 |    ollama pull mistral
17 |    ```
18 | 
19 | 3. Run the example:
20 | 
21 |    ```bash
22 |    npm start
23 |    ```
24 | 


--------------------------------------------------------------------------------
/examples/langchain-typescript-simple/main.ts:
--------------------------------------------------------------------------------
 1 | import { Ollama } from 'langchain/llms/ollama';
 2 | import * as readline from "readline";
 3 | 
 4 | async function main() {
 5 |   const ollama = new Ollama({
 6 |     model: 'mistral'    
 7 |     // other parameters can be found at https://js.langchain.com/docs/api/llms_ollama/classes/Ollama
 8 |   });
 9 | 
10 |   const rl = readline.createInterface({
11 |     input: process.stdin,
12 |     output: process.stdout,
13 |   });
14 | 
15 |   rl.question("What is your question: \n", async (user_input) => {
16 |     const stream = await ollama.stream(user_input);
17 |   
18 |     for await (const chunk of stream) {
19 |       process.stdout.write(chunk);
20 |     }
21 |     rl.close();
22 |   })
23 | }
24 | 
25 | main();


--------------------------------------------------------------------------------
/examples/langchain-typescript-simple/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scripts": {
 3 |     "start": "tsx main.ts"
 4 |   },
 5 |   "devDependencies": {
 6 |     "tsx": "^4.6.2",
 7 |     "typescript": "^5.3.3"
 8 |   },
 9 |   "dependencies": {
10 |     "langchain": "^0.0.165",
11 |     "readline": "^1.3.0"
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/modelfile-mario/Modelfile:
--------------------------------------------------------------------------------
1 | FROM llama3
2 | PARAMETER temperature 1
3 | SYSTEM """
4 | You are Mario from super mario bros, acting as an assistant.
5 | """
6 | 


--------------------------------------------------------------------------------
/examples/modelfile-mario/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/examples/modelfile-mario/logo.png


--------------------------------------------------------------------------------
/examples/modelfile-mario/readme.md:
--------------------------------------------------------------------------------
 1 | <img src="logo.png" alt="image of Italian plumber" height="200"/>
 2 | 
 3 | # Example character: Mario
 4 | 
 5 | This example shows how to create a basic character using Llama3 as the base model.
 6 | 
 7 | To run this example:
 8 | 
 9 | 1. Download the Modelfile
10 | 2. `ollama pull llama3` to get the base model used in the model file.
11 | 3. `ollama create NAME -f ./Modelfile`
12 | 4. `ollama run NAME`
13 | 
14 | Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
15 | 
16 | ## Editing this file
17 | 
18 | What the model file looks like:
19 | 
20 | ```
21 | FROM llama3
22 | PARAMETER temperature 1
23 | SYSTEM """
24 | You are Mario from Super Mario Bros, acting as an assistant.
25 | """
26 | ```
27 | 
28 | What if you want to change its behaviour?
29 | 
30 | - Try changing the prompt
31 | - Try changing the parameters [Docs](https://github.com/ollama/ollama/blob/main/docs/modelfile.md)
32 | - Try changing the model (e.g. An uncensored model by `FROM wizard-vicuna` this is the wizard-vicuna uncensored model )
33 | 
34 | Once the changes are made,
35 | 
36 | 1. `ollama create NAME -f ./Modelfile`
37 | 2. `ollama run NAME`
38 | 3. Iterate until you are happy with the results.
39 | 
40 | Notes:
41 | 
42 | - This example is for research purposes only. There is no affiliation with any entity.
43 | - When using an uncensored model, please be aware that it may generate offensive content.
44 | 


--------------------------------------------------------------------------------
/examples/python-dockerit/Modelfile:
--------------------------------------------------------------------------------
 1 | FROM mistral
 2 | SYSTEM """
 3 | You are an experienced Devops engineer focused on docker. When given specifications for a particular need or application you know the best way to host that within a docker container. For instance if someone tells you they want an nginx server to host files located at /web you will answer as follows
 4 | 
 5 | ---start
 6 | FROM nginx:alpine
 7 | COPY /myweb /usr/share/nginx/html
 8 | EXPOSE 80
 9 | ---end
10 | 
11 | Notice that the answer you should give is just the contents of the dockerfile with no explanation and there are three dashes and the word start at the beginning and 3 dashes and the word end. The full output can be piped into a file and run as is. Here is another example. The user will ask to launch a Postgres server with a password of abc123. And the response should be
12 | 
13 | ---start
14 | FROM postgres:latest
15 | ENV POSTGRES_PASSWORD=abc123
16 | EXPOSE 5432
17 | ---end
18 | 
19 | Again it's just the contents of the dockerfile and nothing else.
20 | """
21 | 


--------------------------------------------------------------------------------
/examples/python-dockerit/README.md:
--------------------------------------------------------------------------------
 1 | # DockerIt
 2 | 
 3 | DockerIt is a tool to help you build and run your application in a Docker container. It consists of a model that defines the system prompt and model weights to use, along with a python script to then build the container and run the image automatically.
 4 | 
 5 | ## Running the Example
 6 | 
 7 | 1. Ensure you have the `mattw/dockerit` model installed:
 8 | 
 9 |    ```bash
10 |    ollama pull mattw/dockerit
11 |    ```
12 | 
13 | 2. Make sure Docker is running on your machine.
14 | 
15 | 3. Install the Python Requirements.
16 | 
17 |    ```bash
18 |    pip install -r requirements.txt
19 |    ```
20 | 
21 | 4. Run the example:
22 | 
23 |    ```bash
24 |    python dockerit.py "simple postgres server with admin password set to 123"
25 |    ```
26 | 
27 | 5. Enter the name you would like to use for your container image.
28 | 
29 | ## Caveats
30 | 
31 | This is a simple example. It's assuming the Dockerfile content generated is going to work. In many cases, even with simple web servers, it fails when trying to copy files that don't exist. It's simply an example of what you could possibly do.
32 | 


--------------------------------------------------------------------------------
/examples/python-dockerit/dockerit.py:
--------------------------------------------------------------------------------
 1 | import requests, json, docker, io, sys
 2 | inputDescription = " ".join(sys.argv[1:])
 3 | imageName = input("Enter the name of the image: ")
 4 | client = docker.from_env()
 5 | s = requests.Session()
 6 | output=""
 7 | with s.post('http://localhost:11434/api/generate', json={'model': 'dockerit', 'prompt': inputDescription}, stream=True) as r:
 8 |   for line in r.iter_lines():
 9 |     if line:
10 |       j = json.loads(line)
11 |       if "response" in j:
12 |         output = output +j["response"]
13 | output = output[output.find("---start")+9:output.find("---end")-1]
14 | f = io.BytesIO(bytes(output, 'utf-8'))
15 | client.images.build(fileobj=f, tag=imageName)
16 | container = client.containers.run(imageName, detach=True)
17 | print("Container named", container.name, " started with id: ",container.id)
18 | 


--------------------------------------------------------------------------------
/examples/python-dockerit/requirements.txt:
--------------------------------------------------------------------------------
1 | docker


--------------------------------------------------------------------------------
/examples/python-json-datagenerator/predefinedschema.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | import random
 4 | 
 5 | model = "llama3"
 6 | template = {
 7 |   "firstName": "",
 8 |   "lastName": "",
 9 |   "address": {
10 |     "street": "",
11 |     "city": "",
12 |     "state": "",
13 |     "zipCode": ""
14 |   },
15 |   "phoneNumber": ""
16 | }
17 | 
18 | prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and  phone number. \nUse the following template: {json.dumps(template)}."
19 | 
20 | data = {
21 |     "prompt": prompt,
22 |     "model": model,
23 |     "format": "json",
24 |     "stream": False,
25 |     "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
26 | }
27 | 
28 | print(f"Generating a sample user")
29 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
30 | json_data = json.loads(response.text)
31 | print(json.dumps(json.loads(json_data["response"]), indent=2))
32 | 


--------------------------------------------------------------------------------
/examples/python-json-datagenerator/randomaddresses.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | import random
 4 | 
 5 | countries = [
 6 |     "United States",
 7 |     "United Kingdom",
 8 |     "the Netherlands",
 9 |     "Germany",
10 |     "Mexico",
11 |     "Canada",
12 |     "France",
13 | ]
14 | country = random.choice(countries)
15 | model = "llama3"
16 | 
17 | prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
18 | 
19 | data = {
20 |     "prompt": prompt,
21 |     "model": model,
22 |     "format": "json",
23 |     "stream": False,
24 |     "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
25 | }
26 | 
27 | print(f"Generating a sample user in {country}")
28 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
29 | json_data = json.loads(response.text)
30 | 
31 | print(json.dumps(json.loads(json_data["response"]), indent=2))
32 | 


--------------------------------------------------------------------------------
/examples/python-json-datagenerator/readme.md:
--------------------------------------------------------------------------------
 1 | # JSON Output Example
 2 | 
 3 | ![llmjson 2023-11-10 15_31_31](https://github.com/ollama/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25)
 4 | 
 5 | There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in.
 6 | 
 7 | ## Running the Example
 8 | 
 9 | 1. Ensure you have the `llama3` model installed:
10 | 
11 |    ```bash
12 |    ollama pull llama3
13 |    ```
14 | 
15 | 2. Install the Python Requirements.
16 | 
17 |    ```bash
18 |    pip install -r requirements.txt
19 |    ```
20 | 
21 | 3. Run the Random Addresses example:
22 | 
23 |    ```bash
24 |    python randomaddresses.py
25 |    ```
26 | 
27 | 4. Run the Predefined Schema example:
28 | 
29 |    ```bash
30 |    python predefinedschema.py
31 |    ```
32 | 
33 | ## Review the Code
34 | 
35 | Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body.
36 | 
37 | ```python
38 | prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and  phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters."
39 | 
40 | data = {
41 |     "prompt": prompt,
42 |     "model": model,
43 |     "format": "json",
44 |     "stream": False,
45 |     "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
46 | }
47 | ```
48 | 
49 | When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country.
50 | 
51 | In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with.
52 | 
53 | Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read.
54 | 
55 | ```python
56 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
57 | json_data = json.loads(response.text)
58 | 
59 | print(json.dumps(json.loads(json_data["response"]), indent=2))
60 | ```
61 | 


--------------------------------------------------------------------------------
/examples/python-json-datagenerator/requirements.txt:
--------------------------------------------------------------------------------
1 | Requests==2.31.0
2 | 


--------------------------------------------------------------------------------
/examples/python-loganalysis/Modelfile:
--------------------------------------------------------------------------------
1 | FROM codebooga:latest
2 | 
3 | SYSTEM """
4 | You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
5 | """
6 | 
7 | PARAMETER TEMPERATURE 0.3
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/python-loganalysis/loganalysis.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import re
 3 | import requests
 4 | import json
 5 | 
 6 | # prelines and postlines represent the number of lines of context to include in the output around the error
 7 | prelines = 10
 8 | postlines = 10
 9 | 
10 | def find_errors_in_log_file():
11 |   if len(sys.argv) < 2:
12 |     print("Usage: python loganalysis.py <filename>")
13 |     return
14 | 
15 |   log_file_path = sys.argv[1]
16 |   with open(log_file_path, 'r') as log_file:
17 |     log_lines = log_file.readlines()
18 | 
19 |   error_logs = []
20 |   for i, line in enumerate(log_lines):
21 |       if "error" in line.lower():
22 |           start_index = max(0, i - prelines)
23 |           end_index = min(len(log_lines), i + postlines + 1)
24 |           error_logs.extend(log_lines[start_index:end_index])
25 | 
26 |   return error_logs
27 | 
28 | error_logs = find_errors_in_log_file()
29 | 
30 | data = {
31 |   "prompt": "\n".join(error_logs), 
32 |   "model": "mattw/loganalyzer"
33 | }
34 | 
35 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
36 | for line in response.iter_lines():
37 |   if line:
38 |     json_data = json.loads(line)
39 |     if json_data['done'] == False:
40 |       print(json_data['response'], end='', flush=True)
41 | 
42 | 


--------------------------------------------------------------------------------
/examples/python-loganalysis/readme.md:
--------------------------------------------------------------------------------
 1 | # Log Analysis example
 2 | 
 3 | ![loganalyzer 2023-11-10 08_53_29](https://github.com/ollama/ollama/assets/633681/ad30f1fc-321f-4953-8914-e30e24db9921)
 4 | 
 5 | This example shows one possible way to create a log file analyzer. It uses the model **mattw/loganalyzer** which is based on **codebooga**, a 34b parameter model.
 6 | 
 7 | To use it, run:
 8 | 
 9 | `python loganalysis.py <logfile>`
10 | 
11 | You can try this with the `logtest.logfile` file included in this directory.
12 | 
13 | ## Running the Example
14 | 
15 | 1. Ensure you have the `mattw/loganalyzer` model installed:
16 | 
17 |    ```bash
18 |    ollama pull mattw/loganalyzer
19 |    ```
20 | 
21 | 2. Install the Python Requirements.
22 | 
23 |    ```bash
24 |    pip install -r requirements.txt
25 |    ```
26 | 
27 | 3. Run the example:
28 | 
29 |    ```bash
30 |    python loganalysis.py logtest.logfile
31 |    ```
32 | 
33 | ## Review the code
34 | 
35 | The first part of this example is a Modelfile that takes `codebooga` and applies a new System Prompt:
36 | 
37 | ```plaintext
38 | SYSTEM """
39 | You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
40 | """
41 | ```
42 | 
43 | This model is available at https://ollama.com/mattw/loganalyzer. You can customize it and add to your own namespace using the command `ollama create <namespace/modelname> -f <path-to-modelfile>` then `ollama push <namespace/modelname>`.
44 | 
45 | Then loganalysis.py scans all the lines in the given log file and searches for the word 'error'. When the word is found, the 10 lines before and after are set as the prompt for a call to the Generate API.
46 | 
47 | ```python
48 | data = {
49 |   "prompt": "\n".join(error_logs),
50 |   "model": "mattw/loganalyzer"
51 | }
52 | ```
53 | 
54 | Finally, the streamed output is parsed and the response field in the output is printed to the line.
55 | 
56 | ```python
57 | response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
58 | for line in response.iter_lines():
59 |   if line:
60 |     json_data = json.loads(line)
61 |     if json_data['done'] == False:
62 |       print(json_data['response'], end='')
63 | 
64 | ```
65 | 
66 | ## Next Steps
67 | 
68 | There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines.
69 | 
70 | Try different models and different prompts to analyze the data. You could consider adding retrieval augmented generation (RAG) to this to help understand newer log formats.
71 | 


--------------------------------------------------------------------------------
/examples/python-loganalysis/requirements.txt:
--------------------------------------------------------------------------------
1 | Requests==2.31.0
2 | 


--------------------------------------------------------------------------------
/examples/python-rag-newssummary/README.md:
--------------------------------------------------------------------------------
 1 | # News Summarizer
 2 | 
 3 | This example goes through a series of steps:
 4 | 
 5 |   1. You choose a topic area (e.g., "news", "NVidia", "music", etc.).
 6 |   2. Gets the most recent articles on that topic from various sources.
 7 |   3. Uses Ollama to summarize each article.
 8 |   4. Creates chunks of sentences from each article.
 9 |   5. Uses Sentence Transformers to generate embeddings for each of those chunks.
10 |   6. You enter a question regarding the summaries shown.
11 |   7. Uses Sentence Transformers to generate an embedding for that question.
12 |   8. Uses the embedded question to find the most similar chunks.
13 |   9. Feeds all that to Ollama to generate a good answer to your question based on these news articles.
14 | 
15 | This example lets you pick from a few different topic areas, then summarize the most recent x articles for that topic. It then creates chunks of sentences from each article and then generates embeddings for each of those chunks.
16 | 
17 | ## Running the Example
18 | 
19 | 1. Ensure you have the `mistral-openorca` model installed:
20 | 
21 |    ```bash
22 |    ollama pull mistral-openorca
23 |    ```
24 | 
25 | 2. Install the Python Requirements.
26 | 
27 |    ```bash
28 |    pip install -r requirements.txt
29 |    ```
30 | 
31 | 3. Run the example:
32 | 
33 |    ```bash
34 |    python summ.py
35 |    ```
36 | 


--------------------------------------------------------------------------------
/examples/python-rag-newssummary/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.12.2
 2 | feedparser==6.0.10
 3 | mattsollamatools==0.0.8
 4 | newspaper3k==0.2.8
 5 | nltk==3.8.1
 6 | numpy==1.24.3
 7 | Requests==2.31.0
 8 | scikit_learn==1.3.0
 9 | sentence_transformers==2.2.2
10 | 


--------------------------------------------------------------------------------
/examples/python-rag-newssummary/summ.py:
--------------------------------------------------------------------------------
 1 | import curses
 2 | import json
 3 | from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search
 4 | import requests
 5 | from sentence_transformers import SentenceTransformer
 6 | from mattsollamatools import chunker
 7 | 
 8 | if __name__ == "__main__":
 9 |     chosen_topic = curses.wrapper(menu)
10 |     print("Here is your news summary:\n")
11 |     urls = getUrls(chosen_topic, n=5)
12 |     model = SentenceTransformer('all-MiniLM-L6-v2')
13 |     allEmbeddings = []
14 | 
15 |     for url in urls:
16 |       article={}
17 |       article['embeddings'] = []
18 |       article['url'] = url
19 |       text = getArticleText(url)
20 |       summary = get_summary(text)
21 |       chunks = chunker(text)  # Use the chunk_text function from web_utils
22 |       embeddings = model.encode(chunks)
23 |       for (chunk, embedding) in zip(chunks, embeddings):
24 |         item = {}
25 |         item['source'] = chunk
26 |         item['embedding'] = embedding.tolist()  # Convert NumPy array to list
27 |         item['sourcelength'] = len(chunk)
28 |         article['embeddings'].append(item)
29 |     
30 |       allEmbeddings.append(article)
31 | 
32 |       print(f"{summary}\n")
33 | 
34 |     
35 |     while True:
36 |       context = []
37 |       # Input a question from the user
38 |       question = input("Enter your question about the news, or type quit: ")
39 | 
40 |       if question.lower() == 'quit':
41 |         break
42 | 
43 |       # Embed the user's question
44 |       question_embedding = model.encode([question])
45 | 
46 |       # Perform KNN search to find the best matches (indices and source text)
47 |       best_matches = knn_search(question_embedding, allEmbeddings, k=10)
48 | 
49 | 
50 |       sourcetext=""
51 |       for i, (index, source_text) in enumerate(best_matches, start=1):
52 |           sourcetext += f"{i}. Index: {index}, Source Text: {source_text}"
53 | 
54 |       systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
55 | 
56 |       url = "http://localhost:11434/api/generate"
57 | 
58 |       payload = {
59 |       "model": "mistral-openorca",
60 |       "prompt": question, 
61 |       "system": systemPrompt,
62 |       "stream": False, 
63 |       "context": context
64 |       }
65 | 
66 |       # Convert the payload to a JSON string
67 |       payload_json = json.dumps(payload)
68 | 
69 |       # Set the headers to specify JSON content
70 |       headers = {
71 |           "Content-Type": "application/json"
72 |       }
73 | 
74 |       # Send the POST request
75 |       response = requests.post(url, data=payload_json, headers=headers)
76 | 
77 |       # Check the response
78 |       if response.status_code == 200:
79 |           output = json.loads(response.text)
80 |           context = output['context']
81 |           print(output['response']+ "\n")
82 |           
83 | 
84 |       else:
85 |           print(f"Request failed with status code {response.status_code}")
86 | 
87 | 


--------------------------------------------------------------------------------
/examples/python-simplechat/client.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | 
 4 | # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
 5 | model = "llama3"  # TODO: update this for whatever model you wish to use
 6 | 
 7 | 
 8 | def chat(messages):
 9 |     r = requests.post(
10 |         "http://0.0.0.0:11434/api/chat",
11 |         json={"model": model, "messages": messages, "stream": True},
12 | 	stream=True
13 |     )
14 |     r.raise_for_status()
15 |     output = ""
16 | 
17 |     for line in r.iter_lines():
18 |         body = json.loads(line)
19 |         if "error" in body:
20 |             raise Exception(body["error"])
21 |         if body.get("done") is False:
22 |             message = body.get("message", "")
23 |             content = message.get("content", "")
24 |             output += content
25 |             # the response streams one token at a time, print that as we receive it
26 |             print(content, end="", flush=True)
27 | 
28 |         if body.get("done", False):
29 |             message["content"] = output
30 |             return message
31 | 
32 | 
33 | def main():
34 |     messages = []
35 | 
36 |     while True:
37 |         user_input = input("Enter a prompt: ")
38 |         if not user_input:
39 |             exit()
40 |         print()
41 |         messages.append({"role": "user", "content": user_input})
42 |         message = chat(messages)
43 |         messages.append(message)
44 |         print("\n\n")
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     main()
49 | 


--------------------------------------------------------------------------------
/examples/python-simplechat/readme.md:
--------------------------------------------------------------------------------
 1 | # Simple Chat Example
 2 | 
 3 | The **chat** endpoint is one of two ways to generate text from an LLM with Ollama, and is introduced in version 0.1.14. At a high level, you provide the endpoint an array of objects with a role and content specified. Then with each output and prompt, you add more of those role/content objects, which builds up the history.
 4 | 
 5 | ## Running the Example
 6 | 
 7 | 1. Ensure you have the `llama3` model installed:
 8 | 
 9 |    ```bash
10 |    ollama pull llama3
11 |    ```
12 | 
13 | 2. Install the Python Requirements.
14 | 
15 |    ```bash
16 |    pip install -r requirements.txt
17 |    ```
18 | 
19 | 3. Run the example:
20 | 
21 |    ```bash
22 |    python client.py
23 |    ```
24 | 
25 | ## Review the Code
26 | 
27 | You can see in the **chat** function that actually calling the endpoint is done simply with:
28 | 
29 | ```python
30 | r = requests.post(
31 |   "http://0.0.0.0:11434/api/chat",
32 |   json={"model": model, "messages": messages, "stream": True},
33 | )
34 | ```
35 | 
36 | With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field.
37 | 
38 | The final JSON object doesn't provide the full content, so you will need to build the content yourself.
39 | 
40 | In the **main** function, we collect `user_input` and add it as a message to our messages and that is passed to the chat function. When the LLM is done responding the output is added as another message.
41 | 
42 | ## Next Steps
43 | 
44 | In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used.
45 | 


--------------------------------------------------------------------------------
/examples/python-simplechat/requirements.txt:
--------------------------------------------------------------------------------
1 | Requests==2.31.0
2 | 


--------------------------------------------------------------------------------
/examples/python-simplegenerate/README.md:
--------------------------------------------------------------------------------
 1 | # Simple Generate Example
 2 | 
 3 | This is a simple example using the **Generate** endpoint.
 4 | 
 5 | ## Running the Example
 6 | 
 7 | 1. Ensure you have the `stablelm-zephyr` model installed:
 8 | 
 9 |    ```bash
10 |    ollama pull stablelm-zephyr
11 |    ```
12 | 
13 | 2. Install the Python Requirements.
14 | 
15 |    ```bash
16 |    pip install -r requirements.txt
17 |    ```
18 | 
19 | 3. Run the example:
20 | 
21 |    ```bash
22 |    python client.py
23 |    ```
24 | 
25 | ## Review the Code
26 | 
27 | The **main** function simply asks for input, then passes that to the generate function. The output from generate is then passed back to generate on the next run.
28 | 
29 | The **generate** function uses `requests.post` to call `/api/generate`, passing the model, prompt, and context. The `generate` endpoint returns a stream of JSON blobs that are then iterated through, looking for the response values. That is then printed out. The final JSON object includes the full context of the conversation so far, and that is the return value from the function.
30 | 


--------------------------------------------------------------------------------
/examples/python-simplegenerate/client.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | 
 4 | # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
 5 | model = 'stablelm-zephyr' # TODO: update this for whatever model you wish to use
 6 | 
 7 | def generate(prompt, context):
 8 |     r = requests.post('http://localhost:11434/api/generate',
 9 |                       json={
10 |                           'model': model,
11 |                           'prompt': prompt,
12 |                           'context': context,
13 |                       },
14 |                       stream=True)
15 |     r.raise_for_status()
16 | 
17 |     for line in r.iter_lines():
18 |         body = json.loads(line)
19 |         response_part = body.get('response', '')
20 |         # the response streams one token at a time, print that as we receive it
21 |         print(response_part, end='', flush=True)
22 | 
23 |         if 'error' in body:
24 |             raise Exception(body['error'])
25 | 
26 |         if body.get('done', False):
27 |             return body['context']
28 | 
29 | def main():
30 |     context = [] # the context stores a conversation history, you can use this to make the model more context aware
31 |     while True:
32 |         user_input = input("Enter a prompt: ")
33 |         if not user_input:
34 |             exit()
35 |         print()
36 |         context = generate(user_input, context)
37 |         print()
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/examples/python-simplegenerate/requirements.txt:
--------------------------------------------------------------------------------
1 | Requests==2.31.0
2 | 


--------------------------------------------------------------------------------
/examples/typescript-functioncalling/extractwp.ts:
--------------------------------------------------------------------------------
 1 | import { Ollama } from "ollama-node";
 2 | import { readFile } from "fs/promises";
 3 | 
 4 | async function main() {
 5 | 
 6 |   const ollama = new Ollama();
 7 | 
 8 |   // Set the system prompt to prepare the model to receive a prompt and a schema and set some rules for the output.
 9 |   const systemprompt = `You will be given a text along with a prompt and a schema. You will have to extract the information requested in the prompt from the text and generate output in JSON observing the schema provided. If the schema shows a type of integer or number, you must only show a integer for that field. A string should always be a valid string. If a value is unknown, leave it empty. Output the JSON with extra spaces to ensure that it pretty prints.`
10 | 
11 |   const schema = {
12 |     "people": [{
13 |       "name": {
14 |         "type": "string",
15 |         "description": "Name of the person"
16 |       },
17 |       "title": {
18 |         "type": "string",
19 |         "description": "Title of the person"
20 |       }
21 |     }],
22 |   }
23 | 
24 |   // Depending on the model chosen, you may be limited by the size of the context window, so limit the context to 2000 words.
25 |   const textcontent = await readFile("./wp.txt", "utf-8").then((text) => text.split(" ").slice(0, 2000).join(" "));
26 | 
27 |   // Specific instructions for this task
28 |   const prompt = `Review the source text and determine the 10 most important people to focus on. Then extract the name and title for those people. Output should be in JSON.\n\nSchema: \n${JSON.stringify(schema, null, 2)}\n\nSource Text:\n${textcontent}`
29 | 
30 |   await ollama.setModel("neural-chat");
31 |   ollama.setSystemPrompt(systemprompt);
32 | 
33 |   // setJSONFormat is the equivalent of setting 'format: json' in the API
34 |   ollama.setJSONFormat(true);
35 |   await ollama.streamingGenerate(prompt, (word) => { process.stdout.write(word) })
36 | }
37 | 
38 | main();


--------------------------------------------------------------------------------
/examples/typescript-functioncalling/info.txt:
--------------------------------------------------------------------------------
 1 | ---
 2 | Hi matt, 
 3 | 
 4 | thanks for letting me know that you are going to come today, November 16, for my tea party. My address is 123 Falk St on Bainbridge Island. I live in the house with the red door. I will be home all day so just come by whenever you want.
 5 | 
 6 | Fred
 7 | 
 8 | ---
 9 | Great, send the check to our office at 1917 1st St, Seattle, WA 98101. I will let you know when we receive it.
10 | 
11 | Mark Richardson
12 | Big Corp
13 | ---
14 | We are looking forward to seeing you at our Local AI Meetup. It will be held on December 3. It will be at the offices of Enormous Co. Our address is 344 1st Ave, Seattle, WA 98101. We will be meeting in the conference room on the 3rd floor.
15 | 
16 | Barbara Reilly
17 | Enormous Co.


--------------------------------------------------------------------------------
/examples/typescript-functioncalling/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dependencies": {
 3 |     "ollama-node": "^0.1.27"
 4 |   },
 5 |   "devDependencies": {
 6 |     "tsx": "^4.1.2",
 7 |     "typescript": "^5.2.2"
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/typescript-functioncalling/readme.md:
--------------------------------------------------------------------------------
 1 | # Function calling
 2 | 
 3 | ![function calling 2023-11-16 16_12_58](https://github.com/ollama/ollama/assets/633681/a0acc247-9746-45ab-b325-b65dfbbee4fb)
 4 | 
 5 | One of the features added to some models is 'function calling'. It's a bit of a confusing name. It's understandable if you think that means the model can call functions, but that's not what it means. Function calling simply means that the output of the model is formatted in JSON, using a preconfigured schema, and uses the expected types. Then your code can use the output of the model and call functions with it. Using the JSON format in Ollama, you can use any model for function calling. 
 6 | 
 7 | The two examples provided can extract information out of the provided texts. The first example uses the first couple of chapters from War and Peace by Lev Nikolayevich Tolstoy, and extracts the names and titles of the characters introduced in the story. The second example uses a more complicated schema to pull out addresses and event information from a series of emails.
 8 | 
 9 | ## Running the examples
10 | 
11 | 1. Clone this repo and navigate to the `examples/typescript-functioncalling` directory.
12 | 2. Install the dependencies with `npm install`.
13 | 3. Review the `wp.txt` file.
14 | 4. Run `tsx extractwp.ts`.
15 | 5. Review the `info.txt` file.
16 | 6. Run `tsx extractemail.ts`.
17 | 
18 | ## Review the Code
19 | 
20 | Both examples do roughly the same thing with different source material. They both use the same system prompt, which tells the model to expect some instructions and a schema. Then we inject the schema into the prompt and generate an answer.
21 | 
22 | The first example, `extractwp.ts`, outputs the resulting JSON to the console, listing the characters introduced at the start of War and Peace. The second example, `extractemail.ts`, is a bit more complicated, extracting two different types of information: addresses and events. It outputs the results to a JSON blob, then the addresses are handed off to one function called `reportAddresses` and the events are handed off to another function called `reportEvents`.
23 | 
24 | Notice that both examples are using the model from Intel called `neural-chat`. This is not a model tuned for function calling, yet it performs very well at this task.
25 | 
26 | ## Next Steps
27 | 
28 | Try exporting some of your real emails to the input file and seeing how well the model does. Try pointing the first example at other books. You could even have it cycle through all the sections and maybe add up the number of times any character is seen throughout the book, determining the most important characters. You can also try out different models.
29 | 


--------------------------------------------------------------------------------
/examples/typescript-mentors/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | package-lock.json
3 | 


--------------------------------------------------------------------------------
/examples/typescript-mentors/character-generator.ts:
--------------------------------------------------------------------------------
 1 | import { Ollama } from 'ollama-node'
 2 | import fs from 'fs';
 3 | import path from 'path';
 4 | 
 5 | async function characterGenerator() {
 6 |   const character = process.argv[2];
 7 |   console.log(`You are creating a character for ${character}.`);
 8 |   const foldername = character.replace(/\s/g, '').toLowerCase();
 9 |   const directory = path.join(__dirname, foldername);
10 |   if (!fs.existsSync(directory)) {
11 |     fs.mkdirSync(directory, { recursive: true });
12 |   }
13 | 
14 |   const ollama = new Ollama();
15 |   ollama.setModel("stablebeluga2:70b-q4_K_M");
16 |   const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);
17 | 
18 |   const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
19 | 
20 |   fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
21 |     if (err) throw err;
22 |     console.log('The file has been saved!');
23 |   });
24 | }
25 | 
26 | characterGenerator();
27 | 


--------------------------------------------------------------------------------
/examples/typescript-mentors/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scripts": {
 3 |     "charactergen": "tsx character-generator.ts", 
 4 |     "start": "tsx mentors.ts"
 5 |   },
 6 |   "dependencies": {
 7 |     "fs": "^0.0.1-security",
 8 |     "ollama-node": "^0.0.3",
 9 |     "path": "^0.12.7"
10 |   },
11 |   "devDependencies": {
12 |     "tsx": "^4.6.2",
13 |     "typescript": "^5.3.3"
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/examples/typescript-simplechat/client.ts:
--------------------------------------------------------------------------------
 1 | import * as readline from "readline";
 2 | 
 3 | const model = "llama3";
 4 | type Message = {
 5 |   role: "assistant" | "user" | "system";
 6 |   content: string;
 7 | }
 8 | const messages: Message[] = [{
 9 |   role: "system",
10 |   content: "You are a helpful AI agent."
11 | }]
12 | 
13 | const rl = readline.createInterface({
14 |   input: process.stdin,
15 |   output: process.stdout
16 | })
17 | 
18 | async function chat(messages: Message[]): Promise<Message> {
19 |   const body = {
20 |     model: model,
21 |     messages: messages
22 |   }
23 | 
24 |   const response = await fetch("http://localhost:11434/api/chat", {
25 |     method: "POST",
26 |     body: JSON.stringify(body)
27 |   })
28 | 
29 |   const reader = response.body?.getReader()
30 |   if (!reader) {
31 |     throw new Error("Failed to read response body")
32 |   }
33 |   let content = ""
34 |   while (true) {
35 |     const { done, value } = await reader.read()
36 |     if (done) {
37 |       break;
38 |     }
39 |     const rawjson = new TextDecoder().decode(value);
40 |     const json = JSON.parse(rawjson)
41 | 
42 |     if (json.done === false) {
43 |       process.stdout.write(json.message.content);
44 |       content += json.message.content
45 |     }
46 | 
47 |   }
48 |   return { role: "assistant", content: content };
49 | }
50 | 
51 | async function askQuestion(): Promise<void> {
52 |   return new Promise<void>((resolve) => {
53 |     rl.question("\n\nAsk a question: (press enter alone to quit)\n\n", async (user_input) => {
54 |       if (user_input.trim() === "") {
55 |         rl.close();
56 |         console.log("Thankyou. Goodbye.\n")
57 |         console.log("=======\nHere is the message history that was used in this conversation.\n=======\n")
58 |         messages.forEach(message => {
59 |           console.log(message)
60 |         })
61 |         resolve();
62 |       } else {
63 |         console.log();
64 |         messages.push({ role: "user", content: user_input });
65 |         messages.push(await chat(messages));
66 |         await askQuestion(); // Ask the next question
67 |       }
68 |     });
69 |   });
70 | }
71 | 
72 | async function main() {
73 |   await askQuestion();
74 | 
75 | }
76 | 
77 | main();
78 | 


--------------------------------------------------------------------------------
/examples/typescript-simplechat/package.json:
--------------------------------------------------------------------------------
 1 | { 
 2 |   "scripts": {
 3 |     "start": "tsx client.ts"
 4 |   }, 
 5 |   "dependencies": {
 6 |      "@types/node": "^20.10.4", 
 7 |      "prompt-sync": "^4.2.0", 
 8 |      "readline": "^1.3.0", 
 9 |      "tsx": "^4.6.2", 
10 |      "typescript": "^5.3.3" 
11 |      } 
12 |     }


--------------------------------------------------------------------------------
/examples/typescript-simplechat/readme.md:
--------------------------------------------------------------------------------
 1 | # Simple Chat Example
 2 | 
 3 | The **chat** endpoint, available as of v0.1.14, is one of two ways to generate text from an LLM with Ollama. At a high level, you provide the endpoint an array of message objects with a role and content specified. Then with each output and prompt, you add more messages, which builds up the history.
 4 | 
 5 | ## Run the Example
 6 | 
 7 | `npm start`
 8 | 
 9 | ## Review the Code
10 | 
11 | You can see in the **chat** function that is actually calling the endpoint is simply done with:
12 | 
13 | ```typescript
14 | const body = {
15 |   model: model,
16 |   messages: messages
17 | }
18 | 
19 | const response = await fetch("http://localhost:11434/api/chat", {
20 |   method: "POST",
21 |   body: JSON.stringify(body)
22 | })
23 | ```
24 | 
25 | With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field.
26 | 
27 | The final JSON object doesn't provide the full content, so you will need to build the content yourself. In this example, **chat** takes the full array of messages and outputs the resulting message from this call of the chat endpoint.
28 | 
29 | In the **askQuestion** function, we collect `user_input` and add it as a message to our messages, and that is passed to the chat function. When the LLM is done responding, the output is added as another message to the messages array.
30 | 
31 | At the end, you will see a printout of all the messages.
32 | 
33 | ## Next Steps
34 | 
35 | In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used.
36 | 


--------------------------------------------------------------------------------
/format/bytes.go:
--------------------------------------------------------------------------------
 1 | package format
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | )
 7 | 
 8 | const (
 9 | 	Byte = 1
10 | 
11 | 	KiloByte = Byte * 1000
12 | 	MegaByte = KiloByte * 1000
13 | 	GigaByte = MegaByte * 1000
14 | 	TeraByte = GigaByte * 1000
15 | 
16 | 	KibiByte = Byte * 1024
17 | 	MebiByte = KibiByte * 1024
18 | 	GibiByte = MebiByte * 1024
19 | )
20 | 
21 | func HumanBytes(b int64) string {
22 | 	var value float64
23 | 	var unit string
24 | 
25 | 	switch {
26 | 	case b >= TeraByte:
27 | 		value = float64(b) / TeraByte
28 | 		unit = "TB"
29 | 	case b >= GigaByte:
30 | 		value = float64(b) / GigaByte
31 | 		unit = "GB"
32 | 	case b >= MegaByte:
33 | 		value = float64(b) / MegaByte
34 | 		unit = "MB"
35 | 	case b >= KiloByte:
36 | 		value = float64(b) / KiloByte
37 | 		unit = "KB"
38 | 	default:
39 | 		return fmt.Sprintf("%d B", b)
40 | 	}
41 | 
42 | 	switch {
43 | 	case value >= 100:
44 | 		return fmt.Sprintf("%d %s", int(value), unit)
45 | 	case value >= 10:
46 | 		return fmt.Sprintf("%d %s", int(value), unit)
47 | 	case value != math.Trunc(value):
48 | 		return fmt.Sprintf("%.1f %s", value, unit)
49 | 	default:
50 | 		return fmt.Sprintf("%d %s", int(value), unit)
51 | 	}
52 | }
53 | 
54 | func HumanBytes2(b uint64) string {
55 | 	switch {
56 | 	case b >= GibiByte:
57 | 		return fmt.Sprintf("%.1f GiB", float64(b)/GibiByte)
58 | 	case b >= MebiByte:
59 | 		return fmt.Sprintf("%.1f MiB", float64(b)/MebiByte)
60 | 	case b >= KibiByte:
61 | 		return fmt.Sprintf("%.1f KiB", float64(b)/KibiByte)
62 | 	default:
63 | 		return fmt.Sprintf("%d B", b)
64 | 	}
65 | }
66 | 


--------------------------------------------------------------------------------
/format/format.go:
--------------------------------------------------------------------------------
 1 | package format
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | )
 7 | 
 8 | const (
 9 | 	Thousand = 1000
10 | 	Million  = Thousand * 1000
11 | 	Billion  = Million * 1000
12 | )
13 | 
14 | func HumanNumber(b uint64) string {
15 | 	switch {
16 | 	case b >= Billion:
17 | 		number := float64(b) / Billion
18 | 		if number == math.Floor(number) {
19 | 			return fmt.Sprintf("%.0fB", number) // no decimals if whole number
20 | 		}
21 | 		return fmt.Sprintf("%.1fB", number) // one decimal if not a whole number
22 | 	case b >= Million:
23 | 		number := float64(b) / Million
24 | 		if number == math.Floor(number) {
25 | 			return fmt.Sprintf("%.0fM", number) // no decimals if whole number
26 | 		}
27 | 		return fmt.Sprintf("%.2fM", number) // two decimals if not a whole number
28 | 	case b >= Thousand:
29 | 		return fmt.Sprintf("%.0fK", float64(b)/Thousand)
30 | 	default:
31 | 		return fmt.Sprintf("%d", b)
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/format/format_test.go:
--------------------------------------------------------------------------------
 1 | package format
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestHumanNumber(t *testing.T) {
 8 | 
 9 | 	type testCase struct {
10 | 		input    uint64
11 | 		expected string
12 | 	}
13 | 
14 | 	testCases := []testCase{
15 | 		{0, "0"},
16 | 		{1000000, "1M"},
17 | 		{125000000, "125M"},
18 | 		{500500000, "500.50M"},
19 | 		{500550000, "500.55M"},
20 | 		{1000000000, "1B"},
21 | 		{2800000000, "2.8B"},
22 | 		{2850000000, "2.9B"},
23 | 		{1000000000000, "1000B"},
24 | 	}
25 | 
26 | 	for _, tc := range testCases {
27 | 		t.Run(tc.expected, func(t *testing.T) {
28 | 			result := HumanNumber(tc.input)
29 | 			if result != tc.expected {
30 | 				t.Errorf("Expected %s, got %s", tc.expected, result)
31 | 			}
32 | 		})
33 | 	}
34 | }
35 | 


--------------------------------------------------------------------------------
/format/time.go:
--------------------------------------------------------------------------------
 1 | package format
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"math"
 6 | 	"strings"
 7 | 	"time"
 8 | )
 9 | 
10 | // humanDuration returns a human-readable approximation of a
11 | // duration (eg. "About a minute", "4 hours ago", etc.).
12 | func humanDuration(d time.Duration) string {
13 | 	seconds := int(d.Seconds())
14 | 
15 | 	switch {
16 | 	case seconds < 1:
17 | 		return "Less than a second"
18 | 	case seconds == 1:
19 | 		return "1 second"
20 | 	case seconds < 60:
21 | 		return fmt.Sprintf("%d seconds", seconds)
22 | 	}
23 | 
24 | 	minutes := int(d.Minutes())
25 | 	switch {
26 | 	case minutes == 1:
27 | 		return "About a minute"
28 | 	case minutes < 60:
29 | 		return fmt.Sprintf("%d minutes", minutes)
30 | 	}
31 | 
32 | 	hours := int(math.Round(d.Hours()))
33 | 	switch {
34 | 	case hours == 1:
35 | 		return "About an hour"
36 | 	case hours < 48:
37 | 		return fmt.Sprintf("%d hours", hours)
38 | 	case hours < 24*7*2:
39 | 		return fmt.Sprintf("%d days", hours/24)
40 | 	case hours < 24*30*2:
41 | 		return fmt.Sprintf("%d weeks", hours/24/7)
42 | 	case hours < 24*365*2:
43 | 		return fmt.Sprintf("%d months", hours/24/30)
44 | 	}
45 | 
46 | 	return fmt.Sprintf("%d years", int(d.Hours())/24/365)
47 | }
48 | 
49 | func HumanTime(t time.Time, zeroValue string) string {
50 | 	return humanTime(t, zeroValue)
51 | }
52 | 
53 | func HumanTimeLower(t time.Time, zeroValue string) string {
54 | 	return strings.ToLower(humanTime(t, zeroValue))
55 | }
56 | 
57 | func humanTime(t time.Time, zeroValue string) string {
58 | 	if t.IsZero() {
59 | 		return zeroValue
60 | 	}
61 | 
62 | 	delta := time.Since(t)
63 | 	if int(delta.Hours())/24/365 < -20 {
64 | 		return "Forever"
65 | 	} else if delta < 0 {
66 | 		return humanDuration(-delta) + " from now"
67 | 	}
68 | 
69 | 	return humanDuration(delta) + " ago"
70 | }
71 | 


--------------------------------------------------------------------------------
/format/time_test.go:
--------------------------------------------------------------------------------
 1 | package format
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | )
 7 | 
 8 | func assertEqual(t *testing.T, a interface{}, b interface{}) {
 9 | 	if a != b {
10 | 		t.Errorf("Assert failed, expected %v, got %v", b, a)
11 | 	}
12 | }
13 | 
14 | func TestHumanTime(t *testing.T) {
15 | 	now := time.Now()
16 | 
17 | 	t.Run("zero value", func(t *testing.T) {
18 | 		assertEqual(t, HumanTime(time.Time{}, "never"), "never")
19 | 	})
20 | 
21 | 	t.Run("time in the future", func(t *testing.T) {
22 | 		v := now.Add(48 * time.Hour)
23 | 		assertEqual(t, HumanTime(v, ""), "2 days from now")
24 | 	})
25 | 
26 | 	t.Run("time in the past", func(t *testing.T) {
27 | 		v := now.Add(-48 * time.Hour)
28 | 		assertEqual(t, HumanTime(v, ""), "2 days ago")
29 | 	})
30 | 
31 | 	t.Run("soon", func(t *testing.T) {
32 | 		v := now.Add(800 * time.Millisecond)
33 | 		assertEqual(t, HumanTime(v, ""), "Less than a second from now")
34 | 	})
35 | 
36 | 	t.Run("time way in the future", func(t *testing.T) {
37 | 		v := now.Add(24 * time.Hour * 365 * 200)
38 | 		assertEqual(t, HumanTime(v, ""), "Forever")
39 | 	})
40 | 
41 | 	t.Run("time way in the future lowercase", func(t *testing.T) {
42 | 		v := now.Add(24 * time.Hour * 365 * 200)
43 | 		assertEqual(t, HumanTimeLower(v, ""), "forever")
44 | 	})
45 | }
46 | 


--------------------------------------------------------------------------------
/gpu/cpu_common.go:
--------------------------------------------------------------------------------
 1 | package gpu
 2 | 
 3 | import (
 4 | 	"log/slog"
 5 | 
 6 | 	"golang.org/x/sys/cpu"
 7 | )
 8 | 
 9 | func GetCPUVariant() string {
10 | 	if cpu.X86.HasAVX2 {
11 | 		slog.Debug("CPU has AVX2")
12 | 		return "avx2"
13 | 	}
14 | 	if cpu.X86.HasAVX {
15 | 		slog.Debug("CPU has AVX")
16 | 		return "avx"
17 | 	}
18 | 	slog.Debug("CPU does not have vector extensions")
19 | 	// else LCD
20 | 	return ""
21 | }
22 | 


--------------------------------------------------------------------------------
/gpu/cuda_common.go:
--------------------------------------------------------------------------------
 1 | //go:build linux || windows
 2 | 
 3 | package gpu
 4 | 
 5 | import (
 6 | 	"log/slog"
 7 | 	"strings"
 8 | )
 9 | 
10 | func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
11 | 	ids := []string{}
12 | 	for _, info := range gpuInfo {
13 | 		if info.Library != "cuda" {
14 | 			// TODO shouldn't happen if things are wired correctly...
15 | 			slog.Debug("cudaGetVisibleDevicesEnv skipping over non-cuda device", "library", info.Library)
16 | 			continue
17 | 		}
18 | 		ids = append(ids, info.ID)
19 | 	}
20 | 	return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/gpu/gpu_darwin.go:
--------------------------------------------------------------------------------
 1 | //go:build darwin
 2 | 
 3 | package gpu
 4 | 
 5 | /*
 6 | #cgo CFLAGS: -x objective-c
 7 | #cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
 8 | #include "gpu_info_darwin.h"
 9 | */
10 | import "C"
11 | import (
12 | 	"runtime"
13 | 
14 | 	"github.com/ollama/ollama/format"
15 | )
16 | 
17 | const (
18 | 	metalMinimumMemory = 512 * format.MebiByte
19 | )
20 | 
21 | func GetGPUInfo() GpuInfoList {
22 | 	mem, _ := GetCPUMem()
23 | 	if runtime.GOARCH == "amd64" {
24 | 		return []GpuInfo{
25 | 			{
26 | 				Library: "cpu",
27 | 				Variant: GetCPUVariant(),
28 | 				memInfo: mem,
29 | 			},
30 | 		}
31 | 	}
32 | 	info := GpuInfo{
33 | 		Library: "metal",
34 | 		ID:      "0",
35 | 	}
36 | 	info.TotalMemory = uint64(C.getRecommendedMaxVRAM())
37 | 
38 | 	// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
39 | 	info.FreeMemory = info.TotalMemory
40 | 
41 | 	info.MinimumMemory = metalMinimumMemory
42 | 	return []GpuInfo{info}
43 | }
44 | 
45 | func GetCPUMem() (memInfo, error) {
46 | 	return memInfo{
47 | 		TotalMemory: uint64(C.getPhysicalMemory()),
48 | 		FreeMemory:  0,
49 | 	}, nil
50 | }
51 | 
52 | func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
53 | 	// No-op on darwin
54 | 	return "", ""
55 | }
56 | 


--------------------------------------------------------------------------------
/gpu/gpu_info.h:
--------------------------------------------------------------------------------
 1 | #ifndef __APPLE__
 2 | #ifndef __GPU_INFO_H__
 3 | #define __GPU_INFO_H__
 4 | #include <stdint.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #ifndef _WIN32
 9 | #include <dlfcn.h>
10 | #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
11 | #define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
12 | #define LOAD_ERR() strdup(dlerror())
13 | #define UNLOAD_LIBRARY(handle) dlclose(handle)
14 | #else
15 | #include <windows.h>
16 | #define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
17 | #define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
18 | #define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
19 | #define LOAD_ERR() ({\
20 |   LPSTR messageBuffer = NULL; \
21 |   size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \
22 |                                  NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \
23 |   char *resp = strdup(messageBuffer); \
24 |   LocalFree(messageBuffer); \
25 |   resp; \
26 | })
27 | 
28 | #endif
29 | 
30 | #define LOG(verbose, ...) \
31 |   do { \
32 |     if (verbose) { \
33 |       fprintf(stderr, __VA_ARGS__); \
34 |     } \
35 |   } while (0)
36 | 
37 | #ifdef __cplusplus
38 | extern "C" {
39 | #endif
40 | 
41 | #define GPU_ID_LEN 64
42 | #define GPU_NAME_LEN 96
43 | 
44 | typedef struct mem_info {
45 |   char *err;  // If non-nill, caller responsible for freeing
46 |   char gpu_id[GPU_ID_LEN];
47 |   char gpu_name[GPU_NAME_LEN];
48 |   uint64_t total;
49 |   uint64_t free;
50 | 
51 |   // Compute Capability
52 |   int major; 
53 |   int minor;
54 |   int patch;
55 | } mem_info_t;
56 | 
57 | void cpu_check_ram(mem_info_t *resp);
58 | 
59 | #ifdef __cplusplus
60 | }
61 | #endif
62 | 
63 | #include "gpu_info_cudart.h"
64 | #include "gpu_info_nvcuda.h"
65 | #include "gpu_info_oneapi.h"
66 | 
67 | #endif  // __GPU_INFO_H__
68 | #endif  // __APPLE__


--------------------------------------------------------------------------------
/gpu/gpu_info_cpu.c:
--------------------------------------------------------------------------------
 1 | #include "gpu_info.h"
 2 | // Fallbacks for CPU mode
 3 | 
 4 | #ifdef _WIN32
 5 | #include <sysinfoapi.h>
 6 | void cpu_check_ram(mem_info_t *resp) {
 7 |   resp->err = NULL;
 8 |   MEMORYSTATUSEX info;
 9 |   info.dwLength = sizeof(info);
10 |   if (GlobalMemoryStatusEx(&info) != 0) {
11 |     resp->total = info.ullTotalPhys;
12 |     resp->free = info.ullAvailPhys;
13 |     snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
14 |   } else {
15 |     resp->err = LOAD_ERR();
16 |   }
17 |   return;
18 | }
19 | 
20 | #elif __linux__
21 | #include <errno.h>
22 | #include <string.h>
23 | #include <sys/sysinfo.h>
24 | void cpu_check_ram(mem_info_t *resp) {
25 |   struct sysinfo info;
26 |   resp->err = NULL;
27 |   if (sysinfo(&info) != 0) {
28 |     resp->err = strdup(strerror(errno));
29 |   } else {
30 |     resp->total = info.totalram * info.mem_unit;
31 |     resp->free = info.freeram * info.mem_unit;
32 |     snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
33 |   }
34 |   return;
35 | }
36 | 
37 | #elif __APPLE__
38 | // TODO consider an Apple implementation that does something useful
39 | // mem_info_t cpu_check_ram() {
40 | //   mem_info_t resp = {0, 0, NULL};
41 | //   return resp;
42 | // }
43 | #else
44 | #error "Unsupported platform"
45 | #endif
46 | 


--------------------------------------------------------------------------------
/gpu/gpu_info_darwin.h:
--------------------------------------------------------------------------------
1 | #import <Metal/Metal.h>
2 | #include <stdint.h>
3 | uint64_t getRecommendedMaxVRAM();
4 | uint64_t getPhysicalMemory();
5 | 


--------------------------------------------------------------------------------
/gpu/gpu_info_darwin.m:
--------------------------------------------------------------------------------
 1 | // go:build darwin
 2 | #include "gpu_info_darwin.h"
 3 | 
 4 | uint64_t getRecommendedMaxVRAM() {
 5 |   id<MTLDevice> device = MTLCreateSystemDefaultDevice();
 6 |   uint64_t result = device.recommendedMaxWorkingSetSize;
 7 |   CFRelease(device);
 8 |   return result;
 9 | }
10 | 
11 | uint64_t getPhysicalMemory() {
12 |   return [[NSProcessInfo processInfo] physicalMemory];
13 | }
14 | 


--------------------------------------------------------------------------------
/gpu/gpu_info_nvcuda.h:
--------------------------------------------------------------------------------
 1 | #ifndef __APPLE__
 2 | #ifndef __GPU_INFO_NVCUDA_H__
 3 | #define __GPU_INFO_NVCUDA_H__
 4 | #include "gpu_info.h"
 5 | 
 6 | // Just enough typedef's to dlopen/dlsym for memory information
 7 | typedef enum cudaError_enum {
 8 |   CUDA_SUCCESS = 0,
 9 |   CUDA_ERROR_INVALID_VALUE = 1,
10 |   CUDA_ERROR_MEMORY_ALLOCATION = 2,
11 |   CUDA_ERROR_NOT_INITIALIZED = 3,
12 |   CUDA_ERROR_INSUFFICIENT_DRIVER = 35,
13 |   // Other values omitted for now...
14 | } CUresult;
15 | 
16 | typedef enum CUdevice_attribute_enum {
17 |   CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
18 |   CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
19 | 
20 |   // TODO - not yet wired up but may be useful for Jetson or other
21 |   // integrated GPU scenarios with shared memory
22 |   CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
23 | 
24 | } CUdevice_attribute;
25 | 
26 | typedef void *nvcudaDevice_t;  // Opaque is sufficient
27 | typedef struct nvcudaMemory_st {
28 |   uint64_t total;
29 |   uint64_t free;
30 | } nvcudaMemory_t;
31 | 
32 | typedef struct nvcudaDriverVersion {
33 |   int major;
34 |   int minor;
35 | } nvcudaDriverVersion_t;
36 | 
37 | typedef struct CUuuid_st {
38 |     unsigned char bytes[16];
39 | } CUuuid;
40 | 
41 | typedef int CUdevice;
42 | typedef void* CUcontext;
43 | 
44 | typedef struct nvcuda_handle {
45 |   void *handle;
46 |   uint16_t verbose;
47 |   int driver_major;
48 |   int driver_minor;
49 |   CUresult (*cuInit)(unsigned int Flags);
50 |   CUresult (*cuDriverGetVersion)(int *driverVersion);
51 |   CUresult (*cuDeviceGetCount)(int *);
52 |   CUresult (*cuDeviceGet)(CUdevice* device, int ordinal);
53 |   CUresult (*cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev);
54 |   CUresult (*cuDeviceGetUuid)(CUuuid* uuid, CUdevice dev); // signature compatible with cuDeviceGetUuid_v2
55 |   CUresult (*cuDeviceGetName)(char *name, int len, CUdevice dev);
56 | 
57 |   // Context specific aspects
58 |   CUresult (*cuCtxCreate_v3)(CUcontext* pctx, void *params, int len, unsigned int flags, CUdevice dev);
59 |   CUresult (*cuMemGetInfo_v2)(uint64_t* free, uint64_t* total);
60 |   CUresult (*cuCtxDestroy)(CUcontext ctx);
61 | } nvcuda_handle_t;
62 | 
63 | typedef struct nvcuda_init_resp {
64 |   char *err;  // If err is non-null handle is invalid
65 |   nvcuda_handle_t ch;
66 |   int num_devices;
67 | } nvcuda_init_resp_t;
68 | 
69 | void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
70 | void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
71 | void nvcuda_release(nvcuda_handle_t ch);
72 | 
73 | #endif  // __GPU_INFO_NVCUDA_H__
74 | #endif  // __APPLE__
75 | 


--------------------------------------------------------------------------------
/gpu/gpu_oneapi.go:
--------------------------------------------------------------------------------
 1 | //go:build linux || windows
 2 | 
 3 | package gpu
 4 | 
 5 | import (
 6 | 	"log/slog"
 7 | 	"strings"
 8 | )
 9 | 
10 | func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
11 | 	ids := []string{}
12 | 	for _, info := range gpuInfo {
13 | 		if info.Library != "oneapi" {
14 | 			// TODO shouldn't happen if things are wired correctly...
15 | 			slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library)
16 | 			continue
17 | 		}
18 | 		ids = append(ids, info.ID)
19 | 	}
20 | 	return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",")
21 | }
22 | 


--------------------------------------------------------------------------------
/gpu/gpu_test.go:
--------------------------------------------------------------------------------
 1 | package gpu
 2 | 
 3 | import (
 4 | 	"runtime"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestBasicGetGPUInfo(t *testing.T) {
11 | 	info := GetGPUInfo()
12 | 	assert.Greater(t, len(info), 0)
13 | 	assert.Contains(t, "cuda rocm cpu metal", info[0].Library)
14 | 	if info[0].Library != "cpu" {
15 | 		assert.Greater(t, info[0].TotalMemory, uint64(0))
16 | 		assert.Greater(t, info[0].FreeMemory, uint64(0))
17 | 	}
18 | }
19 | 
20 | func TestCPUMemInfo(t *testing.T) {
21 | 	info, err := GetCPUMem()
22 | 	assert.NoError(t, err)
23 | 	switch runtime.GOOS {
24 | 	case "darwin":
25 | 		t.Skip("CPU memory not populated on darwin")
26 | 	case "linux", "windows":
27 | 		assert.Greater(t, info.TotalMemory, uint64(0))
28 | 		assert.Greater(t, info.FreeMemory, uint64(0))
29 | 	default:
30 | 		return
31 | 	}
32 | }
33 | 
34 | // TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
35 | 


--------------------------------------------------------------------------------
/gpu/types.go:
--------------------------------------------------------------------------------
 1 | package gpu
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | 
 7 | 	"github.com/ollama/ollama/format"
 8 | )
 9 | 
10 | type memInfo struct {
11 | 	TotalMemory uint64 `json:"total_memory,omitempty"`
12 | 	FreeMemory  uint64 `json:"free_memory,omitempty"`
13 | }
14 | 
15 | // Beginning of an `ollama info` command
16 | type GpuInfo struct {
17 | 	memInfo
18 | 	Library string `json:"library,omitempty"`
19 | 
20 | 	// Optional variant to select (e.g. versions, cpu feature flags)
21 | 	Variant string `json:"variant,omitempty"`
22 | 
23 | 	// MinimumMemory represents the minimum memory required to use the GPU
24 | 	MinimumMemory uint64 `json:"-"`
25 | 
26 | 	// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
27 | 	DependencyPath string `json:"lib_path,omitempty"`
28 | 
29 | 	// GPU information
30 | 	ID      string `json:"gpu_id"`  // string to use for selection of this specific GPU
31 | 	Name    string `json:"name"`    // user friendly name if available
32 | 	Compute string `json:"compute"` // Compute Capability or gfx
33 | 
34 | 	// Driver Information - TODO no need to put this on each GPU
35 | 	DriverMajor int `json:"driver_major,omitempty"`
36 | 	DriverMinor int `json:"driver_minor,omitempty"`
37 | 
38 | 	// TODO other performance capability info to help in scheduling decisions
39 | }
40 | 
41 | type GpuInfoList []GpuInfo
42 | 
43 | // Split up the set of gpu info's by Library and variant
44 | func (l GpuInfoList) ByLibrary() []GpuInfoList {
45 | 	resp := []GpuInfoList{}
46 | 	libs := []string{}
47 | 	for _, info := range l {
48 | 		found := false
49 | 		requested := info.Library
50 | 		if info.Variant != "" {
51 | 			requested += "_" + info.Variant
52 | 		}
53 | 		for i, lib := range libs {
54 | 			if lib == requested {
55 | 				resp[i] = append(resp[i], info)
56 | 				found = true
57 | 				break
58 | 			}
59 | 		}
60 | 		if !found {
61 | 			libs = append(libs, info.Library)
62 | 			resp = append(resp, []GpuInfo{info})
63 | 		}
64 | 	}
65 | 	return resp
66 | }
67 | 
68 | // Report the GPU information into the log an Info level
69 | func (l GpuInfoList) LogDetails() {
70 | 	for _, g := range l {
71 | 		slog.Info("inference compute",
72 | 			"id", g.ID,
73 | 			"library", g.Library,
74 | 			"compute", g.Compute,
75 | 			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
76 | 			"name", g.Name,
77 | 			"total", format.HumanBytes2(g.TotalMemory),
78 | 			"available", format.HumanBytes2(g.FreeMemory),
79 | 		)
80 | 	}
81 | }
82 | 
83 | // Sort by Free Space
84 | type ByFreeMemory []GpuInfo
85 | 
86 | func (a ByFreeMemory) Len() int           { return len(a) }
87 | func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
88 | func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
89 | 


--------------------------------------------------------------------------------
/integration/README.md:
--------------------------------------------------------------------------------
 1 | # Integration Tests
 2 | 
 3 | This directory contains integration tests to exercise Ollama end-to-end to verify behavior
 4 | 
 5 | By default, these tests are disabled so `go test ./...` will exercise only unit tests.  To run integration tests you must pass the integration tag.  `go test -tags=integration ./...`
 6 | 
 7 | 
 8 | The integration tests have 2 modes of operating.
 9 | 
10 | 1. By default, they will start the server on a random port, run the tests, and then shutdown the server.
11 | 2. If `OLLAMA_TEST_EXISTING` is set to a non-empty string, the tests will run against an existing running server, which can be remote
12 | 


--------------------------------------------------------------------------------
/integration/basic_test.go:
--------------------------------------------------------------------------------
 1 | //go:build integration
 2 | 
 3 | package integration
 4 | 
 5 | import (
 6 | 	"context"
 7 | 	"log/slog"
 8 | 	"os"
 9 | 	"runtime"
10 | 	"testing"
11 | 	"time"
12 | 
13 | 	"github.com/ollama/ollama/api"
14 | 	"github.com/stretchr/testify/require"
15 | )
16 | 
17 | func TestOrcaMiniBlueSky(t *testing.T) {
18 | 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
19 | 	defer cancel()
20 | 	// Set up the test data
21 | 	req := api.GenerateRequest{
22 | 		Model:  "orca-mini",
23 | 		Prompt: "why is the sky blue?",
24 | 		Stream: &stream,
25 | 		Options: map[string]interface{}{
26 | 			"temperature": 0,
27 | 			"seed":        123,
28 | 		},
29 | 	}
30 | 	GenerateTestHelper(ctx, t, req, []string{"rayleigh", "scattering"})
31 | }
32 | 
33 | func TestUnicodeModelDir(t *testing.T) {
34 | 	// This is only useful for Windows with utf-16 characters, so skip this test for other platforms
35 | 	if runtime.GOOS != "windows" {
36 | 		t.Skip("Unicode test only applicable to windows")
37 | 	}
38 | 	// Only works for local testing
39 | 	if os.Getenv("OLLAMA_TEST_EXISTING") != "" {
40 | 		t.Skip("TestUnicodeModelDir only works for local testing, skipping")
41 | 	}
42 | 
43 | 	modelDir, err := os.MkdirTemp("", "ollama_埃")
44 | 	require.NoError(t, err)
45 | 	defer os.RemoveAll(modelDir)
46 | 	slog.Info("unicode", "OLLAMA_MODELS", modelDir)
47 | 
48 | 	oldModelsDir := os.Getenv("OLLAMA_MODELS")
49 | 	if oldModelsDir == "" {
50 | 		defer os.Unsetenv("OLLAMA_MODELS")
51 | 	} else {
52 | 		defer os.Setenv("OLLAMA_MODELS", oldModelsDir)
53 | 	}
54 | 	err = os.Setenv("OLLAMA_MODELS", modelDir)
55 | 	require.NoError(t, err)
56 | 
57 | 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
58 | 	defer cancel()
59 | 
60 | 	req := api.GenerateRequest{
61 | 		Model:  "orca-mini",
62 | 		Prompt: "why is the sky blue?",
63 | 		Stream: &stream,
64 | 		Options: map[string]interface{}{
65 | 			"temperature": 0,
66 | 			"seed":        123,
67 | 		},
68 | 	}
69 | 	GenerateTestHelper(ctx, t, req, []string{"rayleigh", "scattering"})
70 | }
71 | 


--------------------------------------------------------------------------------
/integration/context_test.go:
--------------------------------------------------------------------------------
 1 | //go:build integration
 2 | 
 3 | package integration
 4 | 
 5 | import (
 6 | 	"context"
 7 | 	"testing"
 8 | 	"time"
 9 | 
10 | 	"github.com/ollama/ollama/api"
11 | )
12 | 
13 | func TestContextExhaustion(t *testing.T) {
14 | 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) // TODO maybe shorter?
15 | 	defer cancel()
16 | 	// Set up the test data
17 | 	req := api.GenerateRequest{
18 | 		Model:  "llama2",
19 | 		Prompt: "Write me a story with a ton of emojis?",
20 | 		Stream: &stream,
21 | 		Options: map[string]interface{}{
22 | 			"temperature": 0,
23 | 			"seed":        123,
24 | 			"num_ctx":     128,
25 | 		},
26 | 	}
27 | 	GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"})
28 | }
29 | 


--------------------------------------------------------------------------------
/integration/llm_test.go:
--------------------------------------------------------------------------------
 1 | //go:build integration
 2 | 
 3 | package integration
 4 | 
 5 | import (
 6 | 	"context"
 7 | 	"testing"
 8 | 	"time"
 9 | 
10 | 	"github.com/ollama/ollama/api"
11 | )
12 | 
13 | // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
14 | //        package to avoid circular dependencies
15 | 
16 | var (
17 | 	stream = false
18 | 	req    = [2]api.GenerateRequest{
19 | 		{
20 | 			Model:  "orca-mini",
21 | 			Prompt: "why is the ocean blue?",
22 | 			Stream: &stream,
23 | 			Options: map[string]interface{}{
24 | 				"seed":        42,
25 | 				"temperature": 0.0,
26 | 			},
27 | 		}, {
28 | 			Model:  "orca-mini",
29 | 			Prompt: "what is the origin of the us thanksgiving holiday?",
30 | 			Stream: &stream,
31 | 			Options: map[string]interface{}{
32 | 				"seed":        42,
33 | 				"temperature": 0.0,
34 | 			},
35 | 		},
36 | 	}
37 | 	resp = [2][]string{
38 | 		[]string{"sunlight"},
39 | 		[]string{"england", "english", "massachusetts", "pilgrims"},
40 | 	}
41 | )
42 | 
43 | func TestIntegrationSimpleOrcaMini(t *testing.T) {
44 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
45 | 	defer cancel()
46 | 	GenerateTestHelper(ctx, t, req[0], resp[0])
47 | }
48 | 


--------------------------------------------------------------------------------
/llm/ext_server/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | set(TARGET ollama_llama_server)
 3 | option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
 4 | include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 5 | add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
 6 | install(TARGETS ${TARGET} RUNTIME)
 7 | target_compile_definitions(${TARGET} PRIVATE
 8 |     SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
 9 | )
10 | target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
11 | if (WIN32)
12 |     TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
13 | endif()
14 | target_compile_features(${TARGET} PRIVATE cxx_std_11)


--------------------------------------------------------------------------------
/llm/generate/generate_darwin.go:
--------------------------------------------------------------------------------
1 | package generate
2 | 
3 | //go:generate bash ./gen_darwin.sh
4 | 


--------------------------------------------------------------------------------
/llm/generate/generate_linux.go:
--------------------------------------------------------------------------------
1 | package generate
2 | 
3 | //go:generate bash ./gen_linux.sh
4 | 


--------------------------------------------------------------------------------
/llm/generate/generate_windows.go:
--------------------------------------------------------------------------------
1 | package generate
2 | 
3 | //go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
4 | 


--------------------------------------------------------------------------------
/llm/llm.go:
--------------------------------------------------------------------------------
 1 | package llm
 2 | 
 3 | // #cgo CFLAGS: -Illama.cpp
 4 | // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
 5 | // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
 6 | // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
 7 | // #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
 8 | // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
 9 | // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
10 | // #include <stdlib.h>
11 | // #include "llama.h"
12 | import "C"
13 | import (
14 | 	"fmt"
15 | 	"unsafe"
16 | )
17 | 
18 | // SystemInfo is an unused example of calling llama.cpp functions using CGo
19 | func SystemInfo() string {
20 | 	return C.GoString(C.llama_print_system_info())
21 | }
22 | 
23 | func Quantize(infile, outfile string, ftype fileType) error {
24 | 	cinfile := C.CString(infile)
25 | 	defer C.free(unsafe.Pointer(cinfile))
26 | 
27 | 	coutfile := C.CString(outfile)
28 | 	defer C.free(unsafe.Pointer(coutfile))
29 | 
30 | 	params := C.llama_model_quantize_default_params()
31 | 	params.nthread = -1
32 | 	params.ftype = ftype.Value()
33 | 
34 | 	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
35 | 		return fmt.Errorf("llama_model_quantize: %d", rc)
36 | 	}
37 | 
38 | 	return nil
39 | }
40 | 


--------------------------------------------------------------------------------
/llm/llm_darwin_amd64.go:
--------------------------------------------------------------------------------
1 | package llm
2 | 
3 | import (
4 | 	"embed"
5 | )
6 | 
7 | //go:embed build/darwin/x86_64/*/bin/*
8 | var libEmbed embed.FS
9 | 


--------------------------------------------------------------------------------
/llm/llm_darwin_arm64.go:
--------------------------------------------------------------------------------
1 | package llm
2 | 
3 | import (
4 | 	"embed"
5 | )
6 | 
7 | //go:embed build/darwin/arm64/*/bin/*
8 | var libEmbed embed.FS
9 | 


--------------------------------------------------------------------------------
/llm/llm_linux.go:
--------------------------------------------------------------------------------
1 | package llm
2 | 
3 | import "embed"
4 | 
5 | //go:embed build/linux/*/*/bin/*
6 | var libEmbed embed.FS
7 | 


--------------------------------------------------------------------------------
/llm/llm_windows.go:
--------------------------------------------------------------------------------
1 | package llm
2 | 
3 | import "embed"
4 | 
5 | // unused on windows
6 | var libEmbed embed.FS
7 | 


--------------------------------------------------------------------------------
/llm/patches/01-load-progress.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/common/common.cpp b/common/common.cpp
 2 | index ba1ecf0e..cead57cc 100644
 3 | --- a/common/common.cpp
 4 | +++ b/common/common.cpp
 5 | @@ -1836,6 +1836,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
 6 |      mparams.use_mmap        = params.use_mmap;
 7 |      mparams.use_mlock       = params.use_mlock;
 8 |      mparams.check_tensors   = params.check_tensors;
 9 | +    mparams.progress_callback = params.progress_callback;
10 | +    mparams.progress_callback_user_data = params.progress_callback_user_data;
11 |      if (params.kv_overrides.empty()) {
12 |          mparams.kv_overrides = NULL;
13 |      } else {
14 | diff --git a/common/common.h b/common/common.h
15 | index d80344f2..71e84834 100644
16 | --- a/common/common.h
17 | +++ b/common/common.h
18 | @@ -174,6 +174,13 @@ struct gpt_params {
19 |      // multimodal models (see examples/llava)
20 |      std::string mmproj = "";        // path to multimodal projector
21 |      std::vector<std::string> image; // path to image file(s)
22 | +
23 | +    // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
24 | +    // If the provided progress_callback returns true, model loading continues.
25 | +    // If it returns false, model loading is immediately aborted.
26 | +    llama_progress_callback progress_callback = NULL;
27 | +    // context pointer passed to the progress callback
28 | +    void * progress_callback_user_data;
29 |  };
30 |  
31 |  void gpt_params_handle_model_default(gpt_params & params);
32 | 


--------------------------------------------------------------------------------
/llm/patches/02-clip-log.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
 2 | index e431c7f7..f077e688 100644
 3 | --- a/examples/llava/clip.cpp
 4 | +++ b/examples/llava/clip.cpp
 5 | @@ -3,6 +3,7 @@
 6 |  // I'll gradually clean and extend it
 7 |  // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
 8 |  #include "clip.h"
 9 | +#include "common.h"
10 |  #include "log.h"
11 |  #include "ggml.h"
12 |  #include "ggml-alloc.h"
13 | 


--------------------------------------------------------------------------------
/llm/patches/03-load_exception.diff:
--------------------------------------------------------------------------------
 1 | From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001
 2 | From: Michael Yang <mxyng@pm.me>
 3 | Date: Thu, 23 May 2024 11:18:45 -0700
 4 | Subject: [PATCH] throw exception on load errors
 5 | 
 6 | ---
 7 |  llama.cpp | 25 ++++++++++++++++---------
 8 |  1 file changed, 16 insertions(+), 9 deletions(-)
 9 | 
10 | diff --git a/llama.cpp b/llama.cpp
11 | index 15c66077..8ba90b6a 100644
12 | --- a/llama.cpp
13 | +++ b/llama.cpp
14 | @@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
15 |          }
16 |      } catch (const std::exception & err) {
17 |          LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
18 | -        return -1;
19 | +        throw;
20 |      }
21 |  
22 |      return 0;
23 | @@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file(
24 |          }
25 |          model->rpc_servers.push_back(servers);
26 |      }
27 | -    int status = llama_model_load(path_model, *model, params);
28 | -    GGML_ASSERT(status <= 0);
29 | -    if (status < 0) {
30 | -        if (status == -1) {
31 | -            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
32 | -        } else if (status == -2) {
33 | -            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
34 | +
35 | +    try {
36 | +        int status = llama_model_load(path_model, *model, params);
37 | +        GGML_ASSERT(status <= 0);
38 | +        if (status < 0) {
39 | +            if (status == -1) {
40 | +                LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
41 | +            } else if (status == -2) {
42 | +                LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
43 | +            }
44 | +            delete model;
45 | +            return nullptr;
46 |          }
47 | +    } catch (...) {
48 | +        LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
49 |          delete model;
50 | -        return nullptr;
51 | +        throw;
52 |      }
53 |  
54 |      return model;
55 | -- 
56 | 2.45.1
57 | 
58 | 


--------------------------------------------------------------------------------
/llm/patches/04-metal.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/ggml-metal.m b/ggml-metal.m
 2 | index 0207b787..b5e9884b 100644
 3 | --- a/ggml-metal.m
 4 | +++ b/ggml-metal.m
 5 | @@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
 6 |                          // to the matrix-vector kernel
 7 |                          int ne11_mm_min = 1;
 8 |  
 9 | -#if 0
10 |                          // the numbers below are measured on M2 Ultra for 7B and 13B models
11 |                          // these numbers do not translate to other devices or model sizes
12 |                          // TODO: need to find a better approach
13 | -                        if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
14 | -                            switch (src0t) {
15 | -                                case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
16 | -                                case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
17 | -                                case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
18 | -                                case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
19 | -                                case GGML_TYPE_Q4_0:
20 | -                                case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
21 | -                                case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
22 | -                                case GGML_TYPE_Q5_0:                          // not tested yet
23 | -                                case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
24 | -                                case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
25 | -                                case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
26 | -                                default:             ne11_mm_min = 1;  break;
27 | -                            }
28 | +                        switch (src0t) {
29 | +                            case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
30 | +                            case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
31 | +                            case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
32 | +                            case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
33 | +                            case GGML_TYPE_Q4_0:
34 | +                            case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
35 | +                            case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
36 | +                            case GGML_TYPE_Q5_0:                          // not tested yet
37 | +                            case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
38 | +                            case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
39 | +                            case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
40 | +                            default:             ne11_mm_min = 1;  break;
41 |                          }
42 | -#endif
43 |  
44 |                          // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
45 |                          // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
46 | 


--------------------------------------------------------------------------------
/llm/patches/05-default-pretokenizer.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/llama.cpp b/llama.cpp
 2 | index 40d2ec2c..74f3ee9c 100644
 3 | --- a/llama.cpp
 4 | +++ b/llama.cpp
 5 | @@ -4642,16 +4642,7 @@ static void llm_load_vocab(
 6 |  
 7 |          // for now, only BPE models have pre-tokenizers
 8 |          if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
 9 | -            if (tokenizer_pre.empty()) {
10 | -                LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
11 | -                LLAMA_LOG_WARN("%s:                                             \n", __func__);
12 | -                LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
13 | -                LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED!        \n", __func__);
14 | -                LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL             \n", __func__);
15 | -                LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
16 | -                LLAMA_LOG_WARN("%s:                                             \n", __func__);
17 | -                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
18 | -            } else if (
19 | +            if (
20 |                      tokenizer_pre == "default") {
21 |                  vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
22 |              } else if (
23 | @@ -4703,7 +4694,8 @@ static void llm_load_vocab(
24 |                  tokenizer_pre == "smaug-bpe") {
25 |                  vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMAUG;
26 |              } else {
27 | -                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
28 | +                LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
29 | +                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
30 |              }
31 |          } else {
32 |              vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
33 | 


--------------------------------------------------------------------------------
/llm/status.go:
--------------------------------------------------------------------------------
 1 | package llm
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"os"
 6 | )
 7 | 
 8 | // StatusWriter is a writer that captures error messages from the llama runner process
 9 | type StatusWriter struct {
10 | 	LastErrMsg string
11 | 	out        *os.File
12 | }
13 | 
14 | func NewStatusWriter(out *os.File) *StatusWriter {
15 | 	return &StatusWriter{
16 | 		out: out,
17 | 	}
18 | }
19 | 
20 | // TODO - regex matching to detect errors like
21 | // libcublasLt.so.11: cannot open shared object file: No such file or directory
22 | 
23 | var errorPrefixes = []string{
24 | 	"error:",
25 | 	"CUDA error",
26 | 	"cudaMalloc failed",
27 | 	"\"ERR\"",
28 | }
29 | 
30 | func (w *StatusWriter) Write(b []byte) (int, error) {
31 | 	var errMsg string
32 | 	for _, prefix := range errorPrefixes {
33 | 		if _, after, ok := bytes.Cut(b, []byte(prefix)); ok {
34 | 			errMsg = prefix + string(bytes.TrimSpace(after))
35 | 		}
36 | 	}
37 | 	if errMsg != "" {
38 | 		w.LastErrMsg = errMsg
39 | 	}
40 | 
41 | 	return w.out.Write(b)
42 | }
43 | 


--------------------------------------------------------------------------------
/macapp/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": {
 3 |     "browser": true,
 4 |     "es6": true,
 5 |     "node": true
 6 |   },
 7 |   "extends": [
 8 |     "eslint:recommended",
 9 |     "plugin:@typescript-eslint/eslint-recommended",
10 |     "plugin:@typescript-eslint/recommended",
11 |     "plugin:import/recommended",
12 |     "plugin:import/electron",
13 |     "plugin:import/typescript"
14 |   ],
15 |   "parser": "@typescript-eslint/parser"
16 | }
17 | 


--------------------------------------------------------------------------------
/macapp/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | lerna-debug.log*
 8 | 
 9 | # Diagnostic reports (https://nodejs.org/api/report.html)
10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
11 | 
12 | # Runtime data
13 | pids
14 | *.pid
15 | *.seed
16 | *.pid.lock
17 | .DS_Store
18 | 
19 | # Directory for instrumented libs generated by jscoverage/JSCover
20 | lib-cov
21 | 
22 | # Coverage directory used by tools like istanbul
23 | coverage
24 | *.lcov
25 | 
26 | # nyc test coverage
27 | .nyc_output
28 | 
29 | # node-waf configuration
30 | .lock-wscript
31 | 
32 | # Compiled binary addons (https://nodejs.org/api/addons.html)
33 | build/Release
34 | 
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 | 
39 | # TypeScript v1 declaration files
40 | typings/
41 | 
42 | # TypeScript cache
43 | *.tsbuildinfo
44 | 
45 | # Optional npm cache directory
46 | .npm
47 | 
48 | # Optional eslint cache
49 | .eslintcache
50 | 
51 | # Optional REPL history
52 | .node_repl_history
53 | 
54 | # Output of 'npm pack'
55 | *.tgz
56 | 
57 | # Yarn Integrity file
58 | .yarn-integrity
59 | 
60 | # dotenv environment variables file
61 | .env
62 | .env.test
63 | 
64 | # parcel-bundler cache (https://parceljs.org/)
65 | .cache
66 | 
67 | # next.js build output
68 | .next
69 | 
70 | # nuxt.js build output
71 | .nuxt
72 | 
73 | # vuepress build output
74 | .vuepress/dist
75 | 
76 | # Serverless directories
77 | .serverless/
78 | 
79 | # FuseBox cache
80 | .fusebox/
81 | 
82 | # DynamoDB Local files
83 | .dynamodb/
84 | 
85 | # Webpack
86 | .webpack/
87 | 
88 | # Vite
89 | .vite/
90 | 
91 | # Electron-Forge
92 | out/
93 | 


--------------------------------------------------------------------------------
/macapp/README.md:
--------------------------------------------------------------------------------
 1 | # Desktop
 2 | 
 3 | This app builds upon Ollama to provide a desktop experience for running models.
 4 | 
 5 | ## Developing
 6 | 
 7 | First, build the `ollama` binary:
 8 | 
 9 | ```
10 | cd ..
11 | go build .
12 | ```
13 | 
14 | Then run the desktop app with `npm start`:
15 | 
16 | ```
17 | cd macapp
18 | npm install
19 | npm start
20 | ```
21 | 
22 | 


--------------------------------------------------------------------------------
/macapp/assets/icon.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/icon.icns


--------------------------------------------------------------------------------
/macapp/assets/iconDarkTemplate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkTemplate.png


--------------------------------------------------------------------------------
/macapp/assets/iconDarkTemplate@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkTemplate@2x.png


--------------------------------------------------------------------------------
/macapp/assets/iconDarkUpdateTemplate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkUpdateTemplate.png


--------------------------------------------------------------------------------
/macapp/assets/iconDarkUpdateTemplate@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconDarkUpdateTemplate@2x.png


--------------------------------------------------------------------------------
/macapp/assets/iconTemplate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconTemplate.png


--------------------------------------------------------------------------------
/macapp/assets/iconTemplate@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconTemplate@2x.png


--------------------------------------------------------------------------------
/macapp/assets/iconUpdateTemplate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconUpdateTemplate.png


--------------------------------------------------------------------------------
/macapp/assets/iconUpdateTemplate@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/assets/iconUpdateTemplate@2x.png


--------------------------------------------------------------------------------
/macapp/forge.config.ts:
--------------------------------------------------------------------------------
 1 | import type { ForgeConfig } from '@electron-forge/shared-types'
 2 | import { MakerSquirrel } from '@electron-forge/maker-squirrel'
 3 | import { MakerZIP } from '@electron-forge/maker-zip'
 4 | import { PublisherGithub } from '@electron-forge/publisher-github'
 5 | import { AutoUnpackNativesPlugin } from '@electron-forge/plugin-auto-unpack-natives'
 6 | import { WebpackPlugin } from '@electron-forge/plugin-webpack'
 7 | import * as path from 'path'
 8 | import * as fs from 'fs'
 9 | 
10 | import { mainConfig } from './webpack.main.config'
11 | import { rendererConfig } from './webpack.renderer.config'
12 | 
13 | const packageJson = JSON.parse(fs.readFileSync(path.resolve(__dirname, './package.json'), 'utf8'))
14 | 
15 | const config: ForgeConfig = {
16 |   packagerConfig: {
17 |     appVersion: process.env.VERSION || packageJson.version,
18 |     asar: true,
19 |     icon: './assets/icon.icns',
20 |     extraResource: [
21 |       '../dist/ollama',
22 |       path.join(__dirname, './assets/iconTemplate.png'),
23 |       path.join(__dirname, './assets/iconTemplate@2x.png'),
24 |       path.join(__dirname, './assets/iconUpdateTemplate.png'),
25 |       path.join(__dirname, './assets/iconUpdateTemplate@2x.png'),
26 |       path.join(__dirname, './assets/iconDarkTemplate.png'),
27 |       path.join(__dirname, './assets/iconDarkTemplate@2x.png'),
28 |       path.join(__dirname, './assets/iconDarkUpdateTemplate.png'),
29 |       path.join(__dirname, './assets/iconDarkUpdateTemplate@2x.png'),
30 |     ],
31 |     ...(process.env.SIGN
32 |       ? {
33 |           osxSign: {
34 |             identity: process.env.APPLE_IDENTITY,
35 |           },
36 |           osxNotarize: {
37 |             tool: 'notarytool',
38 |             appleId: process.env.APPLE_ID || '',
39 |             appleIdPassword: process.env.APPLE_PASSWORD || '',
40 |             teamId: process.env.APPLE_TEAM_ID || '',
41 |           },
42 |         }
43 |       : {}),
44 |     osxUniversal: {
45 |       x64ArchFiles: '**/ollama',
46 |     },
47 |   },
48 |   rebuildConfig: {},
49 |   makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
50 |   hooks: {
51 |     readPackageJson: async (_, packageJson) => {
52 |       return { ...packageJson, version: process.env.VERSION || packageJson.version }
53 |     },
54 |   },
55 |   plugins: [
56 |     new AutoUnpackNativesPlugin({}),
57 |     new WebpackPlugin({
58 |       mainConfig,
59 |       devContentSecurityPolicy: `default-src * 'unsafe-eval' 'unsafe-inline'; img-src data: 'self'`,
60 |       renderer: {
61 |         config: rendererConfig,
62 |         nodeIntegration: true,
63 |         entryPoints: [
64 |           {
65 |             html: './src/index.html',
66 |             js: './src/renderer.tsx',
67 |             name: 'main_window',
68 |             preload: {
69 |               js: './src/preload.ts',
70 |             },
71 |           },
72 |         ],
73 |       },
74 |     }),
75 |   ],
76 | }
77 | 
78 | export default config
79 | 


--------------------------------------------------------------------------------
/macapp/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     'postcss-import': {},
4 |     tailwindcss: {},
5 |     autoprefixer: {},
6 |   },
7 | }
8 | 


--------------------------------------------------------------------------------
/macapp/src/app.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | html,
 6 | body {
 7 |   background: transparent;
 8 | }
 9 | 
10 | .drag {
11 |   -webkit-app-region: drag;
12 | }
13 | 
14 | .no-drag {
15 |   -webkit-app-region: no-drag;
16 | }
17 | 
18 | .blink {
19 |   -webkit-animation: 1s blink step-end infinite;
20 |   -moz-animation: 1s blink step-end infinite;
21 |   -ms-animation: 1s blink step-end infinite;
22 |   -o-animation: 1s blink step-end infinite;
23 |   animation: 1s blink step-end infinite;
24 | }
25 | 
26 | @keyframes blink {
27 |   from,
28 |   to {
29 |     color: transparent;
30 |   }
31 |   50% {
32 |     color: black;
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/macapp/src/declarations.d.ts:
--------------------------------------------------------------------------------
1 | declare module '*.svg' {
2 |   const content: string
3 |   export default content
4 | }
5 | 


--------------------------------------------------------------------------------
/macapp/src/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |   </head>
 6 |   <body>
 7 |     <div id="app"></div>
 8 |   </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------
/macapp/src/install.ts:
--------------------------------------------------------------------------------
 1 | import * as fs from 'fs'
 2 | import { exec as cbExec } from 'child_process'
 3 | import * as path from 'path'
 4 | import { promisify } from 'util'
 5 | 
 6 | const app = process && process.type === 'renderer' ? require('@electron/remote').app : require('electron').app
 7 | const ollama = app.isPackaged ? path.join(process.resourcesPath, 'ollama') : path.resolve(process.cwd(), '..', 'ollama')
 8 | const exec = promisify(cbExec)
 9 | const symlinkPath = '/usr/local/bin/ollama'
10 | 
11 | export function installed() {
12 |   return fs.existsSync(symlinkPath) && fs.readlinkSync(symlinkPath) === ollama
13 | }
14 | 
15 | export async function install() {
16 |   const command = `do shell script "mkdir -p ${path.dirname(
17 |     symlinkPath
18 |   )} && ln -F -s \\"${ollama}\\" \\"${symlinkPath}\\"" with administrator privileges`
19 | 
20 |   await exec(`osascript -e '${command}'`)
21 | }
22 | 


--------------------------------------------------------------------------------
/macapp/src/preload.ts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TfxLabs/ollama/60323e08057d36b617f11d3c4958d342a44d0342/macapp/src/preload.ts


--------------------------------------------------------------------------------
/macapp/src/renderer.tsx:
--------------------------------------------------------------------------------
1 | import App from './app'
2 | import './app.css'
3 | import { createRoot } from 'react-dom/client'
4 | 
5 | const container = document.getElementById('app')
6 | const root = createRoot(container)
7 | root.render(<App />)
8 | 


--------------------------------------------------------------------------------
/macapp/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 |   content: ['./src/**/*.{js,ts,jsx,tsx,mdx}'],
4 |   theme: {},
5 |   plugins: [],
6 | }
7 | 


--------------------------------------------------------------------------------
/macapp/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES6",
 4 |     "allowJs": true,
 5 |     "module": "commonjs",
 6 |     "skipLibCheck": true,
 7 |     "esModuleInterop": true,
 8 |     "noImplicitAny": true,
 9 |     "sourceMap": true,
10 |     "baseUrl": ".",
11 |     "outDir": "dist",
12 |     "moduleResolution": "node",
13 |     "resolveJsonModule": true,
14 |     "paths": {
15 |       "*": ["node_modules/*"]
16 |     },
17 |     "jsx": "react-jsx"
18 |   },
19 |   "include": ["src/**/*"]
20 | }
21 | 


--------------------------------------------------------------------------------
/macapp/webpack.main.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Configuration } from 'webpack'
 2 | 
 3 | import { rules } from './webpack.rules'
 4 | import { plugins } from './webpack.plugins'
 5 | 
 6 | export const mainConfig: Configuration = {
 7 |   /**
 8 |    * This is the main entry point for your application, it's the first file
 9 |    * that runs in the main process.
10 |    */
11 |   entry: './src/index.ts',
12 |   // Put your normal webpack config below here
13 |   module: {
14 |     rules,
15 |   },
16 |   plugins,
17 |   resolve: {
18 |     extensions: ['.js', '.ts', '.jsx', '.tsx', '.css', '.json'],
19 |   },
20 | }
21 | 


--------------------------------------------------------------------------------
/macapp/webpack.plugins.ts:
--------------------------------------------------------------------------------
 1 | import type IForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin'
 2 | import { DefinePlugin } from 'webpack'
 3 | 
 4 | // eslint-disable-next-line @typescript-eslint/no-var-requires
 5 | const ForkTsCheckerWebpackPlugin: typeof IForkTsCheckerWebpackPlugin = require('fork-ts-checker-webpack-plugin')
 6 | 
 7 | export const plugins = [
 8 |   new ForkTsCheckerWebpackPlugin({
 9 |     logger: 'webpack-infrastructure',
10 |   }),
11 |   new DefinePlugin({
12 |     'process.env.TELEMETRY_WRITE_KEY': JSON.stringify(process.env.TELEMETRY_WRITE_KEY),
13 |   }),
14 | ]
15 | 


--------------------------------------------------------------------------------
/macapp/webpack.renderer.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Configuration } from 'webpack'
 2 | 
 3 | import { rules } from './webpack.rules'
 4 | import { plugins } from './webpack.plugins'
 5 | 
 6 | rules.push({
 7 |   test: /\.css$/,
 8 |   use: [{ loader: 'style-loader' }, { loader: 'css-loader' }, { loader: 'postcss-loader' }],
 9 | })
10 | 
11 | export const rendererConfig: Configuration = {
12 |   module: {
13 |     rules,
14 |   },
15 |   plugins,
16 |   resolve: {
17 |     extensions: ['.js', '.ts', '.jsx', '.tsx', '.css'],
18 |   },
19 | }
20 | 


--------------------------------------------------------------------------------
/macapp/webpack.rules.ts:
--------------------------------------------------------------------------------
 1 | import type { ModuleOptions } from 'webpack'
 2 | 
 3 | export const rules: Required<ModuleOptions>['rules'] = [
 4 |   // Add support for native node modules
 5 |   {
 6 |     // We're specifying native_modules in the test because the asset relocator loader generates a
 7 |     // "fake" .node file which is really a cjs file.
 8 |     test: /native_modules[/\\].+\.node$/,
 9 |     use: 'node-loader',
10 |   },
11 |   {
12 |     test: /[/\\]node_modules[/\\].+\.(m?js|node)$/,
13 |     parser: { amd: false },
14 |     use: {
15 |       loader: '@vercel/webpack-asset-relocator-loader',
16 |       options: {
17 |         outputAssetBase: 'native_modules',
18 |       },
19 |     },
20 |   },
21 |   {
22 |     test: /\.tsx?$/,
23 |     exclude: /(node_modules|\.webpack)/,
24 |     use: {
25 |       loader: 'ts-loader',
26 |       options: {
27 |         transpileOnly: true,
28 |       },
29 |     },
30 |   },
31 |   {
32 |     test: /\.svg$/,
33 |     use: ['@svgr/webpack'],
34 |   },
35 | ]
36 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/ollama/ollama/cmd"
 7 | 	"github.com/spf13/cobra"
 8 | )
 9 | 
10 | func main() {
11 | 	cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
12 | }
13 | 


--------------------------------------------------------------------------------
/progress/progress.go:
--------------------------------------------------------------------------------
  1 | package progress
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"sync"
  7 | 	"time"
  8 | )
  9 | 
 10 | type State interface {
 11 | 	String() string
 12 | }
 13 | 
 14 | type Progress struct {
 15 | 	mu sync.Mutex
 16 | 	w  io.Writer
 17 | 
 18 | 	pos int
 19 | 
 20 | 	ticker *time.Ticker
 21 | 	states []State
 22 | }
 23 | 
 24 | func NewProgress(w io.Writer) *Progress {
 25 | 	p := &Progress{w: w}
 26 | 	go p.start()
 27 | 	return p
 28 | }
 29 | 
 30 | func (p *Progress) stop() bool {
 31 | 	for _, state := range p.states {
 32 | 		if spinner, ok := state.(*Spinner); ok {
 33 | 			spinner.Stop()
 34 | 		}
 35 | 	}
 36 | 
 37 | 	if p.ticker != nil {
 38 | 		p.ticker.Stop()
 39 | 		p.ticker = nil
 40 | 		p.render()
 41 | 		return true
 42 | 	}
 43 | 
 44 | 	return false
 45 | }
 46 | 
 47 | func (p *Progress) Stop() bool {
 48 | 	stopped := p.stop()
 49 | 	if stopped {
 50 | 		fmt.Fprint(p.w, "\n")
 51 | 	}
 52 | 	return stopped
 53 | }
 54 | 
 55 | func (p *Progress) StopAndClear() bool {
 56 | 	fmt.Fprint(p.w, "\033[?25l")
 57 | 	defer fmt.Fprint(p.w, "\033[?25h")
 58 | 
 59 | 	stopped := p.stop()
 60 | 	if stopped {
 61 | 		// clear all progress lines
 62 | 		for i := 0; i < p.pos; i++ {
 63 | 			if i > 0 {
 64 | 				fmt.Fprint(p.w, "\033[A")
 65 | 			}
 66 | 			fmt.Fprint(p.w, "\033[2K\033[1G")
 67 | 		}
 68 | 	}
 69 | 
 70 | 	return stopped
 71 | }
 72 | 
 73 | func (p *Progress) Add(key string, state State) {
 74 | 	p.mu.Lock()
 75 | 	defer p.mu.Unlock()
 76 | 
 77 | 	p.states = append(p.states, state)
 78 | }
 79 | 
 80 | func (p *Progress) render() {
 81 | 	p.mu.Lock()
 82 | 	defer p.mu.Unlock()
 83 | 
 84 | 	fmt.Fprint(p.w, "\033[?25l")
 85 | 	defer fmt.Fprint(p.w, "\033[?25h")
 86 | 
 87 | 	// clear already rendered progress lines
 88 | 	for i := 0; i < p.pos; i++ {
 89 | 		if i > 0 {
 90 | 			fmt.Fprint(p.w, "\033[A")
 91 | 		}
 92 | 		fmt.Fprint(p.w, "\033[2K\033[1G")
 93 | 	}
 94 | 
 95 | 	// render progress lines
 96 | 	for i, state := range p.states {
 97 | 		fmt.Fprint(p.w, state.String())
 98 | 		if i < len(p.states)-1 {
 99 | 			fmt.Fprint(p.w, "\n")
100 | 		}
101 | 	}
102 | 
103 | 	p.pos = len(p.states)
104 | }
105 | 
106 | func (p *Progress) start() {
107 | 	p.ticker = time.NewTicker(100 * time.Millisecond)
108 | 	for range p.ticker.C {
109 | 		p.render()
110 | 	}
111 | }
112 | 


--------------------------------------------------------------------------------
/progress/spinner.go:
--------------------------------------------------------------------------------
 1 | package progress
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strings"
 6 | 	"time"
 7 | )
 8 | 
 9 | type Spinner struct {
10 | 	message      string
11 | 	messageWidth int
12 | 
13 | 	parts []string
14 | 
15 | 	value int
16 | 
17 | 	ticker  *time.Ticker
18 | 	started time.Time
19 | 	stopped time.Time
20 | }
21 | 
22 | func NewSpinner(message string) *Spinner {
23 | 	s := &Spinner{
24 | 		message: message,
25 | 		parts: []string{
26 | 			"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
27 | 		},
28 | 		started: time.Now(),
29 | 	}
30 | 	go s.start()
31 | 	return s
32 | }
33 | 
34 | func (s *Spinner) String() string {
35 | 	var sb strings.Builder
36 | 	if len(s.message) > 0 {
37 | 		message := strings.TrimSpace(s.message)
38 | 		if s.messageWidth > 0 && len(message) > s.messageWidth {
39 | 			message = message[:s.messageWidth]
40 | 		}
41 | 
42 | 		fmt.Fprintf(&sb, "%s", message)
43 | 		if padding := s.messageWidth - sb.Len(); padding > 0 {
44 | 			sb.WriteString(strings.Repeat(" ", padding))
45 | 		}
46 | 
47 | 		sb.WriteString(" ")
48 | 	}
49 | 
50 | 	if s.stopped.IsZero() {
51 | 		spinner := s.parts[s.value]
52 | 		sb.WriteString(spinner)
53 | 		sb.WriteString(" ")
54 | 	}
55 | 
56 | 	return sb.String()
57 | }
58 | 
59 | func (s *Spinner) start() {
60 | 	s.ticker = time.NewTicker(100 * time.Millisecond)
61 | 	for range s.ticker.C {
62 | 		s.value = (s.value + 1) % len(s.parts)
63 | 		if !s.stopped.IsZero() {
64 | 			return
65 | 		}
66 | 	}
67 | }
68 | 
69 | func (s *Spinner) Stop() {
70 | 	if s.stopped.IsZero() {
71 | 		s.stopped = time.Now()
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/readline/errors.go:
--------------------------------------------------------------------------------
 1 | package readline
 2 | 
 3 | import (
 4 | 	"errors"
 5 | )
 6 | 
 7 | var (
 8 | 	ErrInterrupt = errors.New("Interrupt")
 9 | )
10 | 
11 | type InterruptError struct {
12 | 	Line []rune
13 | }
14 | 
15 | func (*InterruptError) Error() string {
16 | 	return "Interrupted"
17 | }
18 | 


--------------------------------------------------------------------------------
/readline/readline_unix.go:
--------------------------------------------------------------------------------
 1 | //go:build !windows
 2 | 
 3 | package readline
 4 | 
 5 | import (
 6 | 	"syscall"
 7 | )
 8 | 
 9 | func handleCharCtrlZ(fd int, termios any) (string, error) {
10 | 	t := termios.(*Termios)
11 | 	if err := UnsetRawMode(fd, t); err != nil {
12 | 		return "", err
13 | 	}
14 | 
15 | 	_ = syscall.Kill(0, syscall.SIGSTOP)
16 | 
17 | 	// on resume...
18 | 	return "", nil
19 | }
20 | 


--------------------------------------------------------------------------------
/readline/readline_windows.go:
--------------------------------------------------------------------------------
1 | package readline
2 | 
3 | func handleCharCtrlZ(fd int, state any) (string, error) {
4 | 	// not supported
5 | 	return "", nil
6 | }
7 | 


--------------------------------------------------------------------------------
/readline/term.go:
--------------------------------------------------------------------------------
 1 | //go:build aix || darwin || dragonfly || freebsd || (linux && !appengine) || netbsd || openbsd || os400 || solaris
 2 | 
 3 | package readline
 4 | 
 5 | import (
 6 | 	"syscall"
 7 | )
 8 | 
 9 | type Termios syscall.Termios
10 | 
11 | func SetRawMode(fd int) (*Termios, error) {
12 | 	termios, err := getTermios(fd)
13 | 	if err != nil {
14 | 		return nil, err
15 | 	}
16 | 
17 | 	newTermios := *termios
18 | 	newTermios.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON
19 | 	newTermios.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN
20 | 	newTermios.Cflag &^= syscall.CSIZE | syscall.PARENB
21 | 	newTermios.Cflag |= syscall.CS8
22 | 	newTermios.Cc[syscall.VMIN] = 1
23 | 	newTermios.Cc[syscall.VTIME] = 0
24 | 
25 | 	return termios, setTermios(fd, &newTermios)
26 | }
27 | 
28 | func UnsetRawMode(fd int, termios any) error {
29 | 	t := termios.(*Termios)
30 | 	return setTermios(fd, t)
31 | }
32 | 
33 | // IsTerminal returns true if the given file descriptor is a terminal.
34 | func IsTerminal(fd int) bool {
35 | 	_, err := getTermios(fd)
36 | 	return err == nil
37 | }
38 | 


--------------------------------------------------------------------------------
/readline/term_bsd.go:
--------------------------------------------------------------------------------
 1 | //go:build darwin || freebsd || netbsd || openbsd
 2 | 
 3 | package readline
 4 | 
 5 | import (
 6 | 	"syscall"
 7 | 	"unsafe"
 8 | )
 9 | 
10 | func getTermios(fd int) (*Termios, error) {
11 | 	termios := new(Termios)
12 | 	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
13 | 	if err != 0 {
14 | 		return nil, err
15 | 	}
16 | 	return termios, nil
17 | }
18 | 
19 | func setTermios(fd int, termios *Termios) error {
20 | 	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
21 | 	if err != 0 {
22 | 		return err
23 | 	}
24 | 	return nil
25 | }
26 | 


--------------------------------------------------------------------------------
/readline/term_linux.go:
--------------------------------------------------------------------------------
 1 | //go:build linux || solaris
 2 | 
 3 | package readline
 4 | 
 5 | import (
 6 | 	"syscall"
 7 | 	"unsafe"
 8 | )
 9 | 
10 | const tcgets = 0x5401
11 | const tcsets = 0x5402
12 | 
13 | func getTermios(fd int) (*Termios, error) {
14 | 	termios := new(Termios)
15 | 	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcgets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
16 | 	if err != 0 {
17 | 		return nil, err
18 | 	}
19 | 	return termios, nil
20 | }
21 | 
22 | func setTermios(fd int, termios *Termios) error {
23 | 	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcsets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
24 | 	if err != 0 {
25 | 		return err
26 | 	}
27 | 	return nil
28 | }
29 | 


--------------------------------------------------------------------------------
/readline/term_windows.go:
--------------------------------------------------------------------------------
 1 | package readline
 2 | 
 3 | import (
 4 | 	"golang.org/x/sys/windows"
 5 | )
 6 | 
 7 | type State struct {
 8 | 	mode uint32
 9 | }
10 | 
11 | // IsTerminal checks if the given file descriptor is associated with a terminal
12 | func IsTerminal(fd int) bool {
13 | 	var st uint32
14 | 	err := windows.GetConsoleMode(windows.Handle(fd), &st)
15 | 	return err == nil
16 | }
17 | 
18 | func SetRawMode(fd int) (*State, error) {
19 | 	var st uint32
20 | 	if err := windows.GetConsoleMode(windows.Handle(fd), &st); err != nil {
21 | 		return nil, err
22 | 	}
23 | 
24 | 	// this enables raw mode by turning off various flags in the console mode: https://pkg.go.dev/golang.org/x/sys/windows#pkg-constants
25 | 	raw := st &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT)
26 | 
27 | 	// turn on ENABLE_VIRTUAL_TERMINAL_INPUT to enable escape sequences
28 | 	raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT
29 | 	if err := windows.SetConsoleMode(windows.Handle(fd), raw); err != nil {
30 | 		return nil, err
31 | 	}
32 | 	return &State{st}, nil
33 | }
34 | 
35 | func UnsetRawMode(fd int, state any) error {
36 | 	s := state.(*State)
37 | 	return windows.SetConsoleMode(windows.Handle(fd), s.mode)
38 | }
39 | 


--------------------------------------------------------------------------------
/readline/types.go:
--------------------------------------------------------------------------------
 1 | package readline
 2 | 
 3 | const (
 4 | 	CharNull      = 0
 5 | 	CharLineStart = 1
 6 | 	CharBackward  = 2
 7 | 	CharInterrupt = 3
 8 | 	CharDelete    = 4
 9 | 	CharLineEnd   = 5
10 | 	CharForward   = 6
11 | 	CharBell      = 7
12 | 	CharCtrlH     = 8
13 | 	CharTab       = 9
14 | 	CharCtrlJ     = 10
15 | 	CharKill      = 11
16 | 	CharCtrlL     = 12
17 | 	CharEnter     = 13
18 | 	CharNext      = 14
19 | 	CharPrev      = 16
20 | 	CharBckSearch = 18
21 | 	CharFwdSearch = 19
22 | 	CharTranspose = 20
23 | 	CharCtrlU     = 21
24 | 	CharCtrlW     = 23
25 | 	CharCtrlY     = 25
26 | 	CharCtrlZ     = 26
27 | 	CharEsc       = 27
28 | 	CharSpace     = 32
29 | 	CharEscapeEx  = 91
30 | 	CharBackspace = 127
31 | )
32 | 
33 | const (
34 | 	KeyDel    = 51
35 | 	KeyUp     = 65
36 | 	KeyDown   = 66
37 | 	KeyRight  = 67
38 | 	KeyLeft   = 68
39 | 	MetaEnd   = 70
40 | 	MetaStart = 72
41 | )
42 | 
43 | const (
44 | 	CursorUp    = "\033[1A"
45 | 	CursorDown  = "\033[1B"
46 | 	CursorRight = "\033[1C"
47 | 	CursorLeft  = "\033[1D"
48 | 
49 | 	CursorSave    = "\033[s"
50 | 	CursorRestore = "\033[u"
51 | 
52 | 	CursorUpN    = "\033[%dA"
53 | 	CursorDownN  = "\033[%dB"
54 | 	CursorRightN = "\033[%dC"
55 | 	CursorLeftN  = "\033[%dD"
56 | 
57 | 	CursorEOL  = "\033[E"
58 | 	CursorBOL  = "\033[1G"
59 | 	CursorHide = "\033[?25l"
60 | 	CursorShow = "\033[?25h"
61 | 
62 | 	ClearToEOL  = "\033[K"
63 | 	ClearLine   = "\033[2K"
64 | 	ClearScreen = "\033[2J"
65 | 	CursorReset = "\033[0;0f"
66 | 
67 | 	ColorGrey    = "\033[38;5;245m"
68 | 	ColorDefault = "\033[0m"
69 | 
70 | 	StartBracketedPaste = "\033[?2004h"
71 | 	EndBracketedPaste   = "\033[?2004l"
72 | )
73 | 
74 | const (
75 | 	CharBracketedPaste      = 50
76 | 	CharBracketedPasteStart = "00~"
77 | 	CharBracketedPasteEnd   = "01~"
78 | )
79 | 


--------------------------------------------------------------------------------
/scripts/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -eu
 4 | 
 5 | usage() {
 6 |     echo "usage: $(basename $0) VERSION"
 7 |     exit 1
 8 | }
 9 | 
10 | [ "$#" -eq 1 ] || usage
11 | 
12 | export VERSION="$1"
13 | 
14 | # build universal MacOS binary
15 | sh $(dirname $0)/build_darwin.sh
16 | 
17 | # # build arm64 and amd64 Linux binaries
18 | sh $(dirname $0)/build_linux.sh
19 | 
20 | # # build arm64 and amd64 Docker images
21 | sh $(dirname $0)/build_docker.sh
22 | 


--------------------------------------------------------------------------------
/scripts/build_darwin.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
 6 | export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
 7 | 
 8 | mkdir -p dist
 9 | 
10 | for TARGETARCH in arm64 amd64; do
11 |     rm -rf llm/llama.cpp/build
12 |     GOOS=darwin GOARCH=$TARGETARCH go generate ./...
13 |     CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH
14 |     CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov
15 | done
16 | 
17 | lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
18 | rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
19 | if [ -n "$APPLE_IDENTITY" ]; then
20 |     codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
21 | else
22 |     echo "Skipping code signing - set APPLE_IDENTITY"
23 | fi
24 | chmod +x dist/ollama
25 | 
26 | # build and optionally sign the mac app
27 | npm install --prefix macapp
28 | if [ -n "$APPLE_IDENTITY" ]; then
29 |     npm run --prefix macapp make:sign
30 | else 
31 |     npm run --prefix macapp make
32 | fi
33 | cp macapp/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
34 | 
35 | # sign the binary and rename it
36 | if [ -n "$APPLE_IDENTITY" ]; then
37 |     codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
38 | else
39 |     echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
40 | fi
41 | ditto -c -k --keepParent dist/ollama dist/temp.zip
42 | if [ -n "$APPLE_IDENTITY" ]; then
43 |     xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
44 | fi
45 | mv dist/ollama dist/ollama-darwin
46 | rm -f dist/temp.zip
47 | 


--------------------------------------------------------------------------------
/scripts/build_linux.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -eu
 4 | 
 5 | export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
 6 | export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
 7 | 
 8 | BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
 9 | export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
10 | mkdir -p dist
11 | 
12 | for TARGETARCH in ${BUILD_ARCH}; do
13 |     docker build \
14 |         --platform=linux/$TARGETARCH \
15 |         --build-arg=GOFLAGS \
16 |         --build-arg=CGO_CFLAGS \
17 |         --build-arg=OLLAMA_CUSTOM_CPU_DEFS \
18 |         --build-arg=AMDGPU_TARGETS \
19 |         --target build-$TARGETARCH \
20 |         -f Dockerfile \
21 |         -t builder:$TARGETARCH \
22 |         .
23 |     docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
24 |     docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-$TARGETARCH
25 | 
26 |     if [ "$TARGETARCH" = "amd64" ]; then
27 |         docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/
28 |     fi
29 | 
30 |     docker rm builder-$TARGETARCH
31 | done
32 | 


--------------------------------------------------------------------------------
/scripts/build_remote.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import subprocess
 3 | import sys
 4 | from urllib.parse import urlparse
 5 | from git import Repo
 6 | 
 7 | # Helper script to be able to build on remote repos using git to push local changes
 8 | # (e.g. particularly helpful to target a remote windows build system)
 9 | #
10 | # Typical windows remote git config looks like this:
11 | #
12 | #[remote "windows-pa"]
13 | #        url = jdoe@desktop-foo:C:/Users/Jdoe/code/ollama
14 | #        fetch = +refs/heads/*:refs/remotes/windows-pa/*
15 | #        uploadpack = powershell git upload-pack
16 | #        receivepack = powershell git receive-pack
17 | #
18 | 
19 | # TODO - add argpare and make this more configurable 
20 | # - force flag becomes optional
21 | # - generate, build or test ...
22 | 
23 | # Note: remote repo will need this run once:
24 | # git config --local receive.denyCurrentBranch updateInstead
25 | repo = Repo(".")
26 | 
27 | # On linux, add links in /usr/local/bin to the go binaries to avoid needing this
28 | # GoCmd = "/usr/local/go/bin/go" 
29 | GoCmd = "go" 
30 | 
31 | if repo.is_dirty():
32 |     print("Tree is dirty.  Commit your changes before running this script")
33 |     sys.exit(1)
34 | 
35 | if len(sys.argv) != 2:
36 |     print("Please specify the remote name: " + ', '.join([r.name for r in repo.remotes]))
37 |     sys.exit(1)
38 | remote_name = sys.argv[1]
39 | 
40 | remote = {r.name: r for r in repo.remotes}[remote_name]
41 | raw_url = list(remote.urls)[0]
42 | url = urlparse(raw_url)
43 | # Windows urls don't quite parse properly
44 | if url.scheme == "" and url.netloc == "":
45 |     url = urlparse("ssh://" + raw_url)
46 | print("URL: " + str(url))
47 | netloc = url.netloc.split(":")[0]
48 | path = url.path
49 | branch_name = repo.active_branch.name
50 | 
51 | print("Force pushing content to remote...")
52 | # Use with care given the force push
53 | remote.push(force=True).raise_if_error()
54 | 
55 | print("Ensuring correct branch checked out on remote via ssh...")
56 | subprocess.check_call(['ssh', netloc, 'cd', path, ';', 'git', 'checkout', branch_name])
57 | 
58 | 
59 | # TODO - add some hardening to try to figure out how to set up the path properly
60 | # subprocess.check_call(['ssh', netloc, 'cd', path, ';', 'env'])
61 | # TODO - or consider paramiko maybe
62 | 
63 | print("Running Windows Build Script")
64 | subprocess.check_call(['ssh', netloc, 'cd', path, ';', "powershell", "-ExecutionPolicy", "Bypass", "-File", "./scripts/build_windows.ps1"])
65 | 
66 | # print("Building")
67 | # subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.'])
68 | 
69 | print("Copying built result")
70 | subprocess.check_call(['scp', netloc +":"+ path + "/ollama.exe",  './dist/'])
71 | 
72 | print("Copying installer")
73 | subprocess.check_call(['scp', netloc +":"+ path + "/dist/Ollama Setup.exe",  './dist/'])
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
 1 | # Set your variables here.
 2 | REPO="jmorganca/ollama"
 3 | 
 4 | # Check if VERSION is set
 5 | if [[ -z "${VERSION}" ]]; then
 6 |   echo "VERSION is not set. Please set the VERSION environment variable."
 7 |   exit 1
 8 | fi
 9 | 
10 | OS=$(go env GOOS)
11 | 
12 | ./script/build_${OS}.sh
13 | 
14 | # Create a new tag if it doesn't exist.
15 | if ! git rev-parse v$VERSION >/dev/null 2>&1; then
16 |   git tag v$VERSION
17 | fi
18 | 
19 | git push origin v$VERSION
20 | 
21 | # Create a new release.
22 | gh release create -p v$VERSION -t v$VERSION
23 | 
24 | # Upload the zip file.
25 | gh release upload v$VERSION ./dist/* --clobber
26 | 


--------------------------------------------------------------------------------
/scripts/push_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -eu
 4 | 
 5 | export VERSION=${VERSION:-0.0.0}
 6 | export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
 7 | 
 8 | docker build \
 9 |     --push \
10 |     --platform=linux/arm64,linux/amd64 \
11 |     --build-arg=VERSION \
12 |     --build-arg=GOFLAGS \
13 |     -f Dockerfile \
14 |     -t ollama/ollama -t ollama/ollama:$VERSION \
15 |     .
16 | 


--------------------------------------------------------------------------------
/scripts/rh_linux_deps.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Script for common Dockerfile dependency installation in redhat linux based images
 4 | 
 5 | set -ex
 6 | MACHINE=$(uname -m)
 7 | 
 8 | if grep -i "centos" /etc/system-release >/dev/null; then
 9 |     # Centos 7 derivatives have too old of a git version to run our generate script
10 |     # uninstall and ignore failures
11 |     yum remove -y git
12 |     yum -y install epel-release centos-release-scl
13 |     yum -y install dnf
14 |     if [ "${MACHINE}" = "x86_64" ]; then
15 |         yum -y install https://repo.ius.io/ius-release-el7.rpm
16 |         dnf install -y git236
17 |     else
18 |         dnf install -y rh-git227-git
19 |         ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git
20 |     fi
21 |     dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++
22 | elif grep -i "rocky" /etc/system-release >/dev/null; then
23 |     # Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC)
24 |     cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo
25 | [vault]
26 | name=Rocky Vault
27 | baseurl=https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/
28 | gpgcheck=1
29 | enabled=1
30 | countme=1
31 | gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-rockyofficial
32 | EOF
33 |     dnf install -y git \
34 |         gcc-toolset-10-gcc-10.2.1-8.2.el8 \
35 |         gcc-toolset-10-gcc-c++-10.2.1-8.2.el8
36 | else
37 |     echo "ERROR Unexpected distro"
38 |     exit 1
39 | fi
40 | 
41 | if [ -n "${CMAKE_VERSION}" ]; then
42 |     curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
43 | fi
44 | 
45 | if [ -n "${GOLANG_VERSION}" ]; then
46 |     if [ "${MACHINE}" = "x86_64" ]; then
47 |         GO_ARCH="amd64"
48 |     else
49 |         GO_ARCH="arm64"
50 |     fi
51 |     mkdir -p /usr/local
52 |     curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-${GO_ARCH}.tar.gz | tar xz -C /usr/local
53 |     ln -s /usr/local/go/bin/go /usr/local/bin/go
54 |     ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt
55 | fi
56 | 


--------------------------------------------------------------------------------
/scripts/tag_latest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -eu
 4 | 
 5 | # We use 2 different image repositories to handle combining architecture images into multiarch manifest
 6 | # (The ROCm image is x86 only and is not a multiarch manifest)
 7 | # For developers, you can override the DOCKER_ORG to generate multiarch manifests
 8 | #  DOCKER_ORG=jdoe VERSION=0.1.30 PUSH=1 ./scripts/tag_latest.sh
 9 | DOCKER_ORG=${DOCKER_ORG:-"ollama"}
10 | RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
11 | FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
12 | 
13 | # Set PUSH to a non-empty string to trigger push instead of load
14 | PUSH=${PUSH:-""}
15 | 
16 | echo "Assembling manifest and tagging latest"
17 | docker manifest rm ${FINAL_IMAGE_REPO}:latest || true
18 | docker manifest create ${FINAL_IMAGE_REPO}:latest \
19 |     ${RELEASE_IMAGE_REPO}:$VERSION-amd64 \
20 |     ${RELEASE_IMAGE_REPO}:$VERSION-arm64
21 | 
22 | docker pull ${RELEASE_IMAGE_REPO}:$VERSION-rocm
23 | docker tag ${RELEASE_IMAGE_REPO}:$VERSION-rocm ${FINAL_IMAGE_REPO}:rocm
24 | 
25 | if [ -n "${PUSH}" ]; then
26 |     echo "Pushing latest tags up..."
27 |     docker manifest push ${FINAL_IMAGE_REPO}:latest
28 |     docker push ${FINAL_IMAGE_REPO}:rocm
29 | else
30 |     echo "Not pushing ${FINAL_IMAGE_REPO}:latest and ${FINAL_IMAGE_REPO}:rocm"
31 | fi
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/server/auth.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"crypto/rand"
 6 | 	"crypto/sha256"
 7 | 	"encoding/base64"
 8 | 	"encoding/hex"
 9 | 	"encoding/json"
10 | 	"fmt"
11 | 	"io"
12 | 	"net/http"
13 | 	"net/url"
14 | 	"strconv"
15 | 	"strings"
16 | 	"time"
17 | 
18 | 	"github.com/ollama/ollama/api"
19 | 	"github.com/ollama/ollama/auth"
20 | )
21 | 
22 | type registryChallenge struct {
23 | 	Realm   string
24 | 	Service string
25 | 	Scope   string
26 | }
27 | 
28 | func (r registryChallenge) URL() (*url.URL, error) {
29 | 	redirectURL, err := url.Parse(r.Realm)
30 | 	if err != nil {
31 | 		return nil, err
32 | 	}
33 | 
34 | 	values := redirectURL.Query()
35 | 	values.Add("service", r.Service)
36 | 	for _, s := range strings.Split(r.Scope, " ") {
37 | 		values.Add("scope", s)
38 | 	}
39 | 
40 | 	values.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
41 | 
42 | 	nonce, err := auth.NewNonce(rand.Reader, 16)
43 | 	if err != nil {
44 | 		return nil, err
45 | 	}
46 | 
47 | 	values.Add("nonce", nonce)
48 | 
49 | 	redirectURL.RawQuery = values.Encode()
50 | 	return redirectURL, nil
51 | }
52 | 
53 | func getAuthorizationToken(ctx context.Context, challenge registryChallenge) (string, error) {
54 | 	redirectURL, err := challenge.URL()
55 | 	if err != nil {
56 | 		return "", err
57 | 	}
58 | 
59 | 	sha256sum := sha256.Sum256(nil)
60 | 	data := []byte(fmt.Sprintf("%s,%s,%s", http.MethodGet, redirectURL.String(), base64.StdEncoding.EncodeToString([]byte(hex.EncodeToString(sha256sum[:])))))
61 | 
62 | 	headers := make(http.Header)
63 | 	signature, err := auth.Sign(ctx, data)
64 | 	if err != nil {
65 | 		return "", err
66 | 	}
67 | 
68 | 	headers.Add("Authorization", signature)
69 | 
70 | 	response, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, nil)
71 | 	if err != nil {
72 | 		return "", err
73 | 	}
74 | 	defer response.Body.Close()
75 | 
76 | 	body, err := io.ReadAll(response.Body)
77 | 	if err != nil {
78 | 		return "", fmt.Errorf("%d: %v", response.StatusCode, err)
79 | 	}
80 | 
81 | 	if response.StatusCode >= http.StatusBadRequest {
82 | 		if len(body) > 0 {
83 | 			return "", fmt.Errorf("%d: %s", response.StatusCode, body)
84 | 		} else {
85 | 			return "", fmt.Errorf("%d", response.StatusCode)
86 | 		}
87 | 	}
88 | 
89 | 	var token api.TokenResponse
90 | 	if err := json.Unmarshal(body, &token); err != nil {
91 | 		return "", err
92 | 	}
93 | 
94 | 	return token.Token, nil
95 | }
96 | 


--------------------------------------------------------------------------------
/server/fixblobs.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path/filepath"
 6 | 	"strings"
 7 | )
 8 | 
 9 | // fixBlobs walks the provided dir and replaces (":") to ("-") in the file
10 | // prefix. (e.g. sha256:1234 -> sha256-1234)
11 | func fixBlobs(dir string) error {
12 | 	return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
13 | 		if err != nil {
14 | 			return err
15 | 		}
16 | 		baseName := filepath.Base(path)
17 | 		typ, sha, ok := strings.Cut(baseName, ":")
18 | 		if ok && typ == "sha256" {
19 | 			newPath := filepath.Join(filepath.Dir(path), typ+"-"+sha)
20 | 			if err := os.Rename(path, newPath); err != nil {
21 | 				return err
22 | 			}
23 | 		}
24 | 		return nil
25 | 	})
26 | }
27 | 


--------------------------------------------------------------------------------
/server/fixblobs_test.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import (
 4 | 	"io/fs"
 5 | 	"os"
 6 | 	"path/filepath"
 7 | 	"runtime"
 8 | 	"slices"
 9 | 	"strings"
10 | 	"testing"
11 | )
12 | 
13 | func TestFixBlobs(t *testing.T) {
14 | 	cases := []struct {
15 | 		path []string
16 | 		want []string
17 | 	}{
18 | 		{path: []string{"sha256-1234"}, want: []string{"sha256-1234"}},
19 | 		{path: []string{"sha256:1234"}, want: []string{"sha256-1234"}},
20 | 		{path: []string{"sha259:5678"}, want: []string{"sha259:5678"}},
21 | 		{path: []string{"sha256:abcd"}, want: []string{"sha256-abcd"}},
22 | 		{path: []string{"x/y/sha256:abcd"}, want: []string{"x/y/sha256-abcd"}},
23 | 		{path: []string{"x:y/sha256:abcd"}, want: []string{"x:y/sha256-abcd"}},
24 | 		{path: []string{"x:y/sha256:abcd"}, want: []string{"x:y/sha256-abcd"}},
25 | 		{path: []string{"x:y/sha256:abcd", "sha256:1234"}, want: []string{"x:y/sha256-abcd", "sha256-1234"}},
26 | 		{path: []string{"x:y/sha256:abcd", "sha256-1234"}, want: []string{"x:y/sha256-abcd", "sha256-1234"}},
27 | 	}
28 | 
29 | 	for _, tt := range cases {
30 | 		t.Run(strings.Join(tt.path, "|"), func(t *testing.T) {
31 | 			hasColon := slices.ContainsFunc(tt.path, func(s string) bool { return strings.Contains(s, ":") })
32 | 			if hasColon && runtime.GOOS == "windows" {
33 | 				t.Skip("skipping test on windows")
34 | 			}
35 | 
36 | 			rootDir := t.TempDir()
37 | 			for _, path := range tt.path {
38 | 				fullPath := filepath.Join(rootDir, path)
39 | 				fullDir, _ := filepath.Split(fullPath)
40 | 
41 | 				t.Logf("creating dir %s", fullDir)
42 | 				if err := os.MkdirAll(fullDir, 0o755); err != nil {
43 | 					t.Fatal(err)
44 | 				}
45 | 
46 | 				t.Logf("writing file %s", fullPath)
47 | 				if err := os.WriteFile(fullPath, nil, 0o644); err != nil {
48 | 					t.Fatal(err)
49 | 				}
50 | 			}
51 | 
52 | 			if err := fixBlobs(rootDir); err != nil {
53 | 				t.Fatal(err)
54 | 			}
55 | 
56 | 			got := slurpFiles(os.DirFS(rootDir))
57 | 
58 | 			slices.Sort(tt.want)
59 | 			slices.Sort(got)
60 | 			if !slices.Equal(got, tt.want) {
61 | 				t.Fatalf("got = %v, want %v", got, tt.want)
62 | 			}
63 | 		})
64 | 	}
65 | }
66 | 
67 | func slurpFiles(fsys fs.FS) []string {
68 | 	var sfs []string
69 | 	fn := func(path string, d fs.DirEntry, err error) error {
70 | 		if err != nil {
71 | 			return err
72 | 		}
73 | 		if d.IsDir() {
74 | 			return nil
75 | 		}
76 | 		sfs = append(sfs, path)
77 | 		return nil
78 | 	}
79 | 	if err := fs.WalkDir(fsys, ".", fn); err != nil {
80 | 		panic(err)
81 | 	}
82 | 	return sfs
83 | }
84 | 


--------------------------------------------------------------------------------
/server/layer.go:
--------------------------------------------------------------------------------
  1 | package server
  2 | 
  3 | import (
  4 | 	"crypto/sha256"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"os"
  8 | )
  9 | 
 10 | type Layer struct {
 11 | 	MediaType string `json:"mediaType"`
 12 | 	Digest    string `json:"digest"`
 13 | 	Size      int64  `json:"size"`
 14 | 	From      string `json:"from,omitempty"`
 15 | 	status    string
 16 | }
 17 | 
 18 | func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
 19 | 	blobs, err := GetBlobsPath("")
 20 | 	if err != nil {
 21 | 		return nil, err
 22 | 	}
 23 | 
 24 | 	temp, err := os.CreateTemp(blobs, "sha256-")
 25 | 	if err != nil {
 26 | 		return nil, err
 27 | 	}
 28 | 	defer temp.Close()
 29 | 	defer os.Remove(temp.Name())
 30 | 
 31 | 	sha256sum := sha256.New()
 32 | 	n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
 33 | 	if err != nil {
 34 | 		return nil, err
 35 | 	}
 36 | 
 37 | 	if err := temp.Close(); err != nil {
 38 | 		return nil, err
 39 | 	}
 40 | 
 41 | 	digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
 42 | 	blob, err := GetBlobsPath(digest)
 43 | 	if err != nil {
 44 | 		return nil, err
 45 | 	}
 46 | 
 47 | 	status := "using existing layer"
 48 | 	if _, err := os.Stat(blob); err != nil {
 49 | 		status = "creating new layer"
 50 | 		if err := os.Rename(temp.Name(), blob); err != nil {
 51 | 			return nil, err
 52 | 		}
 53 | 	}
 54 | 
 55 | 	return &Layer{
 56 | 		MediaType: mediatype,
 57 | 		Digest:    digest,
 58 | 		Size:      n,
 59 | 		status:    fmt.Sprintf("%s %s", status, digest),
 60 | 	}, nil
 61 | }
 62 | 
 63 | func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
 64 | 	blob, err := GetBlobsPath(digest)
 65 | 	if err != nil {
 66 | 		return nil, err
 67 | 	}
 68 | 
 69 | 	fi, err := os.Stat(blob)
 70 | 	if err != nil {
 71 | 		return nil, err
 72 | 	}
 73 | 
 74 | 	return &Layer{
 75 | 		MediaType: mediatype,
 76 | 		Digest:    digest,
 77 | 		Size:      fi.Size(),
 78 | 		From:      from,
 79 | 		status:    fmt.Sprintf("using existing layer %s", digest),
 80 | 	}, nil
 81 | }
 82 | 
 83 | func (l *Layer) Open() (io.ReadSeekCloser, error) {
 84 | 	blob, err := GetBlobsPath(l.Digest)
 85 | 	if err != nil {
 86 | 		return nil, err
 87 | 	}
 88 | 
 89 | 	return os.Open(blob)
 90 | }
 91 | 
 92 | func (l *Layer) Remove() error {
 93 | 	ms, err := Manifests()
 94 | 	if err != nil {
 95 | 		return err
 96 | 	}
 97 | 
 98 | 	for _, m := range ms {
 99 | 		for _, layer := range append(m.Layers, m.Config) {
100 | 			if layer.Digest == l.Digest {
101 | 				// something is using this layer
102 | 				return nil
103 | 			}
104 | 		}
105 | 	}
106 | 
107 | 	blob, err := GetBlobsPath(l.Digest)
108 | 	if err != nil {
109 | 		return err
110 | 	}
111 | 
112 | 	return os.Remove(blob)
113 | }
114 | 


--------------------------------------------------------------------------------
/server/routes_delete_test.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/http"
 6 | 	"path/filepath"
 7 | 	"testing"
 8 | 
 9 | 	"github.com/ollama/ollama/api"
10 | )
11 | 
12 | func TestDelete(t *testing.T) {
13 | 	p := t.TempDir()
14 | 	t.Setenv("OLLAMA_MODELS", p)
15 | 	var s Server
16 | 
17 | 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
18 | 		Name:      "test",
19 | 		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
20 | 	})
21 | 
22 | 	if w.Code != http.StatusOK {
23 | 		t.Fatalf("expected status code 200, actual %d", w.Code)
24 | 	}
25 | 
26 | 	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
27 | 		Name:      "test2",
28 | 		Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t)),
29 | 	})
30 | 
31 | 	if w.Code != http.StatusOK {
32 | 		t.Fatalf("expected status code 200, actual %d", w.Code)
33 | 	}
34 | 
35 | 	checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
36 | 		filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
37 | 		filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
38 | 	})
39 | 
40 | 	checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
41 | 		filepath.Join(p, "blobs", "sha256-8f2c2167d789c6b2302dff965160fa5029f6a24096d262c1cbb469f21a045382"),
42 | 		filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"),
43 | 		filepath.Join(p, "blobs", "sha256-ca239d7bd8ea90e4a5d2e6bf88f8d74a47b14336e73eb4e18bed4dd325018116"),
44 | 		filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
45 | 	})
46 | 
47 | 	w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
48 | 
49 | 	if w.Code != http.StatusOK {
50 | 		t.Fatalf("expected status code 200, actual %d", w.Code)
51 | 	}
52 | 
53 | 	checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{
54 | 		filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"),
55 | 	})
56 | 
57 | 	checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
58 | 		filepath.Join(p, "blobs", "sha256-8f2c2167d789c6b2302dff965160fa5029f6a24096d262c1cbb469f21a045382"),
59 | 		filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"),
60 | 		filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
61 | 	})
62 | 
63 | 	w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test2"})
64 | 
65 | 	if w.Code != http.StatusOK {
66 | 		t.Fatalf("expected status code 200, actual %d", w.Code)
67 | 	}
68 | 
69 | 	checkFileExists(t, filepath.Join(p, "manifests", "*", "*", "*", "*"), []string{})
70 | 	checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{})
71 | }
72 | 


--------------------------------------------------------------------------------
/server/routes_list_test.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"net/http"
 7 | 	"slices"
 8 | 	"testing"
 9 | 
10 | 	"github.com/ollama/ollama/api"
11 | )
12 | 
13 | func TestList(t *testing.T) {
14 | 	t.Setenv("OLLAMA_MODELS", t.TempDir())
15 | 
16 | 	expectNames := []string{
17 | 		"mistral:7b-instruct-q4_0",
18 | 		"zephyr:7b-beta-q5_K_M",
19 | 		"apple/OpenELM:latest",
20 | 		"boreas:2b-code-v1.5-q6_K",
21 | 		"notus:7b-v1-IQ2_S",
22 | 		// TODO: host:port currently fails on windows (#4107)
23 | 		// "localhost:5000/library/eurus:700b-v0.5-iq3_XXS",
24 | 		"mynamespace/apeliotes:latest",
25 | 		"myhost/mynamespace/lips:code",
26 | 	}
27 | 
28 | 	var s Server
29 | 	for _, n := range expectNames {
30 | 		createRequest(t, s.CreateModelHandler, api.CreateRequest{
31 | 			Name:      n,
32 | 			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t)),
33 | 		})
34 | 	}
35 | 
36 | 	w := createRequest(t, s.ListModelsHandler, nil)
37 | 	if w.Code != http.StatusOK {
38 | 		t.Fatalf("expected status code 200, actual %d", w.Code)
39 | 	}
40 | 
41 | 	var resp api.ListResponse
42 | 	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
43 | 		t.Fatal(err)
44 | 	}
45 | 
46 | 	if len(resp.Models) != len(expectNames) {
47 | 		t.Fatalf("expected %d models, actual %d", len(expectNames), len(resp.Models))
48 | 	}
49 | 
50 | 	actualNames := make([]string, len(resp.Models))
51 | 	for i, m := range resp.Models {
52 | 		actualNames[i] = m.Name
53 | 	}
54 | 
55 | 	slices.Sort(actualNames)
56 | 	slices.Sort(expectNames)
57 | 
58 | 	if !slices.Equal(actualNames, expectNames) {
59 | 		t.Fatalf("expected slices to be equal %v", actualNames)
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/types/errtypes/errtypes.go:
--------------------------------------------------------------------------------
 1 | // Package errtypes contains custom error types
 2 | package errtypes
 3 | 
 4 | import (
 5 | 	"fmt"
 6 | 	"strings"
 7 | )
 8 | 
 9 | const UnknownOllamaKeyErrMsg = "unknown ollama key"
10 | const InvalidModelNameErrMsg = "invalid model name"
11 | 
12 | // TODO: This should have a structured response from the API
13 | type UnknownOllamaKey struct {
14 | 	Key string
15 | }
16 | 
17 | func (e *UnknownOllamaKey) Error() string {
18 | 	return fmt.Sprintf("unauthorized: %s %q", UnknownOllamaKeyErrMsg, strings.TrimSpace(e.Key))
19 | }
20 | 


--------------------------------------------------------------------------------
/types/model/testdata/fuzz/FuzzName/d37463aa416f6bab:
--------------------------------------------------------------------------------
1 | go test fuzz v1
2 | string("00@")
3 | 


--------------------------------------------------------------------------------
/version/version.go:
--------------------------------------------------------------------------------
1 | package version
2 | 
3 | var Version string = "0.0.0"
4 | 


--------------------------------------------------------------------------------