├── .dockerignore ├── .gitattributes ├── .github └── workflows │ ├── ci.yml │ ├── eval.yml │ ├── publish.yml │ └── site.yml ├── .gitignore ├── .openapi-generator-ignore ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── assets └── demo.gif ├── cli ├── go.mod ├── go.sum ├── main.go ├── main_test.go └── sdk │ ├── .gitignore │ ├── .openapi-generator-ignore │ ├── .openapi-generator │ ├── FILES │ └── VERSION │ ├── README.md │ ├── api │ └── openapi.yaml │ ├── api_default.go │ ├── client.go │ ├── configuration.go │ ├── model_health.go │ ├── model_http_validation_error.go │ ├── model_model.go │ ├── model_run_request.go │ ├── model_run_response.go │ ├── model_validation_error.go │ ├── model_validation_error_loc_inner.go │ ├── response.go │ ├── test │ └── api_default_test.go │ └── utils.go ├── contrib ├── providers │ ├── fireworks │ │ ├── LICENSE │ │ ├── README.md │ │ ├── provider_fireworks.py │ │ └── pyproject.toml │ ├── google-genai │ │ ├── LICENSE │ │ ├── README.md │ │ ├── provider_google_genai.py │ │ └── pyproject.toml │ └── groq │ │ ├── LICENSE │ │ ├── README.md │ │ ├── provider_groq.py │ │ └── pyproject.toml ├── tests │ └── tools │ │ ├── mysql │ │ ├── fixtures │ │ │ └── docker-compose.yml │ │ └── test_mysql_runtime.py │ │ └── postgres │ │ ├── fixtures │ │ ├── docker-compose.yml │ │ └── init.sql │ │ └── test_postgres_runtime.py └── tools │ ├── mysql │ ├── LICENSE │ ├── README.md │ ├── fixtures │ │ ├── docker-compose.yml │ │ └── mydb.sql │ ├── mysql │ │ ├── __init__.py │ │ ├── runtime.py │ │ └── tool.py │ └── pyproject.toml │ └── postgres │ ├── LICENSE │ ├── README.md │ ├── fixtures │ ├── docker-compose.yml │ └── ecommerce.sql │ ├── postgres │ ├── __init__.py │ ├── runtime.py │ └── tool.py │ └── pyproject.toml ├── docs ├── CLI │ ├── chat.md │ ├── db-migrate.md │ ├── db-revisions.md │ ├── db-rollback.md │ ├── index.md │ ├── ingest-prometheus-metrics-metadata.md │ ├── ingest.md │ ├── install.md │ ├── list-contexts.md │ ├── list-models.md │ ├── list-runtimes.md │ ├── list-tools.md │ ├── reset.md │ ├── run.md │ ├── schedule-embeddings-reindex.md │ ├── serve.md │ ├── solve.md │ ├── uninstall.md │ └── worker.md ├── CNAME ├── assets │ ├── asciinema-player.css │ ├── asciinema-player.min.js │ ├── docker-compose-runtime.cast │ ├── gce-runtime.cast │ └── ssh-runtime.cast ├── configurations │ ├── OTel.md │ ├── add-new-llm-providers.md │ ├── advanced-knowledge-retrieval.md │ ├── integrate-with-new-runtime.md │ ├── llm-configurations.md │ └── use-cloud-storage-for-embeddings-storage.md ├── cookbooks │ ├── 5-levels-of-workflow-orchestration.ipynb │ ├── automation-using-python-runtime.ipynb │ ├── docker-runtime.md │ ├── index.md │ ├── interacting-with-mysql-using-opsmate.md │ ├── k8s-runtime.md │ ├── knowledge-management.ipynb │ ├── manage-vms.md │ ├── plugin-system.ipynb │ └── plugins │ │ └── prom.py ├── development.md ├── experiments │ └── prometheus.ipynb ├── how-to-o11y-with-tempo.md ├── index.md ├── macros.py ├── production.md ├── providers │ ├── anthropic.md │ ├── fireworks-ai.md │ ├── google-genai.md │ ├── groq.md │ ├── index.md │ ├── ollama.md │ ├── openai.md │ └── xai.md └── tools │ ├── index.md │ ├── loki.md │ ├── mysql.md │ ├── postgres.md │ └── prometheus.md ├── evals ├── apps │ ├── audit-server │ │ ├── Dockerfile │ │ └── app.py │ └── innovation-lab │ │ ├── Dockerfile │ │ └── app.py ├── eval_main.py ├── kind.yaml ├── scenarios │ ├── investigation-001-oom-deploy.yaml │ ├── investigation-002-misconfigured-readiness-probe.yaml │ ├── investigation-003-unschedulable-deploy.yaml │ ├── investigation-004-image-pull-backoff.yaml │ ├── investigation-005-db-connection.yaml │ ├── investigation-006-rbac-issue.yaml │ ├── investigation-007-network-policy-issue.yaml │ ├── text-edit-001-missing-resources-config.yaml │ ├── text-edit-002-remove-config.yaml │ ├── text-edit-003-insert.yaml │ └── text-edit-004-search.yaml ├── scorers.py └── setup.sh ├── examples ├── runtime │ └── gce │ │ ├── README.md │ │ ├── gce.py │ │ └── pyproject.toml └── tools │ └── calculator │ ├── calculator.py │ └── pyproject.toml ├── experiments └── sandbox │ └── docker-compose.yml ├── hack └── gen-docs.py ├── mkdocs.yml ├── opsmate ├── __init__.py ├── apiserver │ ├── __init__.py │ └── apiserver.py ├── cli │ ├── __init__.py │ └── cli.py ├── config │ └── __init__.py ├── contexts │ ├── __init__.py │ ├── cli.py │ ├── cli_lite.py │ ├── k8s.py │ └── terraform.py ├── dbq │ ├── __init__.py │ └── dbq.py ├── dbqapp │ └── app.py ├── dino │ ├── __init__.py │ ├── context.py │ ├── dino.py │ ├── provider │ │ ├── __init__.py │ │ ├── anthropic.py │ │ ├── base.py │ │ ├── ollama.py │ │ ├── openai.py │ │ └── xai.py │ ├── react.py │ ├── tools.py │ ├── types.py │ └── utils.py ├── gui │ ├── app.py │ ├── assets.py │ ├── components.py │ ├── config.py │ ├── models.py │ ├── seed.py │ ├── steps.py │ └── views.py ├── ingestions │ ├── __init__.py │ ├── base.py │ ├── chunk.py │ ├── fs.py │ ├── github.py │ ├── jobs.py │ └── models.py ├── knowledgestore │ ├── __init__.py │ └── models.py ├── libs │ ├── config │ │ ├── __init__.py │ │ └── base_settings.py │ └── core │ │ └── trace │ │ └── __init__.py ├── migrations │ ├── README │ ├── alembic.ini │ ├── env.py │ ├── script.py.mako │ └── versions │ │ ├── 79fe7d287ba8_init_db_schema.py │ │ └── b86047adede9_introduce_the_concept_of_named_queue_in_.py ├── plugins │ ├── __init__.py │ └── plugins.py ├── polya │ ├── __init__.py │ ├── execution.py │ ├── models.py │ ├── planning.py │ └── understanding.py ├── runtime │ ├── __init__.py │ ├── docker.py │ ├── k8s.py │ ├── local.py │ ├── runtime.py │ └── ssh.py ├── tests │ ├── __init__.py │ ├── apiserver │ │ └── test_apiserver.py │ ├── base.py │ ├── cli │ │ └── test_cli.py │ ├── core │ │ ├── __init__.py │ │ └── trace │ │ │ └── test_traceit.py │ ├── dbq │ │ └── test_dbq.py │ ├── dino │ │ ├── fixtures │ │ │ └── contexts │ │ │ │ └── gcloud.py │ │ ├── test_agentic.py │ │ ├── test_context.py │ │ ├── test_dino.py │ │ ├── test_react.py │ │ └── test_utils.py │ ├── gui │ │ ├── test_gui.py │ │ └── test_models.py │ ├── ingestions │ │ ├── fixtures │ │ │ ├── SOMETHINGELSE.txt │ │ │ ├── TEST.md │ │ │ └── nested │ │ │ │ └── TEST2.md │ │ ├── test_fs.py │ │ ├── test_github.py │ │ ├── test_ingestions.py │ │ └── test_jobs.py │ ├── knowledgestore │ │ └── test_knowledgestore.py │ ├── plugins │ │ ├── fixtures │ │ │ ├── conflicts │ │ │ │ └── plugin.py │ │ │ └── plugins │ │ │ │ └── essentials.py │ │ └── test_plugins.py │ ├── polya │ │ ├── __init__.py │ │ ├── test_planning.py │ │ └── test_understanding.py │ ├── runtime │ │ ├── test_docker.py │ │ └── test_runtime.py │ ├── textsplitters │ │ └── test_recursive.py │ ├── tools │ │ ├── test_aci.py │ │ ├── test_command_line.py │ │ ├── test_knowledge_retrieval.py │ │ ├── test_loki.py │ │ └── test_system.py │ └── workflow │ │ └── test_workflow.py ├── textsplitters │ ├── __init__.py │ ├── base.py │ ├── markdown_header.py │ └── recursive.py ├── tools │ ├── __init__.py │ ├── aci.py │ ├── command_line.py │ ├── datetime.py │ ├── github_operation.py │ ├── knowledge_retrieval.py │ ├── loki.py │ ├── prom.py │ ├── system.py │ ├── thinking.py │ └── utils.py └── workflow │ ├── __init__.py │ ├── models.py │ └── workflow.py ├── pyproject.toml ├── pytest.ini ├── scripts └── api-gen.py ├── sdk └── spec │ └── apiserver │ └── openapi.json ├── tempo ├── .gitignore ├── docker-compose.yaml ├── grafana-datasources.yaml └── tempo.yaml └── uv.lock /.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .pytest_cache 3 | .venv 4 | .env 5 | .git 6 | .gitignore 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | sdk/** linguist-generated=true 2 | cli/sdk/** linguist-generated=true 3 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | environment: ci 11 | strategy: 12 | max-parallel: 1 13 | matrix: 14 | # python-version: ["3.12", "3.13"] 15 | python-version: ["3.12"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | pipx install uv --python $(which python${{ matrix.python-version }}) 25 | uv sync 26 | - name: Run tests 27 | env: 28 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 29 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 30 | CI: "true" 31 | run: | 32 | uv run pytest ./opsmate/tests -n auto -m "not serial" 33 | - name: Docker runtime tests 34 | env: 35 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 36 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 37 | CI: "true" 38 | run: | 39 | uv run pytest ./opsmate/tests -m serial 40 | - name: mysql tool tests 41 | env: 42 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 43 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 44 | CI: "true" 45 | run: | 46 | uv run --package opsmate-tool-mysql pytest ./contrib/tests/tools/mysql 47 | - name: postgres tool tests 48 | env: 49 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 50 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 51 | CI: "true" 52 | run: | 53 | uv run --package opsmate-tool-postgres pytest ./contrib/tests/tools/postgres 54 | 55 | build-image: 56 | runs-on: ubuntu-latest 57 | steps: 58 | - name: Checkout repository 59 | uses: actions/checkout@v4 60 | - name: Build image 61 | run: | 62 | make docker-build 63 | -------------------------------------------------------------------------------- /.github/workflows/eval.yml: -------------------------------------------------------------------------------- 1 | # name: Run Opsmate evals 2 | 3 | # on: 4 | # workflow_dispatch: 5 | # schedule: 6 | # - cron: "0 0 * * 1" # every Monday at 00:00 UTC 7 | 8 | # jobs: 9 | # eval: 10 | # name: Run evals 11 | # runs-on: ubuntu-latest 12 | # environment: ci 13 | # strategy: 14 | # matrix: 15 | # python-version: ["3.12"] 16 | # steps: 17 | # - name: Checkout 18 | # id: checkout 19 | # uses: actions/checkout@v4 20 | # with: 21 | # fetch-depth: 0 22 | 23 | # - name: Set up Python 24 | # uses: actions/setup-python@v4 25 | # with: 26 | # python-version: ${{ matrix.python-version }} 27 | 28 | # - name: Install dependencies 29 | # run: | 30 | # pipx install uv --python $(which python${{ matrix.python-version }}) 31 | # uv sync --group eval 32 | 33 | # - name: Install kubectl 34 | # run: | 35 | # curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" 36 | # chmod +x ./kubectl 37 | # sudo mv ./kubectl /usr/local/bin/kubectl 38 | 39 | # - name: Start the test cluster 40 | # run: | 41 | # make kind-cluster 42 | 43 | # - name: Run Evals 44 | # env: 45 | # BRAINTRUST_PROJECT_ID: ${{ secrets.BRAINTRUST_PROJECT_ID }} 46 | # BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} 47 | # ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 48 | # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 49 | # CI: "true" 50 | # run: | 51 | # uv run braintrust eval ./evals/ --num-workers 1 52 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'pyproject.toml' 9 | - '.github/workflows/publish.yml' 10 | - 'contrib/providers/fireworks/pyproject.toml' 11 | - 'contrib/providers/groq/pyproject.toml' 12 | - 'contrib/tools/mysql/pyproject.toml' 13 | 14 | jobs: 15 | pypi-publish: 16 | runs-on: ubuntu-latest 17 | strategy: 18 | matrix: 19 | package: 20 | - name: opsmate 21 | url: https://pypi.org/p/opsmate 22 | - name: opsmate-provider-google-genai 23 | url: https://pypi.org/p/opsmate-provider-google-genai 24 | - name: opsmate-provider-groq 25 | url: https://pypi.org/p/opsmate-provider-groq 26 | - name: opsmate-provider-fireworks 27 | url: https://pypi.org/p/opsmate-provider-fireworks 28 | - name: opsmate-tool-mysql 29 | url: https://pypi.org/p/opsmate-tool-mysql 30 | - name: opsmate-tool-postgres 31 | url: https://pypi.org/p/opsmate-tool-postgres 32 | environment: 33 | name: pypi 34 | url: ${{ matrix.package.url }} 35 | permissions: 36 | id-token: write 37 | steps: 38 | - name: Checkout repository 39 | uses: actions/checkout@v4 40 | 41 | - name: Set up Python 42 | uses: actions/setup-python@v4 43 | with: 44 | python-version: '3.12' 45 | 46 | - name: Install dependencies 47 | run: | 48 | pipx install uv --python $(which python3.12) 49 | 50 | - name: Build package 51 | run: | 52 | uv build --package ${{ matrix.package.name }} 53 | 54 | - name: Publish release distributions to PyPI 55 | uses: pypa/gh-action-pypi-publish@release/v1 56 | with: 57 | skip-existing: true 58 | 59 | build-and-push-image: 60 | runs-on: ubuntu-latest 61 | permissions: 62 | contents: read 63 | packages: write 64 | attestations: write 65 | id-token: write 66 | steps: 67 | - name: Checkout repository 68 | uses: actions/checkout@v4 69 | # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. 70 | - name: Log in to the Container registry 71 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 72 | with: 73 | registry: ghcr.io 74 | username: ${{ github.actor }} 75 | password: ${{ secrets.GITHUB_TOKEN }} 76 | - name: Build image 77 | run: | 78 | make docker-build 79 | - name: Push image 80 | run: | 81 | make docker-push 82 | -------------------------------------------------------------------------------- /.github/workflows/site.yml: -------------------------------------------------------------------------------- 1 | name: site 2 | on: 3 | push: 4 | branches: 5 | - main 6 | permissions: 7 | contents: write 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Configure Git Credentials 14 | run: | 15 | git config user.name github-actions[bot] 16 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: '3.12' 20 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 21 | - uses: actions/cache@v4 22 | with: 23 | key: mkdocs-material-${{ env.cache_id }} 24 | path: .cache 25 | restore-keys: | 26 | mkdocs-material- 27 | - name: Install dependencies 28 | run: | 29 | pipx install uv --python $(which python3.12) 30 | - name: Install docs dependencies 31 | run: | 32 | uv sync --group docs --python $(which python3.12) 33 | - name: Deploy docs 34 | run: | 35 | uv run mkdocs gh-deploy --force 36 | -------------------------------------------------------------------------------- /.openapi-generator-ignore: -------------------------------------------------------------------------------- 1 | .travis.yml 2 | .gitlab-ci.yml 3 | .github 4 | git_push.sh 5 | docs/ 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM python:3.12.3-slim-bullseye AS builder 3 | 4 | COPY --from=ghcr.io/astral-sh/uv:0.6.5 /uv /uvx /bin/ 5 | 6 | WORKDIR /app 7 | 8 | COPY pyproject.toml uv.lock README.md LICENSE /app/ 9 | COPY opsmate /app/opsmate 10 | RUN uv build 11 | 12 | # Final stage 13 | FROM python:3.12.3-slim-bullseye 14 | 15 | LABEL org.opencontainers.image.source=https://github.com/opsmate-ai/opsmate 16 | 17 | # Install only kubectl without keeping unnecessary files 18 | RUN apt-get update && \ 19 | apt-get install -y --no-install-recommends curl && \ 20 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" && \ 21 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl.sha256" && \ 22 | echo "$(cat kubectl.sha256) kubectl" | sha256sum --check && \ 23 | install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \ 24 | rm kubectl kubectl.sha256 && \ 25 | apt-get autoremove -y && \ 26 | apt-get clean && \ 27 | rm -rf /var/lib/apt/lists/* 28 | 29 | WORKDIR /app 30 | 31 | COPY --from=builder /app/dist/opsmate-*.whl /tmp/dist/ 32 | 33 | RUN pip install --no-cache-dir /tmp/dist/opsmate-*.whl && opsmate version 34 | 35 | ENTRYPOINT ["opsmate"] 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jingkai He 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 2 | ifeq (,$(shell go env GOBIN)) 3 | GOBIN=$(shell go env GOPATH)/bin 4 | else 5 | GOBIN=$(shell go env GOBIN) 6 | endif 7 | 8 | VERSION=$(shell awk '/^\[project\]/{p=1;next} /^\[/{p=0} p&&/^version = /{print}' pyproject.toml | sed 's/version = "\(.*\)"/\1/') 9 | IMAGE_NAME=opsmate 10 | CONTAINER_REGISTRY=ghcr.io/opsmate-ai 11 | 12 | SHELL = /usr/bin/env bash -o pipefail 13 | .SHELLFLAGS = -ec 14 | 15 | LOCALBIN ?= $(shell pwd)/.bin 16 | 17 | KIND ?= $(LOCALBIN)/kind 18 | 19 | ## Location to install dependencies to 20 | LOCALBIN ?= $(shell pwd)/bin 21 | $(LOCALBIN): 22 | mkdir -p $(LOCALBIN) 23 | 24 | docker-build: 25 | docker build -t $(CONTAINER_REGISTRY)/$(IMAGE_NAME):$(VERSION) . 26 | 27 | docker-push: 28 | docker push $(CONTAINER_REGISTRY)/$(IMAGE_NAME):$(VERSION) 29 | docker tag $(CONTAINER_REGISTRY)/$(IMAGE_NAME):$(VERSION) $(CONTAINER_REGISTRY)/$(IMAGE_NAME):latest 30 | docker push $(CONTAINER_REGISTRY)/$(IMAGE_NAME):latest 31 | 32 | gen-docs: # generate the docs for the CLI 33 | uv run python hack/gen-docs.py 34 | 35 | .PHONY: kind 36 | kind: $(LOCALBIN) 37 | test -s $(LOCALBIN)/kind || curl -Lo $(LOCALBIN)/kind https://kind.sigs.k8s.io/dl/v0.24.0/kind-linux-amd64 && chmod +x $(LOCALBIN)/kind 38 | 39 | .PHONY: kind-cluster 40 | kind-cluster: kind 41 | $(KIND) create cluster --config evals/kind.yaml || true 42 | ./evals/setup.sh 43 | 44 | .PHONY: kind-destroy 45 | kind-destroy: kind 46 | $(KIND) delete cluster --name troubleshooting-eval 47 | 48 | .PHONY: api-gen 49 | api-gen: # generate the api spec 50 | echo "Generating the api spec..." 51 | uv run python scripts/api-gen.py 52 | 53 | .PHONY: python-sdk-codegen 54 | python-sdk-codegen: api-gen # generate the python sdk 55 | echo "Generating the python sdk..." 56 | sudo rm -rf sdk/python 57 | mkdir -p sdk/python 58 | cp .openapi-generator-ignore sdk/python/.openapi-generator-ignore 59 | docker run --rm \ 60 | -v $(PWD)/sdk:/local/sdk \ 61 | openapitools/openapi-generator-cli:v7.10.0 generate \ 62 | -i /local/sdk/spec/apiserver/openapi.json \ 63 | --api-package api \ 64 | --model-package models \ 65 | -g python \ 66 | --package-name opsmatesdk \ 67 | -o /local/sdk/python \ 68 | --additional-properties=packageVersion=$(VERSION) 69 | sudo chown -R $(USER):$(USER) sdk 70 | 71 | .PHONY: go-sdk-codegen 72 | go-sdk-codegen: # generate the go sdk 73 | echo "Generating the go sdk..." 74 | sudo rm -rf cli/sdk 75 | mkdir -p cli/sdk 76 | cp .openapi-generator-ignore cli/sdk/.openapi-generator-ignore 77 | docker run --rm \ 78 | -v $(PWD)/cli/sdk:/local/cli/sdk \ 79 | -v $(PWD)/sdk/spec/apiserver/openapi.json:/local/openapi.json \ 80 | openapitools/openapi-generator-cli:v7.10.0 generate \ 81 | -i /local/openapi.json \ 82 | --api-package api \ 83 | --model-package models \ 84 | -g go \ 85 | --package-name opsmatesdk \ 86 | --git-user-id jingkaihe \ 87 | --git-repo-id opsmate/cli/sdk \ 88 | -o /local/cli/sdk \ 89 | --additional-properties=packageVersion=$(VERSION),withGoMod=false 90 | sudo chown -R $(USER):$(USER) cli/sdk 91 | -------------------------------------------------------------------------------- /assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/assets/demo.gif -------------------------------------------------------------------------------- /cli/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/opsmate-ai/opsmate/cli 2 | 3 | go 1.22.2 4 | 5 | require ( 6 | github.com/olekukonko/tablewriter v0.0.5 7 | github.com/stretchr/testify v1.9.0 8 | github.com/urfave/cli/v2 v2.27.5 9 | ) 10 | 11 | require ( 12 | github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect 13 | github.com/davecgh/go-spew v1.1.1 // indirect 14 | github.com/mattn/go-runewidth v0.0.9 // indirect 15 | github.com/pmezard/go-difflib v1.0.0 // indirect 16 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 17 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect 18 | gopkg.in/yaml.v3 v3.0.1 // indirect 19 | ) 20 | -------------------------------------------------------------------------------- /cli/go.sum: -------------------------------------------------------------------------------- 1 | github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= 2 | github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= 6 | github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= 7 | github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= 8 | github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= 9 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 10 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 11 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 12 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 13 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 14 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 15 | github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= 16 | github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= 17 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= 18 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= 19 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 20 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 21 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 22 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 23 | -------------------------------------------------------------------------------- /cli/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestOsInventory(t *testing.T) { 10 | inventory, err := osInventory() 11 | assert.NoError(t, err) 12 | 13 | t.Logf("inventory: %+v", inventory) 14 | 15 | assert.NotNil(t, inventory) 16 | assert.NotEmpty(t, inventory["os"]) 17 | assert.NotEmpty(t, inventory["arch"]) 18 | assert.NotEmpty(t, inventory["cpus"]) 19 | assert.NotEmpty(t, inventory["memInGB"]) 20 | } 21 | -------------------------------------------------------------------------------- /cli/sdk/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | -------------------------------------------------------------------------------- /cli/sdk/.openapi-generator-ignore: -------------------------------------------------------------------------------- 1 | .travis.yml 2 | .gitlab-ci.yml 3 | .github 4 | git_push.sh 5 | docs/ 6 | -------------------------------------------------------------------------------- /cli/sdk/.openapi-generator/FILES: -------------------------------------------------------------------------------- 1 | .gitignore 2 | README.md 3 | api/openapi.yaml 4 | api_default.go 5 | client.go 6 | configuration.go 7 | model_health.go 8 | model_http_validation_error.go 9 | model_model.go 10 | model_run_request.go 11 | model_run_response.go 12 | model_validation_error.go 13 | model_validation_error_loc_inner.go 14 | response.go 15 | test/api_default_test.go 16 | utils.go 17 | -------------------------------------------------------------------------------- /cli/sdk/.openapi-generator/VERSION: -------------------------------------------------------------------------------- 1 | 7.10.0 2 | -------------------------------------------------------------------------------- /cli/sdk/model_validation_error_loc_inner.go: -------------------------------------------------------------------------------- 1 | /* 2 | FastAPI 3 | 4 | No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) 5 | 6 | API version: 0.1.0 7 | */ 8 | 9 | // Code generated by OpenAPI Generator (https://openapi-generator.tech); DO NOT EDIT. 10 | 11 | package opsmatesdk 12 | 13 | import ( 14 | "encoding/json" 15 | "fmt" 16 | ) 17 | 18 | 19 | // ValidationErrorLocInner struct for ValidationErrorLocInner 20 | type ValidationErrorLocInner struct { 21 | Int32 *int32 22 | String *string 23 | } 24 | 25 | // Unmarshal JSON data into any of the pointers in the struct 26 | func (dst *ValidationErrorLocInner) UnmarshalJSON(data []byte) error { 27 | var err error 28 | // try to unmarshal JSON data into Int32 29 | err = json.Unmarshal(data, &dst.Int32); 30 | if err == nil { 31 | jsonInt32, _ := json.Marshal(dst.Int32) 32 | if string(jsonInt32) == "{}" { // empty struct 33 | dst.Int32 = nil 34 | } else { 35 | return nil // data stored in dst.Int32, return on the first match 36 | } 37 | } else { 38 | dst.Int32 = nil 39 | } 40 | 41 | // try to unmarshal JSON data into String 42 | err = json.Unmarshal(data, &dst.String); 43 | if err == nil { 44 | jsonString, _ := json.Marshal(dst.String) 45 | if string(jsonString) == "{}" { // empty struct 46 | dst.String = nil 47 | } else { 48 | return nil // data stored in dst.String, return on the first match 49 | } 50 | } else { 51 | dst.String = nil 52 | } 53 | 54 | return fmt.Errorf("data failed to match schemas in anyOf(ValidationErrorLocInner)") 55 | } 56 | 57 | // Marshal data from the first non-nil pointers in the struct to JSON 58 | func (src *ValidationErrorLocInner) MarshalJSON() ([]byte, error) { 59 | if src.Int32 != nil { 60 | return json.Marshal(&src.Int32) 61 | } 62 | 63 | if src.String != nil { 64 | return json.Marshal(&src.String) 65 | } 66 | 67 | return nil, nil // no data in anyOf schemas 68 | } 69 | 70 | 71 | type NullableValidationErrorLocInner struct { 72 | value *ValidationErrorLocInner 73 | isSet bool 74 | } 75 | 76 | func (v NullableValidationErrorLocInner) Get() *ValidationErrorLocInner { 77 | return v.value 78 | } 79 | 80 | func (v *NullableValidationErrorLocInner) Set(val *ValidationErrorLocInner) { 81 | v.value = val 82 | v.isSet = true 83 | } 84 | 85 | func (v NullableValidationErrorLocInner) IsSet() bool { 86 | return v.isSet 87 | } 88 | 89 | func (v *NullableValidationErrorLocInner) Unset() { 90 | v.value = nil 91 | v.isSet = false 92 | } 93 | 94 | func NewNullableValidationErrorLocInner(val *ValidationErrorLocInner) *NullableValidationErrorLocInner { 95 | return &NullableValidationErrorLocInner{value: val, isSet: true} 96 | } 97 | 98 | func (v NullableValidationErrorLocInner) MarshalJSON() ([]byte, error) { 99 | return json.Marshal(v.value) 100 | } 101 | 102 | func (v *NullableValidationErrorLocInner) UnmarshalJSON(src []byte) error { 103 | v.isSet = true 104 | return json.Unmarshal(src, &v.value) 105 | } 106 | 107 | 108 | -------------------------------------------------------------------------------- /cli/sdk/response.go: -------------------------------------------------------------------------------- 1 | /* 2 | FastAPI 3 | 4 | No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) 5 | 6 | API version: 0.1.0 7 | */ 8 | 9 | // Code generated by OpenAPI Generator (https://openapi-generator.tech); DO NOT EDIT. 10 | 11 | package opsmatesdk 12 | 13 | import ( 14 | "net/http" 15 | ) 16 | 17 | // APIResponse stores the API response returned by the server. 18 | type APIResponse struct { 19 | *http.Response `json:"-"` 20 | Message string `json:"message,omitempty"` 21 | // Operation is the name of the OpenAPI operation. 22 | Operation string `json:"operation,omitempty"` 23 | // RequestURL is the request URL. This value is always available, even if the 24 | // embedded *http.Response is nil. 25 | RequestURL string `json:"url,omitempty"` 26 | // Method is the HTTP method used for the request. This value is always 27 | // available, even if the embedded *http.Response is nil. 28 | Method string `json:"method,omitempty"` 29 | // Payload holds the contents of the response body (which may be nil or empty). 30 | // This is provided here as the raw response.Body() reader will have already 31 | // been drained. 32 | Payload []byte `json:"-"` 33 | } 34 | 35 | // NewAPIResponse returns a new APIResponse object. 36 | func NewAPIResponse(r *http.Response) *APIResponse { 37 | 38 | response := &APIResponse{Response: r} 39 | return response 40 | } 41 | 42 | // NewAPIResponseWithError returns a new APIResponse object with the provided error message. 43 | func NewAPIResponseWithError(errorMessage string) *APIResponse { 44 | 45 | response := &APIResponse{Message: errorMessage} 46 | return response 47 | } 48 | -------------------------------------------------------------------------------- /cli/sdk/test/api_default_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | FastAPI 3 | 4 | Testing DefaultAPIService 5 | 6 | */ 7 | 8 | // Code generated by OpenAPI Generator (https://openapi-generator.tech); 9 | 10 | package opsmatesdk 11 | 12 | import ( 13 | "context" 14 | "github.com/stretchr/testify/assert" 15 | "github.com/stretchr/testify/require" 16 | "testing" 17 | openapiclient "github.com/jingkaihe/opsmate/cli/sdk" 18 | ) 19 | 20 | func Test_opsmatesdk_DefaultAPIService(t *testing.T) { 21 | 22 | configuration := openapiclient.NewConfiguration() 23 | apiClient := openapiclient.NewAPIClient(configuration) 24 | 25 | t.Run("Test DefaultAPIService HealthV1HealthzGet", func(t *testing.T) { 26 | 27 | t.Skip("skip test") // remove to run test 28 | 29 | resp, httpRes, err := apiClient.DefaultAPI.HealthV1HealthzGet(context.Background()).Execute() 30 | 31 | require.Nil(t, err) 32 | require.NotNil(t, resp) 33 | assert.Equal(t, 200, httpRes.StatusCode) 34 | 35 | }) 36 | 37 | t.Run("Test DefaultAPIService ModelsV1ModelsGet", func(t *testing.T) { 38 | 39 | t.Skip("skip test") // remove to run test 40 | 41 | resp, httpRes, err := apiClient.DefaultAPI.ModelsV1ModelsGet(context.Background()).Execute() 42 | 43 | require.Nil(t, err) 44 | require.NotNil(t, resp) 45 | assert.Equal(t, 200, httpRes.StatusCode) 46 | 47 | }) 48 | 49 | t.Run("Test DefaultAPIService RunV1RunPost", func(t *testing.T) { 50 | 51 | t.Skip("skip test") // remove to run test 52 | 53 | resp, httpRes, err := apiClient.DefaultAPI.RunV1RunPost(context.Background()).Execute() 54 | 55 | require.Nil(t, err) 56 | require.NotNil(t, resp) 57 | assert.Equal(t, 200, httpRes.StatusCode) 58 | 59 | }) 60 | 61 | } 62 | -------------------------------------------------------------------------------- /contrib/providers/fireworks/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jingkai He 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /contrib/providers/fireworks/README.md: -------------------------------------------------------------------------------- 1 | # opsmate-provider-fireworks 2 | 3 | `opsmate-provider-fireworks` provides selected models from [Fireworks](https://fireworks.ai). 4 | 5 | ## Installation 6 | 7 | ```bash 8 | opsmate install opsmate-provider-fireworks 9 | ``` 10 | 11 | After installation you can list all the models via 12 | 13 | ```bash 14 | $ opsmate list-models 15 | ``` 16 | 17 | ## Usage 18 | 19 | You can specify the fireworks model in the `-m` flag. 20 | ```bash 21 | export FIRWORKS_API_KEY="fw_..." # ideally save it in your shell profile 22 | opsmate chat -m accounts/fireworks/models/deepseek-v3-0324 23 | ``` 24 | 25 | ## Uninstall 26 | 27 | ```bash 28 | opsmate uninstall -y opsmate-provider-fireworks 29 | ``` 30 | -------------------------------------------------------------------------------- /contrib/providers/fireworks/provider_fireworks.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino.provider import Provider, register_provider 2 | from opsmate.dino.types import Message 3 | from typing import Any, Awaitable, List 4 | from instructor.client import T 5 | from instructor import AsyncInstructor 6 | from tenacity import AsyncRetrying 7 | from functools import cache 8 | from fireworks.client import AsyncFireworks 9 | import instructor 10 | import os 11 | 12 | 13 | @register_provider("fireworks") 14 | class FireworksProvider(Provider): 15 | DEFAULT_BASE_URL = "https://api.fireworks.ai/inference/v1" 16 | 17 | models = [ 18 | "accounts/fireworks/models/llama-v3p1-405b-instruct", 19 | "accounts/fireworks/models/llama-v3p3-70b-instruct", 20 | "accounts/fireworks/models/deepseek-r1", 21 | "accounts/fireworks/models/deepseek-v3", 22 | "accounts/fireworks/models/deepseek-v3-0324", 23 | "accounts/fireworks/models/deepseek-r1-distill-llama-70b", 24 | "accounts/fireworks/models/qwen2p5-72b-instruct", 25 | ] 26 | 27 | @classmethod 28 | async def chat_completion( 29 | cls, 30 | response_model: type[T], 31 | messages: List[Message], 32 | max_retries: int | AsyncRetrying = 3, 33 | validation_context: dict[str, Any] | None = None, 34 | context: dict[str, Any] | None = None, # {{ edit_1 }} 35 | strict: bool = True, 36 | client: AsyncInstructor | None = None, 37 | **kwargs: Any, 38 | ) -> Awaitable[T]: 39 | model = kwargs.get("model") 40 | client = client or cls.default_client(model) 41 | kwargs.pop("client", None) 42 | 43 | messages = [{"role": m.role, "content": m.content} for m in messages] 44 | 45 | filtered_kwargs = cls._filter_kwargs(kwargs) 46 | return await client.chat.completions.create( 47 | response_model=response_model, 48 | messages=messages, 49 | max_retries=max_retries, 50 | validation_context=validation_context, 51 | context=context, 52 | strict=strict, 53 | **filtered_kwargs, 54 | ) 55 | 56 | @classmethod 57 | @cache 58 | def _default_client(cls) -> AsyncInstructor: 59 | return instructor.from_fireworks( 60 | AsyncFireworks( 61 | base_url=os.getenv("FIREWORKS_BASE_URL", cls.DEFAULT_BASE_URL), 62 | api_key=os.getenv("FIREWORKS_API_KEY"), 63 | ), 64 | mode=instructor.Mode.FIREWORKS_TOOLS, 65 | ) 66 | -------------------------------------------------------------------------------- /contrib/providers/fireworks/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate-provider-fireworks" 3 | version = "0.1.1a0" 4 | description = "Fireworks AI provider for opsmate" 5 | authors = [ 6 | { name="Jingkai He", email="jingkai@hey.com" }, 7 | ] 8 | readme = "README.md" 9 | license = { file = "LICENSE" } 10 | requires-python = "<4.0,>=3.10" 11 | dependencies = [ 12 | "opsmate", 13 | "fireworks-ai>=0.15.12", 14 | ] 15 | 16 | [tool.uv.sources] 17 | opsmate = { workspace = true } 18 | 19 | [build-system] 20 | requires = ["hatchling"] 21 | build-backend = "hatchling.build" 22 | 23 | [tool.hatch.build.targets.wheel] 24 | include = ["provider_fireworks.py"] 25 | 26 | [project.entry-points."opsmate.dino.providers"] 27 | fireworks = "provider_fireworks:FireworksProvider" 28 | -------------------------------------------------------------------------------- /contrib/providers/google-genai/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jingkai He 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /contrib/providers/google-genai/README.md: -------------------------------------------------------------------------------- 1 | # opsmate-provider-google-genai 2 | 3 | `opsmate-provider-google-genai` provides selected models from [Google GenAI](https://cloud.google.com/vertex-ai/generative-ai). 4 | 5 | ## Installation 6 | 7 | ```bash 8 | opsmate install opsmate-provider-google-genai 9 | ``` 10 | 11 | After installation you can list all the models via 12 | 13 | ```bash 14 | $ opsmate list-models 15 | ``` 16 | 17 | ## Limitations 18 | 19 | ### Only Vertex AI models are supported 20 | :warning: This provider currently does not support [Gemini API](https://ai.google.dev/gemini-api/docs/api-key), because the Gemini API [does not support `default` value](https://github.com/googleapis/python-genai/blob/edf6ee359fdce14d03e1e2c7b2dc50fa5b0fdee3/google/genai/_transformers.py#L653-L657) in the response schema. 21 | 22 | As the result, currently only [vertex AI](https://cloud.google.com/vertex-ai) models are supported, meaning **you need to have a Google Cloud account in order to use this provider**. 23 | 24 | ### Limited region support for gemini-2.5-pro 25 | 26 | By the time the provider is published, the `gemini-2.5-pro-preview-03-25` and `gemini-2.5-pro-exp-03-25` models are only available in the `us-central1` region. To use it you will need to set `GOOGLE_CLOUD_LOCATION` as below: 27 | 28 | ```bash 29 | export GOOGLE_CLOUD_LOCATION=us-central1 30 | 31 | # or 32 | export GOOGLE_CLOUD_LOCATION=global 33 | ``` 34 | 35 | ## Usage 36 | 37 | ```bash 38 | export GOOGLE_CLOUD_PROJECT= 39 | export GOOGLE_CLOUD_LOCATION= 40 | 41 | opsmate chat -m gemini-2.0-flash-001 42 | ``` 43 | 44 | ## Uninstall 45 | 46 | ```bash 47 | opsmate uninstall -y opsmate-provider-google-genai 48 | ``` 49 | -------------------------------------------------------------------------------- /contrib/providers/google-genai/provider_google_genai.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino.provider import Provider, register_provider 2 | from opsmate.dino.types import Message 3 | from typing import Any, Awaitable, List 4 | from instructor.client import T 5 | from instructor import AsyncInstructor 6 | from tenacity import AsyncRetrying 7 | from functools import cache 8 | from google import genai 9 | import instructor 10 | 11 | 12 | @register_provider("google-genai") 13 | class GoogleGenAIProvider(Provider): 14 | models = [ 15 | "gemini-2.5-pro-preview-03-25", 16 | "gemini-2.5-pro-exp-03-25", 17 | "gemini-2.0-flash-001", 18 | "gemini-2.0-flash-lite", 19 | ] 20 | 21 | @classmethod 22 | async def chat_completion( 23 | cls, 24 | response_model: type[T], 25 | messages: List[Message], 26 | max_retries: int | AsyncRetrying = 3, 27 | validation_context: dict[str, Any] | None = None, 28 | context: dict[str, Any] | None = None, # {{ edit_1 }} 29 | strict: bool = True, 30 | client: AsyncInstructor | None = None, 31 | **kwargs: Any, 32 | ) -> Awaitable[T]: 33 | model = kwargs.get("model") 34 | client = client or cls.default_client(model) 35 | kwargs.pop("client", None) 36 | 37 | messages = [{"role": m.role, "content": m.content} for m in messages] 38 | 39 | messages = [ 40 | ( 41 | {"role": "model", "content": m["content"]} 42 | if m["role"] == "assistant" 43 | else m 44 | ) 45 | for m in messages 46 | ] 47 | 48 | filtered_kwargs = cls._filter_kwargs(kwargs) 49 | return await client.chat.completions.create( 50 | response_model=response_model, 51 | messages=messages, 52 | max_retries=max_retries, 53 | validation_context=validation_context, 54 | context=context, 55 | strict=strict, 56 | **filtered_kwargs, 57 | ) 58 | 59 | @classmethod 60 | @cache 61 | def _default_client(cls) -> AsyncInstructor: 62 | return instructor.from_genai( 63 | genai.Client(vertexai=True), 64 | mode=instructor.Mode.GENAI_STRUCTURED_OUTPUTS, 65 | use_async=True, 66 | ) 67 | -------------------------------------------------------------------------------- /contrib/providers/google-genai/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate-provider-google-genai" 3 | version = "0.1.1a0" 4 | description = "Google GenAI provider for opsmate" 5 | authors = [ 6 | { name="Jingkai He", email="jingkai@hey.com" }, 7 | ] 8 | readme = "README.md" 9 | license = { file = "LICENSE" } 10 | requires-python = "<4.0,>=3.10" 11 | dependencies = [ 12 | "opsmate", 13 | "google-genai>=1.5.0", 14 | "jsonref<2.0.0,>=1.1.0", 15 | ] 16 | 17 | [tool.uv.sources] 18 | opsmate = { workspace = true } 19 | 20 | [build-system] 21 | requires = ["hatchling"] 22 | build-backend = "hatchling.build" 23 | 24 | [tool.hatch.build.targets.wheel] 25 | include = ["provider_google_genai.py"] 26 | 27 | [project.entry-points."opsmate.dino.providers"] 28 | google_genai = "provider_google_genai:GoogleGenAIProvider" 29 | -------------------------------------------------------------------------------- /contrib/providers/groq/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jingkai He 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /contrib/providers/groq/README.md: -------------------------------------------------------------------------------- 1 | # opsmate-provider-groq 2 | 3 | `opsmate-provider-groq` provides selected models from [Groq](https://groq.com). 4 | 5 | ## Installation 6 | 7 | ```bash 8 | opsmate install opsmate-provider-groq 9 | ``` 10 | 11 | After installation you can list all the models via 12 | 13 | ```bash 14 | $ opsmate list-models --provider groq 15 | Models 16 | ┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 17 | ┃ Provider ┃ Model ┃ 18 | ┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 19 | │ groq │ qwen-qwq-32b │ 20 | ├──────────┼───────────────────────────────┤ 21 | │ groq │ qwen-2.5-32b │ 22 | ├──────────┼───────────────────────────────┤ 23 | │ groq │ mistral-saba-24b │ 24 | ├──────────┼───────────────────────────────┤ 25 | │ groq │ deepseek-r1-distill-qwen-32b │ 26 | ├──────────┼───────────────────────────────┤ 27 | │ groq │ deepseek-r1-distill-llama-70b │ 28 | ├──────────┼───────────────────────────────┤ 29 | │ groq │ llama-3.3-70b-versatile │ 30 | └──────────┴───────────────────────────────┘ 31 | ``` 32 | 33 | You will notice that the models from Groq are automatically added to the list of models. 34 | 35 | You can use the `-m` flag to specify the model to use. For example: 36 | 37 | ```bash 38 | export OPSMATE_LOGLEVEL=ERROR 39 | $ opsmate run -n --tools HtmlToText -m llama-3.3-70b-versatile "find me top 10 news on the hacker news, titl 40 | e only in bullet points" 41 | The top 10 news on Hacker News are: 42 | * The most unhinged video wall, made out of Chromebooks 43 | * Show HN: Berlin Swapfest – Electronics flea market 44 | * GLP-1 drugs – the biggest economic disruptor since the internet? (2024) 45 | * Efabless – Shutdown Notice 46 | * Video encoding requires using your eyes 47 | * Making o1, o3, and Sonnet 3.7 hallucinate for everyone 48 | * How to gain code execution on hundreds of millions of people and popular apps 49 | * Show HN: I made a website where you can create your own "Life in Weeks" timeline 50 | * Drone captures narwhals using their tusks to explore, forage and play 51 | * Maestro – Next generation mobile UI automation 52 | ``` 53 | 54 | ## Uninstall 55 | 56 | ```bash 57 | opsmate uninstall -y opsmate-provider-groq 58 | ``` 59 | -------------------------------------------------------------------------------- /contrib/providers/groq/provider_groq.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino.provider import Provider, register_provider 2 | from opsmate.dino.types import Message 3 | from typing import Any, Awaitable, List 4 | from instructor.client import T 5 | from instructor import AsyncInstructor 6 | from tenacity import AsyncRetrying 7 | from functools import cache 8 | from groq import AsyncGroq 9 | import instructor 10 | import os 11 | 12 | 13 | @register_provider("groq") 14 | class GroqProvider(Provider): 15 | DEFAULT_BASE_URL = "https://api.groq.com" 16 | 17 | # Here is the full list of models that support tool use https://console.groq.com/docs/tool-use 18 | models = [ 19 | "qwen-qwq-32b", 20 | "qwen-2.5-32b", 21 | "mistral-saba-24b", 22 | "deepseek-r1-distill-qwen-32b", 23 | "deepseek-r1-distill-llama-70b", 24 | "llama-3.3-70b-versatile", 25 | # commented out as it cannot reliably use tools 26 | # "llama-3.1-8b-instant", 27 | # "mixtral-8x7b-32768", 28 | # "gemma2-9b-it", 29 | ] 30 | 31 | @classmethod 32 | async def chat_completion( 33 | cls, 34 | response_model: type[T], 35 | messages: List[Message], 36 | max_retries: int | AsyncRetrying = 3, 37 | validation_context: dict[str, Any] | None = None, 38 | context: dict[str, Any] | None = None, # {{ edit_1 }} 39 | strict: bool = True, 40 | client: AsyncInstructor | None = None, 41 | **kwargs: Any, 42 | ) -> Awaitable[T]: 43 | model = kwargs.get("model") 44 | client = client or cls.default_client(model) 45 | kwargs.pop("client", None) 46 | 47 | messages = [{"role": m.role, "content": m.content} for m in messages] 48 | 49 | filtered_kwargs = cls._filter_kwargs(kwargs) 50 | return await client.chat.completions.create( 51 | response_model=response_model, 52 | messages=messages, 53 | max_retries=max_retries, 54 | validation_context=validation_context, 55 | context=context, 56 | strict=strict, 57 | **filtered_kwargs, 58 | ) 59 | 60 | @classmethod 61 | @cache 62 | def _default_client(cls) -> AsyncInstructor: 63 | return instructor.from_groq( 64 | AsyncGroq( 65 | base_url=os.getenv("GROQ_BASE_URL", cls.DEFAULT_BASE_URL), 66 | api_key=os.getenv("GROQ_API_KEY"), 67 | ), 68 | mode=instructor.Mode.JSON, 69 | ) 70 | -------------------------------------------------------------------------------- /contrib/providers/groq/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate-provider-groq" 3 | version = "0.1.1a0" 4 | description = "Groq provider for opsmate" 5 | authors = [ 6 | { name="Jingkai He", email="jingkai@hey.com" }, 7 | ] 8 | readme = "README.md" 9 | license = { file = "LICENSE" } 10 | requires-python = "<4.0,>=3.10" 11 | dependencies = [ 12 | "opsmate", 13 | "groq>=0.22.0", 14 | ] 15 | 16 | [tool.uv.sources] 17 | opsmate = { workspace = true } 18 | 19 | [build-system] 20 | requires = ["hatchling"] 21 | build-backend = "hatchling.build" 22 | 23 | [tool.hatch.build.targets.wheel] 24 | include = ["provider_groq.py"] 25 | 26 | [project.entry-points."opsmate.dino.providers"] 27 | groq = "provider_groq:GroqProvider" 28 | -------------------------------------------------------------------------------- /contrib/tests/tools/mysql/fixtures/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | mysql-server: 3 | image: mysql:9.2.0 4 | environment: 5 | MYSQL_ROOT_PASSWORD: my-secret-pw 6 | ports: 7 | - "3306:3306" 8 | healthcheck: 9 | test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-pmy-secret-pw"] 10 | interval: 5s 11 | timeout: 5s 12 | retries: 10 13 | -------------------------------------------------------------------------------- /contrib/tests/tools/postgres/fixtures/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | postgres-server: 3 | image: postgres:15.12-bullseye 4 | environment: 5 | POSTGRES_USER: postgres 6 | POSTGRES_PASSWORD: postgres 7 | POSTGRES_DB: testdb 8 | ports: 9 | - "5432:5432" 10 | healthcheck: 11 | test: ["CMD", "pg_isready", "-U", "postgres"] 12 | interval: 5s 13 | timeout: 5s 14 | retries: 10 15 | volumes: 16 | - ./init.sql:/docker-entrypoint-initdb.d/init.sql 17 | -------------------------------------------------------------------------------- /contrib/tests/tools/postgres/fixtures/init.sql: -------------------------------------------------------------------------------- 1 | \c testdb; 2 | 3 | CREATE TABLE test ( 4 | id SERIAL PRIMARY KEY, 5 | name VARCHAR(255) NOT NULL 6 | ); 7 | 8 | INSERT INTO test (name) VALUES ('test'); 9 | -------------------------------------------------------------------------------- /contrib/tools/mysql/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jingkai He 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /contrib/tools/mysql/fixtures/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | mysql-server: 3 | image: mysql:9.2.0 4 | environment: 5 | MYSQL_ROOT_PASSWORD: my-secret-pw 6 | ports: 7 | - "3306:3306" 8 | volumes: 9 | - mysql_data:/var/lib/mysql 10 | - ./mydb.sql:/docker-entrypoint-initdb.d/mydb.sql 11 | 12 | volumes: 13 | mysql_data: 14 | -------------------------------------------------------------------------------- /contrib/tools/mysql/mysql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/contrib/tools/mysql/mysql/__init__.py -------------------------------------------------------------------------------- /contrib/tools/mysql/mysql/tool.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino.types import ( 2 | ToolCall, 3 | ToolCallConfig, 4 | register_tool, 5 | PresentationMixin, 6 | ) 7 | from pydantic import Field 8 | from typing import Any, Tuple, Dict, Union, List 9 | from .runtime import MySQLRuntime, RuntimeError 10 | import pandas as pd 11 | 12 | ResultType = Union[ 13 | Tuple[Dict[str, Any], ...], 14 | List[Dict[str, Any]], 15 | ] 16 | 17 | 18 | class MySQLToolConfig(ToolCallConfig): 19 | runtime: str = Field( 20 | alias="MYSQL_TOOL_RUNTIME", 21 | description="The runtime to use for the tool call", 22 | default="mysql", 23 | ) 24 | 25 | 26 | @register_tool(config=MySQLToolConfig) 27 | class MySQLTool(ToolCall[ResultType], PresentationMixin): 28 | """MySQL tool""" 29 | 30 | class Config: 31 | arbitrary_types_allowed = True 32 | 33 | query: str = Field(description="The query to execute") 34 | timeout: int = Field( 35 | default=30, ge=1, le=120, description="The timeout for the query in seconds" 36 | ) 37 | 38 | async def __call__(self, context: dict[str, Any] = {}): 39 | runtime = self.maybe_runtime(context) 40 | if runtime is None: 41 | raise RuntimeError("MySQL runtime not found") 42 | 43 | if not isinstance(runtime, MySQLRuntime): 44 | raise RuntimeError(f"Runtime {runtime} is not a MySQLRuntime") 45 | 46 | if not await self.confirmation_prompt(context): 47 | return ( 48 | { 49 | "status": "cancelled", 50 | "message": "Query execution cancelled by user, try something else.", 51 | }, 52 | ) 53 | 54 | try: 55 | return await runtime.run(self.query, timeout=self.timeout) 56 | except RuntimeError as e: 57 | return ( 58 | { 59 | "status": "error", 60 | "message": str(e), 61 | }, 62 | ) 63 | except Exception: 64 | raise 65 | 66 | def markdown(self, context: dict[str, Any] = {}): 67 | result = pd.DataFrame(self.output) 68 | return f""" 69 | ## MySQL Query 70 | 71 | ```sql 72 | {self.query} 73 | ``` 74 | 75 | ## Result 76 | 77 | {result.to_markdown()} 78 | """ 79 | 80 | def confirmation_fields(self) -> List[str]: 81 | return ["query"] 82 | 83 | def maybe_runtime(self, context: dict[str, Any] = {}): 84 | runtimes = context.get("runtimes", {}) 85 | if len(runtimes) == 0: 86 | return None 87 | 88 | return runtimes.get("MySQLTool", None) 89 | -------------------------------------------------------------------------------- /contrib/tools/mysql/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate-tool-mysql" 3 | version = "0.1.2a0" 4 | description = "MySQL tool for opsmate" 5 | authors = [ 6 | { name="Jingkai He", email="jingkai@hey.com" }, 7 | ] 8 | readme = "README.md" 9 | license = { file = "LICENSE" } 10 | requires-python = "<4.0,>=3.10" 11 | dependencies = [ 12 | "opsmate", 13 | "pymysql[rsa]", 14 | ] 15 | 16 | [tool.uv.sources] 17 | opsmate = { workspace = true } 18 | 19 | [build-system] 20 | requires = ["hatchling"] 21 | build-backend = "hatchling.build" 22 | 23 | [tool.hatch.build.targets.wheel] 24 | packages = ["mysql"] 25 | 26 | [project.entry-points."opsmate.tools"] 27 | tool = "mysql.tool:MySQLTool" 28 | 29 | [project.entry-points."opsmate.runtime.runtimes"] 30 | runtime = "mysql.runtime:MySQLRuntime" 31 | -------------------------------------------------------------------------------- /contrib/tools/postgres/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jingkai He 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /contrib/tools/postgres/README.md: -------------------------------------------------------------------------------- 1 | # opsmate-tool-postgres 2 | 3 | `opsmate-tool-postgres` is a tool for Opsmate that allows you to interact with PostgreSQL databases with the assistance of a LLM. 4 | 5 | ## Installation 6 | 7 | Change directory to this folder and run: 8 | ```bash 9 | opsmate install opsmate-tool-postgres 10 | ``` 11 | 12 | To verify the installation, you can run: 13 | 14 | ```bash 15 | $ opsmate list-tools | grep -i postgres 16 | │ PostgresTool │ PostgreSQL tool 17 | ``` 18 | 19 | 20 | ## Usage 21 | 22 | First, start the PostgreSQL server using docker-compose: 23 | ```bash 24 | docker compose -f fixtures/docker-compose.yml up 25 | ``` 26 | 27 | Then you can test the tool by running: 28 | 29 | ```bash 30 | opsmate chat \ 31 | --runtime-postgres-password postgres \ 32 | --runtime-postgres-host localhost \ 33 | --runtime-postgres-database ecommerce \ 34 | --runtime-postgres-schema ecommerce \ 35 | --tools PostgresTool 36 | ``` 37 | 38 | ## Configurable oOptions 39 | 40 | ```bash 41 | $ opsmate chat --help | grep -i postgres 42 | --postgres-tool-runtime TEXT The runtime to use for the tool call (env: 43 | POSTGRES_TOOL_RUNTIME) [default: postgres] 44 | --runtime-postgres-timeout INTEGER 45 | The timeout of the PostgreSQL server in 46 | seconds (env: RUNTIME_POSTGRES_TIMEOUT) 47 | --runtime-postgres-schema TEXT The schema of the PostgreSQL server (env: 48 | RUNTIME_POSTGRES_SCHEMA) [default: public] 49 | --runtime-postgres-database TEXT 50 | The database of the PostgreSQL server (env: 51 | RUNTIME_POSTGRES_DATABASE) 52 | --runtime-postgres-password TEXT 53 | The password of the PostgreSQL server (env: 54 | RUNTIME_POSTGRES_PASSWORD) [default: ""] 55 | --runtime-postgres-user TEXT The user of the PostgreSQL server (env: 56 | RUNTIME_POSTGRES_USER) [default: postgres] 57 | --runtime-postgres-port INTEGER 58 | The port of the PostgreSQL server (env: 59 | RUNTIME_POSTGRES_PORT) [default: 5432] 60 | --runtime-postgres-host TEXT The host of the PostgreSQL server (env: 61 | RUNTIME_POSTGRES_HOST) [default: localhost] 62 | ``` 63 | 64 | ## Uninstall 65 | 66 | ```bash 67 | opsmate uninstall -y opsmate-tool-postgres 68 | ``` 69 | -------------------------------------------------------------------------------- /contrib/tools/postgres/fixtures/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | postgres: 3 | image: postgres:15.12-bullseye 4 | restart: always 5 | environment: 6 | POSTGRES_PASSWORD: postgres 7 | POSTGRES_USER: postgres 8 | POSTGRES_DB: ecommerce 9 | ports: 10 | - 5432:5432 11 | volumes: 12 | - pgdata:/var/lib/postgresql/data 13 | - ./ecommerce.sql:/docker-entrypoint-initdb.d/ecommerce.sql 14 | 15 | volumes: 16 | pgdata: 17 | -------------------------------------------------------------------------------- /contrib/tools/postgres/postgres/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/contrib/tools/postgres/postgres/__init__.py -------------------------------------------------------------------------------- /contrib/tools/postgres/postgres/tool.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino.types import ( 2 | ToolCall, 3 | ToolCallConfig, 4 | register_tool, 5 | PresentationMixin, 6 | ) 7 | from pydantic import Field 8 | from typing import Any, Tuple, Dict, Union, List 9 | from .runtime import PostgresRuntime, RuntimeError 10 | import pandas as pd 11 | 12 | ResultType = Union[ 13 | Tuple[Dict[str, Any], ...], 14 | List[Dict[str, Any]], 15 | ] 16 | 17 | 18 | class PostgresToolConfig(ToolCallConfig): 19 | runtime: str = Field( 20 | alias="POSTGRES_TOOL_RUNTIME", 21 | description="The runtime to use for the tool call", 22 | default="postgres", 23 | ) 24 | 25 | 26 | @register_tool(config=PostgresToolConfig) 27 | class PostgresTool(ToolCall[ResultType], PresentationMixin): 28 | """PostgreSQL tool""" 29 | 30 | class Config: 31 | arbitrary_types_allowed = True 32 | 33 | query: str = Field(description="The query to execute") 34 | timeout: int = Field( 35 | default=30, ge=1, le=120, description="The timeout for the query in seconds" 36 | ) 37 | 38 | async def __call__(self, context: dict[str, Any] = {}): 39 | runtime = self.maybe_runtime(context) 40 | if runtime is None: 41 | raise RuntimeError("PostgreSQL runtime not found") 42 | 43 | if not isinstance(runtime, PostgresRuntime): 44 | raise RuntimeError(f"Runtime {runtime} is not a PostgresRuntime") 45 | 46 | if not await self.confirmation_prompt(context): 47 | return ( 48 | { 49 | "status": "cancelled", 50 | "message": "Query execution cancelled by user, try something else.", 51 | }, 52 | ) 53 | 54 | try: 55 | return await runtime.run(self.query, timeout=self.timeout) 56 | except RuntimeError as e: 57 | return ( 58 | { 59 | "status": "error", 60 | "message": str(e), 61 | }, 62 | ) 63 | except Exception: 64 | raise 65 | 66 | def markdown(self, context: dict[str, Any] = {}): 67 | result = pd.DataFrame(self.output) 68 | return f""" 69 | ## PostgreSQL Query 70 | 71 | ```sql 72 | {self.query} 73 | ``` 74 | 75 | ## Result 76 | 77 | {result.to_markdown()} 78 | """ 79 | 80 | def confirmation_fields(self) -> List[str]: 81 | return ["query"] 82 | 83 | def maybe_runtime(self, context: dict[str, Any] = {}): 84 | runtimes = context.get("runtimes", {}) 85 | if len(runtimes) == 0: 86 | return None 87 | 88 | return runtimes.get("PostgresTool", None) 89 | -------------------------------------------------------------------------------- /contrib/tools/postgres/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate-tool-postgres" 3 | version = "0.1.0a0" 4 | description = "PostgreSQL tool for opsmate" 5 | authors = [ 6 | { name="Jingkai He", email="jingkai@hey.com" }, 7 | ] 8 | readme = "README.md" 9 | license = { file = "LICENSE" } 10 | requires-python = "<4.0,>=3.10" 11 | dependencies = [ 12 | "opsmate", 13 | "psycopg2-binary", 14 | ] 15 | 16 | [tool.uv.sources] 17 | opsmate = { workspace = true } 18 | 19 | [build-system] 20 | requires = ["hatchling"] 21 | build-backend = "hatchling.build" 22 | 23 | [tool.hatch.build.targets.wheel] 24 | packages = ["postgres"] 25 | 26 | [project.entry-points."opsmate.tools"] 27 | tool = "postgres.tool:PostgresTool" 28 | 29 | [project.entry-points."opsmate.runtime.runtimes"] 30 | runtime = "postgres.runtime:PostgresRuntime" 31 | -------------------------------------------------------------------------------- /docs/CLI/db-migrate.md: -------------------------------------------------------------------------------- 1 | `db-migrate` is used to update the sqlite database that powers the opsmate to the latest version. By default it migrates the database to the latest version. 2 | 3 | ## USAGE 4 | 5 | ```bash 6 | Usage: opsmate db-migrate [OPTIONS] 7 | 8 | Apply migrations. 9 | 10 | Options: 11 | -r, --revision TEXT Revision to upgrade to [default: head] 12 | --help Show this message and exit. 13 | ``` 14 | 15 | ## EXAMPLES 16 | 17 | ### Migrating to the latest version 18 | 19 | ```bash 20 | opsmate db-migrate 21 | ``` 22 | 23 | ### Migrating to a specific version 24 | 25 | ```bash 26 | opsmate db-migrate --revision 27 | ``` 28 | 29 | To list all the versions available, run `opsmate db-revisions`. 30 | 31 | ## SEE ALSO 32 | 33 | - [opsmate db-revisions](./db-revisions.md) 34 | - [opsmate db-rollback](./db-rollback.md) 35 | 36 | ## OPTIONS 37 | 38 | ``` 39 | Usage: opsmate db-migrate [OPTIONS] 40 | 41 | Apply migrations. 42 | 43 | Options: 44 | -r, --revision TEXT Revision to upgrade to [default: head] 45 | --help Show this message and exit. 46 | ``` 47 | -------------------------------------------------------------------------------- /docs/CLI/db-revisions.md: -------------------------------------------------------------------------------- 1 | `db-revisions` shows all the revisions of the opsmate database. 2 | 3 | ## OPTIONS 4 | 5 | ``` 6 | Usage: opsmate db-revisions [OPTIONS] 7 | 8 | List all the revisions available. 9 | 10 | Options: 11 | --help Show this message and exit. 12 | ``` 13 | 14 | ## SEE ALSO 15 | 16 | - [opsmate db-migrate](./db-migrate.md) 17 | - [opsmate db-rollback](./db-rollback.md) 18 | -------------------------------------------------------------------------------- /docs/CLI/db-rollback.md: -------------------------------------------------------------------------------- 1 | `db-rollback` is used to rollback the opsmate database to the previous version. 2 | 3 | ## OPTIONS 4 | 5 | ``` 6 | Usage: opsmate db-rollback [OPTIONS] 7 | 8 | Rollback migrations. 9 | 10 | Options: 11 | -r, --revision TEXT Revision to downgrade to [default: -1] 12 | --help Show this message and exit. 13 | ``` 14 | 15 | ## EXAMPLES 16 | 17 | ```bash 18 | opsmate db-rollback 19 | ``` 20 | 21 | To rollback to a specific version, run: 22 | 23 | ```bash 24 | opsmate db-rollback --revision 25 | ``` 26 | 27 | To list all the versions available, run `opsmate db-revisions`. 28 | 29 | ## SEE ALSO 30 | 31 | - [opsmate db-revisions](./db-revisions.md) 32 | - [opsmate db-migrate](./db-migrate.md) 33 | -------------------------------------------------------------------------------- /docs/CLI/index.md: -------------------------------------------------------------------------------- 1 | # CLI 2 | 3 | This documentation highlights some of the most common use cases of Opsmate CLI tools. 4 | 5 | ## Natural Language CLI run 6 | 7 | One of the most simple use case of Opsmate is to run commands using natural language. This comes handy when you need to run a command that you don't know/remember the exact instruction. 8 | 9 | ```bash 10 | $ opsmate run "what's the gpu of the vm" 11 | Command 12 | # Check the GPU installed on the VM using lspci command and filter for VGA or compatible graphics device. 13 | lspci | grep -i 'vga\|3d\|2d' 14 | 15 | 16 | Output 17 | 04:00.0 VGA compatible controller: Red Hat, Inc. Virtio 1.0 GPU (rev 01) 18 | 19 | The VM is using a VGA compatible controller with a Red Hat, Inc. Virtio 1.0 GPU (rev 01). 20 | ``` 21 | 22 | ## Advanced reasoning 23 | A more advanced use case is to leverage Opsmate to perform reasoning and problem solving of production issues via using the `solve` command as you can see in the following example. Like a human SRE, Opsmate can make mistakes but with the advanced reasoning ability it can reflect on its mistakes and correct itself. 24 | 25 | ```bash 26 | opsmate solve "what's the k8s distro of the current context" 27 | 28 | Thought process 29 | Thought: To determine the Kubernetes distribution of the current context, I need to access the Kubernetes configuration and context details. 30 | Action: Run the command kubectl version --short or check the Kubernetes configuration using kubectl config current-context to get information about the server and its version. 31 | 32 | ... 33 | 34 | 35 | Output 36 | error: unknown flag: --short 37 | See 'kubectl version --help' for usage. 38 | 39 | ... 40 | 41 | Thought: I need to run a valid command to get cluster details without the --short option. 42 | Action: Run kubectl version to get the full version details which might give us clues about the distribution in use. 43 | ... 44 | 45 | Answer: The Kubernetes distribution of the current context is K3s, as indicated by the +k3s1 suffix in the server version output from kubectl version. 46 | ``` 47 | 48 | ## Chat with Opsmate 49 | To have the human-in-the-loop experience you can run 50 | 51 | ```bash 52 | opsmate chat 53 | ``` 54 | 55 | ## API and Web UI 56 | 57 | To serve the Opsmate with a web interface and API you can run the following command: 58 | 59 | ```bash 60 | opsmate serve 61 | ``` 62 | 63 | You can access the web interface at [http://localhost:8080](http://localhost:8080). 64 | 65 | API documentation is available at [http://localhost:8080/api/docs](http://localhost:8080/api/docs). 66 | -------------------------------------------------------------------------------- /docs/CLI/install.md: -------------------------------------------------------------------------------- 1 | `opsmate install` installs Opsmate plugins as python packages. 2 | 3 | Currently there are two types python package based plugins that are supported: 4 | 5 | - `opsmate-provider-*`: These are the language model providers. 6 | - `opsmate-runtime-*`: These are the runtime environments that can be used to run the Opsmate. 7 | 8 | 9 | ## OPTIONS 10 | 11 | ``` 12 | Usage: opsmate install [OPTIONS] [PACKAGES]... 13 | 14 | Install the opsmate plugins. 15 | 16 | Options: 17 | -U, --upgrade Upgrade the given packages to the latest version 18 | --force-reinstall Reinstall all packages even if they are already up-to- 19 | date 20 | -e, --editable TEXT Install a project in editable mode (i.e. setuptools 21 | "develop mode") from a local project path or a VCS url 22 | --no-cache-dir Disable the cache 23 | --help Show this message and exit. 24 | ``` 25 | 26 | ## SEE ALSO 27 | 28 | - [Add New LLM Providers](../configurations/add-new-llm-providers.md) 29 | - [Integrate with New Runtime](../configurations/integrate-with-new-runtime.md) 30 | - [Uninstall](./uninstall.md) 31 | -------------------------------------------------------------------------------- /docs/CLI/list-models.md: -------------------------------------------------------------------------------- 1 | `opsmate list-models` lists all the models available. 2 | 3 | Currently on Opsmate we cherry-pick the models that are suitable for performing SRE/DevOps oriented tasks that being said in the future we will look into supporting extra models through the plugin system. 4 | 5 | ## OPTIONS 6 | 7 | ``` 8 | Usage: opsmate list-models [OPTIONS] 9 | 10 | List all the models available. 11 | 12 | Options: 13 | --provider TEXT Provider to list the models for 14 | --help Show this message and exit. 15 | ``` 16 | 17 | ## USAGE 18 | 19 | ```bash 20 | Models 21 | ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 22 | ┃ Provider ┃ Model ┃ 23 | ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 24 | │ openai │ gpt-4o │ 25 | ├───────────┼────────────────────────────┤ 26 | │ openai │ gpt-4o-mini │ 27 | ├───────────┼────────────────────────────┤ 28 | │ openai │ o1 │ 29 | ├───────────┼────────────────────────────┤ 30 | │ openai │ o3-mini │ 31 | ├───────────┼────────────────────────────┤ 32 | │ anthropic │ claude-3-5-sonnet-20241022 │ 33 | ├───────────┼────────────────────────────┤ 34 | │ anthropic │ claude-3-7-sonnet-20250219 │ 35 | ├───────────┼────────────────────────────┤ 36 | │ xai │ grok-2-1212 │ 37 | ├───────────┼────────────────────────────┤ 38 | │ xai │ grok-2-vision-1212 │ 39 | ├───────────┼────────────────────────────┤ 40 | │ xai │ grok-3-mini-fast-beta │ 41 | ├───────────┼────────────────────────────┤ 42 | │ xai │ grok-3-mini-beta │ 43 | ├───────────┼────────────────────────────┤ 44 | │ xai │ grok-3-fast-beta │ 45 | ├───────────┼────────────────────────────┤ 46 | │ xai │ grok-3-beta │ 47 | └───────────┴────────────────────────────┘ 48 | ``` 49 | 50 | ## SEE ALSO 51 | 52 | - [Add new LLM providers](../configurations/add-new-llm-providers.md) 53 | -------------------------------------------------------------------------------- /docs/CLI/list-runtimes.md: -------------------------------------------------------------------------------- 1 | `opsmate list-runtimes` lists all the runtimes available. 2 | 3 | ## OPTIONS 4 | 5 | ``` 6 | Usage: opsmate list-runtimes [OPTIONS] 7 | 8 | List all the runtimes available. 9 | 10 | Options: 11 | --help Show this message and exit. 12 | ``` 13 | 14 | ## USAGE 15 | 16 | The command below will list all the runtimes available to Opsmate. 17 | 18 | ```bash 19 | opsmate list-runtimes 20 | Runtimes 21 | ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 22 | ┃ Name ┃ Description ┃ 23 | ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 24 | │ local │ Local runtime allows model to execute tool calls within the same namespace as the opsmate process. │ 25 | ├────────┼────────────────────────────────────────────────────────────────────────────────────────────────────┤ 26 | │ docker │ Docker runtime allows model to execute tool calls within a docker container. │ 27 | ├────────┼────────────────────────────────────────────────────────────────────────────────────────────────────┤ 28 | │ ssh │ SSH runtime allows model to execute tool calls on a remote server via SSH. │ 29 | └────────┴────────────────────────────────────────────────────────────────────────────────────────────────────┘ 30 | ``` 31 | -------------------------------------------------------------------------------- /docs/CLI/reset.md: -------------------------------------------------------------------------------- 1 | `opsmate reset` deletes all the data used by Opsmate. Note that it DOES NOT delete the plugins. 2 | 3 | ## OPTIONS 4 | 5 | ``` 6 | Usage: opsmate reset [OPTIONS] 7 | 8 | Reset the Opsmate database and embeddings db. Note that if the database is 9 | using litestream it will not be reset. Same applies to the embeddings db, if 10 | the embedding db is using GCS, S3 or Azure Blob Storage, it will not be 11 | reset. 12 | 13 | Options: 14 | --tools TEXT The tools to use for the session. Run 15 | `opsmate list-tools` to see the available 16 | tools. By default the tools from the context 17 | are used. (env: OPSMATE_TOOLS) [default: 18 | ""] 19 | --loglevel TEXT Set loglevel (env: OPSMATE_LOGLEVEL) 20 | [default: INFO] 21 | --categorise BOOLEAN Whether to categorise the embeddings (env: 22 | OPSMATE_CATEGORISE) [default: True] 23 | --reranker-name TEXT The name of the reranker model (env: 24 | OPSMATE_RERANKER_NAME) [default: ""] 25 | --embedding-model-name TEXT The name of the embedding model (env: 26 | OPSMATE_EMBEDDING_MODEL_NAME) [default: 27 | text-embedding-ada-002] 28 | --embedding-registry-name TEXT The name of the embedding registry (env: 29 | OPSMATE_EMBEDDING_REGISTRY_NAME) [default: 30 | openai] 31 | --embeddings-db-path TEXT The path to the lance db. When s3:// is used 32 | for AWS S3, az:// is used for Azure Blob 33 | Storage, and gs:// is used for Google Cloud 34 | Storage (env: OPSMATE_EMBEDDINGS_DB_PATH) 35 | [default: /root/.opsmate/embeddings] 36 | -c, --context TEXT The context to use for the session. Run 37 | `opsmate list-contexts` to see the available 38 | contexts. (env: OPSMATE_CONTEXT) [default: 39 | cli] 40 | --contexts-dir TEXT Set contexts_dir (env: OPSMATE_CONTEXTS_DIR) 41 | [default: /root/.opsmate/contexts] 42 | --plugins-dir TEXT Set plugins_dir (env: OPSMATE_PLUGINS_DIR) 43 | [default: /root/.opsmate/plugins] 44 | -m, --model TEXT The model to use for the session. Run 45 | `opsmate list-models` to see the available 46 | models. (env: OPSMATE_MODEL) [default: 47 | gpt-4o] 48 | --db-url TEXT Set db_url (env: OPSMATE_DB_URL) [default: 49 | sqlite:////root/.opsmate/opsmate.db] 50 | --skip-confirm Skip confirmation 51 | --help Show this message and exit. 52 | ``` 53 | 54 | ## EXAMPLES 55 | 56 | ### Reset the Opsmate 57 | 58 | This will reset the database and the vector store. You will be prompted to confirm the reset. 59 | 60 | ```bash 61 | opsmate reset 62 | ``` 63 | 64 | ### Reset the Opsmate without confirmation 65 | 66 | This will reset the databases without confirmation. 67 | 68 | ```bash 69 | opsmate reset --skip-confirm 70 | ``` 71 | -------------------------------------------------------------------------------- /docs/CLI/uninstall.md: -------------------------------------------------------------------------------- 1 | `opsmate uninstall` uninstalls Opsmate plugins. 2 | 3 | ## OPTIONS 4 | 5 | ``` 6 | Usage: opsmate uninstall [OPTIONS] PACKAGES... 7 | 8 | Uninstall the given packages. 9 | 10 | Options: 11 | -y, --yes Do not prompt for confirmation 12 | --help Show this message and exit. 13 | ``` 14 | 15 | ## SEE ALSO 16 | 17 | - [Install](./install.md) 18 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | docs.tryopsmate.ai 2 | -------------------------------------------------------------------------------- /docs/configurations/OTel.md: -------------------------------------------------------------------------------- 1 | Opsmate provides built-in integration with [OpenTelemetry](https://opentelemetry.io/) for distributed tracing. This allows you to monitor and troubleshoot your application's performance and behavior. 2 | 3 | ## Setup 4 | 5 | To enable OTel tracing, set the following environment variables: 6 | 7 | ```bash 8 | # Required: OTLP endpoint 9 | export OTEL_EXPORTER_OTLP_ENDPOINT=http://your-collector:4317 10 | 11 | # Optional: Protocol - defaults to HTTP if not specified 12 | export OTEL_EXPORTER_OTLP_PROTOCOL=grpc # or "http" 13 | 14 | # Optional: Service name - defaults to "opsmate" 15 | export SERVICE_NAME= 16 | 17 | # Optional: OTel header - typically for the purpose of breaer or basic auth 18 | export OTEL_EXPORTER_OTEL_HEADER= 19 | ``` 20 | 21 | Here is the official documentation for the OTel configuration: 22 | 23 | - [OTLP Exporter](https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/) 24 | 25 | 26 | After setting up, the following commands are OTel traced: 27 | 28 | - [opsmate run](../CLI/run.md) 29 | - [opsmate solve](../CLI/solve.md) 30 | - [opsmate chat](../CLI/chat.md) 31 | - [opsmate serve](../CLI/serve.md) 32 | - [opsmate worker](../CLI/worker.md) 33 | 34 | ## Automatic Instrumentation 35 | 36 | Out of the box, the following integrations are automatically instrumented: 37 | 38 | - OpenAI API and OpenAI compatible providers API calls 39 | - Anthropic API calls 40 | - SQLAlchemy database calls (when the database operations are performed) 41 | - Starlette HTTP requests (when running in server mode) 42 | 43 | ## Disable Tracing 44 | 45 | To disable tracing, set the following environment variable: 46 | 47 | ```bash 48 | export OPSMATE_DISABLE_OTEL=true 49 | 50 | # or 51 | 52 | unset OTEL_EXPORTER_OTLP_ENDPOINT 53 | ``` 54 | -------------------------------------------------------------------------------- /docs/configurations/advanced-knowledge-retrieval.md: -------------------------------------------------------------------------------- 1 | Opsmate out of box uses `openai/text-embedding-ada-002" for text embeddings, and no rerankers are being used during the retrieval. 2 | 3 | That being said alternative embeddings and rerankers are available. This document outlines how to setup and use them. 4 | 5 | ## Embeddings 6 | 7 | Opsmate supports two types of embeddings: 8 | 9 | 1. OpenAI embeddings 10 | 2. Sentence Transformers embeddings 11 | 12 | ### OpenAI embeddings 13 | 14 | To explicitlyuse OpenAI embeddings, you need to set the following environment variables: 15 | 16 | - `OPSMATE_EMBEDDING_REGISTRY_NAME=openai` 17 | - `OPSMATE_EMBEDDING_MODEL_NAME=text-embedding-ada-002` 18 | 19 | ### Sentence Transformers embeddings 20 | 21 | By default the sentence transformers is not installed. To install it, run: 22 | 23 | === "pip" 24 | ```bash 25 | pip install -U opsmate[sentence-transformers] 26 | ``` 27 | 28 | === "pipx" 29 | ```bash 30 | pipx install opsmate[sentence-transformers] --force 31 | ``` 32 | 33 | Once installed, Opmsmate will automatically use the Sentence Transformers for embeddings. 34 | 35 | You can explicitly specify the Sentence Transformers embeddings by setting the following environment variables: 36 | 37 | - `OPSMATE_EMBEDDING_REGISTRY_NAME=sentence-transformers` 38 | - `OPSMATE_EMBEDDING_MODEL_NAME=BAAI/bge-small-en-v1.5` 39 | 40 | :warning: At the moment we do not officially support embedding models switch once the knowledge base is created. :warning: 41 | 42 | To switch between embedding models, you need to delete the existing knowledge base and re-ingest. 43 | 44 | ## Rerankers 45 | 46 | Opsmate supports the following rerankers: 47 | 48 | 1. RRF reranker 49 | 2. AnswerDotAI reranker 50 | 3. Cohere reranker 51 | 4. OpenAI reranker 52 | 53 | 54 | ### RRF reranker 55 | 56 | To use the RRF reranker, you need to set the following environment variables: 57 | 58 | - `OPSMATE_RERANKER_NAME=rrf` 59 | 60 | ### AnswerDotAI reranker 61 | 62 | Out of box, the AnswerDotAI reranker is not installed. To install it, run: 63 | 64 | === "pip" 65 | ```bash 66 | pip install -U opsmate[reranker-answerdotai] 67 | ``` 68 | 69 | === "pipx" 70 | ```bash 71 | pipx install opsmate[reranker-answerdotai] --force 72 | ``` 73 | 74 | To use the AnswerDotAI reranker, you need to set the following environment variables: 75 | 76 | - `OPSMATE_RERANKER_NAME=answerdotai` 77 | 78 | ### Cohere reranker 79 | 80 | Out of box, the Cohere reranker is not installed. To install it, run: 81 | 82 | === "pip" 83 | ```bash 84 | pip install -U opsmate[reranker-cohere] 85 | ``` 86 | 87 | === "pipx" 88 | ```bash 89 | pipx install opsmate[reranker-cohere] --force 90 | ``` 91 | 92 | To use the Cohere reranker, you need to set the following environment variables: 93 | 94 | - `OPSMATE_RERANKER_NAME=cohere` 95 | - `COHERE_API_KEY=` 96 | 97 | ### OpenAI reranker 98 | 99 | To use the OpenAI reranker, you need to set the following environment variables: 100 | 101 | - `OPSMATE_RERANKER_NAME=openai` 102 | - `OPENAI_API_KEY=` 103 | -------------------------------------------------------------------------------- /docs/configurations/llm-configurations.md: -------------------------------------------------------------------------------- 1 | ## Default LLM Model 2 | 3 | The default LLM model can be specified via `--model` or `-m` flag through the command line on the program startup. 4 | 5 | It can also be specified as `OPSMATE_MODEL` environment variable, e.g. 6 | 7 | ```bash 8 | export OPSMATE_MODEL="gpt-4.1" 9 | ``` 10 | 11 | Alternatively, you can also save the model configuration in the `~/.opsmate/config.yaml` file. 12 | 13 | ```yaml 14 | --- 15 | # ... 16 | OPSMATE_MODEL: claude-3-7-sonnet-20250219 17 | # ... 18 | ``` 19 | 20 | ## LLM Configuration 21 | 22 | There are more nuanced configurations for the LLM, such as temperatures, top_p, thinking budget, etc. 23 | 24 | Here is the out of box configurations: 25 | 26 | ```yaml 27 | OPSMATE_MODELS_CONFIG: 28 | claude-3-7-sonnet-20250219: 29 | thinking: 30 | budget_tokens: 1024 31 | type: enabled 32 | grok-3-mini-beta: 33 | reasoning_effort: medium 34 | tool_call_model: grok-3-beta 35 | grok-3-mini-fast-beta: 36 | reasoning_effort: medium 37 | tool_call_model: grok-3-beta 38 | o1: 39 | reasoning_effort: medium 40 | tool_call_model: gpt-4.1 41 | o3: 42 | reasoning_effort: medium 43 | tool_call_model: gpt-4.1 44 | o3-mini: 45 | reasoning_effort: medium 46 | tool_call_model: gpt-4.1 47 | o4-mini: 48 | reasoning_effort: medium 49 | tool_call_model: gpt-4.1 50 | ``` 51 | 52 | Note that in the configuration above, we use tool call models as the supplemental model for the reasoning models, this is because while reasoning models are capable of reasoning, in many use cases they are not very effective at tool calling. 53 | 54 | To override the default configurations, you can copy and paste the above configurations to your `~/.opsmate/config.yaml` file, and override the specific configurations you want to change. 55 | 56 | ## Claude 3.7 Sonnet for Thinking 57 | 58 | The `claude-3-7-sonnet-20250219` model is a powerful reasoning model with `thinking` enabled. To enable you can add the following configuration to your `~/.opsmate/config.yaml` file: 59 | 60 | ```yaml 61 | OPSMATE_MODELS_CONFIG: 62 | claude-3-7-sonnet-20250219: 63 | thinking: 64 | budget_tokens: 1024 65 | type: enabled 66 | # ... 67 | ``` 68 | -------------------------------------------------------------------------------- /docs/configurations/use-cloud-storage-for-embeddings-storage.md: -------------------------------------------------------------------------------- 1 | Opsmate uses [LanceDB](https://lancedb.github.io/lancedb/) to store knowledge bases. By default we store the knowledge base in the local filesystem, default at `~/.opsmate/embeddings`, and configures as `OPSMATE_EMBEDDINGS_DB_PATH`. 2 | 3 | The full pros and cons of storage considerations are covered in the [LanceDB Storage `documentation](https://lancedb.github.io/lancedb/concepts/storage). 4 | 5 | Currently In addition to local filesystem, Opsmate officially supports AWS S3 and Azure Blob Storage based cloud storage. That being said we expect other approaches suggested by LanceDB to work as well. 6 | 7 | 8 | ## Prerequisites 9 | 10 | - You must have already provisioned the cloud storage bucket. 11 | - You must have read-only+ access to the cloud storage bucket. 12 | 13 | ## How to use cloud storage for embeddings storage 14 | 15 | === "Environment Variable" 16 | Simply set the `OPSMATE_EMBEDDINGS_DB_PATH` environment variable to the cloud storage path. 17 | ```bash 18 | # AWS S3 19 | OPSMATE_EMBEDDINGS_DB_PATH=s3://bucket/path 20 | # Azure Blob Storage 21 | OPSMATE_EMBEDDINGS_DB_PATH=az://bucket/path 22 | # Google Cloud Storage 23 | OPSMATE_EMBEDDINGS_DB_PATH=gs://bucket/path 24 | ``` 25 | 26 | === "CLI" 27 | Use the `--embeddings-db-path` flag to specify the cloud storage path. 28 | ```bash 29 | # AWS S3 30 | opsmate ingest --embeddings-db-path=s3://bucket/path 31 | # Azure Blob Storage 32 | opsmate ingest --embeddings-db-path=az://bucket/path 33 | # Google Cloud Storage 34 | opsmate ingest --embeddings-db-path=gs://bucket/path 35 | ``` 36 | 37 | === "Config File" 38 | Alternatively you can also set the config in `~/.opsmate/config.yaml`: 39 | ```yaml 40 | # AWS S3 41 | OPSMATE_EMBEDDINGS_DB_PATH: s3://bucket/path 42 | # Azure Blob Storage 43 | OPSMATE_EMBEDDINGS_DB_PATH: az://bucket/path 44 | # Google Cloud Storage 45 | OPSMATE_EMBEDDINGS_DB_PATH: gs://bucket/path 46 | ``` 47 | 48 | Please refer to the [LanceDB Configure Cloud Storage](https://lancedb.github.io/lancedb/guides/storage/) for more details. 49 | -------------------------------------------------------------------------------- /docs/cookbooks/docker-runtime.md: -------------------------------------------------------------------------------- 1 | This cookbook will guide you through how to interact with Docker container using Opsmate's docker runtime. 2 | 3 | ## Prerequisites 4 | 5 | - Docker installed on your machine 6 | - Opsmate installed on your machine 7 | 8 | ## Example 1: Interact with a pre-existing docker container 9 | 10 | First thing first let's create a docker container running in the background. 11 | 12 | ```bash 13 | docker run -d --name testbox --rm ubuntu:20.04 sleep infinity 14 | ``` 15 | 16 | Now with the container running, we can interact with it using Opsmate's docker runtime. 17 | 18 | ```bash 19 | # -nt only prints out the answer 20 | $ opsmate run -nt --shell-command-runtime docker --runtime-docker-container-name testbox "what is the os distro" 21 | The OS distribution is Ubuntu 20.04.6 LTS (Focal Fossa). 22 | ``` 23 | 24 | You can also use [solve](../CLI/solve.md) and [chat](../CLI/chat.md) to interact with the container. 25 | 26 | ## Example 2: Interact with a docker container from docker-compose 27 | 28 | [Docker Compose](https://docs.docker.com/compose/) is a tool for defining and running multi-container Docker applications. In conjunction with Opsmate's docker runtime, you can achieve goals such as: 29 | 30 | - Executing exploratory experiments within a containerised environment. 31 | - Use the containerised runtime as a workstation powered by AI, such as the [three-musketeers](https://3musketeers.pages.dev/) approach 32 | - You need to use a containerised runtime to run complicated evaluation tasks, which otherwise is not feasible to run on your host space. 33 | 34 | Let's say we have the following `docker-compose.yml` file: 35 | 36 | ```yaml 37 | services: 38 | default: 39 | image: ubuntu:24.04 40 | init: true 41 | entrypoint: ["sleep", "infinity"] 42 | redis: 43 | image: redis:latest 44 | ``` 45 | 46 | To interact with the environment you can run: 47 | 48 | ```bash 49 | opsmate chat --shell-command-runtime docker 50 | ``` 51 | 52 | By default it will auto detect the `docker-compose.yml` file in the current directory, and use the `default` service as the container to interact with. 53 | 54 | You can also specify the `docker-compose.yml` file and the service you want to interact with: 55 | 56 | ```bash 57 | # investigate the redis service 58 | opsmate solve \ 59 | --runtime docker \ 60 | --runtime-docker-compose-file ./docker-compose.yml \ 61 | --runtime-docker-service-name redis \ 62 | "what are the name of the processes that are running, find it out using the /proc directory" 63 | ``` 64 | 65 | {{ asciinema("/assets/docker-compose-runtime.cast") }} 66 | 67 | Here are some of the common configuration options for the docker runtime: 68 | 69 | ```bash 70 | --runtime-docker-service-name TEXT 71 | --runtime-docker-compose-file TEXT 72 | Path to the docker compose file (env: 73 | docker-compose.yml] 74 | --runtime-docker-shell TEXT Set shell_cmd (env: RUNTIME_DOCKER_SHELL) 75 | --runtime-docker-container-name TEXT 76 | ``` 77 | 78 | ## See Also 79 | 80 | - [Kubernetes Runtime](k8s-runtime.md) 81 | - [SSH Runtime](manage-vms.md) 82 | -------------------------------------------------------------------------------- /docs/cookbooks/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Comprehensive Opsmate Cookbook Collection 3 | description: Explore diverse use case of Opsmate with cookbook examples. 4 | --- 5 | 6 | ## Cookbooks 7 | 8 | Welcome to the Opsmate Cookbook Collection. Here you will find a variety of examples and recipes to help you to use Opsmate effectively. 9 | 10 | ## Links 11 | 12 | - [Use Opsmate for Automation](automation-using-python-runtime.ipynb) 13 | - [Use Opsmate to Manage VMs](manage-vms.md) 14 | - [Plugin System](plugin-system.ipynb) 15 | - [5 Levels of Workflow Orchestration](5-levels-of-workflow-orchestration.ipynb) 16 | - [Knowledge Management](knowledge-management.ipynb) 17 | - [Docker Runtime](docker-runtime.md) 18 | - [Kubernetes Runtime](k8s-runtime.md) 19 | - [Interacting with MySQL using Opsmate](interacting-with-mysql-using-opsmate.md) 20 | -------------------------------------------------------------------------------- /docs/cookbooks/k8s-runtime.md: -------------------------------------------------------------------------------- 1 | This cookbook demonstrates how to use the Kubernetes runtime to interact with a Kubernetes pod. 2 | 3 | ## Prerequisites 4 | 5 | - A Kubernetes cluster 6 | - Opsmate installed on your machine 7 | 8 | 9 | ## Interact with a pre-existing Kubernetes pod 10 | 11 | First let's create a pod in the Kubernetes cluster. 12 | 13 | ```bash 14 | kubectl run -i --tty --rm debug --image=alpine -- sh 15 | ``` 16 | 17 | Now that we have a pod running, we can interact with it using Opsmate's Kubernetes runtime. 18 | 19 | ```bash 20 | opsmate run -nt --runtime 21 | k8s --runtime-k8s-pod debug "what's the distro of this container?" --tools ShellCommand 22 | The container is running Alpine Linux, version 3.21.3. 23 | ``` 24 | 25 | Here are some of the common configuration options for the Kubernetes runtime: 26 | 27 | ```bash 28 | --runtime-k8s-shell TEXT Set shell_cmd (env: RUNTIME_K8S_SHELL) 29 | --runtime-k8s-container TEXT Name of the container of the pod, if not 30 | --runtime-k8s-pod TEXT Set pod_name (env: RUNTIME_K8S_POD) 31 | --runtime-k8s-namespace TEXT Set namespace (env: RUNTIME_K8S_NAMESPACE) 32 | ``` 33 | 34 | ## See Also 35 | 36 | - [Docker Runtime](docker-runtime.md) 37 | - [SSH Runtime](manage-vms.md) 38 | -------------------------------------------------------------------------------- /docs/cookbooks/manage-vms.md: -------------------------------------------------------------------------------- 1 | # Manage VMs via SSH 2 | 3 | In this cookbook we will demonstrate how to manage VMs using Opsmate. 4 | 5 | By default Opsmate runs shell commands in the same namespace as the opsmate process, but it also provides a `ssh` runtime that allows you to manage VMs using SSH. This is particularly useful when the virtual machine (VM) is: 6 | 7 | - not accessible via the internet or running in an air-gapped network. 8 | - cannot directly access the large language model (LLM) provider. 9 | - a legacy system that cannot accommodate the runtime requirements of Opsmate (e.g. python 3.10+). 10 | 11 | 12 | ## Prerequisites 13 | 14 | - A VM instance 15 | - Opsmate CLI 16 | 17 | ## How to use the SSH runtime 18 | 19 | The remote runtime is available to `run`, `solve` and `chat` commands. 20 | 21 | Here is an example of how you can `chat` with a remote VM. 22 | 23 | ```bash 24 | opsmate chat --shell-command-runtime ssh \ 25 | --runtime-ssh-host \ 26 | --runtime-ssh-username 27 | ``` 28 | 29 | The following asciinema demo shows how to use the SSH runtime to "chat" with a remote VM. 30 | 31 | {{ asciinema("/assets/ssh-runtime.cast") }} 32 | 33 | Here are some of the common configuration options for the SSH runtime: 34 | 35 | ```bash 36 | --runtime-ssh-connect-retries INTEGER 37 | Set connect_retries (env: 38 | RUNTIME_SSH_CONNECT_RETRIES) [default: 3] 39 | --runtime-ssh-timeout INTEGER Set timeout (env: RUNTIME_SSH_TIMEOUT) 40 | [default: 10] 41 | --runtime-ssh-shell TEXT Set shell_cmd (env: RUNTIME_SSH_SHELL) 42 | [default: /bin/bash] 43 | --runtime-ssh-key-file TEXT Set key_file (env: RUNTIME_SSH_KEY_FILE) 44 | --runtime-ssh-password TEXT Set password (env: RUNTIME_SSH_PASSWORD) 45 | --runtime-ssh-username TEXT Set username (env: RUNTIME_SSH_USERNAME) 46 | [default: ""] 47 | --runtime-ssh-port INTEGER Set port (env: RUNTIME_SSH_PORT) [default: 48 | 22] 49 | --runtime-ssh-host TEXT Set host (env: RUNTIME_SSH_HOST) [default: 50 | ""] 51 | ``` 52 | -------------------------------------------------------------------------------- /docs/development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | ## Install dependencies 4 | 5 | ```bash 6 | uv sync 7 | ``` 8 | 9 | ## Running tests 10 | 11 | ```bash 12 | uv run pytest ./opsmate/tests -n auto 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/how-to-o11y-with-tempo.md: -------------------------------------------------------------------------------- 1 | # How to setup observability with Tempo 2 | 3 | To run Grafana and Tempo locally: 4 | 5 | ```bash 6 | ( 7 | cd tempo 8 | docker compose up -d 9 | ) 10 | ``` 11 | 12 | To run Opsmate with tracing enabled: 13 | 14 | ```bash 15 | opsmate chat 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Opsmate, The AI SRE teammate to free you from the toils of production engineering. 2 | 3 | Opsmate is an LLM-powered SRE copilot for understanding and solving production problems. By encoding expert troubleshooting patterns and operational knowledge, Opsmate lets users describe problem statements and intentions in natural language, eliminating the need to memorise complex command line or domain-specific tool syntax. 4 | 5 | Opsmate can not only perform problem solving autonomously, but also allow human operators to provide feedback and take over the control when needed. It accelerates incident response, reduces mean time to repair (MTTR), and empowers teams to focus on solving problems rather than wrestling with tooling. 6 | 7 | ## Getting Started 8 | 9 | You can start using Opsmate by running it locally on your workstation. There are several ways to install Opsmate on your workstation: 10 | 11 | 12 | 13 | === "pip" 14 | ```bash 15 | pip install -U opsmate 16 | ``` 17 | 18 | === "pipx" 19 | ```bash 20 | pipx install opsmate 21 | # or 22 | pipx upgrade opsmate 23 | ``` 24 | === "uvx" 25 | ```bash 26 | uvx opsmate [OPTIONS] COMMAND [ARGS]... 27 | ``` 28 | 29 | === "Docker" 30 | ```bash 31 | # Note this is less useful as you cannot access the host from the container 32 | # But still useful to interact with cloud API in an isolated containerised environment 33 | docker pull ghcr.io/opsmate-ai/opsmate:latest # or the specific version if you prefer not living on the edge 34 | alias opsmate="docker run -it --rm --env OPENAI_API_KEY=$OPENAI_API_KEY -v $HOME/.opsmate:/root/.opsmate ghcr.io/opsmate-ai/opsmate:latest" 35 | ``` 36 | 37 | === "Source" 38 | ```bash 39 | git clone git@github.com:opsmate-ai/opsmate.git 40 | cd opsmate 41 | 42 | uv build 43 | 44 | pipx install ./dist/opsmate-*.whl 45 | ``` 46 | 47 | Note that the Opsmate is powered by large language models. At the moment it supports 48 | 49 | * [OpenAI](https://platform.openai.com/api-keys) 50 | * [Anthropic](https://console.anthropic.com/settings/keys) 51 | * [xAI](https://x.ai/api) 52 | 53 | To use Opsmate, you need to set any one of the `OPENAI_API_KEY`, `ANTHROPIC_API_KEY` or `XAI_API_KEY` environment variables. 54 | 55 | ```bash 56 | export OPENAI_API_KEY="sk-proj..." 57 | export ANTHROPIC_API_KEY="sk-ant-api03-..." 58 | export XAI_API_KEY="xai-..." 59 | ``` 60 | 61 | ## Quick Start 62 | 63 | Run `opsmate run "what's the distro of the os"` to get the OS distribution of the host 64 | 65 | Run `opsmate solve "resolve the high cpu usage on the server" --review` to solve the problem step by step and review the solution with human in the loop. 66 | 67 | Run `opsmate chat --review` to chat with Opsmate. 68 | 69 | Run `opsmate serve` to launch a notebook interface for Opsmate. 70 | 71 | ## Documentation 72 | 73 | - [CLI Reference](./CLI/index.md) for simple command usage. 74 | - [LLM Providers](./providers/index.md) for LLM provider configuration. 75 | - [Tools](./tools/index.md) for tool usage. 76 | - [Integrations](./configurations/add-new-llm-providers.md) and [Cookbooks](./cookbooks/index.md) for advanced usages. 77 | - [Production](production.md) for how to production-grade Opsmate deployment behind local workstation usage. 78 | -------------------------------------------------------------------------------- /docs/macros.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | 4 | def value_str(value): 5 | if isinstance(value, str): 6 | return f'"{value}"' 7 | s = str(value) 8 | if isinstance(value, bool): 9 | s = s.lower() 10 | return s 11 | 12 | 13 | def define_env(env): 14 | """ 15 | This is the hook for defining variables, macros and filters 16 | """ 17 | 18 | @env.macro 19 | def asciinema(file, **kwargs): 20 | html = "" 21 | opts = { 22 | "autoPlay": True, 23 | "controls": True, 24 | "loop": True, 25 | "speed": 1.5, 26 | "theme": "asciinema", 27 | "rows": 24, 28 | } 29 | 30 | # Overwrite defaults with kwargs 31 | for key, value in kwargs.items(): 32 | opts[key] = value 33 | 34 | # Create an empty div that we will use for the player 35 | div_id = "asciinema-" + str(uuid.uuid4()) 36 | div_style = "z-index: 1; position: relative;" 37 | html += '
' 38 | 39 | # Define JS representing creating the player 40 | create_player_js = "" 41 | create_player_js += ( 42 | "AsciinemaPlayer.create('" 43 | + file 44 | + "', document.getElementById('" 45 | + div_id 46 | + "'), {" 47 | ) 48 | for key, value in opts.items(): 49 | # create_player_js += '"' + key + '": ' + value_str(value) + "," 50 | create_player_js += f'"{key}": {value_str(value)},' 51 | create_player_js += "});" 52 | 53 | # Create script tag that will perform cast by either registering for the DOM to 54 | # load or firing immediately if already loaded 55 | html += "" 64 | 65 | return html 66 | -------------------------------------------------------------------------------- /docs/providers/anthropic.md: -------------------------------------------------------------------------------- 1 | [Anthropic](https://www.anthropic.com/) is a large language model provider that, based on the vibe and evaluation metrics by far provides the best results. 2 | 3 | ## Configuration 4 | 5 | Anthropic API key is required to use Anthropic models. You can set the API key using the `ANTHROPIC_API_KEY` environment variable. 6 | 7 | ```bash 8 | export ANTHROPIC_API_KEY= 9 | ``` 10 | 11 | Like OpenAI we only support select models from Anthropic which produces reasonably good results. 12 | 13 | To find all the models supported by Anthropic, you can run: 14 | 15 | ```bash 16 | opsmate list-models --provider anthropic 17 | ``` 18 | 19 | ## Usage 20 | 21 | You can specify the `-m` or `--model` option for the `run`, `solve`, and `chat` commands. 22 | 23 | ```bash 24 | opsmate run -m claude-3-5-sonnet-20241022 "What is the OS?" 25 | 26 | # use claude-3-opus-20240229 27 | opsmate run -m claude-3-7-sonnet-20250219 "What is the OS?" 28 | ``` 29 | 30 | ## See also 31 | 32 | - [run](../CLI/run.md) 33 | - [solve](../CLI/solve.md) 34 | - [chat](../CLI/chat.md) 35 | - [serve](../CLI/serve.md) 36 | - [list-models](../CLI/list-models.md) 37 | -------------------------------------------------------------------------------- /docs/providers/fireworks-ai.md: -------------------------------------------------------------------------------- 1 | [Fireworks AI](https://www.fireworks.ai/) is another LLM inference provider that supports a wide range of models. Notably it supports models such as deepseek and llama that comes with 400B+ parameters with affordable prices. 2 | 3 | ## Installation 4 | 5 | Fireworks AI is not installed by default in Opsmate. You can install it using the following command: 6 | 7 | ```bash 8 | opsmate install opsmate-provider-fireworks 9 | ``` 10 | 11 | ## Configuration 12 | 13 | Fireworks AI API key is required to use Fireworks AI models. You can set the API key using the `FIREWORKS_API_KEY` environment variable. 14 | 15 | ```bash 16 | export FIREWORKS_API_KEY= 17 | 18 | # You can also proxy the API calls to an alternative endpoint 19 | export FIREWORKS_BASE_URL= 20 | ``` 21 | 22 | To find all the models supported by Fireworks AI, you can run: 23 | 24 | ```bash 25 | opsmate list-models --provider fireworks 26 | ``` 27 | 28 | ## Usage 29 | 30 | You can specify the `-m` or `--model` option for the `run`, `solve`, and `chat` commands. 31 | 32 | ```bash 33 | # deepseek-v3-0324 comes with 671B parameters 34 | opsmate run -m accounts/fireworks/models/deepseek-v3-0324 "What is the OS?" 35 | ``` 36 | 37 | ## See also 38 | 39 | - [run](../CLI/run.md) 40 | - [solve](../CLI/solve.md) 41 | - [chat](../CLI/chat.md) 42 | - [serve](../CLI/serve.md) 43 | -------------------------------------------------------------------------------- /docs/providers/google-genai.md: -------------------------------------------------------------------------------- 1 | [Google GenAI](https://cloud.google.com/vertex-ai/generative-ai) supports a wide range of state-of-the-art generative AI models hosted on Google's advanced, global infrastructure. 2 | 3 | ## Installation 4 | 5 | ```bash 6 | opsmate install opsmate-provider-google-genai 7 | ``` 8 | 9 | After installation you can list all the models via 10 | 11 | ```bash 12 | $ opsmate list-models --provider google-genai 13 | Models 14 | ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 15 | ┃ Provider ┃ Model ┃ 16 | ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 17 | │ google-genai │ gemini-2.5-pro-preview-03-25 │ 18 | ├──────────────┼──────────────────────────────┤ 19 | │ google-genai │ gemini-2.5-pro-exp-03-25 │ 20 | ├──────────────┼──────────────────────────────┤ 21 | │ google-genai │ gemini-2.0-flash-001 │ 22 | ├──────────────┼──────────────────────────────┤ 23 | │ google-genai │ gemini-2.0-flash-lite │ 24 | └──────────────┴──────────────────────────────┘ 25 | ``` 26 | 27 | 28 | 29 | ## Limitations 30 | 31 | ### Only Vertex AI models are supported 32 | :warning: This provider currently does not support [Gemini API](https://ai.google.dev/gemini-api/docs/api-key), because the Gemini API [does not support `default` value](https://github.com/googleapis/python-genai/blob/edf6ee359fdce14d03e1e2c7b2dc50fa5b0fdee3/google/genai/_transformers.py#L653-L657) in the response schema. 33 | 34 | As the result, currently only [vertex AI](https://cloud.google.com/vertex-ai) models are supported, meaning **you need to have a Google Cloud account in order to use this provider**. 35 | 36 | ### Limited region support for gemini-2.5-pro 37 | 38 | By the time the provider is published, the `gemini-2.5-pro-preview-03-25` and `gemini-2.5-pro-exp-03-25` models are only available in the `us-central1` region. To use it you will need to set `GOOGLE_CLOUD_LOCATION` as below: 39 | 40 | ```bash 41 | export GOOGLE_CLOUD_LOCATION=us-central1 42 | 43 | # or 44 | export GOOGLE_CLOUD_LOCATION=global 45 | ``` 46 | 47 | ## Usage 48 | 49 | ```bash 50 | export GOOGLE_CLOUD_PROJECT= 51 | export GOOGLE_CLOUD_LOCATION= 52 | 53 | opsmate chat -m gemini-2.0-flash-001 54 | ``` 55 | 56 | ## Uninstall 57 | 58 | ```bash 59 | opsmate uninstall -y opsmate-provider-google-genai 60 | ``` 61 | -------------------------------------------------------------------------------- /docs/providers/groq.md: -------------------------------------------------------------------------------- 1 | [Groq](https://groq.com/) provides fast inference with affordable prices. It supports a wide range of open-weight models. 2 | 3 | ## Installation 4 | 5 | Groq is not installed by default in Opsmate. You can install it using the following command: 6 | 7 | ```bash 8 | opsmate install opsmate-provider-groq 9 | ``` 10 | 11 | ## Configuration 12 | 13 | Groq API key is required to use Groq models. You can set the API key using the `GROQ_API_KEY` environment variable. 14 | 15 | ```bash 16 | export GROQ_API_KEY= 17 | ``` 18 | 19 | To find all the models supported by Groq, you can run: 20 | 21 | ```bash 22 | opsmate list-models --provider groq 23 | ``` 24 | 25 | ## Usage 26 | 27 | You can specify the `-m` or `--model` option for the `run`, `solve`, and `chat` commands. 28 | 29 | ```bash 30 | opsmate run -m llama-3.3-70b-versatile "What is the OS?" 31 | ``` 32 | 33 | ## See also 34 | 35 | - [run](../CLI/run.md) 36 | - [solve](../CLI/solve.md) 37 | - [chat](../CLI/chat.md) 38 | - [serve](../CLI/serve.md) 39 | - [list-models](../CLI/list-models.md) 40 | -------------------------------------------------------------------------------- /docs/providers/index.md: -------------------------------------------------------------------------------- 1 | ## AI Providers 2 | 3 | Opsmate supports a variety of LLM providers. You can use the `opsmate list-models` command to see the models supported by your provider. 4 | 5 | ## Links 6 | 7 | - [OpenAI](openai.md) 8 | - [Anthropic](anthropic.md) 9 | - [XAI](xai.md) 10 | - [Groq](groq.md) 11 | - [Fireworks AI](fireworks-ai.md) 12 | -------------------------------------------------------------------------------- /docs/providers/ollama.md: -------------------------------------------------------------------------------- 1 | [Ollama](https://ollama.com/) is popular choice for running LLMs locally. 2 | 3 | ## Prerequisites 4 | 5 | * You have already Ollama [installed on your machine](https://ollama.com/download). 6 | * Ollama is up and running on your machine. 7 | 8 | ## Caution 9 | 10 | Currently we have only experimented with Ollama on Apple Silicon with bunch of 7b - 12b parameter models. `gemma3:12b` so far is the only model that can produce barely acceptable results, and it's way behind in terms of quality and latency compared to frontier models. 11 | 12 | That being said we encourage you to give it a try and [report any issues](https://github.com/opsmate-ai/opsmate/issues), especially for those who has 128GB+ vRAM to run bigger models. 13 | 14 | ## Usage 15 | 16 | Assuming you have `gemma3:12b` model pulled via `ollama pull gemma3:12b`, you can run it with: 17 | ```bash 18 | opsmate run --context cli-lite -m gemma3:12b "how many cores on the machine" 19 | ``` 20 | 21 | We strongly recommend you to use `cli-lite` context for running 7b - 12b parameter small models. You can find the prompt of `cli-lite` context in [cli_lite.py](https://github.com/opsmate-ai/opsmate/blob/main/opsmate/contexts/cli_lite.py). 22 | 23 | To find all the ollama models you can run: 24 | 25 | ```bash 26 | opsmate list-models --provider ollama 27 | ``` 28 | 29 | Behind the scene it fetches the list of models from `http://localhost:11434/v1/models`. 30 | 31 | If you have a remote ollama server, you can point to the remote server with: 32 | 33 | ```bash 34 | # by default it's http://localhost:11434/v1 35 | export OLLAMA_BASE_URL=http://$YOUR_REMOTE_SERVER:11434/v1 36 | ``` 37 | 38 | ## Further Exploration 39 | 40 | The `cli-lite` context is far from optimal. To test your own prompt, you can create your own context in side `~/.opsmate/contexts` directory. The contexts in the directory will be loaded automatically by opsmate on startup. 41 | -------------------------------------------------------------------------------- /docs/providers/openai.md: -------------------------------------------------------------------------------- 1 | By default Opsmate uses [OpenAI](https://openai.com/) as the default LLM provider, and `gpt-4o` as the default model. 2 | 3 | To find all the models supported by OpenAI, you can run: 4 | 5 | ```bash 6 | opsmate list-models --provider openai 7 | ``` 8 | 9 | At the moment we only support select models from OpenAI which produces reasonably good results. 10 | 11 | ## Configuration 12 | 13 | OpenAI API key is required to use OpenAI models. You can set the API key using the `OPENAI_API_KEY` environment variable. 14 | 15 | ```bash 16 | export OPENAI_API_KEY= 17 | ``` 18 | 19 | If your request goes through a proxy, you can set the `OPENAI_API_BASE` environment variable to the proxy URL. 20 | 21 | ```bash 22 | export OPENAI_BASE_URL= 23 | ``` 24 | 25 | Other configuration options such as `OPENAI_PROJECT_ID` and `OPENAI_ORG_ID` can be set using the environment variables. They will be picked up by the OpenAI SDK used by Opsmate. 26 | 27 | ```bash 28 | export OPENAI_PROJECT_ID= 29 | export OPENAI_ORG_ID= 30 | ``` 31 | 32 | ## Usage 33 | 34 | You can specify the `-m` or `--model` option for the `run`, `solve`, and `chat` commands. 35 | 36 | ```bash 37 | # gpt-4o is the default model 38 | opsmate run -m gpt-4o "What is the OS?" 39 | 40 | # use gpt-4o-mini 41 | opsmate run -m gpt-4o-mini "What is the OS?" 42 | ``` 43 | 44 | ## See also 45 | 46 | - [run](../CLI/run.md) 47 | - [solve](../CLI/solve.md) 48 | - [chat](../CLI/chat.md) 49 | - [serve](../CLI/serve.md) 50 | - [list-models](../CLI/list-models.md) 51 | -------------------------------------------------------------------------------- /docs/providers/xai.md: -------------------------------------------------------------------------------- 1 | We also support [xAI](https://x.ai/) as a provider. 2 | 3 | ## Configuration 4 | 5 | xAI API key is required to use xAI models. You can set the API key using the `XAI_API_KEY` environment variable. 6 | 7 | ```bash 8 | export XAI_API_KEY= 9 | ``` 10 | 11 | To find all the models supported by xAI, you can run: 12 | 13 | ```bash 14 | opsmate list-models --provider xai 15 | ``` 16 | 17 | ## Usage 18 | 19 | You can specify the `-m` or `--model` option for the `run`, `solve`, and `chat` commands. 20 | 21 | ```bash 22 | opsmate run -m grok-2-1212 "What is the OS?" 23 | ``` 24 | 25 | ## See also 26 | 27 | - [run](../CLI/run.md) 28 | - [solve](../CLI/solve.md) 29 | - [chat](../CLI/chat.md) 30 | - [serve](../CLI/serve.md) 31 | - [list-models](../CLI/list-models.md) 32 | -------------------------------------------------------------------------------- /docs/tools/index.md: -------------------------------------------------------------------------------- 1 | Out of the box, Opsmate provides diverse tools for performing different tasks. 2 | 3 | You can find the tools available to you via running [list-tools](../CLI/list-tools.md) command. 4 | 5 | ```bash 6 | opsmate list-tools 7 | ``` 8 | -------------------------------------------------------------------------------- /docs/tools/loki.md: -------------------------------------------------------------------------------- 1 | [Grafana Loki](https://grafana.com/oss/loki/) is a horizontally scalable, highly available, multi-tenant log aggregation system inspired by Prometheus. It is designed to be very cost effective and easy to operate. It does not index the contents of the logs, but rather a set of labels for each log stream. 2 | 3 | Opsmate offers `LokiQueryTool` to query logs in loki via natural language. 4 | 5 | :warning: This is a highly experimental tool and the API is subject to change. 6 | ## Prerequisites 7 | 8 | * You have your system logs pushed to loki 9 | * You have access to the loki api 10 | 11 | ## Setup 12 | 13 | `LokiQueryTool` is out of box supported by Opsmate without plugin installation. 14 | 15 | Here are the default configuration for the tool: 16 | 17 | ```bash 18 | LOKI_ENDPOINT=http://localhost:3100 19 | LOKI_PATH=/api/v1/query_range 20 | # Optional: LOKI_USER_ID 21 | # Optional: LOKI_API_KEY 22 | ``` 23 | 24 | You can also override the default configuration by setting the environment variables. In the example below we point Opsmate to a loki instance deployed within the Grafana Cloud: 25 | 26 | ```bash 27 | LOKI_ENDPOINT=https://logs-prod-eu-west-0.grafana.net/loki 28 | LOKI_USER_ID=xxxx 29 | LOKI_API_KEY=glc_xxx 30 | ``` 31 | 32 | To use the tool you can specify `LokiQueryTool` as part of the `--tools` option when running `opsmate run`, `opsmate solve`, `opsmate chat` or `opsmate serve`: 33 | 34 | ```bash 35 | opsmate run --tools LokiQueryTool,OtherTool ... 36 | ``` 37 | 38 | Alternatively you can add the tool in `~/.opsmate/config.yaml` via: 39 | 40 | ```yaml 41 | OPSMATE_TOOLS: 42 | - LokiQueryTool 43 | - OtherTool 44 | ``` 45 | 46 | Once the tool is added to the config, Opsmate will prioritise using Loki for query logs over other tools. 47 | 48 | ## Current Limitations 49 | 50 | * The Loki Tool at the moment is Kubernetes centric meaning it can only query based on the `namespace`, `pod` and `container` labels. 51 | * The tool at the moment only support `logfmt` and `json` for effective log parsing. 52 | -------------------------------------------------------------------------------- /docs/tools/mysql.md: -------------------------------------------------------------------------------- 1 | MySQLTool is a tool that allows you to interact with MySQL databases. 2 | 3 | ## Installation 4 | 5 | The MySQLTool is not pre-installed with Opsmate. You need to install it explicitly: 6 | 7 | ```bash 8 | opsmate install opsmate-tools-mysql 9 | ``` 10 | 11 | Once installed, the tool will be autodiscovered by Opsmate on startup. To verify this you can run the following commands: 12 | 13 | ```bash 14 | opsmate list-tools | grep -i mysql 15 | │ MySQLTool │ MySQL tool 16 | ``` 17 | 18 | The command line options will be added to the `opsmate [run|solve|chat|serve]` commands: 19 | 20 | ```bash 21 | # to verify the mysql runtime is autodiscovered 22 | opsmate chat --help | grep -i mysql 23 | --runtime-mysql-timeout INTEGER 24 | The timeout of the MySQL server (env: 25 | RUNTIME_MYSQL_TIMEOUT) [default: 120] 26 | --runtime-mysql-charset TEXT The charset of the MySQL server (env: 27 | RUNTIME_MYSQL_CHARSET) [default: utf8mb4] 28 | --runtime-mysql-database TEXT The database of the MySQL server (env: 29 | RUNTIME_MYSQL_DATABASE) 30 | --runtime-mysql-password TEXT The password of the MySQL server (env: 31 | RUNTIME_MYSQL_PASSWORD) [default: ""] 32 | --runtime-mysql-user TEXT The user of the MySQL server (env: 33 | RUNTIME_MYSQL_USER) [default: root] 34 | --runtime-mysql-port INTEGER The port of the MySQL server (env: 35 | RUNTIME_MYSQL_PORT) [default: 3306] 36 | --runtime-mysql-host TEXT The host of the MySQL server (env: 37 | RUNTIME_MYSQL_HOST) [default: localhost] 38 | ``` 39 | 40 | ## Show Cases 41 | 42 | Here is an example of "chatting" with the `x-for-pet` database using Opsmate: 43 | 44 | 54 | 55 | Here is another example of Claude Sonnet 3.7 conducting database schema analysis (the text size is a bit small, please feel free to zoom in): 56 | 57 | 67 | 68 | 69 | ## Uninstallation 70 | 71 | ```bash 72 | opsmate uninstall -y opsmate-tools-mysql 73 | ``` 74 | -------------------------------------------------------------------------------- /docs/tools/postgres.md: -------------------------------------------------------------------------------- 1 | PostgresTool is a tool that allows you to interact with PostgreSQL databases. 2 | 3 | ## Installation 4 | 5 | The PostgresTool is not pre-installed with Opsmate. You need to install it explicitly: 6 | 7 | ```bash 8 | opsmate install opsmate-tool-postgres 9 | ``` 10 | 11 | To verify the installation, you can run: 12 | 13 | ```bash 14 | $ opsmate list-tools | grep -i postgres 15 | │ PostgresTool │ PostgreSQL tool 16 | ``` 17 | 18 | The command line options will be added to the `opsmate [run|solve|chat|serve]` commands: 19 | 20 | ```bash 21 | # to verify the postgres runtime is autodiscovered 22 | opsmate chat --help | grep -i postgres 23 | --postgres-tool-runtime TEXT The runtime to use for the tool call (env: 24 | POSTGRES_TOOL_RUNTIME) [default: postgres] 25 | --runtime-postgres-timeout INTEGER 26 | The timeout of the PostgreSQL server in 27 | seconds (env: RUNTIME_POSTGRES_TIMEOUT) 28 | --runtime-postgres-schema TEXT The schema of the PostgreSQL server (env: 29 | RUNTIME_POSTGRES_SCHEMA) [default: public] 30 | --runtime-postgres-database TEXT 31 | The database of the PostgreSQL server (env: 32 | RUNTIME_POSTGRES_DATABASE) 33 | --runtime-postgres-password TEXT 34 | The password of the PostgreSQL server (env: 35 | RUNTIME_POSTGRES_PASSWORD) [default: ""] 36 | --runtime-postgres-user TEXT The user of the PostgreSQL server (env: 37 | RUNTIME_POSTGRES_USER) [default: postgres] 38 | --runtime-postgres-port INTEGER 39 | The port of the PostgreSQL server (env: 40 | RUNTIME_POSTGRES_PORT) [default: 5432] 41 | --runtime-postgres-host TEXT The host of the PostgreSQL server (env: 42 | RUNTIME_POSTGRES_HOST) [default: localhost] 43 | ``` 44 | 45 | ## Usage 46 | 47 | Similar to the [MySQLTool](./mysql.md), you can interact with the Postgres database by running: 48 | 49 | ```bash 50 | opsmate chat \ 51 | --runtime-postgres-password postgres \ 52 | --runtime-postgres-host localhost \ 53 | --runtime-postgres-database \ 54 | --runtime-postgres-schema \ 55 | --tools PostgresTool 56 | ``` 57 | 58 | ## Uninstall 59 | 60 | ```bash 61 | opsmate uninstall -y opsmate-tool-postgres 62 | ``` 63 | -------------------------------------------------------------------------------- /docs/tools/prometheus.md: -------------------------------------------------------------------------------- 1 | PrometheusTool is a tool to query metrics from prometheus tsdb via natural language. The tool itself is out of box supported by opsmate and added to all the prebuilt contexts. 2 | 3 | You can also explicitly add the tool to your session via 4 | 5 | ```bash 6 | opsmate [run|solve|chat|serve] --models PrometheusTool ... 7 | ``` 8 | 9 | You can configure the prometheus endpoint and other parameters via environment variables: 10 | 11 | ```bash 12 | PROMETHEUS_ENDPOINT=http://localhost:9090 # default endpoint 13 | PROMETHEUS_PATH=/api/v1/query # default path 14 | # Optional: PROMETHEUS_USER_ID 15 | # Optional: PROMETHEUS_API_KEY 16 | ``` 17 | 18 | Example usage: 19 | 20 | Here is a simple example of how to use the tool (you probably need to zoom in to see the text): 21 | 22 | 32 | 33 | Note that for LLM to come up with the correct promql query, you need to provide enough information about: 34 | 35 | - the metrics name 36 | - the labels 37 | 38 | In Opsmate you can store the metrics metadata in the vector db and ask LLM to retrieve the metrics semantically on the fly. 39 | 40 | See [ingest-prometheus-metrics-metadata](../CLI/ingest-prometheus-metrics-metadata.md) for more details. 41 | -------------------------------------------------------------------------------- /evals/apps/audit-server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | RUN pip install flask mysql-connector-python 4 | 5 | COPY app.py /app/app.py 6 | 7 | WORKDIR /app 8 | 9 | CMD ["python", "app.py"] 10 | -------------------------------------------------------------------------------- /evals/apps/audit-server/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import mysql.connector 3 | import os 4 | import logging 5 | 6 | # Configure logging 7 | logging.basicConfig( 8 | level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 9 | ) 10 | logger = logging.getLogger(__name__) 11 | 12 | app = Flask(__name__) 13 | 14 | 15 | @app.route("/healthz") 16 | def index(): 17 | # ping mysql db 18 | try: 19 | conn = mysql.connector.connect( 20 | host=os.getenv("MYSQL_HOST"), 21 | user=os.getenv("MYSQL_USER"), 22 | password=os.getenv("MYSQL_PASSWORD"), 23 | database=os.getenv("MYSQL_DATABASE"), 24 | connect_timeout=5, 25 | ) 26 | conn.ping(reconnect=False, attempts=1) 27 | conn.close() 28 | return "OK" 29 | except Exception as e: 30 | logger.error(f"Error connecting to MySQL: {e}") 31 | return "ERROR", 500 32 | 33 | 34 | if __name__ == "__main__": 35 | logger.info("Starting Flask application on port 80") 36 | app.run(host="0.0.0.0", port=80) 37 | -------------------------------------------------------------------------------- /evals/apps/innovation-lab/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | RUN pip install flask mysql-connector-python 4 | 5 | COPY app.py /app/app.py 6 | 7 | WORKDIR /app 8 | 9 | CMD ["python", "app.py"] 10 | -------------------------------------------------------------------------------- /evals/apps/innovation-lab/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template_string 2 | import mysql.connector 3 | import os 4 | import logging 5 | 6 | # Configure logging 7 | logging.basicConfig( 8 | level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 9 | ) 10 | logger = logging.getLogger(__name__) 11 | 12 | app = Flask(__name__) 13 | 14 | 15 | @app.route("/") 16 | def index(): 17 | host = os.getenv("MYSQL_HOST") 18 | user = os.getenv("MYSQL_USER") 19 | password = os.getenv("MYSQL_PASSWORD") 20 | database = os.getenv("MYSQL_DATABASE") 21 | 22 | html = """ 23 |

Simple Web Application

24 |

Attempting MySQL Connection...

25 | """ 26 | 27 | status_code = 200 # Default status code 28 | 29 | try: 30 | logger.info(f"Attempting to connect to MySQL database at {host}") 31 | conn = mysql.connector.connect( 32 | host=host, user=user, password=password, database=database 33 | ) 34 | if conn.is_connected(): 35 | logger.info("Successfully connected to MySQL database") 36 | html += """ 37 |
38 | Connected successfully to MySQL!
39 | """ 40 | conn.close() 41 | else: 42 | status_code = 500 43 | logger.error( 44 | "Database Connection Error: Could not establish a valid connection to the database." 45 | ) 46 | html += """ 47 |
48 | Database Connection Error:
49 | Could not establish a valid connection to the database.
50 | """ 51 | except Exception as e: 52 | status_code = 500 53 | logger.error( 54 | f"Database Connection Error: Could not connect to MySQL database on '{host}'. Error: {str(e)}" 55 | ) 56 | html += f""" 57 |
58 | Database Connection Error:
59 | Could not connect to MySQL database on '{host}'.
60 | Error: {str(e)}
61 | """ 62 | 63 | return render_template_string(html), status_code 64 | 65 | 66 | if __name__ == "__main__": 67 | logger.info("Starting Flask application on port 80") 68 | app.run(host="0.0.0.0", port=80) 69 | -------------------------------------------------------------------------------- /evals/kind.yaml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | name: troubleshooting-eval 4 | # One control plane node and three "workers". 5 | # 6 | # While these will not add more real compute capacity and 7 | # have limited isolation, this can be useful for testing 8 | # rolling updates etc. 9 | # 10 | # The API-server and other control plane components will be 11 | # on the control-plane node. 12 | # 13 | # You probably don't need this unless you are testing Kubernetes itself. 14 | nodes: 15 | - role: control-plane 16 | - role: worker 17 | - role: worker 18 | - role: worker 19 | -------------------------------------------------------------------------------- /evals/scenarios/investigation-001-oom-deploy.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: finance 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: finance-app 11 | namespace: finance 12 | spec: 13 | replicas: 1 14 | selector: 15 | matchLabels: 16 | app: finance-app 17 | template: 18 | metadata: 19 | labels: 20 | app: finance-app 21 | spec: 22 | containers: 23 | - name: finance-app 24 | image: polinux/stress 25 | resources: 26 | requests: 27 | memory: "100Mi" 28 | cpu: "100m" 29 | limits: 30 | memory: "200Mi" 31 | cpu: "200m" 32 | command: ["stress"] 33 | args: ["--vm", "1", "--vm-bytes", "250M", "--vm-hang", "0"] 34 | -------------------------------------------------------------------------------- /evals/scenarios/investigation-002-misconfigured-readiness-probe.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: ecomm 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: ecomm-app 11 | namespace: ecomm 12 | labels: 13 | app: ecomm-app 14 | spec: 15 | replicas: 2 16 | selector: 17 | matchLabels: 18 | app: ecomm-app 19 | template: 20 | metadata: 21 | labels: 22 | app: ecomm-app 23 | spec: 24 | containers: 25 | - name: nginx 26 | image: nginx:1.19 27 | ports: 28 | - containerPort: 80 29 | readinessProbe: 30 | httpGet: 31 | path: /health-check-endpoint-that-doesnt-exist 32 | port: 80 33 | timeoutSeconds: 1 34 | periodSeconds: 2 35 | failureThreshold: 10 36 | initialDelaySeconds: 1 37 | resources: 38 | limits: 39 | memory: "128Mi" 40 | cpu: "100m" 41 | requests: 42 | memory: "64Mi" 43 | cpu: "50m" 44 | --- 45 | apiVersion: v1 46 | kind: Service 47 | metadata: 48 | name: ecomm-service 49 | namespace: ecomm 50 | spec: 51 | selector: 52 | app: ecomm-app 53 | ports: 54 | - port: 80 55 | targetPort: 80 56 | type: ClusterIP 57 | -------------------------------------------------------------------------------- /evals/scenarios/investigation-003-unschedulable-deploy.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: accounting 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: accounting-app 11 | namespace: accounting 12 | spec: 13 | replicas: 1 14 | selector: 15 | matchLabels: 16 | app: accounting-app 17 | template: 18 | metadata: 19 | labels: 20 | app: accounting-app 21 | spec: 22 | nodeSelector: 23 | node-role.kubernetes.io/control-plane: "" 24 | containers: 25 | - name: accounting-app 26 | image: nginx:1.27.4-alpine-slim 27 | resources: 28 | requests: 29 | memory: "100Mi" 30 | cpu: "100m" 31 | limits: 32 | memory: "200Mi" 33 | cpu: "200m" 34 | -------------------------------------------------------------------------------- /evals/scenarios/investigation-004-image-pull-backoff.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: hr 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: hr-app 11 | namespace: hr 12 | spec: 13 | replicas: 1 14 | selector: 15 | matchLabels: 16 | app: hr-app 17 | template: 18 | metadata: 19 | labels: 20 | app: hr-app 21 | spec: 22 | containers: 23 | - name: hr-app 24 | image: do-not-exist-image:1.0.1 25 | resources: 26 | requests: 27 | memory: "100Mi" 28 | cpu: "100m" 29 | limits: 30 | memory: "200Mi" 31 | cpu: "200m" 32 | -------------------------------------------------------------------------------- /evals/scenarios/investigation-005-db-connection.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: innovation-lab 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: simple-webapp 11 | namespace: innovation-lab 12 | labels: 13 | app: simple-webapp 14 | spec: 15 | replicas: 1 16 | selector: 17 | matchLabels: 18 | app: simple-webapp 19 | template: 20 | metadata: 21 | labels: 22 | app: simple-webapp 23 | spec: 24 | containers: 25 | - name: webapp 26 | image: innovation-lab-app:v1 27 | ports: 28 | - containerPort: 80 29 | env: 30 | - name: MYSQL_HOST 31 | value: "mysql-db-service" 32 | - name: MYSQL_USER 33 | value: "webapp" 34 | - name: MYSQL_PASSWORD 35 | value: "password123" 36 | - name: MYSQL_DATABASE 37 | value: "appdb" 38 | resources: 39 | limits: 40 | memory: "256Mi" 41 | cpu: "1000m" 42 | requests: 43 | memory: "128Mi" 44 | livenessProbe: 45 | httpGet: 46 | path: / 47 | port: 80 48 | initialDelaySeconds: 10 49 | periodSeconds: 10 50 | --- 51 | apiVersion: v1 52 | kind: Service 53 | metadata: 54 | name: webapp-service 55 | namespace: innovation-lab 56 | spec: 57 | selector: 58 | app: simple-webapp 59 | ports: 60 | - port: 80 61 | targetPort: 80 62 | type: ClusterIP 63 | -------------------------------------------------------------------------------- /evals/scenarios/investigation-006-rbac-issue.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: content-service 6 | --- 7 | # Create a service account for the content manager application 8 | apiVersion: v1 9 | kind: ServiceAccount 10 | metadata: 11 | name: content-manager-sa 12 | namespace: content-service 13 | --- 14 | apiVersion: rbac.authorization.k8s.io/v1 15 | kind: Role 16 | metadata: 17 | name: content-reader 18 | namespace: content-service 19 | rules: 20 | - apiGroups: [""] 21 | resources: ["pods", "services"] 22 | verbs: ["get", "list", "watch"] 23 | --- 24 | # Bind the role to the service account 25 | apiVersion: rbac.authorization.k8s.io/v1 26 | kind: RoleBinding 27 | metadata: 28 | name: content-manager-binding 29 | namespace: content-service 30 | subjects: 31 | - kind: ServiceAccount 32 | name: content-manager-sa 33 | namespace: content-service 34 | roleRef: 35 | kind: Role 36 | name: content-reader 37 | apiGroup: rbac.authorization.k8s.io 38 | --- 39 | # Create a ConfigMap that the application needs to access 40 | apiVersion: v1 41 | kind: ConfigMap 42 | metadata: 43 | name: content-config 44 | namespace: content-service 45 | data: 46 | DATABASE_URL: "mysql://content-db:3306/content" 47 | API_KEY: "sample-api-key-12345" 48 | --- 49 | # Create a Secret that the application also needs to access 50 | apiVersion: v1 51 | kind: Secret 52 | metadata: 53 | name: content-secrets 54 | namespace: content-service 55 | type: Opaque 56 | data: 57 | db-password: cGFzc3dvcmQxMjM= # base64 encoded "password123" 58 | --- 59 | # Deploy the content manager application 60 | apiVersion: apps/v1 61 | kind: Deployment 62 | metadata: 63 | name: content-manager 64 | namespace: content-service 65 | spec: 66 | replicas: 1 67 | selector: 68 | matchLabels: 69 | app: content-manager 70 | template: 71 | metadata: 72 | labels: 73 | app: content-manager 74 | spec: 75 | serviceAccountName: content-manager-sa 76 | containers: 77 | - name: content-app 78 | image: nginx:1.21-alpine # Using nginx as a placeholder 79 | ports: 80 | - containerPort: 80 81 | env: 82 | - name: CONFIG_MAP_NAME 83 | value: "content-config" 84 | - name: SECRET_NAME 85 | value: "content-secrets" 86 | command: ["/bin/sh", "-c"] 87 | args: 88 | - | 89 | set -eou pipefail 90 | export CACERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt 91 | export TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) 92 | export NAMESPACE=content-service 93 | 94 | echo "fetching ConfigMap from kubernetes..." 95 | curl --fail --cacert $CACERT -H "Authorization: Bearer $TOKEN" https://kubernetes.default.svc/api/v1/namespaces/${NAMESPACE}/configmaps/${CONFIG_MAP_NAME} 96 | echo "fetching Secret from kubernetes..." 97 | curl --fail --cacert $CACERT -H "Authorization: Bearer $TOKEN" https://kubernetes.default.svc/api/v1/namespaces/${NAMESPACE}/secrets/${SECRET_NAME} 98 | 99 | # Sleep to keep container running for investigation 100 | echo "Starting nginx..." 101 | nginx -g "daemon off;" 102 | --- 103 | # Create a service for the content manager 104 | apiVersion: v1 105 | kind: Service 106 | metadata: 107 | name: content-manager 108 | namespace: content-service 109 | spec: 110 | selector: 111 | app: content-manager 112 | ports: 113 | - port: 80 114 | targetPort: 80 115 | type: ClusterIP 116 | -------------------------------------------------------------------------------- /evals/scenarios/investigation-007-network-policy-issue.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: audit 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: audit-server 11 | namespace: audit 12 | spec: 13 | replicas: 2 14 | selector: 15 | matchLabels: 16 | app: audit-server 17 | template: 18 | metadata: 19 | labels: 20 | app: audit-server 21 | spec: 22 | containers: 23 | - name: audit-server 24 | image: audit-server:v1 25 | ports: 26 | - containerPort: 80 27 | resources: 28 | requests: 29 | cpu: 100m 30 | memory: 128Mi 31 | limits: 32 | cpu: 200m 33 | memory: 256Mi 34 | livenessProbe: 35 | httpGet: 36 | path: /healthz 37 | port: 80 38 | initialDelaySeconds: 10 39 | periodSeconds: 10 40 | failureThreshold: 20 41 | env: 42 | - name: MYSQL_HOST 43 | value: "mysql-db-service" 44 | - name: MYSQL_USER 45 | value: "webapp" 46 | - name: MYSQL_PASSWORD 47 | value: "password123" 48 | - name: MYSQL_DATABASE 49 | value: "appdb" 50 | --- 51 | apiVersion: apps/v1 52 | kind: Deployment 53 | metadata: 54 | name: mysql-db 55 | namespace: audit 56 | spec: 57 | replicas: 1 58 | selector: 59 | matchLabels: 60 | app: mysql-db 61 | template: 62 | metadata: 63 | labels: 64 | app: mysql-db 65 | spec: 66 | containers: 67 | - name: mysql-db 68 | image: mysql:latest 69 | ports: 70 | - containerPort: 3306 71 | resources: 72 | requests: 73 | cpu: 100m 74 | memory: 128Mi 75 | env: 76 | - name: MYSQL_USER 77 | value: "webapp" 78 | - name: MYSQL_PASSWORD 79 | value: "password123" 80 | - name: MYSQL_ROOT_PASSWORD 81 | value: "password123" 82 | - name: MYSQL_DATABASE 83 | value: "appdb" 84 | --- 85 | apiVersion: v1 86 | kind: Service 87 | metadata: 88 | name: mysql-db-service 89 | namespace: audit 90 | spec: 91 | selector: 92 | app: mysql-db 93 | ports: 94 | - protocol: TCP 95 | port: 3306 96 | targetPort: 3306 97 | 98 | --- 99 | apiVersion: networking.k8s.io/v1 100 | kind: NetworkPolicy 101 | metadata: 102 | name: mysql-access 103 | namespace: audit 104 | spec: 105 | podSelector: 106 | matchLabels: 107 | app: mysql-db 108 | ingress: 109 | - from: 110 | - podSelector: 111 | matchLabels: 112 | app: audit-app 113 | policyTypes: 114 | - Ingress 115 | -------------------------------------------------------------------------------- /evals/scenarios/text-edit-001-missing-resources-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: construction 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: construction-app 11 | namespace: construction 12 | spec: 13 | replicas: 1 14 | selector: 15 | matchLabels: 16 | app: construction-app 17 | template: 18 | metadata: 19 | labels: 20 | app: construction-app 21 | spec: 22 | containers: 23 | - name: construction-app 24 | image: nginx:1.27.4-alpine-slim 25 | livenessProbe: 26 | httpGet: 27 | path: / 28 | port: 80 29 | timeoutSeconds: 1 30 | periodSeconds: 2 31 | failureThreshold: 10 32 | --- 33 | apiVersion: v1 34 | kind: Service 35 | metadata: 36 | name: construction-app-service 37 | namespace: construction 38 | spec: 39 | selector: 40 | app: construction-app 41 | ports: 42 | - protocol: TCP 43 | port: 80 44 | targetPort: 80 45 | -------------------------------------------------------------------------------- /evals/scenarios/text-edit-002-remove-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: film-production 6 | --- 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | metadata: 10 | name: film-production-app 11 | namespace: film-production 12 | spec: 13 | replicas: 1 14 | selector: 15 | matchLabels: 16 | app: film-production-app 17 | template: 18 | metadata: 19 | labels: 20 | app: film-production-app 21 | spec: 22 | containers: 23 | - name: film-production-app 24 | image: nginx:1.27.4-alpine-slim 25 | livenessProbe: 26 | httpGet: 27 | path: / 28 | port: 80 29 | timeoutSeconds: 1 30 | periodSeconds: 2 31 | failureThreshold: 10 32 | --- 33 | apiVersion: v1 34 | kind: Service 35 | metadata: 36 | name: film-production-app-service 37 | namespace: film-production 38 | spec: 39 | selector: 40 | app: film-production-app 41 | ports: 42 | - protocol: TCP 43 | port: 80 44 | targetPort: 80 45 | -------------------------------------------------------------------------------- /evals/scenarios/text-edit-003-insert.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: team-a 6 | -------------------------------------------------------------------------------- /evals/scenarios/text-edit-004-search.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: eastegg 6 | -------------------------------------------------------------------------------- /evals/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | scriptDir=$(dirname -- "$(readlink -f -- "$BASH_SOURCE")") 6 | 7 | ( 8 | cd $scriptDir/apps/innovation-lab 9 | docker build -t innovation-lab-app:v1 . 10 | kind load docker-image innovation-lab-app:v1 --name troubleshooting-eval 11 | ) 12 | 13 | ( 14 | cd $scriptDir/apps/audit-server 15 | docker build -t audit-server:v1 . 16 | kind load docker-image audit-server:v1 --name troubleshooting-eval 17 | ) 18 | 19 | kubectl apply -f $scriptDir/scenarios/ 20 | 21 | ( 22 | rm -rf /tmp/kube-prometheus 23 | git clone https://github.com/prometheus-operator/kube-prometheus --depth 1 /tmp/kube-prometheus 24 | cd /tmp/kube-prometheus 25 | kubectl apply --server-side -f manifests/setup 26 | kubectl wait \ 27 | --for condition=Established \ 28 | --all CustomResourceDefinition \ 29 | --namespace=monitoring 30 | kubectl apply -f manifests/ 31 | ) 32 | 33 | 34 | echo "Waiting for all the pods in the monitoring namespace to be ready..." 35 | kubectl wait --for=condition=ready --all pod --namespace=monitoring --timeout=300s 36 | echo "All the pods in the monitoring namespace are ready" 37 | -------------------------------------------------------------------------------- /examples/runtime/gce/README.md: -------------------------------------------------------------------------------- 1 | # GCE Runtime 2 | 3 | This example demonstrates how to register a new runtime. In this case we are registering [GCE](https://cloud.google.com/compute) as the runtime for Opsmate. 4 | 5 | ## Installation 6 | 7 | ```bash 8 | opsmate install -e . 9 | ``` 10 | 11 | After installation you can list the runtimes via 12 | 13 | ```bash 14 | Runtimes 15 | ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 16 | ┃ Name ┃ Description ┃ 17 | ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 18 | │ local │ Local runtime allows model to execute tool calls within the same namespace as the opsmate process. │ 19 | ├────────┼────────────────────────────────────────────────────────────────────────────────────────────────────┤ 20 | │ docker │ Docker runtime allows model to execute tool calls within a docker container. │ 21 | ├────────┼────────────────────────────────────────────────────────────────────────────────────────────────────┤ 22 | │ ssh │ SSH runtime allows model to execute tool calls on a remote server via SSH. │ 23 | ├────────┼────────────────────────────────────────────────────────────────────────────────────────────────────┤ 24 | │ gce │ GCE runtime allows model to execute tool calls on a GCE instance using gcloud compute ssh. │ 25 | └────────┴────────────────────────────────────────────────────────────────────────────────────────────────────┘ 26 | ``` 27 | 28 | You will notice that the GCE runtime is automatically added to the list of runtimes. 29 | 30 | Here is an example of how to use the GCE runtime: 31 | 32 | ```bash 33 | opsmate chat --shell-command-runtime gce \ 34 | --runtime-gce-instance my-vm \ 35 | --runtime-gce-zone europe-west1-c 36 | ``` 37 | 38 | This will start a chat with the GCE instance `my-vm` in the zone `europe-west1-c`. 39 | 40 | ## Uninstall 41 | 42 | ```bash 43 | opsmate uninstall -y opsmate-runtime-gce 44 | ``` 45 | -------------------------------------------------------------------------------- /examples/runtime/gce/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate-runtime-gce" 3 | version = "0.1.0" 4 | description = "GCE runtime for opsmate" 5 | dependencies = [ 6 | "opsmate", 7 | ] 8 | 9 | [project.entry-points."opsmate.runtime.runtimes"] 10 | gce = "gce:GCERuntime" 11 | -------------------------------------------------------------------------------- /examples/tools/calculator/calculator.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino.types import ToolCall, PresentationMixin 2 | from typing import Dict, Any 3 | from pydantic import Field 4 | 5 | 6 | class Calculator(ToolCall[int], PresentationMixin): 7 | """Calculator tool""" 8 | 9 | expr: str = Field(description="The expression to evaluate") 10 | 11 | def __call__(self) -> float: 12 | return eval(self.expr) 13 | 14 | def markdown(self, context: Dict[str, Any] = {}): 15 | return f"```\n{self.expr} = {self.output}\n```" 16 | -------------------------------------------------------------------------------- /examples/tools/calculator/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate-tools-calculator" 3 | version = "0.1.0" 4 | description = "Calculator tool for opsmate" 5 | dependencies = [ 6 | "opsmate", 7 | ] 8 | 9 | [project.entry-points."opsmate.tools"] 10 | calculator = "calculator:Calculator" 11 | -------------------------------------------------------------------------------- /experiments/sandbox/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | default: 3 | image: ubuntu:24.04 4 | init: true 5 | entrypoint: ["sleep", "infinity"] 6 | redis: 7 | image: redis:latest 8 | -------------------------------------------------------------------------------- /hack/gen-docs.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # get the version from the pyproject.toml file 4 | import tomllib 5 | import asyncio 6 | 7 | with open("pyproject.toml", "rb") as f: 8 | pyproject = tomllib.load(f) 9 | 10 | version = pyproject["project"]["version"] 11 | 12 | container_image = f"ghcr.io/opsmate-ai/opsmate:{version}" 13 | 14 | 15 | async def gen_help(subcommand: str): 16 | import subprocess 17 | 18 | try: 19 | # Run the command and capture the output asynchronously 20 | process = await asyncio.create_subprocess_shell( 21 | f"docker run --rm {container_image} {subcommand} --help", 22 | stdout=asyncio.subprocess.PIPE, 23 | stderr=asyncio.subprocess.PIPE, 24 | ) 25 | stdout, stderr = await process.communicate() 26 | 27 | if process.returncode != 0: 28 | raise subprocess.CalledProcessError( 29 | process.returncode, f"{subcommand} --help", stderr 30 | ) 31 | 32 | return stdout.decode().strip() 33 | except subprocess.CalledProcessError as e: 34 | raise e 35 | 36 | 37 | async def main(): 38 | cmds = [ 39 | "chat", 40 | "db-migrate", 41 | "db-revisions", 42 | "db-rollback", 43 | "ingest-prometheus-metrics-metadata", 44 | "ingest", 45 | "install", 46 | "list-contexts", 47 | "list-models", 48 | "list-tools", 49 | "list-runtimes", 50 | "reset", 51 | "run", 52 | "schedule-embeddings-reindex", 53 | "serve", 54 | "solve", 55 | "uninstall", 56 | "worker", 57 | ] 58 | 59 | help_texts = {} 60 | lock = asyncio.Lock() 61 | # Process all commands concurrently 62 | tasks = [process_command(cmd, lock, help_texts) for cmd in cmds] 63 | await asyncio.gather(*tasks) 64 | 65 | for cmd, help_text in help_texts.items(): 66 | # Update the docs in `docs/CLI/` specifically the `## OPTIONS` section 67 | with open(f"docs/CLI/{cmd}.md", "r") as f: 68 | content = f.read() 69 | 70 | # Format the help text as markdown with proper code block 71 | formatted_help = f"\n```\n{help_text}\n```\n" 72 | 73 | # Find the OPTIONS section and replace its content 74 | import re 75 | 76 | if "## OPTIONS" in content: 77 | # Replace everything between ## OPTIONS and the next heading (or end of file) 78 | pattern = r"(## OPTIONS\n)(?:.*?)(?=\n## |\Z)" 79 | updated_content = re.sub( 80 | pattern, r"\1" + formatted_help, content, flags=re.DOTALL 81 | ) 82 | else: 83 | # If OPTIONS section doesn't exist, append it 84 | updated_content = content + f"\n## OPTIONS\n{formatted_help}" 85 | 86 | with open(f"docs/CLI/{cmd}.md", "w") as f: 87 | f.write(updated_content) 88 | 89 | 90 | async def process_command(cmd, lock, help_texts): 91 | async with lock: 92 | help_texts[cmd] = await gen_help(cmd) 93 | 94 | 95 | if __name__ == "__main__": 96 | asyncio.run(main()) 97 | -------------------------------------------------------------------------------- /opsmate/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib.metadata 2 | 3 | __version__ = importlib.metadata.version("opsmate") 4 | -------------------------------------------------------------------------------- /opsmate/apiserver/__init__.py: -------------------------------------------------------------------------------- 1 | from opsmate.apiserver.apiserver import app, api_app 2 | 3 | __all__ = ["app", "api_app"] 4 | -------------------------------------------------------------------------------- /opsmate/cli/__init__.py: -------------------------------------------------------------------------------- 1 | from .cli import opsmate_cli 2 | 3 | __all__ = ["opsmate_cli"] 4 | -------------------------------------------------------------------------------- /opsmate/contexts/__init__.py: -------------------------------------------------------------------------------- 1 | from .k8s import k8s_ctx 2 | from .terraform import terraform_ctx 3 | from .cli import cli_ctx 4 | from .cli_lite import cli_lite_ctx 5 | 6 | __all__ = ["k8s_ctx", "terraform_ctx", "cli_ctx", "cli_lite_ctx"] 7 | -------------------------------------------------------------------------------- /opsmate/contexts/cli.py: -------------------------------------------------------------------------------- 1 | from opsmate.tools import ( 2 | ShellCommand, 3 | KnowledgeRetrieval, 4 | ACITool, 5 | HtmlToText, 6 | PrometheusTool, 7 | Thinking, 8 | ) 9 | from opsmate.dino.context import context 10 | from opsmate.runtime import Runtime 11 | from jinja2 import Template 12 | 13 | 14 | @context( 15 | name="cli", 16 | tools=[ 17 | ShellCommand, 18 | KnowledgeRetrieval, 19 | ACITool, 20 | HtmlToText, 21 | PrometheusTool, 22 | Thinking, 23 | ], 24 | ) 25 | async def cli_ctx(runtimes: dict[str, Runtime] = {}) -> str: 26 | """System Admin Assistant""" 27 | 28 | # Pre-fetch all runtime information asynchronously 29 | runtime_info = {} 30 | for runtime_name, runtime in runtimes.items(): 31 | runtime_info[runtime_name] = { 32 | "os_info": await runtime.os_info(), 33 | "whoami": await runtime.whoami(), 34 | "runtime_info": await runtime.runtime_info(), 35 | "has_systemd": await runtime.has_systemd(), 36 | } 37 | 38 | template = Template( 39 | """ 40 | 41 | You are a world class SRE who is good at solving problems. You are given access to the terminal for solving problems. 42 | 43 | 44 | You have access to the following runtimes: 45 | 46 | 47 | {% for runtime_name, info in runtime_info.items() %} 48 | 49 | 50 | {{ info.os_info }} 51 | 52 | 53 | {{ info.whoami }} 54 | 55 | 56 | {{ info.runtime_info }} 57 | 58 | 59 | {{ info.has_systemd }} 60 | 61 | 62 | {% endfor %} 63 | 64 | 65 | 66 | - If you anticipate the command will generates a lot of output, you should limit the output via piping it to `tail -n 100` command or grepping it with a specific pattern. 67 | - Do not run any command that runs in interactive mode. 68 | - Do not run any command that requires manual intervention. 69 | - Do not run any command that requires user input. 70 | 71 | """ 72 | ) 73 | 74 | rendered_template = template.render(runtime_info=runtime_info) 75 | return rendered_template 76 | -------------------------------------------------------------------------------- /opsmate/contexts/cli_lite.py: -------------------------------------------------------------------------------- 1 | from opsmate.tools import ( 2 | ShellCommand, 3 | ) 4 | from opsmate.dino.context import context 5 | from opsmate.runtime import Runtime 6 | from jinja2 import Template 7 | 8 | 9 | @context( 10 | name="cli-lite", 11 | tools=[ShellCommand], 12 | ) 13 | async def cli_lite_ctx(runtimes: dict[str, Runtime] = {}) -> str: 14 | """System Admin Assistant running on small LLM""" 15 | 16 | # Pre-fetch all runtime information asynchronously 17 | runtime_info = {} 18 | for runtime_name, runtime in runtimes.items(): 19 | runtime_info[runtime_name] = { 20 | "os_info": await runtime.os_info(), 21 | "whoami": await runtime.whoami(), 22 | } 23 | 24 | template = Template( 25 | """ 26 | 27 | You are a world class SRE who is good at solving problems. You are given access to the terminal for solving problems. 28 | 29 | 30 | You have access to the following runtimes: 31 | 32 | 33 | {% for runtime_name, info in runtime_info.items() %} 34 | 35 | 36 | {{ info.whoami }} 37 | 38 | 39 | {{ info.os_info }} 40 | 41 | 42 | {% endfor %} 43 | 44 | 45 | """ 46 | ) 47 | 48 | rendered_template = template.render(runtime_info=runtime_info) 49 | return rendered_template 50 | -------------------------------------------------------------------------------- /opsmate/contexts/k8s.py: -------------------------------------------------------------------------------- 1 | from opsmate.tools import ( 2 | ShellCommand, 3 | KnowledgeRetrieval, 4 | ACITool, 5 | HtmlToText, 6 | PrometheusTool, 7 | Thinking, 8 | ) 9 | from opsmate.dino.context import context 10 | from opsmate.runtime import Runtime 11 | from jinja2 import Template 12 | 13 | 14 | @context( 15 | name="k8s", 16 | tools=[ 17 | ShellCommand, 18 | KnowledgeRetrieval, 19 | ACITool, 20 | HtmlToText, 21 | PrometheusTool, 22 | Thinking, 23 | ], 24 | ) 25 | async def k8s_ctx(runtimes: dict[str, Runtime] = {}) -> str: 26 | """Kubernetes SME""" 27 | 28 | # Pre-fetch all runtime information asynchronously 29 | k8s_info = {} 30 | if runtimes and "ShellCommand" in runtimes: 31 | k8s_info = { 32 | "kube_contexts": await __kube_contexts(runtimes), 33 | "namespaces": await __namespaces(runtimes), 34 | } 35 | else: 36 | raise ValueError("ShellCommand runtime not found") 37 | 38 | template = Template( 39 | """ 40 | 41 | You are a world class SRE who is an expert in kubernetes. You are tasked to help with kubernetes related problem solving 42 | 43 | 44 | 45 | - When you do `kubectl logs ...` do not log more than 50 lines. 46 | - When you look into any issues scoped to the namespaces, look into the events in the given namespaces. 47 | - Always use `kubectl get --show-labels` for querying resources when `-ojson` or `-oyaml` are not being used. 48 | - When running kubectl, always make sure that you are using the right context and namespace. For example never do `kuebctl get po xxx` without specifying the namespace. 49 | - Never run interactive commands that cannot automatically exit, such as `vim`, `view`, `tail -f`, or `less`. 50 | - Always include the `-y` flag with installation commands like `apt-get install` or `apt-get update` to prevent interactive prompts. 51 | - Avoid any command that requires user input after execution. 52 | - When it's unclear what causes error from the logs, you can view the k8s resources to have a holistic view of the situation. 53 | - DO NOT create resources using `kubectl apply -f - < 55 | 56 | 57 | {{ k8s_info.kube_contexts }} 58 | 59 | 60 | 61 | {{ k8s_info.namespaces }} 62 | 63 | 64 | 65 | - kubectl 66 | - helm 67 | - and all the conventional command line tools such as grep, awk, wc, etc. 68 | 69 | """ 70 | ) 71 | 72 | rendered_template = template.render(k8s_info=k8s_info) 73 | return rendered_template 74 | 75 | 76 | async def __namespaces(runtimes: dict[str, Runtime]) -> str: 77 | return await runtimes["ShellCommand"].run( 78 | "kubectl get ns -o jsonpath='{.items[*].metadata.name}'" 79 | ) 80 | 81 | 82 | async def __kube_contexts(runtimes: dict[str, Runtime]) -> str: 83 | return await runtimes["ShellCommand"].run("kubectl config get-contexts") 84 | -------------------------------------------------------------------------------- /opsmate/contexts/terraform.py: -------------------------------------------------------------------------------- 1 | from opsmate.tools import ( 2 | ShellCommand, 3 | KnowledgeRetrieval, 4 | ACITool, 5 | HtmlToText, 6 | Thinking, 7 | ) 8 | from opsmate.dino.context import context 9 | from opsmate.runtime import Runtime 10 | 11 | 12 | @context( 13 | name="terraform", 14 | tools=[ 15 | ShellCommand, 16 | KnowledgeRetrieval, 17 | ACITool, 18 | HtmlToText, 19 | Thinking, 20 | ], 21 | ) 22 | async def terraform_ctx(runtime: Runtime) -> str: 23 | """Terraform SME""" 24 | 25 | return f""" 26 | 27 | You are a world class SRE who is an expert in terraform. You are tasked to help with terraform related problem solving 28 | 29 | 30 | 31 | {await __terraform_help(runtime)} 32 | 33 | 34 | 35 | When you have issue with executing `terraform ` try to use `terraform -help` to get more information. 36 | 37 | """ 38 | 39 | 40 | async def __terraform_help(runtimes: dict[str, Runtime]) -> str: 41 | return await runtimes["ShellCommand"].run("terraform -help") 42 | -------------------------------------------------------------------------------- /opsmate/dbq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/opsmate/dbq/__init__.py -------------------------------------------------------------------------------- /opsmate/dbqapp/app.py: -------------------------------------------------------------------------------- 1 | from opsmate.dbq.dbq import Worker 2 | from opsmate.config import config 3 | import asyncio 4 | import structlog 5 | import signal 6 | 7 | logger = structlog.get_logger() 8 | 9 | 10 | async def main(worker_count: int = 10, worker_queue: str = "default"): 11 | engine = config.db_engine() 12 | 13 | worker = Worker(engine, worker_count, queue_name=worker_queue) 14 | 15 | def handle_signal(signal_number, frame): 16 | logger.info("Received signal", signal_number=signal_number) 17 | asyncio.create_task(worker.stop()) 18 | 19 | signal.signal(signal.SIGTERM, handle_signal) 20 | signal.signal(signal.SIGINT, handle_signal) 21 | 22 | await worker.start() 23 | 24 | 25 | if __name__ == "__main__": 26 | asyncio.run(main()) 27 | -------------------------------------------------------------------------------- /opsmate/dino/__init__.py: -------------------------------------------------------------------------------- 1 | from .dino import dino 2 | from .provider import discover_providers, Provider 3 | from .tools import dtool 4 | from .react import run_react, react 5 | from .context import context 6 | 7 | __all__ = [ 8 | "dino", 9 | "dtool", 10 | "run_react", 11 | "context", 12 | "react", 13 | "discover_providers", 14 | "Provider", 15 | ] 16 | 17 | discover_providers() 18 | -------------------------------------------------------------------------------- /opsmate/dino/provider/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Provider, discover_providers, register_provider 2 | from .openai import OpenAIProvider 3 | from .anthropic import AnthropicProvider 4 | from .xai import XAIProvider 5 | from .ollama import OllamaProvider 6 | 7 | __all__ = [ 8 | "Provider", 9 | "OpenAIProvider", 10 | "AnthropicProvider", 11 | "XAIProvider", 12 | "OllamaProvider", 13 | "discover_providers", 14 | "register_provider", 15 | ] 16 | -------------------------------------------------------------------------------- /opsmate/dino/provider/ollama.py: -------------------------------------------------------------------------------- 1 | from .base import register_provider 2 | from .openai import OpenAIProvider 3 | from instructor import AsyncInstructor 4 | from openai import AsyncOpenAI, OpenAI 5 | from functools import cache 6 | import os 7 | import httpx 8 | import instructor 9 | import structlog 10 | 11 | logger = structlog.get_logger(__name__) 12 | 13 | 14 | # to deal with the fact that @property on a classmethod is deprecated 15 | class classproperty: 16 | def __init__(self, method=None): 17 | self.method = method 18 | 19 | def __get__(self, instance, cls=None): 20 | return self.method(cls) 21 | 22 | 23 | @register_provider("ollama") 24 | class OllamaProvider(OpenAIProvider): 25 | DEFAULT_BASE_URL = "http://localhost:11434/v1" 26 | OLLAMA_CONNECT_TIMEOUT = os.getenv("OPSMATE_OLLAMA_CONNECT_TIMEOUT", 0.1) 27 | 28 | @classproperty 29 | @cache 30 | def models(cls) -> list[str]: 31 | try: 32 | 33 | client = OpenAI( 34 | base_url=os.getenv("OLLAMA_BASE_URL", cls.DEFAULT_BASE_URL), 35 | # connection timeout isn't exactly respected, but it's the best we can do 36 | http_client=httpx.Client( 37 | timeout=httpx.Timeout(None, connect=cls.OLLAMA_CONNECT_TIMEOUT) 38 | ), 39 | ) 40 | models = client.models.list() 41 | model_ids = [model.id for model in models] 42 | cls._cache_models(model_ids) 43 | return model_ids 44 | except Exception as e: 45 | logger.warn( 46 | "Error fetching models from ollama, using cached models", error=e 47 | ) 48 | cached_models = cls._get_cached_models() 49 | return cached_models 50 | 51 | @classmethod 52 | @cache 53 | def default_client(cls, model: str) -> AsyncInstructor: 54 | client = instructor.from_openai( 55 | AsyncOpenAI( 56 | base_url=os.getenv("OLLAMA_BASE_URL", cls.DEFAULT_BASE_URL), 57 | api_key="ollama", 58 | ), 59 | mode=instructor.Mode.JSON, 60 | ) 61 | client.on("parse:error", cls._handle_parse_error) 62 | return client 63 | -------------------------------------------------------------------------------- /opsmate/dino/provider/xai.py: -------------------------------------------------------------------------------- 1 | from .base import register_provider 2 | from .openai import OpenAIProvider 3 | from instructor import AsyncInstructor 4 | from openai import AsyncOpenAI 5 | from functools import cache 6 | import os 7 | import instructor 8 | 9 | 10 | @register_provider("xai") 11 | class XAIProvider(OpenAIProvider): 12 | DEFAULT_BASE_URL = "https://api.x.ai/v1" 13 | chat_models = [ 14 | "grok-2-1212", 15 | "grok-2-vision-1212", 16 | "grok-3-fast-beta", 17 | "grok-3-beta", 18 | ] 19 | reasoning_models = [ 20 | "grok-3-mini-fast-beta", 21 | "grok-3-mini-beta", 22 | ] 23 | models = chat_models + reasoning_models 24 | 25 | models_config = { 26 | "grok-3-mini-fast-beta": { 27 | "reasoning_effort": "medium", 28 | "tool_call_model": "grok-3-beta", 29 | }, 30 | "grok-3-mini-beta": { 31 | "reasoning_effort": "medium", 32 | "tool_call_model": "grok-3-beta", 33 | }, 34 | } 35 | 36 | @classmethod 37 | @cache 38 | def _default_client(cls) -> AsyncInstructor: 39 | return instructor.from_openai( 40 | AsyncOpenAI( 41 | base_url=os.getenv("XAI_BASE_URL", cls.DEFAULT_BASE_URL), 42 | api_key=os.getenv("XAI_API_KEY"), 43 | ), 44 | ) 45 | 46 | @classmethod 47 | def _default_reasoning_client(cls) -> AsyncInstructor: 48 | return instructor.from_openai( 49 | AsyncOpenAI( 50 | base_url=os.getenv("XAI_BASE_URL", cls.DEFAULT_BASE_URL), 51 | api_key=os.getenv("XAI_API_KEY"), 52 | ), 53 | mode=instructor.Mode.JSON_O1, 54 | ) 55 | 56 | @classmethod 57 | def is_reasoning_model(cls, model: str) -> bool: 58 | return model in cls.reasoning_models 59 | -------------------------------------------------------------------------------- /opsmate/dino/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | from inspect import signature 3 | 4 | 5 | def args_dump(fn: Callable, cbk: Callable, args, kwargs): 6 | """dump the matching args and kwargs from the function to the callback 7 | 8 | Args: 9 | fn: Source function whose arguments are being passed 10 | cbk: Callback function to match arguments against 11 | args: Positional arguments passed to fn 12 | kwargs: Keyword arguments passed to fn 13 | 14 | Returns: 15 | Tuple of (matched_args, matched_kwargs) for the callback function 16 | 17 | Example: 18 | def fn(a, b, c=1, d=2): 19 | pass 20 | 21 | def cbk(a, d=2): 22 | pass 23 | 24 | args_dump(fn, cbk, (1, 2), {"c": 3, "d": 4}) 25 | >> ( (1,), {"d": 4}) 26 | """ 27 | fn_params = list(signature(fn).parameters.keys()) 28 | cbk_params = set(signature(cbk).parameters.keys()) 29 | 30 | # Match positional arguments 31 | matched_args = tuple( 32 | arg for i, arg in enumerate(args) if fn_params[i] in cbk_params 33 | ) 34 | 35 | # Match keyword arguments 36 | matched_kwargs = {k: v for k, v in kwargs.items() if k in cbk_params} 37 | 38 | return matched_args, matched_kwargs 39 | -------------------------------------------------------------------------------- /opsmate/gui/config.py: -------------------------------------------------------------------------------- 1 | from opsmate.config import Config as OpsmateConfig 2 | from pydantic import Field 3 | from opsmate.plugins import PluginRegistry 4 | from opsmate.dino.context import ContextRegistry 5 | 6 | 7 | class Config(OpsmateConfig): 8 | session_name: str = Field(default="session", alias="OPSMATE_SESSION_NAME") 9 | token: str = Field(default="", alias="OPSMATE_TOKEN") 10 | 11 | system_prompt: str = Field( 12 | alias="OPSMATE_SYSTEM_PROMPT", 13 | default="", 14 | ) 15 | 16 | def addon_discovery(self): 17 | PluginRegistry.discover(self.plugins_dir) 18 | ContextRegistry.discover(self.contexts_dir) 19 | 20 | 21 | config = Config() 22 | -------------------------------------------------------------------------------- /opsmate/ingestions/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseIngestion 2 | from .fs import FsIngestion 3 | from .github import GithubIngestion 4 | from typing import List 5 | from opsmate.config import Config 6 | import structlog 7 | from opsmate.ingestions.jobs import ingest 8 | from sqlmodel import Session 9 | from opsmate.dbq.dbq import enqueue_task 10 | from sqlalchemy import Engine 11 | from opsmate.knowledgestore.models import init_table 12 | 13 | logger = structlog.get_logger(__name__) 14 | 15 | __all__ = ["BaseIngestion", "FsIngestion", "GithubIngestion"] 16 | 17 | 18 | def ingestions_from_config(cfg: Config) -> List[BaseIngestion]: 19 | ingestions = [] 20 | github_ingestions = GithubIngestion.from_configmap(cfg.github_embeddings_config) 21 | fs_ingestions = FsIngestion.from_configmap(cfg.fs_embeddings_config) 22 | ingestions.extend(github_ingestions) 23 | ingestions.extend(fs_ingestions) 24 | 25 | return ingestions 26 | 27 | 28 | async def ingest_from_config( 29 | cfg: Config, engine: Engine | None = None 30 | ) -> List[BaseIngestion]: 31 | """ 32 | Ingest the data based on the env var config. 33 | """ 34 | ingestions = ingestions_from_config(cfg) 35 | 36 | await init_table() 37 | # db_conn = await aconn() 38 | # table = await db_conn.open_table("knowledge_store") 39 | 40 | with Session(engine) as session: 41 | for ingestion in ingestions: 42 | if ingestion.data_source_provider() == "github": 43 | enqueue_task( 44 | session, 45 | ingest, 46 | ingestor_type="github", 47 | ingestor_config={ 48 | "repo": ingestion.repo, 49 | "branch": ingestion.branch, 50 | "path": ingestion.path, 51 | "glob": ingestion.glob, 52 | }, 53 | splitter_config=cfg.splitter_config, 54 | ) 55 | elif ingestion.data_source_provider() == "fs": 56 | enqueue_task( 57 | session, 58 | ingest, 59 | ingestor_type="fs", 60 | ingestor_config={ 61 | "local_path": ingestion.local_path, 62 | "glob_pattern": ingestion.glob_pattern, 63 | }, 64 | splitter_config=cfg.splitter_config, 65 | ) 66 | 67 | logger.info("Ingestion tasks enqueued") 68 | -------------------------------------------------------------------------------- /opsmate/ingestions/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import AsyncGenerator, Callable, Awaitable 3 | from pydantic import BaseModel, Field 4 | from opsmate.textsplitters.base import Chunk 5 | import structlog 6 | 7 | logger = structlog.get_logger(__name__) 8 | 9 | 10 | class Document(BaseModel): 11 | metadata: dict = Field(default_factory=dict) 12 | data_provider: str = Field(default="unknown") 13 | data_source: str = Field(default="unknown") 14 | content: str 15 | 16 | 17 | PostChunkHook = Callable[[Chunk], Awaitable[Chunk]] 18 | 19 | 20 | class BaseIngestion(ABC, BaseModel): 21 | class Config: 22 | arbitrary_types_allowed = True 23 | 24 | @abstractmethod 25 | async def load(self) -> AsyncGenerator[Document, None]: 26 | """ 27 | Load the documents from the ingestion source. 28 | """ 29 | pass 30 | 31 | @abstractmethod 32 | def data_source(self) -> str: 33 | """ 34 | The data source of the ingestion. 35 | """ 36 | pass 37 | 38 | @abstractmethod 39 | def data_source_provider(self) -> str: 40 | """ 41 | The data source provider of the ingestion. 42 | """ 43 | pass 44 | -------------------------------------------------------------------------------- /opsmate/ingestions/chunk.py: -------------------------------------------------------------------------------- 1 | from opsmate.ingestions.base import Document 2 | from opsmate.textsplitters import TextSplitter 3 | import structlog 4 | 5 | logger = structlog.get_logger(__name__) 6 | 7 | 8 | async def chunk_document(splitter: TextSplitter, document: Document): 9 | """ 10 | Chunk the individual document. 11 | """ 12 | for chunk_idx, chunk in enumerate(splitter.split_text(document.content)): 13 | logger.info( 14 | "chunking document", document=document.metadata["path"], chunk_idx=chunk_idx 15 | ) 16 | ch = chunk.model_copy() 17 | for key, value in document.metadata.items(): 18 | ch.metadata[key] = value 19 | ch.id = chunk_idx 20 | ch.metadata["data_source"] = document.data_source 21 | ch.metadata["data_source_provider"] = document.data_provider 22 | 23 | yield ch 24 | -------------------------------------------------------------------------------- /opsmate/ingestions/fs.py: -------------------------------------------------------------------------------- 1 | from typing import AsyncGenerator 2 | from .base import BaseIngestion, Document 3 | from pydantic import Field 4 | from glob import glob 5 | from os import path 6 | from pathlib import Path 7 | from typing import Dict, List 8 | from hashlib import sha256 9 | 10 | 11 | class FsIngestion(BaseIngestion): 12 | local_path: str = Field(..., description="The local path to the files") 13 | glob_pattern: str = Field("**/*", description="The glob pattern to match the files") 14 | 15 | async def load(self) -> AsyncGenerator[Document, None]: 16 | glob_pattern = path.join(self.local_path, self.glob_pattern) 17 | files = glob(glob_pattern, recursive=True) 18 | for filename in files: 19 | # skip if filename is a directory 20 | if path.isdir(filename): 21 | continue 22 | with open(filename, "r") as f: 23 | content = f.read() 24 | base_name = path.basename(filename) 25 | full_path = path.abspath(filename) 26 | sha = sha256(content.encode("utf-8")).hexdigest() 27 | yield Document( 28 | data_provider=self.data_source_provider(), 29 | data_source=self.data_source(), 30 | content=content, 31 | metadata={ 32 | "name": base_name, 33 | "path": full_path, 34 | "sha": sha, 35 | }, 36 | ) 37 | 38 | def data_source(self) -> str: 39 | return str(Path(self.local_path) / self.glob_pattern) 40 | 41 | def data_source_provider(self) -> str: 42 | return "fs" 43 | 44 | @classmethod 45 | def from_configmap(cls, config: Dict[str, str]) -> List["FsIngestion"]: 46 | ingestions = [] 47 | for path, glob_pattern in config.items(): 48 | ingestions.append(cls(local_path=path, glob_pattern=glob_pattern)) 49 | return ingestions 50 | -------------------------------------------------------------------------------- /opsmate/knowledgestore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/opsmate/knowledgestore/__init__.py -------------------------------------------------------------------------------- /opsmate/libs/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_settings import BaseSettings 2 | 3 | __all__ = ["BaseSettings"] 4 | -------------------------------------------------------------------------------- /opsmate/migrations/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /opsmate/migrations/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from sqlalchemy import engine_from_config 4 | from sqlalchemy import pool 5 | 6 | from alembic import context 7 | from opsmate.config import config as opsmate_config 8 | 9 | from opsmate.workflow.models import SQLModel as WorkflowSQLModel 10 | from opsmate.ingestions.models import SQLModel as IngestionModel 11 | from opsmate.dbq.dbq import SQLModel as DBQSQLModel 12 | from opsmate.gui.models import SQLModel as GUISQLModel 13 | 14 | 15 | # this is the Alembic Config object, which provides 16 | # access to the values within the .ini file in use. 17 | config = context.config 18 | 19 | # Interpret the config file for Python logging. 20 | # This line sets up loggers basically. 21 | if config.config_file_name is not None: 22 | fileConfig(config.config_file_name) 23 | 24 | # add your model's MetaData object here 25 | # for 'autogenerate' support 26 | # from myapp import mymodel 27 | # target_metadata = mymodel.Base.metadata 28 | 29 | target_metadata = [ 30 | WorkflowSQLModel.metadata, 31 | IngestionModel.metadata, 32 | DBQSQLModel.metadata, 33 | GUISQLModel.metadata, 34 | ] 35 | 36 | # other values from the config, defined by the needs of env.py, 37 | # can be acquired: 38 | # my_important_option = config.get_main_option("my_important_option") 39 | # ... etc. 40 | 41 | 42 | def run_migrations_offline() -> None: 43 | """Run migrations in 'offline' mode. 44 | 45 | This configures the context with just a URL 46 | and not an Engine, though an Engine is acceptable 47 | here as well. By skipping the Engine creation 48 | we don't even need a DBAPI to be available. 49 | 50 | Calls to context.execute() here emit the given string to the 51 | script output. 52 | 53 | """ 54 | context.configure( 55 | url=opsmate_config.db_url, 56 | target_metadata=target_metadata, 57 | literal_binds=True, 58 | dialect_opts={"paramstyle": "named"}, 59 | ) 60 | 61 | with context.begin_transaction(): 62 | context.run_migrations() 63 | 64 | 65 | def run_migrations_online() -> None: 66 | """Run migrations in 'online' mode. 67 | 68 | In this scenario we need to create an Engine 69 | and associate a connection with the context. 70 | 71 | """ 72 | cfg = config.get_section(config.config_ini_section, {}) 73 | cfg["sqlalchemy.url"] = opsmate_config.db_url 74 | connectable = engine_from_config( 75 | cfg, 76 | prefix="sqlalchemy.", 77 | poolclass=pool.NullPool, 78 | ) 79 | 80 | with connectable.connect() as connection: 81 | context.configure(connection=connection, target_metadata=target_metadata) 82 | 83 | with context.begin_transaction(): 84 | context.run_migrations() 85 | 86 | 87 | if context.is_offline_mode(): 88 | run_migrations_offline() 89 | else: 90 | run_migrations_online() 91 | -------------------------------------------------------------------------------- /opsmate/migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from typing import Sequence, Union 9 | 10 | from alembic import op 11 | import sqlalchemy as sa 12 | import sqlmodel 13 | ${imports if imports else ""} 14 | 15 | # revision identifiers, used by Alembic. 16 | revision: str = ${repr(up_revision)} 17 | down_revision: Union[str, None] = ${repr(down_revision)} 18 | branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} 19 | depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} 20 | 21 | 22 | def upgrade() -> None: 23 | ${upgrades if upgrades else "pass"} 24 | 25 | 26 | def downgrade() -> None: 27 | ${downgrades if downgrades else "pass"} 28 | -------------------------------------------------------------------------------- /opsmate/migrations/versions/b86047adede9_introduce_the_concept_of_named_queue_in_.py: -------------------------------------------------------------------------------- 1 | """introduce the concept of named-queue in db queue 2 | 3 | Revision ID: b86047adede9 4 | Revises: 79fe7d287ba8 5 | Create Date: 2025-03-06 12:33:48.109644 6 | 7 | """ 8 | 9 | from typing import Sequence, Union 10 | 11 | from alembic import op 12 | import sqlalchemy as sa 13 | import sqlmodel 14 | 15 | 16 | # revision identifiers, used by Alembic. 17 | revision: str = "b86047adede9" 18 | down_revision: Union[str, None] = "79fe7d287ba8" 19 | branch_labels: Union[str, Sequence[str], None] = None 20 | depends_on: Union[str, Sequence[str], None] = None 21 | 22 | 23 | def upgrade() -> None: 24 | op.add_column( 25 | "taskitem", 26 | sa.Column( 27 | "queue_name", 28 | sqlmodel.sql.sqltypes.AutoString(), 29 | nullable=False, 30 | server_default=sa.text("'default'"), 31 | ), 32 | ) 33 | 34 | 35 | def downgrade() -> None: 36 | op.drop_column("taskitem", "queue_name") 37 | -------------------------------------------------------------------------------- /opsmate/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .plugins import PluginRegistry 2 | 3 | auto_discover = PluginRegistry.auto_discover 4 | -------------------------------------------------------------------------------- /opsmate/polya/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/opsmate/polya/__init__.py -------------------------------------------------------------------------------- /opsmate/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | from opsmate.runtime.runtime import Runtime, RuntimeError, discover_runtimes 2 | from opsmate.runtime.local import LocalRuntime 3 | from opsmate.runtime.docker import DockerRuntime 4 | from opsmate.runtime.ssh import SSHRuntime 5 | from opsmate.runtime.k8s import K8sRuntime 6 | 7 | __all__ = [ 8 | "Runtime", 9 | "LocalRuntime", 10 | "RuntimeError", 11 | "DockerRuntime", 12 | "SSHRuntime", 13 | "K8sRuntime", 14 | ] 15 | 16 | discover_runtimes() 17 | -------------------------------------------------------------------------------- /opsmate/runtime/runtime.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Type, List 3 | from opsmate.libs.config.base_settings import BaseSettings 4 | import pkg_resources 5 | import structlog 6 | import subprocess 7 | import traceback 8 | 9 | logger = structlog.get_logger(__name__) 10 | 11 | 12 | class RuntimeConfig(BaseSettings): ... 13 | 14 | 15 | class Runtime(ABC): 16 | runtimes: dict[str, Type["Runtime"]] = {} 17 | configs: dict[str, Type[RuntimeConfig]] = {} 18 | 19 | @abstractmethod 20 | async def run(self, *args, **kwargs): 21 | pass 22 | 23 | @abstractmethod 24 | async def connect(self): 25 | pass 26 | 27 | @abstractmethod 28 | async def disconnect(self): 29 | pass 30 | 31 | @abstractmethod 32 | async def os_info(self): 33 | pass 34 | 35 | @abstractmethod 36 | async def whoami(self): 37 | pass 38 | 39 | @abstractmethod 40 | async def runtime_info(self): 41 | pass 42 | 43 | @abstractmethod 44 | async def has_systemd(self): 45 | pass 46 | 47 | 48 | class RuntimeError(Exception): 49 | """ 50 | Exception raised when a runtime operation fails. 51 | """ 52 | 53 | def __init__(self, message: str, output: str | None = None): 54 | self.message = message 55 | self.output = output 56 | super().__init__(self.message) 57 | 58 | def __str__(self): 59 | return f"{self.message}\n{self.output}" 60 | 61 | 62 | def register_runtime(name: str, config: Type[RuntimeConfig]): 63 | def wrapper(cls: Type[Runtime]): 64 | Runtime.runtimes[name] = cls 65 | Runtime.configs[name] = config 66 | 67 | return cls 68 | 69 | return wrapper 70 | 71 | 72 | def discover_runtimes(group_name="opsmate.runtime.runtimes"): 73 | for entry_point in pkg_resources.iter_entry_points(group_name): 74 | try: 75 | cls = entry_point.load() 76 | if not issubclass(cls, Runtime): 77 | logger.error( 78 | "Runtime must inherit from the Runtime class", name=entry_point.name 79 | ) 80 | continue 81 | except Exception as e: 82 | logger.error( 83 | "Error loading runtime", 84 | name=entry_point.name, 85 | error=e, 86 | traceback=traceback.format_exc(), 87 | ) 88 | 89 | 90 | def co(cmd, **kwargs): 91 | """ 92 | Check output of a command. 93 | Return the exit code and output of the command. 94 | If timeout is specified, the command will be terminated after timeout seconds. 95 | Return code for timeout is 124 (consistent with the timeout command). 96 | """ 97 | kwargs["stderr"] = subprocess.STDOUT 98 | kwargs["text"] = True 99 | 100 | try: 101 | output = subprocess.check_output(cmd, **kwargs).strip() 102 | return 0, output 103 | except subprocess.CalledProcessError as e: 104 | return e.returncode, e.output 105 | except subprocess.TimeoutExpired as e: 106 | return 124, e.stdout 107 | -------------------------------------------------------------------------------- /opsmate/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/opsmate/tests/__init__.py -------------------------------------------------------------------------------- /opsmate/tests/apiserver/test_apiserver.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastapi.testclient import TestClient 3 | from opsmate.apiserver import app 4 | 5 | 6 | class TestApiServer: 7 | @pytest.fixture 8 | def client(self): 9 | return TestClient(app) 10 | 11 | def test_health(self, client): 12 | response = client.get("/api/v1/healthz") 13 | assert response.status_code == 200 14 | assert response.json() == {"status": "ok"} 15 | 16 | def test_models(self, client): 17 | response = client.get("/api/v1/models") 18 | assert response.status_code == 200 19 | assert isinstance(response.json(), list) 20 | assert len(response.json()) > 0 21 | -------------------------------------------------------------------------------- /opsmate/tests/base.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import os 3 | import pytest 4 | from opsmate.config import config 5 | from opsmate.knowledgestore.models import init_table 6 | import structlog 7 | import asyncio 8 | 9 | logger = structlog.get_logger() 10 | 11 | 12 | class BaseTestCase: 13 | @pytest.fixture(scope="session", autouse=True) 14 | def setup_embeddings_db(self): 15 | pid = os.getpid() 16 | prefix = f"opsmate-embeddings-{pid}" 17 | tempdir = tempfile.mkdtemp(prefix=prefix) 18 | config.embeddings_db_path = tempdir 19 | logger.info("Created temp dir for embeddings", path=config.embeddings_db_path) 20 | asyncio.run(init_table()) 21 | 22 | yield 23 | 24 | logger.info("Removing temp dir for embeddings", path=config.embeddings_db_path) 25 | os.system(f"rm -rf {config.embeddings_db_path}") 26 | -------------------------------------------------------------------------------- /opsmate/tests/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/opsmate/tests/core/__init__.py -------------------------------------------------------------------------------- /opsmate/tests/dino/fixtures/contexts/gcloud.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino import context 2 | from opsmate.tools import ShellCommand 3 | 4 | 5 | @context( 6 | name="gcloud", 7 | tools=[ 8 | ShellCommand, 9 | ], 10 | ) 11 | async def gcloud(): 12 | """gcloud sme""" 13 | return "you are a gcloud SME who is specialised calling gcloud CLI" 14 | -------------------------------------------------------------------------------- /opsmate/tests/dino/test_agentic.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from opsmate.dino import dino, dtool, run_react 3 | from opsmate.contexts.k8s import k8s_ctx 4 | from opsmate.dino.types import ReactAnswer 5 | from typing import Annotated 6 | import structlog 7 | 8 | logger = structlog.get_logger(__name__) 9 | 10 | 11 | @dtool 12 | async def k8s_agent( 13 | problem: Annotated[str, "High level problem to solve"], 14 | question: Annotated[str, "The question to solve"], 15 | ) -> str: 16 | """ 17 | k8s_agent is a tool that solves a problem using kubectl. 18 | """ 19 | logger.info("solving query", problem=problem, question=question) 20 | 21 | async for result in run_react( 22 | question, 23 | context=k8s_ctx.resolve_contexts(), 24 | tools=k8s_ctx.resolve_tools(), 25 | ): 26 | logger.info(result) 27 | 28 | if isinstance(result, ReactAnswer): 29 | return result.answer 30 | 31 | 32 | @dino("gpt-4o", response_model=str, tools=[k8s_agent]) 33 | async def sre_manager(query: str): 34 | """ 35 | You are a world class SRE manager who manages a team of SREs. 36 | """ 37 | return f"answer the query: {query}" 38 | 39 | 40 | @dino("gpt-4o-mini", response_model=int) 41 | async def extract_number(text: str): 42 | return f"extract the number from {text}" 43 | 44 | 45 | # @pytest.mark.asyncio 46 | # async def test_k8s_agent(): 47 | # result = await sre_manager("how many pods are running in the cluster?") 48 | # assert await extract_number(result) == 18 49 | -------------------------------------------------------------------------------- /opsmate/tests/dino/test_context.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from os import path 3 | 4 | from opsmate.dino import context 5 | from opsmate.dino.context import ContextRegistry 6 | from opsmate.tools import ShellCommand 7 | 8 | 9 | @pytest.fixture(scope="session", autouse=True) 10 | def context_dir(): 11 | current_dir = path.dirname(path.abspath(__file__)) 12 | context_dir = path.join(current_dir, "fixtures/contexts") 13 | ContextRegistry.reset() 14 | ContextRegistry.discover(context_dir) 15 | yield 16 | ContextRegistry.reset() 17 | 18 | 19 | @pytest.mark.asyncio 20 | async def test_builtin_contexts(context_dir): 21 | contexts = [ 22 | "k8s", 23 | "cli", 24 | "terraform", 25 | ] 26 | 27 | for context in contexts: 28 | assert context in ContextRegistry.get_contexts() 29 | 30 | 31 | @pytest.mark.asyncio 32 | async def test_custom_context_load(context_dir): 33 | gcloud_plugin = ContextRegistry.get_context("gcloud") 34 | assert gcloud_plugin is not None 35 | 36 | assert ( 37 | await gcloud_plugin.system_prompt() 38 | == "you are a gcloud SME who is specialised calling gcloud CLI" 39 | ) 40 | assert gcloud_plugin.description == "gcloud sme" 41 | assert gcloud_plugin.tools == [ShellCommand] 42 | -------------------------------------------------------------------------------- /opsmate/tests/dino/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from opsmate.dino.utils import args_dump 4 | 5 | 6 | def test_args_dump(): 7 | def fn(a, b, c=1, d=2): 8 | pass 9 | 10 | def cbk(a, d=2): 11 | pass 12 | 13 | assert args_dump(fn, cbk, (1, 2), {"c": 3, "d": 4}) == ((1,), {"d": 4}) 14 | 15 | 16 | def test_args_dump_with_unmatching(): 17 | def fn(a, b, c=1, d=2): 18 | pass 19 | 20 | def cbk(a, d=2, e=3): 21 | pass 22 | 23 | assert args_dump(fn, cbk, (1, 2), {"c": 3, "d": 4}) == ((1,), {"d": 4}) 24 | 25 | 26 | @pytest.mark.asyncio 27 | async def test_args_dump_async(): 28 | async def fn(a, b, c=1, d=2): 29 | pass 30 | 31 | async def cbk(a, d=2): 32 | pass 33 | 34 | assert args_dump(fn, cbk, (1, 2), {"c": 3, "d": 4}) == ((1,), {"d": 4}) 35 | 36 | 37 | @pytest.mark.asyncio 38 | async def test_args_dump_async_to_sync_with_kwargs(): 39 | async def fn(a, b, c=1, d=2): 40 | pass 41 | 42 | def cbk(a, d=2): 43 | pass 44 | 45 | assert args_dump(fn, cbk, (1, 2), {"c": 3, "d": 4}) == ((1,), {"d": 4}) 46 | 47 | 48 | def test_args_dump_sync_to_async_with_kwargs(): 49 | def fn(a, b, c=1, d=2): 50 | pass 51 | 52 | async def cbk(a, d=2): 53 | pass 54 | 55 | assert args_dump(fn, cbk, (1, 2), {"c": 3, "d": 4}) == ((1,), {"d": 4}) 56 | -------------------------------------------------------------------------------- /opsmate/tests/ingestions/fixtures/SOMETHINGELSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/opsmate/tests/ingestions/fixtures/SOMETHINGELSE.txt -------------------------------------------------------------------------------- /opsmate/tests/ingestions/fixtures/TEST.md: -------------------------------------------------------------------------------- 1 | # this is a test file for the document ingestion 2 | 3 | This document is used to test the document ingestion. 4 | 5 | ## test 1 6 | 7 | Hello this is test 1 8 | 9 | ## test 2 10 | 11 | Hello this is test 2, here is some code: 12 | 13 | ```go 14 | package main 15 | 16 | func main() { 17 | println("Hello, World!") 18 | } 19 | ``` 20 | 21 | ### How to run this code 22 | 23 | ```bash 24 | go run main.go 25 | ``` 26 | 27 | ## test 3 28 | 29 | Hello this is test 3, here is some yaml: 30 | 31 | ```yaml 32 | apiVersion: v1 33 | kind: Service 34 | metadata: 35 | name: nginx-service 36 | spec: 37 | selector: 38 | app: nginx 39 | ``` 40 | -------------------------------------------------------------------------------- /opsmate/tests/ingestions/fixtures/nested/TEST2.md: -------------------------------------------------------------------------------- 1 | # test 2 2 | 3 | This is a test 2 4 | -------------------------------------------------------------------------------- /opsmate/tests/ingestions/test_ingestions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from opsmate.ingestions import ingestions_from_config 3 | from opsmate.tests.base import BaseTestCase 4 | from opsmate.config import Config 5 | from opsmate.ingestions import GithubIngestion, FsIngestion 6 | import os 7 | 8 | 9 | class TestIngestions(BaseTestCase): 10 | def test_ingestions_from_env(self): 11 | old_token = os.getenv("GITHUB_TOKEN") 12 | os.environ["GITHUB_TOKEN"] = "env-token" 13 | cfg = Config() 14 | cfg.github_embeddings_config = { 15 | "opsmate/opsmate:dev": "*.md", 16 | "opsmate/opsmate2": "*.txt", 17 | } 18 | cfg.fs_embeddings_config = { 19 | "your_repo_path": "*.md", 20 | "your_repo_path2": "*.txt", 21 | } 22 | ingestions = ingestions_from_config(cfg) 23 | assert len(ingestions) == 4 24 | assert isinstance(ingestions[0], GithubIngestion) 25 | assert ingestions[0].data_source_provider() == "github" 26 | assert ingestions[0].data_source() == "opsmate/opsmate" 27 | assert ingestions[0].repo == "opsmate/opsmate" 28 | assert ingestions[0].branch == "dev" 29 | assert ingestions[0].glob == "*.md" 30 | 31 | assert isinstance(ingestions[1], GithubIngestion) 32 | assert ingestions[1].data_source_provider() == "github" 33 | assert ingestions[1].data_source() == "opsmate/opsmate2" 34 | assert ingestions[1].repo == "opsmate/opsmate2" 35 | assert ingestions[1].branch == "main" 36 | assert ingestions[1].glob == "*.txt" 37 | 38 | assert isinstance(ingestions[2], FsIngestion) 39 | assert ingestions[2].data_source_provider() == "fs" 40 | assert ingestions[2].data_source() == "your_repo_path/*.md" 41 | assert ingestions[2].local_path == "your_repo_path" 42 | assert ingestions[2].glob_pattern == "*.md" 43 | 44 | assert isinstance(ingestions[3], FsIngestion) 45 | assert ingestions[3].data_source_provider() == "fs" 46 | assert ingestions[3].data_source() == "your_repo_path2/*.txt" 47 | assert ingestions[3].local_path == "your_repo_path2" 48 | assert ingestions[3].glob_pattern == "*.txt" 49 | 50 | if old_token: 51 | os.environ["GITHUB_TOKEN"] = old_token 52 | else: 53 | del os.environ["GITHUB_TOKEN"] 54 | -------------------------------------------------------------------------------- /opsmate/tests/plugins/fixtures/conflicts/plugin.py: -------------------------------------------------------------------------------- 1 | from opsmate.plugins import auto_discover 2 | from opsmate.dino import dino 3 | from typing import Literal 4 | 5 | 6 | @auto_discover( 7 | author="opsmate", 8 | version="0.1.0", 9 | ) 10 | @dino(model="gpt-4o-mini", response_model=Literal["anthropic", "openai"]) 11 | async def my_creator(): 12 | """you are a LLM""" 13 | return "your creator" 14 | -------------------------------------------------------------------------------- /opsmate/tests/plugins/fixtures/plugins/essentials.py: -------------------------------------------------------------------------------- 1 | from opsmate.plugins import auto_discover 2 | from opsmate.dino import dino, dtool 3 | from typing import Literal 4 | 5 | 6 | @auto_discover( 7 | author="opsmate", 8 | version="0.1.0", 9 | ) 10 | @dino(model="gpt-4o-mini", response_model=Literal["anthropic", "openai"]) 11 | async def my_creator(): 12 | """you are a LLM""" 13 | return "your creator" 14 | 15 | 16 | @dtool 17 | def get_weather(location: str) -> str: 18 | return f"The location is {location}. if it's London return raining other wise return sunny" 19 | 20 | 21 | @auto_discover( 22 | name="fake_weather", 23 | description="get the weather", 24 | author="opsmate", 25 | version="0.1.0", 26 | ) 27 | @dino( 28 | model="gpt-4o-mini", response_model=Literal["sunny", "rainy"], tools=[get_weather] 29 | ) 30 | async def weather(location: str): 31 | """the the current weather""" 32 | return f"check the weather for {location}" 33 | 34 | 35 | @auto_discover( 36 | name="fake_weather_sync", 37 | description="the the current weather", 38 | author="opsmate", 39 | version="0.1.0", 40 | ) 41 | @dino( 42 | model="gpt-4o-mini", response_model=Literal["sunny", "rainy"], tools=[get_weather] 43 | ) 44 | # same function name is supported as long as it's auto-discovered with a different name 45 | def weather(location: str) -> str: 46 | """the the current weather""" 47 | return f"check the weather for {location}" 48 | -------------------------------------------------------------------------------- /opsmate/tests/plugins/test_plugins.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from os import path 3 | 4 | from opsmate.plugins import PluginRegistry 5 | 6 | 7 | @pytest.fixture(scope="session", autouse=True) 8 | def plugins_dir(): 9 | current_dir = path.dirname(path.abspath(__file__)) 10 | plugins_dir = path.join(current_dir, "fixtures/plugins") 11 | PluginRegistry.discover(plugins_dir) 12 | yield 13 | PluginRegistry.clear() 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_builtin_tools_registry(plugins_dir): 18 | tools = [ 19 | "ShellCommand", 20 | "KnowledgeRetrieval", 21 | "current_time", 22 | "datetime_extraction", 23 | "HttpGet", 24 | "HttpCall", 25 | "HtmlToText", 26 | "FileRead", 27 | "FileWrite", 28 | "FileAppend", 29 | "FileDelete", 30 | "FilesList", 31 | "FilesFind", 32 | "SysStats", 33 | "SysEnv", 34 | ] 35 | for tool in tools: 36 | assert tool in PluginRegistry.get_tools() 37 | 38 | 39 | @pytest.mark.asyncio 40 | async def test_plugin_registry_basic(plugins_dir): 41 | my_creator = PluginRegistry.get_plugin("my_creator") 42 | assert my_creator.metadata.description == "you are a LLM" 43 | assert my_creator.metadata.author == "opsmate" 44 | assert my_creator.metadata.version == "0.1.0" 45 | assert my_creator.metadata.source.endswith("fixtures/plugins/essentials.py") 46 | 47 | assert await my_creator.execute(model="gpt-4o-mini") == "openai" 48 | assert await my_creator.execute(model="claude-3-5-sonnet-20241022") == "anthropic" 49 | 50 | 51 | @pytest.mark.asyncio 52 | async def test_plugin_registry_override(plugins_dir): 53 | weather = PluginRegistry.get_plugin("fake_weather") 54 | assert weather.metadata.name == "fake_weather" 55 | assert weather.metadata.description == "get the weather" 56 | assert weather.metadata.author == "opsmate" 57 | assert weather.metadata.version == "0.1.0" 58 | 59 | 60 | @pytest.mark.asyncio 61 | async def test_plugin_registy_with_tool(plugins_dir): 62 | weather = PluginRegistry.get_plugin("fake_weather") 63 | assert await weather.execute(location="London") == "rainy" 64 | assert await weather.execute(location="San Francisco") == "sunny" 65 | 66 | 67 | @pytest.mark.asyncio 68 | async def test_plugin_with_sync_tool(plugins_dir): 69 | weather = PluginRegistry.get_plugin("fake_weather_sync") 70 | assert await weather.execute(location="London") == "rainy" 71 | assert await weather.execute(location="San Francisco") == "sunny" 72 | 73 | 74 | @pytest.mark.asyncio 75 | async def test_plugin_with_conflicts(plugins_dir): 76 | current_dir = path.dirname(path.abspath(__file__)) 77 | conflicts_dir = path.join(current_dir, "fixtures/conflicts") 78 | with pytest.raises(ValueError, match="Plugin my_creator already exists"): 79 | PluginRegistry.discover(conflicts_dir, ignore_conflicts=False) 80 | 81 | 82 | @pytest.mark.asyncio 83 | async def test_load_dtools(plugins_dir): 84 | get_weather = PluginRegistry.get_tool("get_weather") 85 | assert get_weather is not None 86 | assert ( 87 | await get_weather(location="London").run() 88 | == "The location is London. if it's London return raining other wise return sunny" 89 | ) 90 | 91 | assert "get_weather" in PluginRegistry.get_tools() 92 | -------------------------------------------------------------------------------- /opsmate/tests/polya/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opsmate-ai/opsmate/7a43754727aa6dfe98a59fe843b30faca9757b2c/opsmate/tests/polya/__init__.py -------------------------------------------------------------------------------- /opsmate/tests/polya/test_planning.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from opsmate.polya.planning import planning 4 | from opsmate.polya.models import Task, TaskPlan, Solution, ReportExtracted 5 | 6 | 7 | def test_topological_sort(): 8 | subtasks = [ 9 | Task( 10 | id=4, 11 | task="Restart the payment-service deployment to apply the changes.", 12 | subtasks=[3], 13 | ), 14 | Task( 15 | id=5, 16 | task="Monitor the payment-service pods to ensure readiness probe success and stable rollout.", 17 | subtasks=[4], 18 | ), 19 | Task( 20 | id=3, 21 | task="Update the readiness probe configuration with the correct endpoint.", 22 | subtasks=[1, 2], 23 | ), 24 | Task( 25 | id=1, 26 | task="Verify the current readiness probe configuration for the payment-service deployment.", 27 | subtasks=[], 28 | ), 29 | Task( 30 | id=2, 31 | task="Identify the correct health check endpoint for the payment-service application.", 32 | subtasks=[], 33 | ), 34 | ] 35 | task_plan = TaskPlan( 36 | goal="Fix the deployment payment-service in the payment namespace", 37 | subtasks=subtasks, 38 | ) 39 | 40 | sorted = task_plan.topological_sort() 41 | 42 | sorted_ids = [task.id for task in sorted] 43 | assert sorted_ids == [1, 2, 3, 4, 5] 44 | 45 | 46 | @pytest.mark.asyncio 47 | async def test_planning(): 48 | report_extracted = ReportExtracted( 49 | summary="The 'payment-service' deployment in the 'payment' namespace is facing rollout issues due to 'ProgressDeadlineExceeded' status, unhealthy pod statuses from failed readiness probes returning HTTP 404 errors, and back-off behavior from repeatedly restarting a failed container.", 50 | potential_solutions=[ 51 | Solution( 52 | findings=["ProgressDeadlineExceeded Status", "Readiness Probe Failure"], 53 | solution="Check and update the readiness probe configuration to ensure it's targeting an existent and correct endpoint.", 54 | probability=50, 55 | ), 56 | Solution( 57 | findings=["Back-Off Event"], 58 | solution="Inspect and amend the container setup, verifying commands and environment configurations as correct to enable stable application start-up.", 59 | probability=30, 60 | ), 61 | Solution( 62 | findings=["Undocumented Configuration Changes"], 63 | solution="Manually review recent commits or configuration changes to identify any hidden issues or missing settings impacting the rollout.", 64 | probability=20, 65 | ), 66 | ], 67 | ) 68 | 69 | plan = await planning( 70 | summary=report_extracted.potential_solutions[0].summarize( 71 | report_extracted.summary 72 | ), 73 | facts=[], 74 | instruction="can you solve the problem based on the context?", 75 | ) 76 | 77 | assert plan.goal is not None 78 | assert len(plan.subtasks) > 0 79 | -------------------------------------------------------------------------------- /opsmate/tests/textsplitters/test_recursive.py: -------------------------------------------------------------------------------- 1 | from opsmate.textsplitters.recursive import RecursiveTextSplitter 2 | from opsmate.textsplitters.base import Chunk 3 | 4 | 5 | def test_recursive_text_splitter(): 6 | text = "Apple,banana,orange and tomato." 7 | splitter = RecursiveTextSplitter( 8 | chunk_size=7, chunk_overlap=3, separators=[".", ","] 9 | ) 10 | output = splitter.split_text(text) 11 | expected_output = [ 12 | Chunk(content="Apple", metadata={"seperator": ","}), 13 | Chunk(content="banana", metadata={"seperator": ","}), 14 | Chunk(content="orange and tomato", metadata={"seperator": ","}), 15 | ] 16 | assert output == expected_output 17 | 18 | text = "This is a piece of text." 19 | splitter = RecursiveTextSplitter(chunk_size=10, chunk_overlap=5) 20 | output = splitter.split_text(text) 21 | expected_output = [ 22 | Chunk(content="This is a", metadata={"seperator": " "}), 23 | Chunk(content="piece of text", metadata={"seperator": " "}), 24 | Chunk(content="text", metadata={"seperator": " "}), 25 | ] 26 | assert output == expected_output 27 | 28 | text = "This is a piece of text." 29 | splitter = RecursiveTextSplitter(chunk_size=10, chunk_overlap=0) 30 | output = splitter.split_text(text) 31 | expected_output = [ 32 | Chunk(content="This is a", metadata={"seperator": " "}), 33 | Chunk(content="piece of", metadata={"seperator": " "}), 34 | Chunk(content="text", metadata={"seperator": " "}), 35 | ] 36 | assert output == expected_output 37 | 38 | text = "This is a piece of text." 39 | splitter = RecursiveTextSplitter(chunk_size=10, chunk_overlap=0, separators=[" "]) 40 | output = splitter.split_text(text) 41 | expected_output = [ 42 | Chunk(content="This is a", metadata={"seperator": " "}), 43 | Chunk(content="piece of", metadata={"seperator": " "}), 44 | Chunk(content="text.", metadata={"seperator": " "}), 45 | ] 46 | assert output == expected_output 47 | -------------------------------------------------------------------------------- /opsmate/tests/tools/test_command_line.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from opsmate.tools.command_line import ShellCommand 4 | from opsmate.tests.base import BaseTestCase 5 | 6 | 7 | class TestCommandLine(BaseTestCase): 8 | @pytest.mark.asyncio 9 | async def test_command_line(self): 10 | tool = ShellCommand( 11 | command="ls -l", 12 | description="List the contents of the current directory", 13 | ) 14 | assert tool.output is None 15 | 16 | result = await tool.run() 17 | assert result is not None 18 | assert result == tool.output 19 | 20 | assert tool.markdown() is not None 21 | assert tool.output in tool.markdown() 22 | 23 | @pytest.mark.asyncio 24 | async def test_command_line_with_context(self): 25 | tool = ShellCommand( 26 | command="echo $TEST", 27 | description="List the contents of the current directory", 28 | ) 29 | result = await tool.run(context={"envvars": {"TEST": "test"}}) 30 | assert result is not None 31 | assert result == "test\n" 32 | 33 | result = await tool.run(context={"envvars": {"TEST": "test2"}}) 34 | assert result is not None 35 | assert result == "test2\n" 36 | -------------------------------------------------------------------------------- /opsmate/tests/tools/test_knowledge_retrieval.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from opsmate.tools.knowledge_retrieval import KnowledgeRetrieval 4 | from opsmate.tests.base import BaseTestCase 5 | 6 | 7 | class TestKnowledgeRetrieval(BaseTestCase): 8 | @pytest.mark.asyncio 9 | async def test_knowledge_retrieval(self): 10 | tool = KnowledgeRetrieval( 11 | query="What is the meaning of life?", 12 | ) 13 | assert tool.output is None 14 | 15 | aconn = await tool.aconn() 16 | assert aconn is not None 17 | 18 | result = await tool.run(context={"with_reranking": False}) 19 | assert result is not None 20 | assert result == tool.output 21 | 22 | assert tool.markdown().startswith("\n## Knowledge") 23 | -------------------------------------------------------------------------------- /opsmate/textsplitters/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import TextSplitter 2 | from .recursive import RecursiveTextSplitter 3 | from .markdown_header import MarkdownHeaderTextSplitter 4 | from typing import Dict, Any 5 | 6 | __all__ = ["TextSplitter", "RecursiveTextSplitter", "MarkdownHeaderTextSplitter"] 7 | 8 | RECURSIVE_SPLITTER = "recursive" 9 | MARKDOWN_HEADER_SPLITTER = "markdown_header" 10 | 11 | SPLITTERS = { 12 | RECURSIVE_SPLITTER: RecursiveTextSplitter, 13 | MARKDOWN_HEADER_SPLITTER: MarkdownHeaderTextSplitter, 14 | } 15 | 16 | 17 | def splitter_from_config(config: Dict[str, Any]) -> TextSplitter: 18 | name = config.pop("splitter", RECURSIVE_SPLITTER) 19 | if name not in SPLITTERS: 20 | raise ValueError( 21 | f"Unknown splitter type: {name}, must be one of {', '.join(SPLITTERS.keys())}" 22 | ) 23 | return SPLITTERS[name](**config) 24 | -------------------------------------------------------------------------------- /opsmate/textsplitters/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class Chunk(BaseModel): 7 | id: int | None = None 8 | metadata: dict = Field(default_factory=dict) 9 | content: str 10 | 11 | 12 | class TextSplitter(ABC): 13 | default_separators = [ 14 | "\n\n", 15 | "\n", 16 | ".", 17 | "?", 18 | "!", 19 | ";", 20 | ",", 21 | " ", 22 | "", 23 | ] 24 | 25 | def __init__( 26 | self, 27 | chunk_size: int = 1000, 28 | # practically, we don't want any overlap 29 | chunk_overlap: int = 0, 30 | separators: List[str] = [], 31 | ): 32 | """ 33 | Initialize the text splitter 34 | Args: 35 | chunk_size: The size of the chunks to split the text into 36 | chunk_overlap: The overlap between the chunks 37 | separator: The separators to use to split the text 38 | """ 39 | self.chunk_size = chunk_size 40 | self.chunk_overlap = chunk_overlap 41 | if separators: 42 | self.separators = separators 43 | else: 44 | self.separators = self.default_separators 45 | 46 | @abstractmethod 47 | def split_text(self, text: str) -> List[Chunk]: ... 48 | -------------------------------------------------------------------------------- /opsmate/textsplitters/recursive.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from .base import TextSplitter, Chunk 3 | 4 | 5 | class RecursiveTextSplitter(TextSplitter): 6 | def split_text(self, text: str) -> List[str]: 7 | """ 8 | Split the text into chunks of size chunk_size with overlap chunk_overlap 9 | """ 10 | splits = self._split_text(text, 0) 11 | splits = self._merge_splits(splits) 12 | return self._handle_overlap(splits) 13 | 14 | def _split_text(self, text: str, separatorLevel: int) -> List[Chunk]: 15 | if separatorLevel == len(self.separators): 16 | return [ 17 | Chunk( 18 | content=text, 19 | metadata={"seperator": self.separators[-1]}, 20 | ) 21 | ] 22 | 23 | if len(text) <= self.chunk_size: 24 | return [ 25 | Chunk( 26 | content=text, 27 | metadata={"seperator": self.separators[separatorLevel - 1]}, 28 | ) 29 | ] 30 | 31 | separator = self.separators[separatorLevel] 32 | splits = text.split(separator) 33 | splits = [split for split in splits if split] 34 | 35 | result = [] 36 | for split in splits: 37 | result.extend(self._split_text(split, separatorLevel + 1)) 38 | 39 | return result 40 | 41 | def _merge_splits(self, splits: List[Chunk]) -> List[Chunk]: 42 | result = [] 43 | idx = 0 44 | while idx < len(splits): 45 | sep1, add = splits[idx].metadata["seperator"], splits[idx].content 46 | idx += 1 47 | while idx < len(splits): 48 | sep2, chunk = splits[idx].metadata["seperator"], splits[idx].content 49 | if len(add) + len(sep2) + len(chunk) <= self.chunk_size: 50 | add += sep2 + chunk 51 | idx += 1 52 | else: 53 | break 54 | result.append( 55 | Chunk( 56 | content=add, 57 | metadata={"seperator": sep1}, 58 | ) 59 | ) 60 | return result 61 | 62 | def _handle_overlap(self, splits: List[Chunk]) -> List[Chunk]: 63 | result = [] 64 | for idx, split in enumerate(splits): 65 | sep1, add = split.metadata["seperator"], split.content 66 | overlap_remain = self.chunk_overlap + self.chunk_size - len(add) 67 | 68 | while overlap_remain > 0: 69 | for idx2 in range(idx + 1, len(splits)): 70 | sep2, chunk = ( 71 | splits[idx2].metadata["seperator"], 72 | splits[idx2].content, 73 | ) 74 | if len(sep2) + len(chunk) <= overlap_remain: 75 | add += sep2 + chunk 76 | overlap_remain -= len(chunk) - len(sep2) 77 | else: 78 | break 79 | break 80 | result.append( 81 | Chunk( 82 | content=add, 83 | metadata={"seperator": sep1}, 84 | ) 85 | ) 86 | 87 | return result 88 | -------------------------------------------------------------------------------- /opsmate/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .command_line import ShellCommand 2 | from .knowledge_retrieval import KnowledgeRetrieval 3 | from .github_operation import GithubCloneAndCD, GithubRaisePR 4 | from .aci import ACITool 5 | from .datetime import current_time, datetime_extraction 6 | from .system import ( 7 | HttpGet, 8 | HttpCall, 9 | HtmlToText, 10 | SysEnv, 11 | SysStats, 12 | FilesFind, 13 | FileDelete, 14 | FilesList, 15 | FileRead, 16 | FileWrite, 17 | FileAppend, 18 | SysEnv, 19 | SysStats, 20 | ) 21 | from .prom import PrometheusTool 22 | from .thinking import Thinking 23 | from .loki import LokiQueryTool 24 | from opsmate.dino.tools import discover_tools 25 | 26 | __all__ = [ 27 | "current_time", 28 | "datetime_extraction", 29 | "ShellCommand", 30 | "KnowledgeRetrieval", 31 | "ACITool", 32 | "GithubCloneAndCD", 33 | "GithubRaisePR", 34 | "HttpGet", 35 | "HttpCall", 36 | "HtmlToText", 37 | "FilesFind", 38 | "FilesList", 39 | "FileRead", 40 | "FileWrite", 41 | "FileAppend", 42 | "FileDelete", 43 | "FileStats", 44 | "SysEnv", 45 | "SysStats", 46 | "PrometheusTool", 47 | "Thinking", 48 | "LokiQueryTool", 49 | ] 50 | 51 | discover_tools() 52 | -------------------------------------------------------------------------------- /opsmate/tools/datetime.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from pydantic import BaseModel, Field, model_validator, computed_field 3 | import pytz 4 | from typing import ClassVar, Annotated 5 | from opsmate.dino import dtool, dino 6 | from opsmate.dino.types import register_tool 7 | import structlog 8 | 9 | logger = structlog.get_logger(__name__) 10 | 11 | 12 | class DatetimeRange(BaseModel): 13 | start: str = Field( 14 | description="The start time of the query in %Y-%m-%dT%H:%M:%SZ format", 15 | default_factory=lambda: ( 16 | datetime.now(pytz.UTC) - timedelta(minutes=30) 17 | ).strftime("%Y-%m-%dT%H:%M:%SZ"), 18 | ) 19 | end: str = Field( 20 | description="The end time of the query in %Y-%m-%dT%H:%M:%SZ format", 21 | default_factory=lambda: datetime.now(pytz.UTC).strftime("%Y-%m-%dT%H:%M:%SZ"), 22 | ) 23 | 24 | _FMT: ClassVar[str] = "%Y-%m-%dT%H:%M:%SZ" 25 | 26 | @model_validator(mode="after") 27 | def validate_start_end(cls, v): 28 | try: 29 | datetime.strptime(v.start, cls._FMT) 30 | except ValueError: 31 | raise ValueError(f"Invalid start date format: {v.start}") 32 | 33 | try: 34 | datetime.strptime(v.end, cls._FMT) 35 | except ValueError: 36 | raise ValueError(f"Invalid end date format: {v.end}") 37 | 38 | return v 39 | 40 | @computed_field 41 | def start_dt(self) -> datetime: 42 | return datetime.strptime(self.start, self._FMT) 43 | 44 | @computed_field 45 | def end_dt(self) -> datetime: 46 | return datetime.strptime(self.end, self._FMT) 47 | 48 | 49 | @register_tool() 50 | @dtool 51 | async def current_time() -> str: 52 | """ 53 | Get the current time in %Y-%m-%dT%H:%M:%SZ format 54 | """ 55 | return datetime.now(pytz.UTC).strftime("%Y-%m-%dT%H:%M:%SZ") 56 | 57 | 58 | @register_tool() 59 | @dtool 60 | @dino( 61 | model="gpt-4o-mini", 62 | response_model=DatetimeRange, 63 | tools=[current_time], 64 | ) 65 | async def datetime_extraction( 66 | text: Annotated[ 67 | str, "The text to extract the datetime range from" 68 | ] = "last 30 minutes", 69 | ) -> DatetimeRange: 70 | """ 71 | You are tasked to extract the datetime range from the text 72 | 73 | The `current_time` tool must be called to understand the current time 74 | """ 75 | logger.info("datetime_extraction", text=text) 76 | return text 77 | -------------------------------------------------------------------------------- /opsmate/tools/thinking.py: -------------------------------------------------------------------------------- 1 | from opsmate.dino.types import ToolCall, PresentationMixin, register_tool 2 | from typing import Any 3 | from pydantic import Field 4 | import structlog 5 | 6 | logger = structlog.get_logger(__name__) 7 | 8 | 9 | @register_tool() 10 | class Thinking(ToolCall[str], PresentationMixin): 11 | """ 12 | Use the tool to think about something. 13 | It will not obtain new information or change the system state, 14 | but just append the thought to the log. 15 | 16 | Use this tool when there is no obvious action to take. 17 | """ 18 | 19 | thought: str = Field(description="The thought to think about") 20 | 21 | async def __call__(self, context: dict[str, Any] = {}): 22 | logger.info("thinking", thought=self.thought) 23 | return self.thought 24 | 25 | def markdown(self, context: dict[str, Any] = {}): 26 | return f""" 27 | ### Thought 28 | 29 | {self.thought} 30 | """ 31 | -------------------------------------------------------------------------------- /opsmate/tools/utils.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | 4 | def maybe_truncate_text(text: str, max_length: int = 10000) -> str: 5 | tmp_file = _write_text_to_tmp_file(text) 6 | 7 | truncate_notice = f""" 8 | The initial content is truncated due to the maximum text length reached 9 | Please refer to the file for the full content: {tmp_file} 10 | 11 | """ 12 | if len(text) > max_length: 13 | return truncate_notice + text[len(text) - max_length :] 14 | return text 15 | 16 | 17 | def _write_text_to_tmp_file(text: str) -> str: 18 | tmp_file = tempfile.NamedTemporaryFile(delete=False) 19 | tmp_file.write(text.encode()) 20 | tmp_file.close() 21 | return tmp_file.name 22 | -------------------------------------------------------------------------------- /opsmate/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .workflow import Workflow, WorkflowContext, step 2 | 3 | __all__ = ["Workflow", "WorkflowContext", "step"] 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "opsmate" 3 | version = "0.2.3a2" 4 | description = "opsmate is a SRE AI assistant" 5 | authors = [ 6 | { name="Jingkai", email="jingkai@hey.com" }, 7 | ] 8 | readme = "README.md" 9 | license = { file = "LICENSE" } 10 | requires-python = "<4.0,>=3.10" 11 | dependencies = [ 12 | "plotext (>=5.3.2,<6.0.0)", 13 | "pydantic (>=2.8.2,<3.0.0)", 14 | "jinja2 (>=3.1.6,<4.0.0)", 15 | "instructor[anthropic]>=1.7.9,<2.0.0", 16 | "structlog (>=24.4.0,<25.0.0)", 17 | "pyyaml (>=6.0.2,<7.0.0)", 18 | "click (>=8.1.7,<9.0.0)", 19 | "rich (>=13.8.1,<14.0.0)", 20 | "pydantic-settings (>=2.6.1,<3.0.0)", 21 | "fastapi[standard] (>=0.115.5,<1.0.0)", 22 | "python-fasthtml (>=0.10.0,<1.0.0)", 23 | "sqlmodel (>=0.0.22,<1.0.0)", 24 | "graphviz (>=0.20.3,<1.0.0)", 25 | "httpx (>=0.27.2,<1.0.0)", 26 | "html2text (==2024.2.26)", 27 | "pytz (>=2025.1,<2026.0)", 28 | "lancedb (==0.20.0)", 29 | "alembic (>=1.14.1,<2.0.0)", 30 | "plotly (>=6.0.0,<7.0.0)", 31 | "pandas (>=2.2.3,<3.0.0)", 32 | "matplotlib (>=3.10.1,<4.0.0)", 33 | "opentelemetry-exporter-otlp>=1.31.0", 34 | "opentelemetry-instrumentation-openai>=0.38.12", 35 | "opentelemetry-instrumentation-anthropic>=0.38.12", 36 | "opentelemetry-instrumentation-sqlalchemy>=0.52b0", 37 | "opentelemetry-instrumentation-starlette>=0.52b0", 38 | "setuptools>=76.0.0", 39 | "tabulate>=0.9.0", 40 | "aiohttp!=3.11.13,>=3.11.0", # because 3.11.13 is yanked 41 | "pip>=25.0.1", 42 | ] 43 | 44 | [project.optional-dependencies] 45 | reranker-cohere = [ 46 | "cohere (>=5.14.0,<6.0.0)", 47 | ] 48 | reranker-answerdotai = [ 49 | "rerankers[transformers]", 50 | ] 51 | sentence-transformers = [ 52 | "sentence-transformers (>=3.4.1,<4.0.0)", 53 | ] 54 | 55 | [project.scripts] 56 | opsmate = "opsmate.cli:opsmate_cli" 57 | 58 | [tool.uv] 59 | default-groups = ["dev", "docs"] 60 | package = true 61 | 62 | [dependency-groups] 63 | dev = [ 64 | "black >=24.4.2,<25.0.0", 65 | "pytest >=8.3.3,<9.0.0", 66 | "pytest-xdist >=3.6.1,<4.0.0", 67 | "jupyterlab >=4.3.0,<5.0.0", 68 | "ipywidgets >=8.1.5,<9.0.0", 69 | "pandas >=2.2.3,<3.0.0", 70 | "pytest-asyncio >=0.24.0,<1.0.0", 71 | "respx >=0.22.0,<1.0.0", 72 | "snakeviz >=2.2.2,<3.0.0", 73 | ] 74 | docs = [ 75 | "mkdocs-material[imaging]>=9.5.49,<10.0.0", 76 | "mkdocs-jupyter >=0.25.1,<1.0.0", 77 | "mkdocs-macros-plugin>=1.3.7", 78 | ] 79 | eval = [ 80 | "autoevals>=0.0.124", 81 | "braintrust>=0.0.191", 82 | ] 83 | 84 | [build-system] 85 | requires = ["hatchling"] 86 | build-backend = "hatchling.build" 87 | 88 | 89 | [tool.uv.sources] 90 | opsmate-provider-groq = { workspace = true } 91 | opsmate-provider-fireworks = { workspace = true } 92 | opsmate-provider-google-genai = { workspace = true } 93 | opsmate-tool-mysql = { workspace = true } 94 | opsmate-tool-postgres = { workspace = true } 95 | 96 | [tool.uv.workspace] 97 | members = [ 98 | "contrib/providers/*", 99 | "contrib/tools/*", 100 | ] 101 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_mode=auto 3 | asyncio_default_fixture_loop_scope="function" 4 | 5 | markers = 6 | serial: mark test as serial thus excluded from parallel test 7 | -------------------------------------------------------------------------------- /scripts/api-gen.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from opsmate.apiserver import api_app 4 | 5 | schema = api_app.openapi() 6 | 7 | 8 | spec_dir = os.path.join("sdk", "spec", "apiserver") 9 | api_file_path = os.path.join(spec_dir, "openapi.json") 10 | 11 | os.makedirs(spec_dir, exist_ok=True) 12 | 13 | with open(api_file_path, "w") as f: 14 | json.dump(schema, f, indent=2) 15 | -------------------------------------------------------------------------------- /tempo/.gitignore: -------------------------------------------------------------------------------- 1 | tempo-data 2 | -------------------------------------------------------------------------------- /tempo/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | # Tempo runs as user 10001, and docker compose creates the volume as root. 4 | # As such, we need to chown the volume in order for Tempo to start correctly. 5 | init: 6 | image: &tempoImage grafana/tempo:latest 7 | user: root 8 | entrypoint: 9 | - "chown" 10 | - "10001:10001" 11 | - "/var/tempo" 12 | volumes: 13 | - ./tempo-data:/var/tempo 14 | 15 | memcached: 16 | image: memcached:1.6.29 17 | container_name: memcached 18 | ports: 19 | - "11211:11211" 20 | environment: 21 | - MEMCACHED_MAX_MEMORY=64m # Set the maximum memory usage 22 | - MEMCACHED_THREADS=4 # Number of threads to use 23 | 24 | tempo: 25 | image: *tempoImage 26 | command: [ "-config.file=/etc/tempo.yaml" ] 27 | volumes: 28 | - ./tempo.yaml:/etc/tempo.yaml 29 | - ./tempo-data:/var/tempo 30 | ports: 31 | - "14268:14268" # jaeger ingest 32 | - "3200:3200" # tempo 33 | - "9095:9095" # tempo grpc 34 | - "4317:4317" # otlp grpc 35 | - "4318:4318" # otlp http 36 | - "9411:9411" # zipkin 37 | depends_on: 38 | - init 39 | - memcached 40 | 41 | grafana: 42 | image: grafana/grafana:11.0.0 43 | volumes: 44 | - ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml 45 | environment: 46 | - GF_AUTH_ANONYMOUS_ENABLED=true 47 | - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin 48 | - GF_AUTH_DISABLE_LOGIN_FORM=true 49 | - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor traceQLStreaming metricsSummary 50 | ports: 51 | - "3000:3000" 52 | -------------------------------------------------------------------------------- /tempo/grafana-datasources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: Tempo 5 | type: tempo 6 | access: proxy 7 | orgId: 1 8 | url: http://tempo:3200 9 | basicAuth: false 10 | isDefault: true 11 | version: 1 12 | editable: false 13 | apiVersion: 1 14 | uid: tempo 15 | jsonData: 16 | httpMethod: GET 17 | serviceMap: 18 | datasourceUid: prometheus 19 | -------------------------------------------------------------------------------- /tempo/tempo.yaml: -------------------------------------------------------------------------------- 1 | stream_over_http_enabled: true 2 | server: 3 | http_listen_port: 3200 4 | log_level: info 5 | 6 | 7 | cache: 8 | background: 9 | writeback_goroutines: 5 10 | caches: 11 | - roles: 12 | - frontend-search 13 | memcached: 14 | host: localhost:11211 15 | 16 | query_frontend: 17 | search: 18 | duration_slo: 5s 19 | throughput_bytes_slo: 1.073741824e+09 20 | trace_by_id: 21 | duration_slo: 5s 22 | 23 | distributor: 24 | receivers: # this configuration will listen on all ports and protocols that tempo is capable of. 25 | jaeger: # the receives all come from the OpenTelemetry collector. more configuration information can 26 | protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver 27 | thrift_http: # 28 | grpc: # for a production deployment you should only enable the receivers you need! 29 | thrift_binary: 30 | thrift_compact: 31 | zipkin: 32 | otlp: 33 | protocols: 34 | http: 35 | grpc: 36 | opencensus: 37 | 38 | ingester: 39 | max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally 40 | 41 | compactor: 42 | compaction: 43 | block_retention: 1h # overall Tempo trace retention. set for demo purposes 44 | 45 | storage: 46 | trace: 47 | backend: local # backend configuration to use 48 | wal: 49 | path: /var/tempo/wal # where to store the wal locally 50 | local: 51 | path: /var/tempo/blocks 52 | 53 | overrides: 54 | defaults: 55 | metrics_generator: 56 | processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator 57 | generate_native_histograms: both 58 | --------------------------------------------------------------------------------