├── VERSION
├── enova
├── algo
│ ├── __init__.py
│ ├── server.py
│ └── resource.py
├── api
│ ├── __init__.py
│ ├── prom_api.py
│ ├── serving_api.py
│ └── app_api.py
├── app
│ ├── __init__.py
│ ├── utils.py
│ ├── server.py
│ ├── resource.py
│ └── db_modles.py
├── job
│ ├── __init__.py
│ └── job_manager.py
├── webui
│ ├── __init__.py
│ └── chat.py
├── common
│ ├── __init__.py
│ ├── g_vars.py
│ ├── local.py
│ ├── constant.py
│ ├── encoder.py
│ ├── logger.py
│ └── error.py
├── database
│ ├── __init__.py
│ └── relation
│ │ ├── __init__.py
│ │ ├── orm
│ │ └── __init__.py
│ │ └── transaction
│ │ └── __init__.py
├── server
│ ├── __init__.py
│ ├── restful
│ │ ├── __init__.py
│ │ ├── serializer.py
│ │ └── router.py
│ ├── exception
│ │ ├── __init__.py
│ │ └── handler.py
│ └── middleware
│ │ ├── __init__.py
│ │ ├── trace.py
│ │ ├── base.py
│ │ └── response.py
├── serving
│ ├── __init__.py
│ ├── middlewares
│ │ ├── auth.py
│ │ └── base.py
│ ├── backend
│ │ ├── hf
│ │ │ ├── __init__.py
│ │ │ └── handler.py
│ │ ├── sglang.py
│ │ ├── transformers.py
│ │ ├── utils.py
│ │ └── vllm.py
│ └── apiserver.py
├── entry
│ ├── command
│ │ ├── __init__.py
│ │ ├── algo.py
│ │ ├── mon.py
│ │ ├── webui.py
│ │ └── serving.py
│ └── cli.py
├── .gitignore
└── template
│ └── deployment
│ └── docker-compose
│ ├── webui
│ └── webui.yaml
│ ├── traffic-injector
│ ├── data.csv
│ ├── jmeter.Dockerfile
│ ├── compose.yaml
│ └── jmeter-config-template.xml
│ ├── .gitignore
│ ├── grafana
│ └── grafana_provisioning
│ │ ├── datasources
│ │ └── enova-datasource.yaml
│ │ └── dashboards
│ │ └── enova-dashboards.yaml
│ ├── prometheus
│ └── prometheus.yml
│ ├── haproxy
│ └── haproxy.cfg
│ ├── escaler
│ └── conf
│ │ └── settings.json
│ ├── nginx
│ └── nginx.conf
│ ├── otel-collector
│ └── collector-config.yaml
│ ├── tempo
│ └── tempo.yaml
│ └── webui-nginx
│ └── nginx.conf
├── tests
└── enova
│ ├── conftest.py
│ ├── test_requirements.txt
│ └── test_eapp.py
├── front
├── .dockerignore
├── .env.development
├── .env.production
├── env.d.ts
├── src
│ ├── styles
│ │ ├── index.scss
│ │ ├── index.css
│ │ └── element
│ │ │ └── index.scss
│ ├── assets
│ │ ├── empty.png
│ │ ├── filter.png
│ │ ├── logo
│ │ │ ├── emergingai_b.png
│ │ │ └── emergingai_w.png
│ │ └── svg
│ │ │ ├── user.svg
│ │ │ ├── info.svg
│ │ │ ├── setup.svg
│ │ │ ├── auto.svg
│ │ │ ├── toggle.svg
│ │ │ ├── log.svg
│ │ │ ├── home.svg
│ │ │ ├── autoRefresh.svg
│ │ │ ├── cross.svg
│ │ │ ├── docker.svg
│ │ │ └── earth.svg
│ ├── main.ts
│ ├── App.vue
│ ├── components
│ │ ├── SummaryTip.vue
│ │ ├── SearchInput.vue
│ │ ├── instance
│ │ │ └── InstanceDetail.vue
│ │ ├── experiment
│ │ │ └── TestDetail.vue
│ │ ├── SvgIcon.vue
│ │ ├── Drawer.vue
│ │ ├── Pagination.vue
│ │ ├── Language.vue
│ │ └── TimeRangePicker.vue
│ ├── locales
│ │ └── index.ts
│ ├── stores
│ │ ├── app.ts
│ │ ├── config.ts
│ │ ├── experiment.ts
│ │ └── instance.ts
│ ├── layout
│ │ ├── header
│ │ │ └── index.vue
│ │ ├── index.vue
│ │ └── sidebar
│ │ │ └── index.vue
│ ├── router
│ │ └── index.ts
│ ├── utils
│ │ └── request.ts
│ ├── api
│ │ └── instance.ts
│ └── hooks
│ │ └── useInitQueryRange.ts
├── public
│ └── favicon.ico
├── postcss.config.js
├── .prettierrc.json
├── tsconfig.json
├── auto-imports.d.ts
├── index.html
├── tsconfig.app.json
├── .eslintrc.cjs
├── .gitignore
├── tsconfig.node.json
├── tailwind.config.js
├── README.md
├── package.json
└── vite.config.ts
├── requirements-docker-no-deps.txt
├── MANIFEST.in
├── escaler
├── scripts
│ ├── local_docker_run.sh
│ ├── generate_mock_files.sh
│ ├── build_swagger.sh
│ └── generate_ot_clientset.sh
├── pkg
│ ├── api
│ │ ├── types.go
│ │ ├── prom.go
│ │ ├── api.go
│ │ └── enovaalgo.go
│ ├── utils
│ │ ├── utils.go
│ │ └── cache.go
│ ├── resource
│ │ ├── clients.go
│ │ ├── utils
│ │ │ └── cmd.go
│ │ └── k8s.go
│ ├── queue
│ │ └── queue.go
│ ├── httpserver
│ │ ├── utils
│ │ │ └── utils.go
│ │ ├── middleware
│ │ │ ├── trace.go
│ │ │ ├── logger.go
│ │ │ └── response.go
│ │ └── server
│ │ │ └── router.go
│ ├── meta
│ │ └── task.go
│ ├── logger
│ │ └── logger.go
│ ├── redis
│ │ └── redis.go
│ └── scaler
│ │ └── scaler.go
├── build.sh
├── conf
│ └── settings.json
└── cmd
│ └── escaler
│ ├── mock_enovaalgo.go
│ └── main.go
├── .github
└── assets
│ ├── ENOVA.png
│ ├── trace.png
│ ├── webui.png
│ ├── gpu_metrics.png
│ ├── llm_instance.png
│ ├── test_results.png
│ ├── request_inject.png
│ └── monitoring_metrics.png
├── llmo
└── enova-instrumentation-llmo
│ ├── enova
│ └── llmo
│ │ ├── metrics_adapter
│ │ ├── __init__.py
│ │ └── vllm_logging_metrics.py
│ │ ├── instrumentation
│ │ ├── __init__.py
│ │ └── fastapi
│ │ │ └── __init__.py
│ │ └── __init__.py
│ ├── pyproject.toml
│ └── README.md
├── .dockerignore
├── .gitattributes
├── scripts
├── pack_whl.llmo.sh
└── pack_whl.enova.sh
├── docker
├── Dockerfile.jmeter
├── build_image.enova.base.sh
├── build_image.jmeter.sh
├── Dockerfile.requirements
├── Dockerfile
├── Dockerfile.enova
├── Dockerfile.enova.base.npu
├── Dockerfile.enova.npu
├── build_image.enova.npu.sh
├── build_image.enova.sh
├── build_image.escaler.sh
├── Dockerfile.escaler
└── Dockerfile.enova.base
├── .pre-commit-config.yaml
├── requirements.txt
├── requirements-docker.txt
├── requirements-docker.npu.txt
├── pyproject.toml
└── .gitignore
/VERSION:
--------------------------------------------------------------------------------
1 | 0.1.0
--------------------------------------------------------------------------------
/enova/algo/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/api/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/app/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/job/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/webui/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/enova/conftest.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/database/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/job/job_manager.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/server/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/serving/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/entry/command/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/server/restful/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/serving/middlewares/auth.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/front/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
--------------------------------------------------------------------------------
/enova/database/relation/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/server/exception/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/server/middleware/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/serving/backend/hf/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/database/relation/orm/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/front/.env.development:
--------------------------------------------------------------------------------
1 | VITE_APP_BASE_URL="/"
--------------------------------------------------------------------------------
/front/.env.production:
--------------------------------------------------------------------------------
1 | VITE_APP_BASE_URL="/"
--------------------------------------------------------------------------------
/enova/database/relation/transaction/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/.gitignore:
--------------------------------------------------------------------------------
1 | web_statics/*
2 | !web_statics/.gitkeep
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/webui/webui.yaml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/front/env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/front/src/styles/index.scss:
--------------------------------------------------------------------------------
1 | @import './element-ui.scss';
--------------------------------------------------------------------------------
/requirements-docker-no-deps.txt:
--------------------------------------------------------------------------------
1 | vllm==0.8.5.post1
2 |
--------------------------------------------------------------------------------
/tests/enova/test_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-asyncio
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include enova/web_statics/static *
2 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/data.csv:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/escaler/scripts/local_docker_run.sh:
--------------------------------------------------------------------------------
1 | redis-server &
2 | escaler $@
3 |
--------------------------------------------------------------------------------
/front/src/styles/index.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
--------------------------------------------------------------------------------
/.github/assets/ENOVA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/ENOVA.png
--------------------------------------------------------------------------------
/.github/assets/trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/trace.png
--------------------------------------------------------------------------------
/.github/assets/webui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/webui.png
--------------------------------------------------------------------------------
/front/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/front/public/favicon.ico
--------------------------------------------------------------------------------
/front/src/assets/empty.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/front/src/assets/empty.png
--------------------------------------------------------------------------------
/front/src/assets/filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/front/src/assets/filter.png
--------------------------------------------------------------------------------
/.github/assets/gpu_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/gpu_metrics.png
--------------------------------------------------------------------------------
/.github/assets/llm_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/llm_instance.png
--------------------------------------------------------------------------------
/.github/assets/test_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/test_results.png
--------------------------------------------------------------------------------
/.github/assets/request_inject.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/request_inject.png
--------------------------------------------------------------------------------
/.github/assets/monitoring_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/.github/assets/monitoring_metrics.png
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/.gitignore:
--------------------------------------------------------------------------------
1 | tempo-data
2 | single-demo
3 | enova_compose*.yaml
4 | bin/docker-compose*
--------------------------------------------------------------------------------
/front/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | }
7 |
--------------------------------------------------------------------------------
/front/src/assets/logo/emergingai_b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/front/src/assets/logo/emergingai_b.png
--------------------------------------------------------------------------------
/front/src/assets/logo/emergingai_w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/HEAD/front/src/assets/logo/emergingai_w.png
--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/metrics_adapter/__init__.py:
--------------------------------------------------------------------------------
1 | from .vllm_logging_metrics import VLLMLogMetricsAdapter
2 |
--------------------------------------------------------------------------------
/enova/app/utils.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 |
4 | def compute_actual_duration(value, unit):
5 | return int(pd.Timedelta(f"{value}{unit}").total_seconds())
6 |
--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/instrumentation/__init__.py:
--------------------------------------------------------------------------------
1 | from .vllm import EnovaVllmInstrumentor
2 | from .fastapi import EnovaFastAPIInstrumentor
3 |
--------------------------------------------------------------------------------
/escaler/scripts/generate_mock_files.sh:
--------------------------------------------------------------------------------
1 | mockgen -source=vendor/github.com/docker/docker/client/interface.go -destination=cmd/escaler/mock_docker_client.go -package=main
2 |
--------------------------------------------------------------------------------
/escaler/pkg/api/types.go:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | type EnvoaResponse struct {
4 | Code int
5 | Message string
6 | Result interface{}
7 | TraceId string
8 | Version string
9 | }
10 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | build
3 | dist
4 | enova.egg-info
5 | *.log
6 | .gitignore
7 | var
8 | .pre-commit-config.yaml
9 | tests
10 | front/node_modules
11 | front/package-lock.json
12 | front/yarn.lock
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *whl filter=lfs diff=lfs merge=lfs -text
2 | docker-compose-* filter=lfs diff=lfs merge=lfs -text
3 | *tgz filter=lfs diff=lfs merge=lfs -text
4 | *tar.gz filter=lfs diff=lfs merge=lfs -text
5 |
--------------------------------------------------------------------------------
/escaler/scripts/build_swagger.sh:
--------------------------------------------------------------------------------
1 | export GOPATH=$(go env GOPATH | awk -F ':' '{print $1}')
2 | export PATH=$PATH:$GOPATH/bin
3 | swag init -g cmd/escaler/main.go -o cmd/escaler/docs --parseDependency --parseInternal
--------------------------------------------------------------------------------
/front/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json.schemastore.org/prettierrc",
3 | "semi": false,
4 | "tabWidth": 2,
5 | "singleQuote": true,
6 | "printWidth": 100,
7 | "trailingComma": "none"
8 | }
--------------------------------------------------------------------------------
/front/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "files": [],
3 | "references": [
4 | {
5 | "path": "./tsconfig.node.json"
6 | },
7 | {
8 | "path": "./tsconfig.app.json"
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/escaler/build.sh:
--------------------------------------------------------------------------------
1 |
2 | go mod download
3 | # go install github.com/swaggo/swag/cmd/swag@latest
4 |
5 | # swag init -g cmd/escaler/main.go -o cmd/escaler/docs --parseDependency --parseInternal
6 | mkdir -p dist/bin
7 | go env && go build -o dist/bin/escaler cmd/escaler/main.go
8 |
--------------------------------------------------------------------------------
/front/auto-imports.d.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable */
2 | /* prettier-ignore */
3 | // @ts-nocheck
4 | // noinspection JSUnusedGlobalSymbols
5 | // Generated by unplugin-auto-import
6 | export {}
7 | declare global {
8 | const ElMessage: typeof import('element-plus/es')['ElMessage']
9 | }
10 |
--------------------------------------------------------------------------------
/scripts/pack_whl.llmo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | echo "Runing packing wheel of llmo using ${PWD}"
5 |
6 | SCRIPT=$(realpath "$0")
7 | BASEDIR=$(dirname "$SCRIPT")
8 | BASEDIR=$(dirname "$BASEDIR")
9 |
10 | # pack
11 | cd $BASEDIR/llmo/enova-instrumentation-llmo
12 | poetry build
13 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/jmeter.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:centos7
2 | WORKDIR /opt
3 | ADD jdk-8u361-linux-x64.tar.gz /usr/local/
4 | ADD apache-jmeter-5.6.3.tgz /opt/
5 | ENV JAVA_HOME=/usr/local/jdk1.8.0_361 \
6 | PATH=/usr/local/jdk1.8.0_361/bin:/opt/apache-jmeter-5.6.3/bin:$PATH
7 |
--------------------------------------------------------------------------------
/escaler/pkg/utils/utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import "reflect"
4 |
5 | func GetAllField(s interface{}) []reflect.StructField {
6 | ret := []reflect.StructField{}
7 | t := reflect.TypeOf(s)
8 |
9 | for i := 0; i < t.NumField(); i++ {
10 | field := t.Field(i)
11 | ret = append(ret, field)
12 | }
13 | return ret
14 | }
15 |
--------------------------------------------------------------------------------
/enova/server/exception/handler.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from fastapi import Request
3 |
4 |
5 | class BaseExceptionHandler(metaclass=abc.ABCMeta):
6 |
7 | @abc.abstractmethod
8 | def get_exception_class(self):
9 | """"""
10 |
11 | @abc.abstractmethod
12 | def exception_handler(self, request: Request, exc):
13 | """"""
14 |
--------------------------------------------------------------------------------
/escaler/pkg/resource/clients.go:
--------------------------------------------------------------------------------
1 | package resource
2 |
3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
4 |
5 | type ClientInterface interface {
6 | DeployTask(spec meta.TaskSpec)
7 | DeleteTask(spec meta.TaskSpec)
8 | IsTaskExist(spec meta.TaskSpec) bool
9 | IsTaskRunning(spec meta.TaskSpec) bool
10 | GetRuntimeInfos(spec meta.TaskSpec) *meta.RuntimeInfo
11 | }
12 |
--------------------------------------------------------------------------------
/front/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | Enova
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/front/src/styles/element/index.scss:
--------------------------------------------------------------------------------
1 | @forward 'element-plus/theme-chalk/src/common/var.scss' with (
2 | $colors: (
3 | 'primary': (
4 | 'base': #303133,
5 | ),
6 | ),
7 | $table: (
8 | 'header-bg-color': #EBEDF0,
9 | 'header-text-color': #606266
10 | ),
11 | $collapse: (
12 | 'header-height': 36px,
13 | 'header-bg-color': #F0F2F5
14 | )
15 |
16 | );
--------------------------------------------------------------------------------
/enova/algo/server.py:
--------------------------------------------------------------------------------
1 | from enova.common.config import CONFIG
2 | from enova.server.server import ApiServer
3 | from enova.common.constant import ApiServerType
4 |
5 |
6 | def get_algo_api_server(api_server_type=ApiServerType.ENOVA_ALGO.value):
7 | api_config = getattr(CONFIG, api_server_type)
8 | CONFIG.api.update(api_config)
9 |
10 | api_server = ApiServer(api_config)
11 |
12 | return api_server
13 |
--------------------------------------------------------------------------------
/enova/api/prom_api.py:
--------------------------------------------------------------------------------
1 | from enova.common.config import CONFIG
2 | from enova.api.base import ASyncAPI
3 | from enova.common.constant import HttpMethod
4 |
5 |
6 | PROM_API_HOST = CONFIG.enova_app["prom_api_host"]
7 |
8 |
9 | class _PromApi:
10 | def __init__(self) -> None:
11 | self.query_range = ASyncAPI(method=HttpMethod.GET.value, url=PROM_API_HOST + "/api/v1/query_range")
12 |
13 |
14 | PromApi = _PromApi()
15 |
--------------------------------------------------------------------------------
/front/src/main.ts:
--------------------------------------------------------------------------------
1 | import './styles/index.css'
2 |
3 | import { createApp } from 'vue'
4 | import { createPinia } from 'pinia'
5 | import 'virtual:svg-icons-register'
6 | import i18n from './locales'
7 | import App from './App.vue'
8 | import router from './router'
9 | import './styles/index.scss'
10 |
11 | const app = createApp(App)
12 |
13 | app.use(createPinia())
14 | app.use(router)
15 | app.use(i18n)
16 | app.mount('#app')
17 |
--------------------------------------------------------------------------------
/docker/Dockerfile.jmeter:
--------------------------------------------------------------------------------
1 | FROM centos:centos7
2 |
3 | WORKDIR /data
4 |
5 | # TODO: add jdk and jmeter form url
6 | ADD ./docker/jdk-8u401-linux-x64.tar.gz /usr/local/
7 | ADD ./docker/apache-jmeter-5.6.3.tgz /opt/
8 |
9 | RUN mv /usr/local/jdk1.8.0_401 /usr/local/jdk && \
10 | mv /opt/apache-jmeter-5.6.3 /opt/apache-jmeter
11 |
12 | ENV JAVA_HOME=/usr/local/jdk \
13 | PATH=/usr/local/jdk/bin:/opt/apache-jmeter/bin:$PATH
14 |
--------------------------------------------------------------------------------
/docker/build_image.enova.base.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | echo "Runing build image enova:base using ${PWD}"
5 |
6 | SCRIPT=$(realpath "$0")
7 | BASEDIR=$(dirname "$SCRIPT")
8 | BASEDIR=$(dirname "$BASEDIR")
9 |
10 |
11 | export HARBOR_PATH=emergingai
12 |
13 | # build enova
14 | cd $BASEDIR
15 | docker build -f $BASEDIR/docker/Dockerfile.enova.base -t $HARBOR_PATH/enova:base --build-arg HARBOR_PATH="$HARBOR_PATH" $BASEDIR
16 |
--------------------------------------------------------------------------------
/front/src/App.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
13 |
--------------------------------------------------------------------------------
/front/tsconfig.app.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "@vue/tsconfig/tsconfig.dom.json",
3 | "include": ["env.d.ts", "src/**/*", "src/**/*.vue", "**/*.d.ts", "src/**/*.ts"],
4 | "exclude": ["src/**/__tests__/*"],
5 | "compilerOptions": {
6 | "composite": true,
7 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
8 |
9 | "baseUrl": ".",
10 | "paths": {
11 | "@/*": ["./src/*"]
12 | }
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/front/src/components/SummaryTip.vue:
--------------------------------------------------------------------------------
1 |
2 |
5 | {{ title }}
6 | ({{ count }})
7 |
8 |
9 |
10 |
18 |
--------------------------------------------------------------------------------
/enova/api/serving_api.py:
--------------------------------------------------------------------------------
1 | from enova.common.config import CONFIG
2 | from enova.api.base import ASyncEmergingaiAPI
3 | from enova.common.constant import HttpMethod
4 |
5 |
6 | SERVING_API_HOST = CONFIG.enova_app["serving_api_host"]
7 |
8 |
9 | class _ServingApi:
10 | def __init__(self) -> None:
11 | self.engine_args = ASyncEmergingaiAPI(method=HttpMethod.GET.value, url=SERVING_API_HOST + "/v1/model/info/args")
12 |
13 |
14 | ServingApi = _ServingApi()
15 |
--------------------------------------------------------------------------------
/enova/server/middleware/trace.py:
--------------------------------------------------------------------------------
1 | import uuid
2 | from fastapi import Request
3 | from enova.server.middleware.base import BaseMiddleware
4 | from enova.common.local import set_contextvars
5 |
6 |
7 | class TraceMiddleware(BaseMiddleware):
8 |
9 | async def _process_request(self, request: Request):
10 | """get header trace_id"""
11 | trace_id = request.headers.get('trace_id') or uuid.uuid4().hex
12 | set_contextvars('trace_id', trace_id)
13 |
--------------------------------------------------------------------------------
/front/.eslintrc.cjs:
--------------------------------------------------------------------------------
1 | /* eslint-env node */
2 | require('@rushstack/eslint-patch/modern-module-resolution')
3 |
4 | module.exports = {
5 | root: true,
6 | 'extends': [
7 | 'plugin:vue/vue3-essential',
8 | 'eslint:recommended',
9 | '@vue/eslint-config-typescript',
10 | '@vue/eslint-config-prettier/skip-formatting'
11 | ],
12 | parserOptions: {
13 | ecmaVersion: 'latest'
14 | },
15 | global: {
16 | ElMessage: 'readonly',
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/front/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | .DS_Store
12 | dist
13 | dist-ssr
14 | coverage
15 | *.local
16 |
17 | /cypress/videos/
18 | /cypress/screenshots/
19 |
20 | # Editor directories and files
21 | .vscode/*
22 | !.vscode/extensions.json
23 | .idea
24 | *.suo
25 | *.ntvs*
26 | *.njsproj
27 | *.sln
28 | *.sw?
29 |
30 | *.tsbuildinfo
31 |
--------------------------------------------------------------------------------
/docker/build_image.jmeter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | IMAGE_VERSION=v`cat VERSION`
5 |
6 | echo "Runing build image enova-jmoter:${IMAGE_VERSION} using ${PWD}"
7 |
8 | SCRIPT=$(realpath "$0")
9 | BASEDIR=$(dirname "$SCRIPT")
10 | BASEDIR=$(dirname "$BASEDIR")
11 | echo "BASEDIR: " ${BASEDIR}
12 |
13 |
14 | export HARBOR_PATH=emergingai
15 |
16 | # build enova
17 | cd $BASEDIR
18 | docker build -f $BASEDIR/docker/Dockerfile.jmeter -t $HARBOR_PATH/enova-jmeter:$IMAGE_VERSION $BASEDIR
19 |
--------------------------------------------------------------------------------
/front/src/assets/svg/user.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/escaler/pkg/queue/queue.go:
--------------------------------------------------------------------------------
1 | package queue
2 |
3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
4 |
5 | type TaskQueue interface {
6 | Append(meta.TaskSpec)
7 | Pop() meta.TaskSpec
8 | }
9 |
10 | type InnerChanTaskQueue struct {
11 | Ch chan meta.TaskSpecInterface
12 | }
13 |
14 | func (q *InnerChanTaskQueue) Append(task meta.TaskSpecInterface) {
15 | q.Ch <- task
16 | }
17 |
18 | func (q *InnerChanTaskQueue) Pop() (meta.TaskSpecInterface, bool) {
19 | task, ok := <-q.Ch
20 | return task, ok
21 | }
22 |
--------------------------------------------------------------------------------
/front/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "@tsconfig/node20/tsconfig.json",
3 | "include": [
4 | "vite.config.*",
5 | "vitest.config.*",
6 | "cypress.config.*",
7 | "nightwatch.conf.*",
8 | "playwright.config.*"
9 | ],
10 | "compilerOptions": {
11 | "composite": true,
12 | "noEmit": true,
13 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
14 |
15 | "module": "ESNext",
16 | "moduleResolution": "Bundler",
17 | "types": ["node"]
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/escaler/pkg/httpserver/utils/utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "reflect"
5 | "strconv"
6 | )
7 |
8 | func HasMethod(s interface{}, methodName string) bool {
9 | typ := reflect.TypeOf(s)
10 | _, ok := typ.MethodByName(methodName)
11 | return ok
12 | }
13 |
14 | // ParseUnixTimestamp
15 | func ParseUnixTimestamp(ts int64) string {
16 | if ts >= (1 << 32) {
17 | // The timestamp is in milliseconds. Convert it to seconds.
18 | ts /= 1000
19 | }
20 | return strconv.FormatFloat(float64(ts), 'g', -1, 64)
21 | }
22 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pycqa/flake8
3 | rev: 3.9.2
4 | hooks:
5 | - id: black
6 | args:
7 | - --max-line-length=150
8 | - --max-complexity=60
9 |
10 | repos:
11 | - repo: https://github.com/psf/black
12 | rev: stable # Use the specific revision or tag you want to pin to
13 | hooks:
14 | - id: black
15 | args:
16 | - --line-length=150
17 |
18 | - repo: https://github.com/pre-commit/pre-commit-hooks
19 | rev: v4.0.1
20 | hooks:
21 | - id: check-merge-conflict
22 |
--------------------------------------------------------------------------------
/escaler/scripts/generate_ot_clientset.sh:
--------------------------------------------------------------------------------
1 | go install k8s.io/code-generator/cmd/client-gen
2 | export GOPATH=$(go env GOPATH | awk -F ':' '{print $1}')
3 | export PATH=$PATH:$GOPATH/bin
4 | client-gen \
5 | --input-base="/root/go/pkg/mod/github.com/open-telemetry/opentelemetry-operator@v1.51.0/apis/v1alpha1" \
6 | --input="" \
7 | --output-pkg="github.com/Emerging-AI/ENOVA/escaler/pkg/generated/ot/clientset" \
8 | --output-dir=./pkg/generated/ot/clientset \
9 | --clientset-name="versioned" \
10 | --go-header-file="./hack/boilerplate.go.txt"
11 |
--------------------------------------------------------------------------------
/docker/Dockerfile.requirements:
--------------------------------------------------------------------------------
1 | ARG HARBOR_PATH=emergingai
2 |
3 | FROM ${HARBOR_PATH}/python:base
4 |
5 | RUN apt-get install -y \
6 | ocl-icd-libopencl1 \
7 | opencl-headers \
8 | clinfo
9 |
10 | RUN mkdir -p /etc/OpenCL/vendors && \
11 | echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
12 |
13 | COPY ./dist/enova-0.1.0-py3-none-any.whl .
14 | COPY ./llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-0.1.0-py3-none-any.whl .
15 |
16 | RUN pip install enova_instrumentation_llmo-0.1.0-py3-none-any.whl enova-0.1.0-py3-none-any.whl
17 |
18 | RUN pip install vllm
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | # FROM nvcr.io/nvidia/pytorch:24.03-py3
2 | FROM mergingai/enova:base
3 |
4 | RUN apt update && apt install net-tools -y
5 |
6 | COPY ./dist/enova-0.1.0-py3-none-any.whl /tmp/
7 | COPY ./llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-0.1.0-py3-none-any.whl /tmp/
8 |
9 | RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
10 | pip uninstall enova enova-instrumentation-llmo -y && \
11 | pip install --no-cache-dir /tmp/enova_instrumentation_llmo-0.1.0-py3-none-any.whl && \
12 | pip install --no-cache-dir /tmp/enova-0.1.0-py3-none-any.whl
13 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/grafana/grafana_provisioning/datasources/enova-datasource.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 |
3 | datasources:
4 | - name: Enova-Prometheus
5 | type: prometheus
6 | uid: prometheus
7 | url: http://prometheus:9090
8 | isDefault: true
9 | access: proxy
10 | editable: true
11 | orgId: 1
12 |
13 | - name: Enova-Tempo
14 | type: tempo
15 | uid: tempo
16 | url: http://tempo:3200
17 | isDefault: false
18 | access: proxy
19 | orgId: 1
20 | editable: true
21 | jsonData:
22 | httpMethod: GET
23 | serviceMap:
24 | datasourceUid: prometheus
25 |
26 |
27 |
--------------------------------------------------------------------------------
/front/src/assets/svg/info.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/compose.yaml:
--------------------------------------------------------------------------------
1 | version: "3.8"
2 |
3 | services:
4 | traffic_injector:
5 | image: 60.204.135.2/emergingai/enova-jmeter:v0.0.2
6 | command:
7 | - sh
8 | - -c
9 | - |
10 | rm -rf /data/report
11 | mkdir /data/report
12 | jmeter -n -t /data/jmeter-config.xml -l /data/report/report.log -e -o /data/report
13 | volumes:
14 | - ${DATA_FILE}:/opt/data.csv
15 | - ${OUTPUT}:/data
16 | networks:
17 | - enova-net
18 |
19 | volumes:
20 | output:
21 |
22 | networks:
23 | enova-net:
24 | enable_ipv6: false
25 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | httpx==0.24.1
2 | fastapi==0.108.0
3 | huggingface_hub
4 | hf-transfer
5 | transformers
6 | locate
7 | python-rapidjson
8 | opentelemetry-api
9 | opentelemetry-sdk
10 | opentelemetry-exporter-otlp
11 | opentelemetry-distro
12 | opentelemetry-instrumentation-fastapi
13 | streamlit
14 | pymysql==1.1.0
15 | aiomysql==0.2.0
16 | sqlalchemy==2.0.29
17 | sqlalchemy-utils
18 | aiosqlite
19 | greenlet
20 | uvicorn
21 | ulid-py
22 | pyopencl
23 | py-cpuinfo
24 | pytz
25 | tzlocal
26 | openai
27 | packaging
28 | ray
29 | enova-instrumentation-llmo==0.1.0
30 | addict
31 | sglang==0.3.6
32 | python-multipart
33 | orjson
34 | siphash24
--------------------------------------------------------------------------------
/enova/common/g_vars.py:
--------------------------------------------------------------------------------
1 | import uuid
2 | from typing import Union
3 | from enova.common.local import get_contextvars, set_contextvars
4 |
5 |
6 | def get_traceid() -> Union[str, None]:
7 | trace_id = get_contextvars("trace_id")
8 | if trace_id is None:
9 | trace_id = uuid.uuid4().hex
10 | set_contextvars("trace_id", trace_id)
11 | return trace_id
12 |
13 |
14 | def get_realip() -> Union[str, None]:
15 | real_ip = get_contextvars("real_ip")
16 | # TODO: LOGGER will case cyclic reference
17 | # if real_ip is None:
18 | # LOGGER.warn("RealIPMiddleware maybe not Setup.")
19 | return real_ip
20 |
--------------------------------------------------------------------------------
/front/src/locales/index.ts:
--------------------------------------------------------------------------------
1 | import { createI18n } from 'vue-i18n'
2 | import zhLoacles from './lang/zh'
3 | import enLocales from './lang/en'
4 | const getLocale = (): string => {
5 | let locale = localStorage.getItem('lang')
6 | if (!locale) {
7 | locale = navigator.language.split('-')[0]
8 | }
9 | if (!locale || locale === 'zh') {
10 | locale = 'zh_CN'
11 | }
12 | return locale
13 | }
14 |
15 | const i18n = createI18n({
16 | locale: getLocale(),
17 | legacy: false,
18 | globalInjection: true,
19 | fallbackLocale: 'zh_CN',
20 | messages: {
21 | zh_CN: { ...zhLoacles },
22 | en: { ...enLocales }
23 | }
24 | })
25 |
26 | export default i18n
27 |
--------------------------------------------------------------------------------
/front/src/stores/app.ts:
--------------------------------------------------------------------------------
1 | import { defineStore } from 'pinia'
2 | import navImg from '@/assets/logo/emergingai_w.png'
3 | import loginImg from '@/assets/logo/emergingai_b.png'
4 |
5 | export const useAppStore = defineStore('app', {
6 | state: () => ({
7 | navLogo: {
8 | src: navImg,
9 | width: 'auto',
10 | height: '56px',
11 | alt: 'Emergingai'
12 | },
13 | loginLogo: {
14 | src: loginImg,
15 | width: '220px',
16 | height: 'auto',
17 | alt: 'Emergingai'
18 | },
19 | sidebarStatus: true
20 | }),
21 | actions: {
22 | toggleSideBar(): void {
23 | this.sidebarStatus = !this.sidebarStatus
24 | }
25 | }
26 | })
27 |
--------------------------------------------------------------------------------
/escaler/pkg/utils/cache.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "github.com/Emerging-AI/ENOVA/escaler/pkg/redis"
5 | )
6 |
7 | type TTLCache interface {
8 | Set(key string, value string, timeout int64)
9 | Get(key string) string
10 | }
11 |
12 | type RedisTTLCache struct {
13 | Redis *redis.RedisClient
14 | }
15 |
16 | func NewRedisTTLCache(addr string, passwd string, db int) *RedisTTLCache {
17 | return &RedisTTLCache{
18 | redis.NewRedisClient(addr, passwd, db),
19 | }
20 | }
21 |
22 | func (r *RedisTTLCache) Set(key string, value string, timeout int64) {
23 | r.Redis.Set(key, value, timeout)
24 | }
25 |
26 | func (r *RedisTTLCache) Get(key string) string {
27 | return r.Redis.Get(key)
28 | }
29 |
--------------------------------------------------------------------------------
/front/src/components/SearchInput.vue:
--------------------------------------------------------------------------------
1 |
2 |
11 |
12 |
--------------------------------------------------------------------------------
/escaler/pkg/httpserver/middleware/trace.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "github.com/gin-gonic/gin"
5 | uuid "github.com/google/uuid"
6 | )
7 |
8 | const TraceIdKey = "trace_id"
9 |
10 | func GenerateTraceId() string {
11 | v4, err := uuid.NewUUID()
12 | if err != nil {
13 | panic(err)
14 | }
15 | return v4.String()
16 | }
17 |
18 | func GetTraceId() gin.HandlerFunc {
19 | return func(c *gin.Context) {
20 | traceId := c.GetHeader(TraceIdKey)
21 |
22 | if traceId == "" {
23 | traceId = GenerateTraceId()
24 | c.Request.Header.Set(TraceIdKey, traceId)
25 | c.Set(TraceIdKey, traceId)
26 | }
27 |
28 | // Set TraceIdKey header
29 | c.Writer.Header().Set(TraceIdKey, traceId)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/front/src/components/instance/InstanceDetail.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
23 |
--------------------------------------------------------------------------------
/front/src/components/experiment/TestDetail.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
23 |
--------------------------------------------------------------------------------
/enova/api/app_api.py:
--------------------------------------------------------------------------------
1 | from enova.common.config import CONFIG
2 | from enova.api.base import ASyncRestfulEmergingaiAPI, ASyncEmergingaiAPI
3 | from enova.common.constant import HttpMethod
4 |
5 |
6 | APP_API_HOST = CONFIG.enova_app["app_api_host"]
7 |
8 |
9 | class _EnovaAppApi:
10 | def __init__(self) -> None:
11 | self.healthz = ASyncEmergingaiAPI(method=HttpMethod.GET.value, url=APP_API_HOST + "/v1/healthz")
12 |
13 | self.serving = ASyncRestfulEmergingaiAPI(
14 | url=APP_API_HOST + "/v1/serving",
15 | resource_key="instance_id",
16 | )
17 |
18 | self.delete_serving_by_name = ASyncEmergingaiAPI(method=HttpMethod.DELETE.value, url=APP_API_HOST + "/v1/serving/name")
19 |
20 |
21 | EnovaAppApi = _EnovaAppApi()
22 |
--------------------------------------------------------------------------------
/docker/Dockerfile.enova:
--------------------------------------------------------------------------------
1 | FROM emergingai/enova:base
2 |
3 | COPY ./llmo /opt/enova/llmo
4 |
5 | COPY ./scripts /opt/enova/scripts
6 |
7 | RUN bash /opt/enova/scripts/pack_whl.llmo.sh
8 |
9 | ARG LLMO_VERSION=0.1.0
10 | RUN pip install /opt/enova/llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-${LLMO_VERSION}-py3-none-any.whl --no-deps --no-cache-dir
11 |
12 | ARG CACHEBUST=1
13 |
14 | COPY . /opt/enova
15 |
16 | RUN cd /opt/enova && bash ./scripts/pack_whl.enova.sh
17 | ARG ENOVA_VERSION=0.1.0
18 |
19 | RUN pip install -r /opt/enova/requirements.txt --no-deps --no-cache-dir && \
20 | pip install /opt/enova/dist/enova-${ENOVA_VERSION}-py3-none-any.whl --no-deps --no-cache-dir && \
21 | pip uninstall -y transformer-engine && mkdir -p /workspace/model
22 |
--------------------------------------------------------------------------------
/front/src/assets/svg/setup.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docker/Dockerfile.enova.base.npu:
--------------------------------------------------------------------------------
1 | # image enova:base-npu
2 | FROM ascendai/cann:8.1.rc1-910b-ubuntu22.04-py3.11
3 |
4 | ENV DEBIAN_FRONTEND=noninteractive
5 |
6 | RUN apt-get update && \
7 | apt-get install -y --no-install-recommends \
8 | gcc \
9 | g++ \
10 | cmake \
11 | libnuma-dev \
12 | wget \
13 | git \
14 | net-tools \
15 | ocl-icd-libopencl1 \
16 | opencl-headers \
17 | clinfo && \
18 | rm -rf /var/lib/apt/lists/*
19 |
20 | COPY ./requirements-docker.npu.txt /opt/enova/requirements.txt
21 |
22 | RUN pip install build --no-cache-dir && \
23 | pip install pip setuptools setuptools_scm[toml]==8.3.1 toml poetry && \
24 | pip install -r /opt/enova/requirements.txt --no-cache-dir
25 |
26 |
--------------------------------------------------------------------------------
/docker/Dockerfile.enova.npu:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1
2 | FROM emergingai/enova:base-npu
3 |
4 | COPY ./llmo /opt/enova/llmo
5 |
6 | COPY ./scripts /opt/enova/scripts
7 |
8 | RUN bash /opt/enova/scripts/pack_whl.llmo.sh
9 |
10 | ARG LLMO_VERSION=0.1.0
11 | RUN pip install /opt/enova/llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-${LLMO_VERSION}-py3-none-any.whl --no-deps --no-cache-dir
12 |
13 | ARG CACHEBUST=1
14 |
15 | COPY . /opt/enova
16 |
17 |
18 | RUN cd /opt/enova && rm MANIFEST.in && bash ./scripts/pack_whl.enova.sh
19 | ARG ENOVA_VERSION=0.1.0
20 |
21 | RUN pip install -r /opt/enova/requirements-docker.npu.txt --no-deps --no-cache-dir && \
22 | pip install /opt/enova/dist/enova-${ENOVA_VERSION}-py3-none-any.whl --no-deps --no-cache-dir && \
23 | pip uninstall -y transformer-engine && mkdir -p /workspace/model
24 |
25 |
--------------------------------------------------------------------------------
/escaler/pkg/httpserver/server/router.go:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import "github.com/gin-gonic/gin"
4 |
5 | type BaseResource struct {
6 | }
7 |
8 | func (r BaseResource) SetResult(c *gin.Context, result interface{}) {
9 | c.Set("Data", result)
10 | }
11 |
12 | func (r BaseResource) SetErrorResult(c *gin.Context, result interface{}) {
13 | c.Set("ErrorResult", result)
14 | }
15 |
16 | type PathResourceInterface interface {
17 | Path() string
18 | }
19 |
20 | type GetResourceInterface interface {
21 | Get(c *gin.Context)
22 | }
23 |
24 | type ListResourceInterface interface {
25 | List(c *gin.Context)
26 | }
27 |
28 | type PostResourceInterface interface {
29 | Post(c *gin.Context)
30 | }
31 |
32 | type PutResourceInterface interface {
33 | Put(c *gin.Context)
34 | }
35 |
36 | type DeleteResourceInterface interface {
37 | Delete(c *gin.Context)
38 | }
39 |
--------------------------------------------------------------------------------
/front/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | export default {
3 | content: [
4 | "./index.html",
5 | "./src/**/*.{vue,js,ts,jsx,tsx}",
6 | ],
7 | theme: {
8 | extend: {
9 | colors: {
10 | primary: '#303133',
11 | secondary: '#1272FF',
12 | disabled: '#A8ABB2',
13 | regular: '#606266',
14 | gray1: '#EEF3FF',
15 | gray2: '#EBEEF5',
16 | gray3: '#F0F2F5',
17 | gray4: '#7588A3',
18 | gray5: '#909399',
19 | gray7: '#DCDFE6',
20 | gray8: '#F5F7FA',
21 | black1: '#1E252E'
22 |
23 | },
24 | boxShadow: {
25 | tableShadow: 'inset 0px -1px 0px 0px #EBEEF5'
26 | },
27 | backgroundImage: {
28 | 'filter-icon': 'url("../assets/filter.png")'
29 | }
30 | },
31 | },
32 | plugins: [],
33 | }
34 |
35 |
--------------------------------------------------------------------------------
/front/src/assets/svg/auto.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/front/src/components/SvgIcon.vue:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/prometheus/prometheus.yml:
--------------------------------------------------------------------------------
1 | global:
2 | scrape_interval: 15s
3 | scrape_timeout: 10s
4 | evaluation_interval: 15s
5 | alerting:
6 | alertmanagers:
7 | - static_configs:
8 | - targets: []
9 | scheme: http
10 | timeout: 10s
11 | api_version: v1
12 | scrape_configs:
13 | - job_name: prometheus
14 | honor_timestamps: true
15 | scrape_interval: 15s
16 | scrape_timeout: 10s
17 | metrics_path: /metrics
18 | scheme: http
19 | static_configs:
20 | - targets:
21 | - prometheus:9090
22 | - job_name: 'otel-collector'
23 | scrape_interval: 10s
24 | static_configs:
25 | - targets: ['otel-collector:8888']
26 | - targets: ['otel-collector:8889']
27 |
28 | - job_name: 'dcgm'
29 | static_configs:
30 | - targets: ['dcgm-exporter:9400']
31 |
32 | - job_name: 'enovaserving'
33 | static_configs:
34 | - targets: ['enova-serving:9199']
35 |
--------------------------------------------------------------------------------
/front/src/layout/header/index.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
28 |
29 |
--------------------------------------------------------------------------------
/docker/build_image.enova.npu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | echo "Runing build image enova:base using ${PWD}"
5 |
6 | SCRIPT=$(realpath "$0")
7 | BASEDIR=$(dirname "$SCRIPT")
8 | BASEDIR=$(dirname "$BASEDIR")
9 |
10 |
11 | export HARBOR_PATH=emergingai
12 |
13 | # build enova
14 | cd $BASEDIR
15 | docker build -f $BASEDIR/docker/Dockerfile.enova.base.npu -t $HARBOR_PATH/enova:base-npu --build-arg HARBOR_PATH="$HARBOR_PATH" $BASEDIR
16 | IMAGE_VERSION=v`cat VERSION`
17 | ENOVA_VERSION=`cat VERSION`
18 | LLMO_VERSION="0.1.0"
19 |
20 | echo "Runing build image enova:${IMAGE_VERSION} using ${PWD}"
21 |
22 |
23 | docker build -f $BASEDIR/docker/Dockerfile.enova.npu -t $HARBOR_PATH/enova:$IMAGE_VERSION-npu \
24 | --build-arg ENOVA_VERSION="${ENOVA_VERSION}" \
25 | --build-arg LLMO_VERSION="${LLMO_VERSION}" \
26 | --build-arg HARBOR_PATH="$HARBOR_PATH" \
27 | --build-arg CACHEBUST=$(date +%s) \
28 | $BASEDIR
29 |
30 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/haproxy/haproxy.cfg:
--------------------------------------------------------------------------------
1 | defaults
2 | mode tcp
3 | log global
4 | option tcplog
5 | option dontlognull
6 | option http-server-close
7 | option redispatch
8 | retries 3
9 | timeout http-request 10s
10 | timeout queue 1m
11 | timeout connect 10s
12 | timeout client 1m
13 | timeout server 1m
14 | timeout http-keep-alive 10s
15 | timeout check 10s
16 | maxconn 3000
17 |
18 | resolvers mydns
19 | nameserver dns1 127.0.0.1:53
20 | resolve_retries 3
21 | timeout resolve 1s
22 | timeout retry 1s
23 | hold valid 10s
24 |
25 | frontend http_front
26 | bind *:9199
27 | default_backend http_back
28 |
29 | backend http_back
30 | balance roundrobin
31 | server-template srv 1-3 enova.serving.com:9199 check inter 5s fall 3 rise 2 resolvers mydns init-addr last,libc,none
32 |
--------------------------------------------------------------------------------
/front/src/assets/svg/toggle.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/escaler/pkg/meta/task.go:
--------------------------------------------------------------------------------
1 | package meta
2 |
3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/api"
4 |
5 | type TaskStatus string
6 |
7 | const (
8 | TaskStatusCreated TaskStatus = "created"
9 | TaskStatusScheduling TaskStatus = "scheduling"
10 | TaskStatusRunning TaskStatus = "running"
11 | TaskStatusError TaskStatus = "error"
12 | TaskStatusFinished TaskStatus = "finished"
13 | )
14 |
15 | type DetectTask struct {
16 | TaskSpec TaskSpecInterface
17 | Status TaskStatus
18 | }
19 |
20 | type AnomalyRecommendResult struct {
21 | Timestamp int64 `json:"timestamp"`
22 | IsAnomaly bool `json:"isAnomaly"`
23 | ConfigRecommendResult api.ConfigRecommendResult `json:"configRecommendResult"`
24 | CurrentConfig api.ConfigRecommendResult `json:"currentConfig"`
25 | }
26 |
27 | type TaskInfo struct {
28 | Name string `json:"name"`
29 | Status string `json:"status"`
30 | }
31 |
--------------------------------------------------------------------------------
/enova/serving/backend/hf/handler.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | import functools
3 | from typing import Callable, Dict
4 |
5 |
6 | @dataclasses.dataclass
7 | class RemoteFunc:
8 | method: str
9 | path: str
10 | func: Callable
11 | kwarg: Dict
12 |
13 |
14 | REMOTE_FUNC_TAG = "__remote_func__"
15 |
16 |
17 | @dataclasses.dataclass
18 | class HuggingFaceHandler:
19 | """"""
20 |
21 | model: str
22 | name: str = "serving"
23 |
24 | @classmethod
25 | def remote_func(cls, method, path=None, **kwarg):
26 | def decorator(func):
27 | actual_path = f"/{func.__name__}" if path is None else path
28 |
29 | @functools.wraps(func)
30 | def wrapped_func(self, *args, **kwargs):
31 | return func(self, *args, **kwargs)
32 |
33 | setattr(wrapped_func, REMOTE_FUNC_TAG, (RemoteFunc(method, actual_path, func, kwarg)))
34 | return wrapped_func
35 |
36 | return decorator
37 |
--------------------------------------------------------------------------------
/front/src/router/index.ts:
--------------------------------------------------------------------------------
1 | import { createRouter, createWebHistory } from 'vue-router'
2 | import Layout from '@/layout/index.vue'
3 |
4 | const router = createRouter({
5 | history: createWebHistory(import.meta.env.BASE_URL),
6 | routes: [
7 | {
8 | path: '/',
9 | name: 'home',
10 | component: Layout,
11 | redirect: '/instance',
12 | children: [
13 | {
14 | path: '/instance',
15 | name: 'instance',
16 | component: () => import('../views/Instance.vue'),
17 | meta: {
18 | title: 'service',
19 | icon: 'docker'
20 | }
21 | },
22 | {
23 | path: '/record',
24 | name: 'testRecord',
25 | component: () => import('../views/TestRecord.vue'),
26 | meta: {
27 | title: 'record',
28 | icon: 'log'
29 | }
30 | }
31 | ]
32 | }
33 | ]
34 | })
35 |
36 | export default router
37 |
--------------------------------------------------------------------------------
/docker/build_image.enova.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | IMAGE_VERSION=v`cat VERSION`
5 | ENOVA_VERSION=`cat VERSION`
6 | LLMO_VERSION="0.1.0"
7 |
8 | echo "Runing build image enova:${IMAGE_VERSION} using ${PWD}"
9 |
10 | SCRIPT=$(realpath "$0")
11 | BASEDIR=$(dirname "$SCRIPT")
12 | BASEDIR=$(dirname "$BASEDIR")
13 | echo "BASEDIR: " ${BASEDIR}
14 |
15 | # build front
16 | cd $BASEDIR/front
17 | rm $BASEDIR/enova/web_statics -rf
18 | npm install
19 | npm run build
20 | # yarn
21 | # yarn build
22 |
23 | echo $BASEDIR/front/dist $BASEDIR/enova/web_statics
24 | mv $BASEDIR/front/dist $BASEDIR/enova/web_statics
25 |
26 | export HARBOR_PATH=emergingai
27 |
28 | # build enova
29 | cd $BASEDIR
30 | docker build -f $BASEDIR/docker/Dockerfile.enova -t $HARBOR_PATH/enova:$IMAGE_VERSION \
31 | --build-arg ENOVA_VERSION="${ENOVA_VERSION}" \
32 | --build-arg LLMO_VERSION="${LLMO_VERSION}" \
33 | --build-arg HARBOR_PATH="$HARBOR_PATH" \
34 | --build-arg CACHEBUST=$(date +%s) \
35 | $BASEDIR
36 |
--------------------------------------------------------------------------------
/docker/build_image.escaler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | IMAGE_VERSION=v`cat VERSION`
5 |
6 | echo "Runing build image enova:${IMAGE_VERSION} using ${PWD}"
7 |
8 | SCRIPT=$(realpath "$0")
9 | BASEDIR=$(dirname "$SCRIPT")
10 | BASEDIR=$(dirname "$BASEDIR")
11 | echo "BASEDIR: " ${BASEDIR}
12 |
13 |
14 | export MIRROR_PATH=emergingai
15 |
16 | # check golang tar.gz
17 | GOLANG_TAR=dependencies/go1.22.2.linux-amd64.tar.gz
18 | DOWNLOAD_URL=https://go.dev/dl/go1.22.2.linux-amd64.tar.gz
19 |
20 | if [ ! -f "$GOLANG_TAR" ]; then
21 | mkdir -p dependencies
22 |
23 | echo "golang tar $GOLANG_TAR is not existed, start to download..."
24 | cd dependencies
25 | wget "$DOWNLOAD_URL"
26 | cd ../
27 | if [ $? -eq 0 ]; then
28 | echo "download sucessfully"
29 | else
30 | echo "failed to download"
31 | fi
32 | fi
33 |
34 | # build enova
35 | cd $BASEDIR
36 | docker build -f $BASEDIR/docker/Dockerfile.escaler -t $MIRROR_PATH/enova-escaler:$IMAGE_VERSION $BASEDIR
37 |
--------------------------------------------------------------------------------
/scripts/pack_whl.enova.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | echo "Runing packing wheel using ${PWD}"
5 |
6 | SCRIPT=$(realpath "$0")
7 | BASEDIR=$(dirname "$SCRIPT")
8 | BASEDIR=$(dirname "$BASEDIR")
9 |
10 | DOCKER_COMPOSE_BIN=enova/template/deployment/docker-compose/bin/docker-compose-linux-x86_64
11 | DOWNLOAD_URL=https://github.com/docker/compose/releases/download/v2.24.5/docker-compose-linux-x86_64
12 |
13 |
14 | if [ ! -f "$DOCKER_COMPOSE_BIN" ]; then
15 | echo "PWD: " $PWD
16 | mkdir -p enova/template/deployment/docker-compose/bin/
17 |
18 | echo "docker-compose binary $DOCKER_COMPOSE_BIN is not existed, start to download..."
19 | cd enova/template/deployment/docker-compose/bin/
20 | wget -q "$DOWNLOAD_URL"
21 |
22 | chmod +x docker-compose-linux-x86_64
23 | cd $BASEDIR
24 | if [ $? -eq 0 ]; then
25 | echo "download sucessfully"
26 | else
27 | echo "failed to download"
28 | fi
29 | fi
30 |
31 | # pack
32 | cd $BASEDIR
33 | python -m build --no-isolation
34 |
35 |
--------------------------------------------------------------------------------
/front/src/assets/svg/log.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/requirements-docker.txt:
--------------------------------------------------------------------------------
1 | httpx==0.24.1
2 | fastapi==0.115.0
3 | vllm==0.8.5.post1
4 | sglang==0.3.6
5 | huggingface_hub
6 | hf-transfer
7 | transformers
8 | locate
9 | python-rapidjson
10 | opentelemetry-api
11 | opentelemetry-sdk
12 | opentelemetry-exporter-otlp
13 | opentelemetry-distro
14 | opentelemetry-instrumentation-fastapi
15 | streamlit
16 | pymysql==1.1.0
17 | aiomysql==0.2.0
18 | sqlalchemy==2.0.29
19 | sqlalchemy-utils
20 | aiosqlite
21 | greenlet
22 | uvicorn
23 | ulid-py
24 | pyopencl
25 | py-cpuinfo
26 | pytz
27 | tzlocal
28 | openai
29 | packaging
30 | # ray
31 | python-multipart
32 | addict
33 | orjson
34 | siphash24
35 | # msgspec
36 | # compressed_tensors
37 | # gguf
38 | # sentencepiece
39 | # mistral_common
40 |
41 | # filelock
42 | # lm-format-enforcer==0.10.3
43 | # ninja
44 | # nvidia-ml-py
45 | # outlines
46 | # pillow
47 | # prometheus-client
48 | # prometheus-fastapi-instrumentator
49 | # psutil
50 | # sentencepiece
51 | # tiktoken
52 | # tokenizers
53 | # typing-extensions
54 | # vllm-flash-attn==2.5.9.post1
55 | # xformers==0.0.27
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/grafana/grafana_provisioning/dashboards/enova-dashboards.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 |
3 | providers:
4 | # an unique provider name. Required
5 | - name: 'ENOVA-LLMO-dashboards'
6 | # Org id. Default to 1
7 | orgId: 1
8 | # name of the dashboard folder.
9 | folder: ''
10 | # folder UID. will be automatically generated if not specified
11 | folderUid: ''
12 | # provider type. Default to 'file'
13 | type: file
14 | # disable dashboard deletion
15 | disableDeletion: false
16 | # how often Grafana will scan for changed dashboards
17 | updateIntervalSeconds: 10
18 | # allow updating provisioned dashboards from the UI
19 | allowUiUpdates: false
20 | options:
21 | # path to dashboard files on disk. Required when using the 'file' type
22 | path: /etc/dashboards
23 | # use folder names from filesystem to create folders in Grafana
24 | foldersFromFilesStructure: true~
--------------------------------------------------------------------------------
/front/src/assets/svg/home.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/front/src/utils/request.ts:
--------------------------------------------------------------------------------
1 | import axios, { type AxiosResponse } from 'axios'
2 |
3 | const service = axios.create({
4 | baseURL: '/',
5 | timeout: 10000
6 | })
7 |
8 | service.interceptors.request.use(
9 | (config) => {
10 | return config
11 | },
12 | (error) => {
13 | return Promise.reject(error)
14 | }
15 | )
16 |
17 | service.interceptors.response.use(
18 | (response: AxiosResponse) => {
19 | const res = response.data
20 | if (Number(res.code) === 0 || res.status === 'success') {
21 | return res.code === 0 ? res.result : res
22 | } else {
23 | ElMessage({
24 | message: res.response?.data?.message || res.message || 'Error',
25 | type: 'error',
26 | duration: 5 * 1000
27 | })
28 | return Promise.reject(res)
29 | }
30 | },
31 | (error) => {
32 | ElMessage({
33 | message: error.response?.data?.message || error.message || 'Error',
34 | type: 'error',
35 | duration: 5 * 1000
36 | })
37 | return Promise.reject(error)
38 | }
39 | )
40 |
41 | export default service
42 |
--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "enova-instrumentation-llmo"
3 | version = "0.1.0"
4 | description = "enova-instrumentation-llmo"
5 | requires-python = ">=3.10"
6 | dynamic = [
7 | "dependencies"
8 | ]
9 | authors = [
10 | { name="wenxinxie", email="wenxin@emergingai-tech.com" },
11 | ]
12 | readme = "README.md"
13 |
14 |
15 | [tool.coverage.run]
16 | branch = true
17 | source = [ "enova/llmo" ]
18 |
19 | [build-system]
20 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2,<9", "toml"]
21 |
22 | [tool.poetry]
23 | name = "enova-instrumentation-llmo"
24 | version = "0.1.0"
25 | description = "llmo instrumentation for OpenTelemetry"
26 | authors = ["wenxinxie "]
27 |
28 | [[tool.poetry.packages]]
29 | include = "enova/llmo"
30 |
31 | [tool.poetry.dependencies]
32 | python = "^3.10"
33 | opentelemetry-api = "*"
34 | opentelemetry-sdk = "*"
35 | vllm = "0.8.5.post1"
36 | fastapi = "*"
37 | opentelemetry-exporter-otlp = "*"
38 | opentelemetry-distro = "*"
39 | opentelemetry-instrumentation-fastapi = "*"
40 |
--------------------------------------------------------------------------------
/front/README.md:
--------------------------------------------------------------------------------
1 | # enova-web
2 |
3 | This template should help get you started developing with Vue 3 in Vite.
4 |
5 | ## Recommended IDE Setup
6 |
7 | [VSCode](https://code.visualstudio.com/) + [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur).
8 |
9 | ## Type Support for `.vue` Imports in TS
10 |
11 | TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types.
12 |
13 | ## Customize configuration
14 |
15 | See [Vite Configuration Reference](https://vitejs.dev/config/).
16 |
17 | ## Project Setup
18 |
19 | ```sh
20 | npm install
21 | ```
22 |
23 | ### Compile and Hot-Reload for Development
24 |
25 | ```sh
26 | npm run dev
27 | ```
28 |
29 | ### Type-Check, Compile and Minify for Production
30 |
31 | ```sh
32 | npm run build
33 | ```
34 |
35 | ### Lint with [ESLint](https://eslint.org/)
36 |
37 | ```sh
38 | npm run lint
39 | ```
40 |
--------------------------------------------------------------------------------
/requirements-docker.npu.txt:
--------------------------------------------------------------------------------
1 | httpx==0.24.1
2 | fastapi==0.108.0
3 | vllm==0.9.0
4 | vllm-ascend==v0.9.0rc2
5 | # sglang==0.3.6
6 | huggingface_hub
7 | hf-transfer
8 | transformers==4.51.1
9 | locate
10 | python-rapidjson
11 | opentelemetry-api==1.36.0
12 | opentelemetry-sdk==1.36.0
13 | opentelemetry-exporter-otlp==1.36.0
14 | opentelemetry-distro
15 | opentelemetry-instrumentation-fastapi
16 | streamlit
17 | pymysql==1.1.0
18 | aiomysql==0.2.0
19 | sqlalchemy==2.0.29
20 | sqlalchemy-utils
21 | aiosqlite
22 | greenlet
23 | uvicorn
24 | ulid-py
25 | # pyopencl
26 | py-cpuinfo
27 | pytz
28 | tzlocal
29 | openai
30 | packaging
31 | ray
32 | python-multipart
33 | addict
34 | orjson
35 | siphash24
36 | uvloop
37 | qwen-vl-utils
38 | watchfiles
39 | # msgspec
40 | # compressed_tensors
41 | # gguf
42 | # sentencepiece
43 | # mistral_common
44 |
45 | # filelock
46 | # lm-format-enforcer==0.10.3
47 | # ninja
48 | # nvidia-ml-py
49 | # outlines
50 | # pillow
51 | # prometheus-client
52 | # prometheus-fastapi-instrumentator
53 | # psutil
54 | # sentencepiece
55 | # tiktoken
56 | # tokenizers
57 | # typing-extensions
58 | # vllm-flash-attn==2.5.9.post1
59 | # xformers==0.0.27
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/escaler/conf/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "resource_backend": {
3 | "type": "docker"
4 | },
5 | "docker": {
6 |
7 | },
8 | "detector": {
9 | "prom": {
10 | "host": "enova-prometheus",
11 | "port": 9090
12 | },
13 | "api": {
14 | "host": "0.0.0.0",
15 | "port": 8183,
16 | "version": "v1",
17 | "url_prefix": "/escaler"
18 | },
19 | "detect_interval": 30
20 | },
21 | "scaler": {},
22 | "zmq": {
23 | "host": "127.0.0.1",
24 | "port": 4321
25 | },
26 | "redis": {
27 | "addr": "127.0.0.1:6379",
28 | "password": "",
29 | "db": 0
30 | },
31 | "enova_algo": {
32 | "host": "enova-algo:8181"
33 | },
34 | "serving": {
35 | "image": "emergingai/enova:v0.1.0",
36 | "start_cmd": [
37 | ],
38 | "network": "enova-mon_enova-net",
39 | "network_alias": "enova-serving",
40 | "name": "enova"
41 | },
42 | "logger": {
43 | "name": "server",
44 | "path": "./var/log/emergingai",
45 | "level": "debug"
46 | }
47 | }
--------------------------------------------------------------------------------
/escaler/conf/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "resource_backend": {
3 | "type": "docker"
4 | },
5 | "docker": {
6 |
7 | },
8 | "detector": {
9 | "prom": {
10 | "host": "enova-prometheus",
11 | "port": 9090
12 | },
13 | "api": {
14 | "host": "0.0.0.0",
15 | "port": 8183,
16 | "version": "v1",
17 | "url_prefix": "/escaler"
18 | },
19 | "detect_interval": 30
20 | },
21 | "scaler": {},
22 | "zmq": {
23 | "host": "127.0.0.1",
24 | "port": 4321
25 | },
26 | "redis": {
27 | "addr": "127.0.0.1:6379",
28 | "password": "",
29 | "db": 0
30 | },
31 | "enova_algo": {
32 | "host": "127.0.0.1:8181"
33 | },
34 | "serving": {
35 | "image": "emergingai/enova:v0.1.0",
36 | "start_cmd": [
37 | "sleep",
38 | "inf"
39 | ],
40 | "network": "enova-mon_enova-net",
41 | "network_alias": "enova-serving",
42 | "name": "enova"
43 | },
44 | "logger": {
45 | "name": "server",
46 | "path": "./var/log/emergingai",
47 | "level": "debug"
48 | }
49 | }
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/nginx/nginx.conf:
--------------------------------------------------------------------------------
1 | worker_processes 8;
2 | worker_rlimit_nofile 65535;
3 |
4 | events {
5 | worker_connections 20480;
6 | }
7 |
8 |
9 | http {
10 |
11 | client_max_body_size 4096M;
12 | client_header_buffer_size 512k;
13 | large_client_header_buffers 4 512k;
14 |
15 | access_log /var/log/nginx/access.log;
16 | error_log /var/log/nginx/error.log;
17 |
18 | resolver 127.0.0.11 valid=1s;
19 | upstream backend {
20 | server enova-serving:9199 max_fails=1 fail_timeout=1s;
21 | }
22 |
23 | server {
24 | underscores_in_headers on;
25 | ignore_invalid_headers off;
26 |
27 | listen 9199;
28 | server_name artrefine_proxy;
29 | keepalive_timeout 3600;
30 |
31 | access_log /var/log/nginx/enova_access.log;
32 | error_log /var/log/nginx/enova_error.log;
33 |
34 | location / {
35 | proxy_read_timeout 3600;
36 | proxy_pass http://backend;
37 | proxy_set_header Host $proxy_host;
38 | proxy_set_header X-Real-IP $remote_addr;
39 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
40 | }
41 |
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "enova"
3 | description = "enova"
4 | requires-python = ">=3.8"
5 | dynamic = ["dependencies", "version"]
6 | authors = [
7 | { name = "kyokagong", email = "kyokagong@emergingai-tech.com" },
8 | { name = "wenxinxie", email = "wenxin@emergingai-tech.com" },
9 | { name = "jockyhawk", email = "jockyhawk@emergingai-tech.com" },
10 | { name = "kimzhao", email = "kimzhao@emergingai-tech.com" },
11 | ]
12 | readme = "README.md"
13 |
14 | [project.scripts]
15 | enova = "enova.entry.cli:main"
16 |
17 | [project.optional-dependencies]
18 | lint = ["black==23.12.0"]
19 | test = ["pytest", "pytest-cov", "responses", "respx"]
20 |
21 |
22 | [build-system]
23 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2,<9", "toml"]
24 | build-backend = "setuptools.build_meta"
25 |
26 |
27 | [tool.setuptools.packages.find]
28 | where = ["."]
29 | include = ["enova.*"]
30 | namespaces = true
31 |
32 | [tool.setuptools.package-data]
33 | "*" = ["*.csv", "docker-compose-*"]
34 | "enova.web_statics" = ["*", "*/*"]
35 |
36 | [tool.setuptools.dynamic]
37 | dependencies = { file = ["requirements.txt"] }
38 | version = {file = ["VERSION"]}
39 |
40 | [tool.coverage.run]
41 | omit = ["*/tests/test_*.py"]
42 |
--------------------------------------------------------------------------------
/docker/Dockerfile.escaler:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 |
3 | RUN apt update && apt install build-essential redis libzmq3-dev ca-certificates pkg-config net-tools iputils-ping -y
4 | COPY dependencies/go1.22.2.linux-amd64.tar.gz /tmp/go1.22.2.linux-amd64.tar.gz
5 | RUN cd /tmp && tar -xf go1.22.2.linux-amd64.tar.gz && cp -r go /usr/local/go
6 | ENV PATH=/usr/local/go/bin:$PATH
7 | ENV GO111MODULE="on"
8 | ENV APK_REP="mirrors.ustc.edu.cn"
9 |
10 | #ENV GOPROXY="https://goproxy.io,direct"
11 | #ENV GOPROXY=https://proxy.golang.org,direct
12 | ENV GOPROXY=https://goproxy.cn,direct
13 | #ENV GOPROXY=https://mirrors.aliyun.com/goproxy/,direct
14 | #ENV GOCACHE=/go-cache
15 |
16 | # create and set cache directory permissions
17 | RUN mkdir /go-cache && chmod -R 777 /go-cache
18 |
19 | WORKDIR /app
20 |
21 | COPY escaler .
22 |
23 | # copy go module file to workdir
24 | COPY escaler/go.mod escaler/go.sum ./
25 |
26 | # download dependencies on go module
27 | RUN go mod download
28 |
29 | # download swagger toolset
30 | RUN go install github.com/swaggo/swag/cmd/swag@latest
31 |
32 | # compile and install
33 | RUN go env \
34 | && CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o escaler cmd/escaler/main.go && \
35 | cp escaler /usr/local/bin/escaler
36 |
--------------------------------------------------------------------------------
/enova/entry/command/algo.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import click
3 |
4 | from enova.common.cli_helper import ArgumentHelper
5 | from enova.common.config import CONFIG
6 |
7 |
8 | class EnovaAlgo:
9 | # TODO: support run compose
10 | def run(self):
11 | args_helper = ArgumentHelper(self, sys._getframe())
12 | CONFIG.update_config(args_helper.args_map)
13 |
14 | import uvicorn
15 |
16 | from enova.algo.server import get_algo_api_server
17 |
18 | api_server = get_algo_api_server()
19 | uvicorn.run(api_server.app, host=CONFIG.enova_algo["host"], port=CONFIG.enova_algo["port"])
20 |
21 |
22 | pass_enova_algo = click.make_pass_decorator(EnovaAlgo)
23 |
24 |
25 | @click.group(name="algo")
26 | @click.pass_context
27 | def algo_cli(ctx):
28 | """
29 | Run the autoscaling service.
30 | """
31 | ctx.obj = EnovaAlgo()
32 |
33 |
34 | @algo_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
35 | @pass_enova_algo
36 | @click.pass_context
37 | def mon_run(ctx, enova_algo: EnovaAlgo):
38 | enova_algo.run()
39 |
40 |
41 | @algo_cli.command(name="stop")
42 | @pass_enova_algo
43 | @click.pass_context
44 | def mon_stop(ctx, enova_algo: EnovaAlgo):
45 | enova_algo.stop()
46 |
--------------------------------------------------------------------------------
/front/src/assets/svg/autoRefresh.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/front/src/components/Drawer.vue:
--------------------------------------------------------------------------------
1 |
2 |
11 |
12 |
13 | {{ title }}
14 | - {{ titleDesc }}
15 |
16 |
17 |
18 |
19 |
20 |
44 |
54 |
--------------------------------------------------------------------------------
/enova/serving/middlewares/base.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import List
3 | from fastapi import Request
4 |
5 |
6 | def get_dependencies() -> List:
7 | return
8 |
9 |
10 | class BaseMiddleware(metaclass=abc.ABCMeta):
11 | """"""
12 |
13 |
14 | class EnovaAIMultiMiddlewares:
15 | def __init__(self) -> None:
16 | self.middewares: List[BaseMiddleware] = []
17 | self.request_middlewares: List[BaseMiddleware] = []
18 | self.response_middlewares: List[BaseMiddleware] = []
19 |
20 | def register(self, middleware: BaseMiddleware):
21 | self.middewares.append(middleware)
22 | if hasattr(middleware, "_process_request"):
23 | self.request_middlewares.append(middleware)
24 | if hasattr(middleware, "_process_response"):
25 | self.response_middlewares.append(middleware)
26 |
27 | async def process(self, request: Request, call_next):
28 | # request
29 | for middleware in self.request_middlewares:
30 | if hasattr(middleware, "_process_request"):
31 | await middleware._process_request(request)
32 | response = await call_next(request)
33 | # response
34 | for middleware in self.response_middlewares:
35 | response = await middleware._process_response(request, response)
36 | return response
37 |
--------------------------------------------------------------------------------
/enova/serving/apiserver.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | from enova.common.constant import ServingBackend
3 | from enova.common.config import CONFIG
4 | from enova.serving.backend.transformers import TransformersBackend
5 | from enova.serving.backend.vllm import VllmBackend
6 | from enova.serving.backend.sglang import SglangBackend
7 |
8 |
9 | @dataclasses.dataclass
10 | class EApiServer:
11 | """
12 | Need to adapt to multiple task, text2text, text2image, image2image
13 | support multiple api according to different task
14 | """
15 |
16 | host: str
17 | port: int
18 | model: str
19 | backend: str
20 |
21 | def __post_init__(self):
22 | self.backend_ins = None
23 |
24 | def get_backend_ins(self):
25 | engine_map = {
26 | ServingBackend.HF.value: TransformersBackend,
27 | ServingBackend.VLLM.value: VllmBackend,
28 | ServingBackend.SGLANG.value: SglangBackend}
29 | if self.backend not in engine_map:
30 | raise ValueError(f"serving.backend: {CONFIG.serving['backend']} is not in {ServingBackend.values()}")
31 | return engine_map[self.backend](self.backend, self.model)
32 |
33 | def local_run(self):
34 | """"""
35 | self.backend_ins = self.get_backend_ins()
36 | self.backend_ins.local_run(host=self.host, port=self.port)
37 |
--------------------------------------------------------------------------------
/front/src/assets/svg/cross.svg:
--------------------------------------------------------------------------------
1 |
3 |
9 |
--------------------------------------------------------------------------------
/front/src/assets/svg/docker.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/escaler/cmd/escaler/mock_enovaalgo.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "net/http"
5 |
6 | "github.com/Emerging-AI/ENOVA/escaler/pkg/api"
7 | "github.com/gin-gonic/gin"
8 | )
9 |
10 | func StartMockEnovaAlgoServer() {
11 | r := gin.Default()
12 | r.POST("/api/enovaalgo/v1/config_recommend", func(c *gin.Context) {
13 | c.JSON(http.StatusOK, api.EnvoaResponse{
14 | Message: "",
15 | Code: 0,
16 | Result: api.ConfigRecommendResult{
17 | MaxNumSeqs: 32,
18 | TensorParallelSize: 1,
19 | GpuMemoryUtilization: 0.8,
20 | Replicas: 1,
21 | },
22 | TraceId: "TraceId",
23 | Version: "v1",
24 | })
25 | })
26 |
27 | r.POST("/api/enovaalgo/v1/anomaly_detect", func(c *gin.Context) {
28 | c.JSON(http.StatusOK, api.EnvoaResponse{
29 | Message: "",
30 | Code: 0,
31 | Result: api.AnomalyDetectResponse{
32 | IsAnomaly: 0,
33 | },
34 | TraceId: "TraceId",
35 | Version: "v1",
36 | })
37 | })
38 |
39 | r.POST("/api/enovaalgo/v1/anomaly_recover", func(c *gin.Context) {
40 | c.JSON(http.StatusOK, api.EnvoaResponse{
41 | Message: "",
42 | Code: 0,
43 | Result: api.ConfigRecommendResult{
44 | MaxNumSeqs: 32,
45 | TensorParallelSize: 1,
46 | GpuMemoryUtilization: 0.8,
47 | Replicas: 1,
48 | },
49 | TraceId: "TraceId",
50 | Version: "v1",
51 | })
52 | })
53 | r.Run(":8181")
54 | }
55 |
--------------------------------------------------------------------------------
/enova/server/middleware/base.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import dataclasses
3 | from typing import List
4 | from fastapi import Request
5 |
6 |
7 | def get_dependencies() -> List:
8 | return
9 |
10 |
11 | @dataclasses.dataclass
12 | class BaseMiddleware(metaclass=abc.ABCMeta):
13 | """"""
14 |
15 | api_config: dict
16 |
17 |
18 | class EmergingAIMultiMiddlewares:
19 |
20 | def __init__(self) -> None:
21 | self.middewares: List[BaseMiddleware] = []
22 | self.request_middlewares: List[BaseMiddleware] = []
23 | self.response_middlewares: List[BaseMiddleware] = []
24 |
25 | def register(self, middleware: BaseMiddleware):
26 | self.middewares.append(middleware)
27 | if hasattr(middleware, "_process_request"):
28 | self.request_middlewares.append(middleware)
29 | if hasattr(middleware, "_process_response"):
30 | self.response_middlewares.append(middleware)
31 |
32 | async def process(self, request: Request, call_next):
33 | # request
34 | for middleware in self.request_middlewares:
35 | if hasattr(middleware, "_process_request"):
36 | await middleware._process_request(request)
37 | response = await call_next(request)
38 | # response
39 | for middleware in self.response_middlewares:
40 | response = await middleware._process_response(request, response)
41 | return response
42 |
--------------------------------------------------------------------------------
/escaler/pkg/api/prom.go:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 |
7 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
8 | )
9 |
10 | var promClientInitOnce sync.Once
11 |
12 | type Metric map[string]string
13 |
14 | type ValueSet []interface{}
15 |
16 | type Series struct {
17 | Metric Metric `json:"metric"`
18 | Values []ValueSet `json:"values"`
19 | }
20 |
21 | type PromData struct {
22 | ResultType string `json:"resultType"`
23 | Result []Series `json:"result"`
24 | }
25 |
26 | type PromResponse struct {
27 | Status string
28 | Data PromData
29 | }
30 |
31 | type promClient struct {
32 | Query HttpApi[PromResponse]
33 | QueryRange HttpApi[PromResponse]
34 | }
35 |
36 | var PromClient *promClient
37 |
38 | func GetPromClient() *promClient {
39 | promClientInitOnce.Do(func() {
40 | PromClient = &promClient{
41 | Query: HttpApi[PromResponse]{
42 | Method: "GET",
43 | Url: fmt.Sprintf("http://%s:%d/api/v1/query", config.GetEConfig().Detector.Prom.Host, config.GetEConfig().Detector.Prom.Port),
44 | HeaderBuilder: &EmptyHeaderBuilder{},
45 | },
46 | QueryRange: HttpApi[PromResponse]{
47 | Method: "GET",
48 | Url: fmt.Sprintf("http://%s:%d/api/v1/query_range", config.GetEConfig().Detector.Prom.Host, config.GetEConfig().Detector.Prom.Port),
49 | HeaderBuilder: &EmptyHeaderBuilder{},
50 | },
51 | }
52 | })
53 | return PromClient
54 | }
55 |
--------------------------------------------------------------------------------
/front/src/api/instance.ts:
--------------------------------------------------------------------------------
1 | import service from '@/utils/request'
2 | enum API {
3 | ENODE = '/v1/serving',
4 | MONITOR = '/api/v1/query_range',
5 | PILOT = '/api/escaler/v1'
6 | }
7 |
8 | export const getServing = () => service({
9 | url: API.ENODE,
10 | method: 'get',
11 | });
12 |
13 | export const addServing = () => service({
14 | url: API.ENODE,
15 | method: 'post',
16 | data: {
17 | "instance_name": "enova_test",
18 | "model": "THUDM/chatglm3-6b"
19 | },
20 | })
21 |
22 | export const deleteServing = (id: string) => service({
23 | url: `${API.ENODE}/${id}`,
24 | method: 'delete',
25 | });
26 |
27 | export const getExperiment = (params: string) => service({
28 | url: `${API.ENODE}/instance/test?${params}`,
29 | method: 'get',
30 | })
31 |
32 | export const createTest = (data: any) => service({
33 | url: `${API.ENODE}/instance/test`,
34 | method: 'post',
35 | data
36 | })
37 |
38 | const getPromUrl = (port: number) => {
39 | const { protocol, hostname } = window.location
40 | if (import.meta.env.MODE === 'development') return '/'
41 | return `${protocol}//${hostname}:${port}/`
42 | }
43 |
44 | export const getMonitorData = (params?: string) => service({
45 | url: `${API.MONITOR}?${params}`,
46 | baseURL: getPromUrl(32826),
47 | method: 'get',
48 | })
49 |
50 | export const getDetectHistory = (params?: string) => service({
51 | url: `${API.PILOT}/task/detect/history?${params}`,
52 | baseURL: getPromUrl(8183),
53 | method: 'get',
54 | })
--------------------------------------------------------------------------------
/front/src/stores/config.ts:
--------------------------------------------------------------------------------
1 | interface InstanceType {
2 | instance_id: 'string'
3 | instance_name: 'string'
4 | instance_spec: {
5 | cpu: {
6 | brand_name: string
7 | core_amount: number
8 | }
9 | gpu: {
10 | product: string
11 | video_memory: string
12 | card_amount: number
13 | }
14 | memory: string
15 | }
16 | startup_args: {
17 | exported_job: string
18 | dtype: string
19 | load_format: string
20 | max_num_batched_tokens: number
21 | max_num_seqs: number
22 | max_paddings: number
23 | max_seq_len: number
24 | model: string
25 | tokenizer: string
26 | pipeline_parallel_size: number
27 | tensor_parallel_size: number
28 | quantization: null
29 | }
30 | serving_id: string
31 | deploy_status: string
32 | create_time: string
33 | }
34 |
35 | interface ExperimentType {
36 | test_id: string
37 | instance_id: string
38 | test_spec: {
39 | data_set: string
40 | duration: 0
41 | duration_unit: string
42 | distribution: string
43 | tps_mean: 0
44 | tps_std?: string
45 | }
46 | param_spec: {
47 | max_tokens: number
48 | temperature: number
49 | top_p: number
50 | others: string
51 | }
52 | test_status: string
53 | prompt_tps: number
54 | generation_tps: number
55 | result: {
56 | total: number
57 | success: number
58 | elasped_avg: number
59 | }
60 | create_time: string
61 | }
62 |
63 | export type { InstanceType, ExperimentType }
64 |
--------------------------------------------------------------------------------
/escaler/cmd/escaler/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "flag"
5 | "fmt"
6 | "sync"
7 |
8 | "github.com/Emerging-AI/ENOVA/escaler/cmd/escaler/docs"
9 |
10 | "github.com/Emerging-AI/ENOVA/escaler/pkg/detector"
11 | "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
12 | "github.com/Emerging-AI/ENOVA/escaler/pkg/scaler"
13 |
14 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
15 |
16 | swaggerfiles "github.com/swaggo/files"
17 | ginSwagger "github.com/swaggo/gin-swagger"
18 | )
19 |
20 | func main() {
21 | confPath := flag.String("conf", "conf/settings.json", "Path to the configuration file")
22 | flag.Parse()
23 |
24 | fmt.Printf("Using configuration file: %s\n", *confPath)
25 | econfig := config.GetEConfig()
26 | econfig.Init(*confPath)
27 | econfig.PrintConfig()
28 |
29 | docs.SwaggerInfo.Title = "Monitor Service API"
30 | docs.SwaggerInfo.Description = "This is a monitor service."
31 | docs.SwaggerInfo.Version = "1.0"
32 | //docs.SwaggerInfo.Host = "121.36.212.78:30080"
33 | docs.SwaggerInfo.Host = "0.0.0.0:8183"
34 | docs.SwaggerInfo.BasePath = "/"
35 | docs.SwaggerInfo.Schemes = []string{"http", "https"}
36 |
37 | var wg sync.WaitGroup
38 |
39 | ch := make(chan meta.TaskSpecInterface)
40 | d := detector.NewDetectorServer(ch, nil)
41 | d.GetEngine().GET("/api/escaler/docs/*any", ginSwagger.WrapHandler(swaggerfiles.Handler))
42 |
43 | s := scaler.NewServingScaler(ch)
44 |
45 | wg.Add(2)
46 | go d.RunInWaitGroup(&wg)
47 | go s.RunInWaitGroup(&wg)
48 |
49 | wg.Wait()
50 | close(ch)
51 | fmt.Println("All tasks finished.")
52 | }
53 |
--------------------------------------------------------------------------------
/enova/entry/cli.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | from enova.common.config import _get_pkg_version, CONFIG
4 | from enova.entry.command.algo import algo_cli
5 | from enova.entry.command.app import app_cli
6 | from enova.entry.command.serving import serving_cli
7 | from enova.entry.command.injector import injector_cli
8 | from enova.entry.command.mon import mon_cli
9 | from enova.entry.command.pilot import pilot_cli
10 | from enova.entry.command.webui import webui_cli
11 |
12 |
13 | @click.version_option(_get_pkg_version(), "--version", "-v")
14 | @click.group(context_settings=CONFIG.cli["context_settings"])
15 | def cli():
16 | """
17 | \b
18 | ███████╗███╗ ██╗ ██████╗ ██╗ ██╗ █████╗
19 | ██╔════╝████╗ ██║██╔═══██╗██║ ██║██╔══██╗
20 | █████╗ ██╔██╗ ██║██║ ██║██║ ██║███████║
21 | ██╔══╝ ██║╚██╗██║██║ ██║╚██╗ ██╔╝██╔══██║
22 | ███████╗██║ ╚████║╚██████╔╝ ╚████╔╝ ██║ ██║
23 | ╚══════╝╚═╝ ╚═══╝ ╚═════╝ ╚═══╝ ╚═╝ ╚═╝
24 |
25 | \b
26 | ENOVA is an open-source llm deployment, monitoring, injection and auto-scaling service.
27 | It provides a set of commands to deploy stable serverless serving of LLM on GPU clusters with auto-scaling.
28 | """
29 | pass
30 |
31 |
32 | def main():
33 | cli.add_command(serving_cli)
34 | cli.add_command(app_cli)
35 | cli.add_command(webui_cli)
36 | cli.add_command(mon_cli)
37 | cli.add_command(algo_cli)
38 | cli.add_command(injector_cli)
39 |
40 | cli.add_command(pilot_cli) # all in one
41 |
42 | cli()
43 |
44 |
45 | if __name__ == "__main__":
46 | main()
47 |
--------------------------------------------------------------------------------
/enova/algo/resource.py:
--------------------------------------------------------------------------------
1 | from enova.server.restful.router import BaseResource
2 | from enova.algo.serializer import (
3 | ConfigRecommendRequestSLZ,
4 | ConfigRecommendResponseSLZ,
5 | AnomalyDetectRequestSLZ,
6 | AnomalyDetectResponseSLZ,
7 | AnomalyRecoverRequestSLZ,
8 | AnomalyRecoverResponseSLZ,
9 | )
10 | from enova.algo.service import AlgoService
11 |
12 |
13 | class BaseResource(BaseResource):
14 | def __init__(self) -> None:
15 | self.service = AlgoService()
16 |
17 |
18 | class ConfigRecommendResource(BaseResource):
19 | PATH = "/config_recommend"
20 | TAGS = ["Algo"]
21 | GET_INCLUDE_IN_SCHEMA = False
22 | POST_INCLUDE_IN_SCHEMA = False
23 |
24 | async def post(self, params: ConfigRecommendRequestSLZ) -> ConfigRecommendResponseSLZ:
25 | return await self.service.config_recommend(params.dict())
26 |
27 |
28 | class AnomalyDetectResource(BaseResource):
29 | PATH = "/anomaly_detect"
30 | TAGS = ["Algo"]
31 | GET_INCLUDE_IN_SCHEMA = False
32 | POST_INCLUDE_IN_SCHEMA = False
33 |
34 | async def post(self, params: AnomalyDetectRequestSLZ) -> AnomalyDetectResponseSLZ:
35 | return await self.service.anomaly_detect(params.dict())
36 |
37 |
38 | class AnomalyRecoverResource(BaseResource):
39 | PATH = "/anomaly_recover"
40 | TAGS = ["Algo"]
41 | GET_INCLUDE_IN_SCHEMA = False
42 | POST_INCLUDE_IN_SCHEMA = False
43 |
44 | async def post(self, params: AnomalyRecoverRequestSLZ) -> AnomalyRecoverResponseSLZ:
45 | return await self.service.anomaly_recover(params.dict())
46 |
--------------------------------------------------------------------------------
/enova/serving/backend/sglang.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | import os
3 | from enova.common.logger import LOGGER
4 | from enova.common.config import CONFIG
5 | from enova.serving.backend.base import BaseBackend
6 |
7 |
8 | @dataclasses.dataclass
9 | class SglangBackend(BaseBackend):
10 | def __post_init__(self):
11 | """Initialize the SglangBackend specific components."""
12 |
13 | def _create_app(self):
14 | from sglang.srt.server import app as sglang_app, launch_engine
15 | from sglang.srt.server_args import ServerArgs
16 | from sglang.srt.utils import add_prometheus_middleware, set_prometheus_multiproc_dir
17 | from sglang.srt.metrics.func_timer import enable_func_timer
18 |
19 | if not hasattr(self, "model"):
20 | raise RuntimeError("Model path must be specified")
21 |
22 | if "tensor_parallel_size" in CONFIG.sglang:
23 | CONFIG.sglang["tp_size"] = CONFIG.sglang.pop("tensor_parallel_size")
24 | server_args = ServerArgs(host=CONFIG.serving["host"], port=CONFIG.serving["port"], model_path=self.model, **CONFIG.sglang)
25 | launch_engine(server_args)
26 | set_prometheus_multiproc_dir()
27 | os.makedirs(os.environ["PROMETHEUS_MULTIPROC_DIR"])
28 | add_prometheus_middleware(sglang_app)
29 | enable_func_timer()
30 |
31 | self.app = sglang_app
32 |
33 | @self.app.get("/v1/model/info/args")
34 | async def get_engine_args():
35 | return {"code": 0, "result": server_args}
36 |
37 | LOGGER.info("SGLangBackend FastAPI app created and routes defined.")
38 |
--------------------------------------------------------------------------------
/docker/Dockerfile.enova.base:
--------------------------------------------------------------------------------
1 | # image enova:base
2 | FROM nvcr.io/nvidia/pytorch:24.07-py3
3 |
4 | RUN apt update && apt install -y \
5 | net-tools \
6 | ocl-icd-libopencl1 \
7 | opencl-headers \
8 | clinfo
9 |
10 | RUN mkdir -p /etc/OpenCL/vendors && \
11 | echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
12 | mkdir -p /opt/enova
13 |
14 | COPY ./requirements-docker.txt /opt/enova/requirements.txt
15 | COPY ./requirements-docker-no-deps.txt /opt/enova/requirements-docker-no-deps.txt
16 |
17 | RUN export https_proxy=http://192.168.3.2:7892 && export http_proxy=http://192.168.3.2:7892 && \
18 | pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6 --no-deps --no-cache-dir
19 | RUN pip install build --no-cache-dir && \
20 | pip install pip setuptools setuptools_scm[toml]==7.1.0 toml poetry && \
21 | pip install -r /opt/enova/requirements.txt --no-cache-dir && \
22 | pip install -r /opt/enova/requirements-docker-no-deps.txt --no-deps --no-cache-dir
23 |
24 | RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
25 | pip install build --no-cache-dir && \
26 | pip install pip setuptools setuptools_scm[toml]==7.1.0 toml poetry --index-url https://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com && \
27 | pip install -r /opt/enova/requirements.txt --no-cache-dir --index-url https://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com && \
28 | pip install -r /opt/enova/requirements-docker-no-deps.txt --no-deps --no-cache-dir --index-url https://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
29 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/otel-collector/collector-config.yaml:
--------------------------------------------------------------------------------
1 | receivers:
2 | otlp:
3 | protocols:
4 | grpc:
5 | http:
6 | otlp/spanmetrics:
7 | protocols:
8 | grpc:
9 | endpoint: localhost:12345
10 |
11 |
12 | exporters:
13 | debug:
14 | verbosity: detailed
15 | otlp:
16 | endpoint: tempo:4317
17 | tls:
18 | insecure: true
19 | otlp/spanmetrics:
20 | endpoint: "localhost:4317"
21 | tls:
22 | insecure: true
23 | prometheus:
24 | endpoint: 0.0.0.0:8889
25 | # prometheusremotewrite:
26 | # endpoint: "http://prometheus:9090/api/v1/write"
27 |
28 | processors:
29 | batch:
30 | memory_limiter:
31 | check_interval: 5s
32 | limit_percentage: 80
33 | spike_limit_percentage: 25
34 | spanmetrics:
35 | metrics_exporter: otlp/spanmetrics
36 | dimensions:
37 | - name: batch_size
38 | # - name: parameters
39 | attributes/http:
40 | actions:
41 | - action: delete
42 | key: "http.server_name"
43 | - action: delete
44 | key: "http.host"
45 |
46 | extensions:
47 | health_check:
48 |
49 | service:
50 | extensions: [health_check]
51 | pipelines:
52 | traces:
53 | receivers: [otlp]
54 | processors: [spanmetrics, batch]
55 | exporters: [otlp]
56 | metrics/spanmetrics:
57 | receivers: [otlp/spanmetrics]
58 | exporters: [otlp/spanmetrics]
59 | metrics:
60 | receivers: [otlp]
61 | processors: [attributes/http, batch]
62 | exporters: [debug, prometheus]
63 | # logs:
64 | # receivers: [otlp]
65 | # processors: [batch]
66 | # exporters: [debug]
--------------------------------------------------------------------------------
/enova/server/restful/serializer.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import Dict, List
3 |
4 | from pydantic import BaseModel, Field
5 | from pydantic.version import VERSION as PYDANTIC_VERSION
6 |
7 |
8 | PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
9 | if PYDANTIC_V2:
10 | from pydantic._internal._model_construction import ModelMetaclass
11 | else:
12 | from pydantic.main import ModelMetaclass
13 |
14 | from enova.common.config import CONFIG # noqa
15 | from enova.common.constant import OrderBy # noqa
16 |
17 |
18 | class AllFields(ModelMetaclass):
19 | def __new__(self, name, bases, namespaces, **kwargs):
20 | for field in namespaces:
21 | if not field.startswith("__"):
22 | namespaces[field] = Field(namespaces[field])
23 | return super().__new__(self, name, bases, namespaces, **kwargs)
24 |
25 |
26 | class EmergingAIBaseModel(BaseModel):
27 | def dict(self, *args, **kwargs):
28 | return json.loads(self.model_dump_json())
29 |
30 |
31 | class EmergingAIQueryRequestBaseModel(EmergingAIBaseModel):
32 | page: int = Field(default=1, ge=CONFIG.api["default_min_page"], le=CONFIG.api["default_max_page"])
33 | size: int = Field(default=10, ge=CONFIG.api["default_min_size"], le=CONFIG.api["default_max_size"])
34 | order_by: str | None = None
35 | order_type: OrderBy | None = None
36 | fuzzy: str | None = None
37 | start_time: str | None = None
38 | end_time: str | None = None
39 |
40 |
41 | class EmergingAIQueryResponseBaseModel(EmergingAIBaseModel):
42 | page: int
43 | size: int
44 | total_num: int
45 | total_page: int
46 | num: int
47 | data: List[Dict]
48 |
--------------------------------------------------------------------------------
/front/src/hooks/useInitQueryRange.ts:
--------------------------------------------------------------------------------
1 | import { useExperimentStore } from '@/stores/experiment'
2 | import { useInstanceStore } from '@/stores/instance'
3 | import dayjs from 'dayjs'
4 | import utc from 'dayjs/plugin/utc'
5 | import {storeToRefs} from 'pinia'
6 |
7 | const getTestDuration = (duration: number, unit: string): number => {
8 | switch (unit) {
9 | case 'hour':
10 | return duration * 60 * 60
11 | case 'min':
12 | return duration * 60
13 | case 'sec':
14 | return (Math.min(duration, 10))
15 | default:
16 | return 0
17 | }
18 | }
19 |
20 | const useInitQueryRange = () => {
21 | const { activeExperiment } = storeToRefs(useExperimentStore())
22 | const { chartTimeRange, searchTimePair } = storeToRefs(useInstanceStore())
23 | dayjs.extend(utc)
24 | let startTime = new Date()
25 | let endTime = new Date()
26 |
27 | if (activeExperiment.value != null) {
28 | startTime = new Date(dayjs.utc(activeExperiment.value.create_time).toDate())
29 | const { duration, duration_unit } = activeExperiment.value.test_spec
30 | const testDuration = getTestDuration(duration, duration_unit)
31 | startTime.setTime(startTime.getTime())
32 | endTime.setTime(Math.min(startTime.getTime() + (testDuration + 180) * 1000, Date.now()))
33 | } else {
34 | startTime.setTime(startTime.getTime() - 3600 * 1000)
35 | }
36 | const _start = dayjs(startTime).format('YYYY-MM-DD HH:mm:ss')
37 | const _end = dayjs(endTime).format('YYYY-MM-DD HH:mm:ss')
38 |
39 | chartTimeRange.value = [_start, _end]
40 | searchTimePair.value = [_start, _end]
41 | return { start: _start, end: _end }
42 | }
43 |
44 | export { useInitQueryRange }
--------------------------------------------------------------------------------
/front/src/components/Pagination.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
14 |
15 |
16 |
17 |
62 |
--------------------------------------------------------------------------------
/front/src/stores/experiment.ts:
--------------------------------------------------------------------------------
1 | import { defineStore } from 'pinia'
2 | import { getExperiment } from '@/api/instance'
3 | import type { ExperimentType } from './config'
4 | import dayjs from 'dayjs'
5 | import utc from 'dayjs/plugin/utc'
6 | interface ExperimentStoreState {
7 | testList: ExperimentType[]
8 | currentId: string
9 | drawerVisible: boolean
10 | }
11 |
12 | interface ExperimentRes {
13 | data: ExperimentType[]
14 | page: number
15 | size: number
16 | total_num: number
17 | total_page: number
18 | page_size: number
19 | }
20 |
21 | export const useExperimentStore = defineStore('experiment', {
22 | state: (): ExperimentStoreState => ({
23 | testList: [],
24 | currentId: '',
25 | drawerVisible: false
26 | }),
27 | getters: {
28 | activeExperiment: (state): ExperimentType | undefined => {
29 | return state.testList.find((item) => item.test_id === state.currentId) || undefined
30 | }
31 | },
32 | actions: {
33 | getTestList(params: string) {
34 | dayjs.extend(utc)
35 | return new Promise((resolve, reject) => {
36 | getExperiment(params)
37 | .then((res) => {
38 | this.testList =
39 | res.data.length > 0
40 | ? res.data.map((i: ExperimentType) => {
41 | return {
42 | ...i,
43 | create_time: dayjs.utc(i.create_time).toDate()
44 | }
45 | })
46 | : []
47 | resolve(res as unknown as ExperimentRes)
48 | })
49 | .catch(() => {
50 | reject(null)
51 | })
52 | })
53 | }
54 | }
55 | })
56 |
--------------------------------------------------------------------------------
/escaler/pkg/httpserver/middleware/logger.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "bytes"
5 | "io/ioutil"
6 | "strings"
7 | "time"
8 |
9 | "github.com/Emerging-AI/ENOVA/escaler/pkg/logger"
10 |
11 | "github.com/gin-gonic/gin"
12 | )
13 |
14 | func RequestResponseLogger() gin.HandlerFunc {
15 | return func(c *gin.Context) {
16 | // just for /api/monitor/v1/
17 | if !strings.Contains(c.Request.URL.Path, "/api/enova/v1") {
18 | c.Next()
19 | return
20 | }
21 |
22 | // 获取请求体
23 | reqBody, _ := ioutil.ReadAll(c.Request.Body)
24 | c.Request.Body = ioutil.NopCloser(bytes.NewBuffer(reqBody))
25 |
26 | // 获取响应体
27 | respWriter := &responseWriter{body: bytes.NewBufferString(""), ResponseWriter: c.Writer}
28 | c.Writer = respWriter
29 |
30 | // 处理请求
31 | c.Next()
32 |
33 | // 记录请求和响应
34 | respStr := respWriter.body.String()
35 | if respStrLen := len(respStr); respStrLen > 1024 {
36 | respStr = respStr[:1024]
37 | }
38 |
39 | logger.Info("---------------------------------------------------------")
40 | logger.Infof("[INFO] [%s] %s %s %s\n%d %s\n",
41 | time.Now().Format("2006-01-02 15:04:05"),
42 | c.Request.Method, c.Request.URL.Path, string(reqBody),
43 | respWriter.status, respStr,
44 | )
45 | logger.Info("---------------------------------------------------------")
46 | }
47 | }
48 |
49 | type responseWriter struct {
50 | body *bytes.Buffer
51 | gin.ResponseWriter
52 | status int
53 | }
54 |
55 | func (w *responseWriter) Write(b []byte) (int, error) {
56 | w.body.Write(b)
57 | return w.ResponseWriter.Write(b)
58 | }
59 |
60 | func (w *responseWriter) WriteHeader(statusCode int) {
61 | w.status = statusCode
62 | w.ResponseWriter.WriteHeader(statusCode)
63 | }
64 |
--------------------------------------------------------------------------------
/front/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "enova-web",
3 | "version": "1.0.0",
4 | "private": true,
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "build": "run-p type-check \"build-only {@}\" --",
9 | "preview": "vite preview",
10 | "build-only": "vite build",
11 | "type-check": "vue-tsc --build --force",
12 | "lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix --ignore-path .gitignore",
13 | "format": "prettier --write src/"
14 | },
15 | "dependencies": {
16 | "@types/axios": "^0.14.0",
17 | "@vueuse/core": "^10.9.0",
18 | "axios": "^1.6.8",
19 | "dayjs": "^1.11.11",
20 | "echarts": "^5.5.0",
21 | "element-plus": "^2.6.3",
22 | "pinia": "^2.1.7",
23 | "vue": "^3.4.21",
24 | "vue-i18n": "^9.13.1",
25 | "vue-router": "^4.3.0"
26 | },
27 | "devDependencies": {
28 | "@rushstack/eslint-patch": "^1.8.0",
29 | "@tsconfig/node20": "^20.1.4",
30 | "@types/node": "^20.12.5",
31 | "@vitejs/plugin-vue": "^5.0.4",
32 | "@vue/eslint-config-prettier": "^9.0.0",
33 | "@vue/eslint-config-typescript": "^13.0.0",
34 | "@vue/tsconfig": "^0.5.1",
35 | "autoprefixer": "^10.4.19",
36 | "eslint": "^8.57.0",
37 | "eslint-plugin-vue": "^9.23.0",
38 | "npm-run-all2": "^6.1.2",
39 | "postcss": "^8.4.38",
40 | "prettier": "^3.2.5",
41 | "rollup-plugin-visualizer": "^5.12.0",
42 | "sass": "^1.75.0",
43 | "tailwindcss": "^3.4.3",
44 | "typescript": "~5.4.0",
45 | "unplugin-auto-import": "^0.17.5",
46 | "unplugin-vue-components": "^0.26.0",
47 | "vite": "^5.2.8",
48 | "vite-plugin-svg-icons": "^2.0.1",
49 | "vite-plugin-vue-devtools": "^7.0.25",
50 | "vue-tsc": "^2.0.11"
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/front/src/layout/index.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
39 |
--------------------------------------------------------------------------------
/enova/common/local.py:
--------------------------------------------------------------------------------
1 | import contextvars
2 | import functools
3 | import threading
4 |
5 |
6 | context_vars_dict = {}
7 |
8 |
9 | def set_contextvars(key, value):
10 | """"""
11 | if key not in context_vars_dict:
12 | context_vars_dict[key] = contextvars.ContextVar(key)
13 | context_vars_dict[key].set(value)
14 |
15 |
16 | def del_contextvars(key):
17 | """
18 | mainly delete the thread vars
19 | """
20 | if key in context_vars_dict:
21 | context_vars_dict[key].clear()
22 |
23 |
24 | def get_contextvars(key, default=None):
25 | """
26 | mainly get the thread vars
27 | """
28 | if key not in context_vars_dict:
29 | return default
30 | try:
31 | return context_vars_dict[key].get()
32 | except LookupError:
33 | return default
34 |
35 |
36 | def has_contextvars(key):
37 | """TODO:"""
38 | return False
39 |
40 |
41 | _local = threading.local()
42 |
43 |
44 | def set_local_param(key, value):
45 | """
46 | mainly setup the custom vars of threads
47 | """
48 | setattr(_local, key, value)
49 |
50 |
51 | def del_local_param(key):
52 | """
53 | mainly delete the custom vars of threads
54 | """
55 | if hasattr(_local, key):
56 | delattr(_local, key)
57 |
58 |
59 | def get_local_param(key, default=None):
60 | return getattr(_local, key, default)
61 |
62 |
63 | def contextlocal_cache(func):
64 | @functools.wraps(func)
65 | def wrapper(*args, **kwargs):
66 | key = functools._make_key(args, kwargs, False)
67 | key = f"{func.__name__}_{key}"
68 | if has_contextvars(key):
69 | return get_local_param(key)
70 | ret = func(*args, **kwargs)
71 | set_contextvars(key, ret)
72 | return ret
73 |
74 | return wrapper
75 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/tempo/tempo.yaml:
--------------------------------------------------------------------------------
1 | stream_over_http_enabled: true
2 | server:
3 | http_listen_port: 3200
4 | log_level: info
5 |
6 | query_frontend:
7 | search:
8 | duration_slo: 5s
9 | throughput_bytes_slo: 1.073741824e+09
10 | trace_by_id:
11 | duration_slo: 5s
12 |
13 | distributor:
14 | receivers:
15 | otlp:
16 | protocols:
17 | http:
18 | grpc:
19 |
20 | ingester:
21 | max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally
22 |
23 | compactor:
24 | compaction:
25 | block_retention: 1h # overall Tempo trace retention. set for demo purposes
26 |
27 | metrics_generator:
28 | processor:
29 | local_blocks:
30 | filter_server_spans: false
31 | span_metrics:
32 | dimensions:
33 | - http.method
34 | - http.target
35 | - http.status_code
36 | - service.version
37 | service_graphs:
38 | dimensions:
39 | - http.method
40 | - http.target
41 | - http.status_code
42 | - service.version
43 | registry:
44 | external_labels:
45 | source: tempo
46 | cluster: docker-compose
47 | storage:
48 | path: /tmp/tempo/generator/wal
49 | remote_write:
50 | - url: http://prometheus:9090/api/v1/write
51 | send_exemplars: true
52 | traces_storage:
53 | path: /tmp/tempo/generator/traces
54 |
55 | storage:
56 | trace:
57 | backend: local # backend configuration to use
58 | wal:
59 | path: /tmp/tempo/wal # where to store the the wal locally
60 | local:
61 | path: /tmp/tempo/blocks
62 |
63 | overrides:
64 | defaults:
65 | metrics_generator:
66 | processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator
67 |
--------------------------------------------------------------------------------
/escaler/pkg/logger/logger.go:
--------------------------------------------------------------------------------
1 | package logger
2 |
3 | import (
4 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
5 | "github.com/sirupsen/logrus"
6 | )
7 |
8 | var logger *logrus.Logger
9 |
10 | func init() {
11 | logger = GetLogger()
12 | }
13 |
14 | func GetLogger() *logrus.Logger {
15 | config := config.GetEConfig()
16 | logger := logrus.New()
17 |
18 | // 设置日志级别
19 | switch config.Logger.Level {
20 | case "panic":
21 | logrus.SetLevel(logrus.PanicLevel)
22 | case "fatal":
23 | logrus.SetLevel(logrus.FatalLevel)
24 | case "error":
25 | logrus.SetLevel(logrus.ErrorLevel)
26 | case "warn", "warning":
27 | logrus.SetLevel(logrus.WarnLevel)
28 | case "info":
29 | logrus.SetLevel(logrus.InfoLevel)
30 | case "debug":
31 | logrus.SetLevel(logrus.DebugLevel)
32 | case "trace":
33 | logrus.SetLevel(logrus.TraceLevel)
34 | default:
35 | logrus.Warn("Unknown log level: ", config.Logger.Level)
36 | logrus.SetLevel(logrus.InfoLevel) // 设置默认日志等级
37 | }
38 |
39 | // 设置日志格式
40 | logger.SetFormatter(&logrus.TextFormatter{
41 | TimestampFormat: "2006-01-02 15:04:05",
42 | })
43 | return logger
44 | }
45 |
46 | func Info(args ...interface{}) {
47 | logger.Infoln(args)
48 | }
49 |
50 | func Infof(format string, args ...interface{}) {
51 | logger.Infof(format, args...)
52 | }
53 |
54 | func Debug(args ...interface{}) {
55 | logger.Debugln(args)
56 | }
57 |
58 | func Debugf(format string, args ...interface{}) {
59 | logger.Debugf(format, args...)
60 | }
61 |
62 | func Fatal(args ...interface{}) {
63 | logger.Fatalln(args)
64 | }
65 |
66 | func Warn(args ...interface{}) {
67 | logger.Warnln(args)
68 | }
69 |
70 | func Error(args ...interface{}) {
71 | logger.Errorln(args)
72 | }
73 |
74 | func Errorf(format string, args ...interface{}) {
75 | logger.Errorf(format, args...)
76 | }
77 |
78 | func Panic(args ...interface{}) {
79 | logger.Panicln(args)
80 | }
81 |
--------------------------------------------------------------------------------
/escaler/pkg/httpserver/middleware/response.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "bytes"
5 | "encoding/json"
6 | "net/http"
7 |
8 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
9 |
10 | "github.com/gin-gonic/gin"
11 | )
12 |
13 | type EApiResponse struct {
14 | Message string `json:"message"`
15 | Code int `json:"code"`
16 | Result json.RawMessage `json:"result"`
17 | TraceId string `json:"trace_id"`
18 | Version string `json:"version"`
19 | }
20 |
21 | type responseBodyWriter struct {
22 | gin.ResponseWriter
23 | body *bytes.Buffer
24 | }
25 |
26 | func (w responseBodyWriter) Write(b []byte) (int, error) {
27 | w.body.Write(b)
28 | return w.ResponseWriter.Write(b)
29 | }
30 |
31 | func (w responseBodyWriter) WriteString(s string) (int, error) {
32 | w.body.WriteString(s)
33 | return w.ResponseWriter.WriteString(s)
34 | }
35 |
36 | func ResponseMiddleware() gin.HandlerFunc {
37 | return func(c *gin.Context) {
38 |
39 | // 调用下一个中间件或路由处理函数
40 | c.Next()
41 |
42 | // 错误的结果直接返回
43 | if errResult, ok := c.Get("ErrorResult"); ok {
44 | c.JSON(http.StatusOK, errResult)
45 | return
46 | }
47 |
48 | // 解析成功的返回
49 | var jsonResult json.RawMessage
50 | result, ok := c.Get("Data")
51 | if !ok {
52 | return
53 | }
54 |
55 | jsonResult, err := json.Marshal(result)
56 | if err != nil {
57 | c.AbortWithStatusJSON(http.StatusInternalServerError, EApiResponse{
58 | Message: "Internal error",
59 | Code: 500,
60 | Result: jsonResult,
61 | TraceId: GenerateTraceId(),
62 | Version: config.GetEConfig().Detector.Api.Version,
63 | })
64 | return
65 | }
66 |
67 | c.JSON(http.StatusOK, EApiResponse{
68 | Message: "ok",
69 | Code: 0,
70 | Result: jsonResult,
71 | TraceId: GenerateTraceId(),
72 | Version: config.GetEConfig().Detector.Api.Version,
73 | })
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/enova/common/constant.py:
--------------------------------------------------------------------------------
1 | from enum import Enum as BaseEnum
2 |
3 |
4 | class Enum(BaseEnum):
5 | @classmethod
6 | def values(cls):
7 | return list(e.value for e in cls.__members__.values())
8 |
9 |
10 | class ServingBackend(Enum):
11 | HF = "hf"
12 | VLLM = "vllm"
13 | SGLANG = "sglang"
14 |
15 |
16 | class HttpMethod(Enum):
17 | GET = "get"
18 | POST = "post"
19 | PUT = "put"
20 | DELETE = "delete"
21 |
22 | @classmethod
23 | def methods_with_body(cls):
24 | return [cls.POST.value, cls.PUT.value]
25 |
26 |
27 | class OrderBy(Enum):
28 | ASC = "asc"
29 | DESC = "desc"
30 |
31 |
32 | JSON_RESPONSE_HEADER = "application/json"
33 |
34 |
35 | # --- server scope ---
36 | class ApiServerType(Enum):
37 | ENOVA_ALGO = "enova_algo"
38 | ENOVA_APP = "enova_app"
39 |
40 |
41 | class DeployMode(Enum):
42 | COMPOSE = "compose"
43 | LOCAL = "local"
44 |
45 |
46 | class TrafficDistributionType(Enum):
47 | GAUSSIAN = "gaussian"
48 | POISSON = "poisson"
49 |
50 |
51 | class DurationUnitType(Enum):
52 | SECOND = "sec"
53 | MINUTE = "min"
54 | HOUR = "hour"
55 |
56 |
57 | # --- db_model scope ---
58 | class DeployStatus(Enum):
59 | UNKNOWN = "unknown"
60 | PENDING = "pending"
61 | RUNNING = "running"
62 | FAILED = "failed"
63 | FINISHED = "finsihed"
64 |
65 |
66 | class TestStatus(Enum):
67 | UNKNOWN = "unknown"
68 | INIT = "init"
69 | SUCCESS = "success"
70 | FAILED = "failed"
71 | RUNNING = "running"
72 | FINISHED = "finished"
73 |
74 |
75 | class ServeStatus(Enum):
76 | UNKNOWN = "unknown"
77 | OFF_LINE = "off_line"
78 | NORMAL = "normal"
79 | ABNORMAL = "abnormal"
80 |
81 |
82 | class Distribution(Enum):
83 | NORMAL = "normal"
84 | POISSON = "poisson"
85 |
86 |
87 | class VllmMode(Enum):
88 | NORMAL = "normal"
89 | OPENAI = "openai"
90 |
--------------------------------------------------------------------------------
/front/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { fileURLToPath, URL } from 'node:url'
2 | import { defineConfig } from 'vite'
3 | import vue from '@vitejs/plugin-vue'
4 | import VueDevTools from 'vite-plugin-vue-devtools'
5 | import AutoImport from 'unplugin-auto-import/vite'
6 | import Components from 'unplugin-vue-components/vite'
7 | import { ElementPlusResolver } from 'unplugin-vue-components/resolvers'
8 | import { createSvgIconsPlugin } from 'vite-plugin-svg-icons'
9 | import path from 'path'
10 | // import { visualizer } from 'rollup-plugin-visualizer'
11 |
12 | export default defineConfig({
13 | build: {
14 | outDir: './dist',
15 | rollupOptions: {
16 | plugins: [
17 | // visualizer({
18 | // open: true,
19 | // gzipSize: true
20 | // })
21 | ],
22 | output: {
23 | chunkFileNames: 'static/js/[name]-[hash].js',
24 | entryFileNames: 'static/js/[name]-[hash].js',
25 | assetFileNames: 'static/[ext]/[name]-[hash].[ext]',
26 | manualChunks(id) {
27 | if (id.includes('node_modules')) {
28 | return id.toString().split('node_modules/')[1].split('/')[0].toString()
29 | }
30 | }
31 | }
32 | }
33 | },
34 | css: {
35 | preprocessorOptions: {
36 | scss: {
37 | additionalData: `@use "~/styles/element/index.scss" as *;`
38 | }
39 | }
40 | },
41 | plugins: [
42 | vue(),
43 | VueDevTools(),
44 | AutoImport({
45 | resolvers: [ElementPlusResolver({ importStyle: 'sass' })]
46 | }),
47 | Components({
48 | resolvers: [ElementPlusResolver({ importStyle: 'sass' })]
49 | }),
50 | createSvgIconsPlugin({
51 | iconDirs: [path.resolve(process.cwd(), 'src/assets/svg')],
52 | symbolId: 'icon-[name]'
53 | })
54 | ],
55 | resolve: {
56 | alias: {
57 | '@': fileURLToPath(new URL('./src', import.meta.url)),
58 | '~/': `${path.resolve(__dirname, 'src')}/`
59 | }
60 | }
61 | })
62 |
--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from opentelemetry import metrics
4 | from opentelemetry import trace
5 | from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
6 | from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
7 | from opentelemetry.sdk.metrics import MeterProvider
8 | from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
9 | from opentelemetry.sdk.resources import Resource
10 | from opentelemetry.sdk.trace import TracerProvider
11 | from opentelemetry.sdk.trace.export import BatchSpanProcessor
12 | from opentelemetry.semconv.resource import ResourceAttributes
13 |
14 |
15 | def start(otlp_exporter_endpoint: str = "localhost:4317", service_name: str = __name__):
16 | otlp_exporter = OTLPSpanExporter(
17 | otlp_exporter_endpoint,
18 | insecure=True,
19 | )
20 | resource = Resource(
21 | attributes={
22 | ResourceAttributes.SERVICE_NAME: service_name,
23 | }
24 | )
25 | provider = TracerProvider(resource=resource)
26 | provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
27 | trace.set_tracer_provider(provider)
28 |
29 | exporter = OTLPMetricExporter(endpoint=otlp_exporter_endpoint, insecure=True)
30 | metric_reader = PeriodicExportingMetricReader(exporter, export_interval_millis=5000)
31 |
32 | provider = MeterProvider(metric_readers=[metric_reader], resource=resource)
33 |
34 | metrics.set_meter_provider(provider)
35 |
36 | from .instrumentation import EnovaFastAPIInstrumentor, EnovaVllmInstrumentor
37 |
38 | EnovaFastAPIInstrumentor().instrument()
39 | EnovaVllmInstrumentor().instrument(service_name)
40 |
41 | from .metrics_adapter import VLLMLogMetricsAdapter
42 |
43 | metrics_log_handler = VLLMLogMetricsAdapter()
44 | vllm_logger = logging.getLogger("vllm.engine.metrics")
45 | vllm_logger.addHandler(metrics_log_handler)
46 |
--------------------------------------------------------------------------------
/enova/app/server.py:
--------------------------------------------------------------------------------
1 | from http.client import HTTPException
2 | from pathlib import Path
3 |
4 | from fastapi import Request
5 | from fastapi.responses import HTMLResponse
6 | from fastapi.staticfiles import StaticFiles
7 | import sqlalchemy as sa
8 |
9 | from enova.common.config import CONFIG
10 | from enova.common.logger import LOGGER
11 | from enova.common.constant import ApiServerType
12 | from enova.common.utils import get_web_static_path
13 | from enova.database.relation.orm.base import BaseSqlite
14 | from enova.database.relation.transaction.session import get_session
15 | from enova.server.server import ApiServer
16 |
17 |
18 | WEB_STATIC_PATH = get_web_static_path()
19 |
20 |
21 | async def redirect_all_requests_to_frontend(request: Request, exc: HTTPException):
22 | # TODO: need to modify
23 | if WEB_STATIC_PATH:
24 | return HTMLResponse(open(Path(WEB_STATIC_PATH) / "index.html").read())
25 | return "Welcome to enova"
26 |
27 |
28 | def init_db():
29 | with get_session() as session:
30 | # TODO: allow migrate new tables
31 | insp = sa.inspect(session.db_engine.engine)
32 | if not insp.get_table_names():
33 | BaseSqlite.metadata.create_all(bind=session.db_engine.engine)
34 | session.commit()
35 |
36 | insp = sa.inspect(session.db_engine.engine)
37 | LOGGER.info(insp.get_table_names())
38 |
39 |
40 | def get_app_api_server(api_server_type=ApiServerType.ENOVA_APP.value):
41 | api_config = getattr(CONFIG, api_server_type)
42 |
43 | CONFIG.api.update(api_config)
44 |
45 | api_server = ApiServer(api_config)
46 |
47 | # mount vuejs dist
48 | api_server.app.mount(
49 | f"{CONFIG.api['url_prefix']}/",
50 | StaticFiles(directory=WEB_STATIC_PATH, html=True),
51 | name="static",
52 | )
53 | api_server.app.add_exception_handler(404, redirect_all_requests_to_frontend)
54 |
55 | # datebase init
56 | init_db()
57 |
58 | return api_server
59 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/webui-nginx/nginx.conf:
--------------------------------------------------------------------------------
1 | worker_processes 8;
2 | worker_rlimit_nofile 65535;
3 |
4 | events {
5 | worker_connections 20480;
6 | }
7 |
8 |
9 | http {
10 | server {
11 | listen 8501;
12 | listen [::]:8501;
13 |
14 |
15 | location /stream {
16 | proxy_pass http://enova-serving:8501;
17 | proxy_http_version 1.1;
18 | proxy_set_header Upgrade $http_upgrade;
19 | proxy_set_header Connection "Upgrade";
20 | proxy_set_header Host $host;
21 | }
22 |
23 |
24 | location ^~ /static {
25 | proxy_pass http://enova-serving:8501/static/;
26 | }
27 |
28 | location ^~ /healthz {
29 | proxy_pass http://enova-serving:8501/healthz;
30 | }
31 |
32 | location ^~ /vendor {
33 | proxy_pass http://enova-serving:8501/vendor;
34 | }
35 |
36 | location = /_stcore/health {
37 | proxy_pass http://enova-serving:8501/_stcore/health;
38 | }
39 |
40 | location = /_stcore/allowed-message-origins {
41 | proxy_pass http://enova-serving:8501/_stcore/allowed-message-origins;
42 | }
43 |
44 | location = /_stcore/stream {
45 | proxy_pass http://enova-serving:8501/_stcore/stream;
46 | proxy_http_version 1.1;
47 | proxy_redirect off;
48 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
49 | proxy_set_header Host $http_host;
50 | proxy_set_header Upgrade $http_upgrade;
51 | proxy_set_header Connection "upgrade";
52 | proxy_read_timeout 86400;
53 | }
54 |
55 |
56 | location / {
57 | proxy_pass http://enova-serving:8501;
58 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
59 | proxy_set_header Host $http_host;
60 | proxy_redirect off;
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/README.md:
--------------------------------------------------------------------------------
1 | ## 使用方式
2 | 安装whl包
3 | ```bash
4 | pip install enova_instrumentation_llmo-0.1.0-py3-none-any.whl
5 | ```
6 | 在vllm程序代码中进行ot配置和开启注入
7 | ```python
8 |
9 | # 开启instrument
10 | from enova.llmo import start
11 | # 指定ot collector地址和service name
12 | start(otlp_exporter_endpoint="localhost:4317", service_name="service_name")
13 |
14 | #######接原代码内容#######
15 | ```
16 |
17 | ## Metrics 指标说明
18 | - `avg_prompt_throughput` prompt 输入速率,单位 tokens/s
19 | - `avg_generation_throughput` 生成速率,单位 tokens/s
20 | - `running_requests` 当前 running 的 requests 数
21 | - `swapped_requests` 当前 swapped 的 requests 数
22 | - `pending_requests` 当前 pending 的 requests 数
23 | - `gpu_kv_cache_usage` gpu kv cache 使用率
24 | - `cpu_kv_cache_usage` cpu kv cache 使用率
25 | - `generated_tokens` 生成的 tokens 数
26 | - `llm_engine_init_config` engine启动参数,attributes如下
27 | - `model`
28 | - `tokenizer`
29 | - `tokenizer_mode`
30 | - `revision`
31 | - `tokenizer_revision`
32 | - `trust_remote_code`
33 | - `dtype`
34 | - `max_seq_len`
35 | - `download_dir`
36 | - `load_format`
37 | - `tensor_parallel_size`
38 | - `disable_custom_all_reduce`
39 | - `quantization`
40 | - `enforce_eager`
41 | - `kv_cache_dtype`
42 | - `seed`
43 | - `max_num_batched_tokens`
44 | - `max_num_seqs`
45 | - `max_paddings`
46 | - `pipeline_parallel_size`
47 | - `worker_use_ray`
48 | - `max_parallel_loading_workers`
49 | - `http.server.active_requests` FastAPI 正在处理中的 HTTP 请求的数量
50 | - `http.server.duration` FastAPI 服务端请求处理时间。
51 | - `http.server.response.size` FastAPI HTTP 响应消息的大小
52 | - `http.server.request.size` FastAPI HTTP 请求的大小
53 |
54 |
55 | ## trace span 说明
56 | - `POST /generate` /generate请求
57 | - `POST /generate prompt` 带有 `prompt` attribute
58 | - `ModelRunner.execute_model` 模型execute,对应一次 token 生成
59 | - `CUDAGraphRunner.forward` CUDA Graph的 forward 计算,在 `ModelRunner.execute_model` 中被调用
60 | - `ChatGLMForCausalLM.forward` chatglm 模型 forward
61 | - `LlamaForCausalLM.forward` llama 模型 forward
62 |
63 |
--------------------------------------------------------------------------------
/enova/common/encoder.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import numpy as np
3 | import json
4 |
5 |
6 | class NumpyEncoder(json.JSONEncoder):
7 | """Custom encoder for numpy data types"""
8 |
9 | def default(self, obj):
10 | if isinstance(
11 | obj,
12 | (
13 | np.int_,
14 | np.intc,
15 | np.intp,
16 | np.int8,
17 | np.int16,
18 | np.int32,
19 | np.int64,
20 | np.uint8,
21 | np.uint16,
22 | np.uint32,
23 | np.uint64,
24 | ),
25 | ):
26 | return int(obj)
27 |
28 | elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
29 | return float(obj)
30 |
31 | elif isinstance(obj, (np.complex_, np.complex64, np.complex128)):
32 | return {"real": obj.real, "imag": obj.imag}
33 |
34 | elif isinstance(obj, (np.ndarray,)):
35 | return obj.tolist()
36 |
37 | elif isinstance(obj, (np.bool_)):
38 | return bool(obj)
39 |
40 | elif isinstance(obj, (np.void)):
41 | return None
42 |
43 | return json.JSONEncoder.default(self, obj)
44 |
45 |
46 | def numpy_dumps(v, *, default):
47 | try:
48 | return json.dumps(v, cls=NumpyEncoder)
49 | except Exception:
50 | pass
51 |
52 | # orjson.dumps returns bytes, to match standard json.dumps we need to decode
53 | return json.dumps(v, default=default)
54 |
55 |
56 | def json_numpy_obj_hook(dct):
57 | """
58 | Decodes a previously encoded numpy ndarray
59 | with proper shape and dtype
60 | :param dct: (dict) json encoded ndarray
61 | :return: (ndarray) if input was an encoded ndarray
62 | """
63 | if isinstance(dct, dict) and "__ndarray__" in dct:
64 | data = base64.b64decode(dct["__ndarray__"])
65 | return np.frombuffer(data, dct["dtype"]).reshape(dct["shape"])
66 | return dct
67 |
--------------------------------------------------------------------------------
/escaler/pkg/redis/redis.go:
--------------------------------------------------------------------------------
1 | package redis
2 |
3 | import (
4 | "context"
5 | "time"
6 |
7 | "github.com/go-redis/redis/v8"
8 | )
9 |
10 | type RedisClient struct {
11 | Ctx context.Context
12 | Redis *redis.Client
13 | }
14 |
15 | func (r *RedisClient) SetList(key string, values []string) error {
16 | _, err := r.DelList(key)
17 | if err != nil {
18 | return err
19 | }
20 | for _, value := range values {
21 | if err := r.Redis.RPush(r.Ctx, key, value).Err(); err != nil {
22 | return err
23 | }
24 | }
25 | return nil
26 | }
27 |
28 | func (r *RedisClient) GetList(key string) ([]string, error) {
29 | storedStringArray, err := r.Redis.LRange(r.Ctx, key, 0, -1).Result()
30 | if err != nil {
31 | return storedStringArray, err
32 | }
33 | return storedStringArray, nil
34 | }
35 |
36 | func (r *RedisClient) DelList(key string) (int64, error) {
37 | return r.Redis.Del(r.Ctx, key).Result()
38 | }
39 |
40 | func (r *RedisClient) AppendList(key string, value string) error {
41 | if err := r.Redis.LPush(r.Ctx, key, value).Err(); err != nil {
42 | return err
43 | }
44 | return nil
45 | }
46 |
47 | func (r *RedisClient) AppendListWithLimitSize(key string, value string, limit int64) error {
48 | if err := r.AppendList(key, value); err != nil {
49 | return err
50 | }
51 | if err := r.Redis.LTrim(r.Ctx, key, 0, limit).Err(); err != nil {
52 | return err
53 | }
54 | return nil
55 | }
56 |
57 | func (r *RedisClient) Set(key string, value string, timeout int64) {
58 | r.Redis.Set(r.Ctx, key, value, time.Duration(time.Duration(timeout)*time.Microsecond))
59 | }
60 |
61 | func (r *RedisClient) Get(key string) string {
62 | result := r.Redis.Get(r.Ctx, key)
63 | if result.Err() != nil {
64 | return ""
65 | }
66 | return result.Val()
67 | }
68 |
69 | func NewRedisClient(addr string, passwd string, db int) *RedisClient {
70 | ctx := context.Background()
71 |
72 | rdb := redis.NewClient(&redis.Options{
73 | Addr: addr,
74 | Password: passwd,
75 | DB: db,
76 | })
77 |
78 | return &RedisClient{
79 | Ctx: ctx,
80 | Redis: rdb,
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | !front/src/components/instance
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # pyenv
77 | .python-version
78 |
79 | # celery beat schedule file
80 | celerybeat-schedule
81 |
82 | # SageMath parsed files
83 | *.sage.py
84 |
85 | # Environments
86 | .env
87 | .venv
88 | env/
89 | venv/
90 | ENV/
91 | env.bak/
92 | venv.bak/
93 |
94 | # Spyder project settings
95 | .spyderproject
96 | .spyproject
97 |
98 | # Rope project settings
99 | .ropeproject
100 |
101 | # mkdocs documentation
102 | /site
103 |
104 | # mypy
105 | .mypy_cache/
106 | .vscode
107 | .idea
108 | .history
109 |
110 | # macos
111 | .DS_Store
112 | src/golang/bin
113 | src/golang/pkg/mod
114 | src/golang/pkg/sumdb
115 | src/golang/dist
116 |
117 | nohup*
118 |
119 | # dependencies
120 | dependencies/
121 | enova/template/deployment/docker-compose/bin/
122 |
--------------------------------------------------------------------------------
/front/src/components/Language.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {{ selectedLang }}
7 |
8 |
9 |
10 |
11 | {{ item.name }}
19 |
20 |
21 |
22 |
23 |
65 |
73 |
--------------------------------------------------------------------------------
/enova/serving/backend/transformers.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | import locate
3 | from fastapi import FastAPI, APIRouter
4 | from fastapi.middleware.cors import CORSMiddleware
5 | from enova.common.config import CONFIG
6 | from enova.serving.middlewares.base import EnovaAIMultiMiddlewares
7 | from enova.serving.backend.base import BaseBackend
8 | from enova.serving.backend.hf.hf import HFText2TextHandler
9 |
10 |
11 | @dataclasses.dataclass
12 | class TransformersBackend(BaseBackend):
13 |
14 | def __post_init__(self):
15 | self.hf = HFText2TextHandler()
16 |
17 | def _create_apiserver(self):
18 | self._create_app()
19 | self._init_middlewares()
20 | self._init_routers()
21 |
22 | def _init_middlewares(self):
23 | """"""
24 | middlewares = EnovaAIMultiMiddlewares()
25 | for middleware_cls_name in CONFIG.api.get("middleware_names", []):
26 | middleware_ins = locate(middleware_cls_name)()
27 | middlewares.register(middleware_ins)
28 | self.app.middleware("http")(middlewares.process)
29 | self.app.add_middleware(
30 | CORSMiddleware,
31 | allow_origins=["*"],
32 | allow_credentials=True,
33 | allow_methods=["*"],
34 | allow_headers=["*"],
35 | )
36 |
37 | def _init_exception_handler(self):
38 | """"""
39 |
40 | def _init_routers(self):
41 | """
42 | according task to add route, such as openai
43 | """
44 | self.api_router = APIRouter(
45 | prefix="",
46 | dependencies=[],
47 | )
48 |
49 | @self.app.get("/healthz", include_in_schema=False)
50 | async def healthz():
51 | return {"status": "ok"}
52 |
53 | self.register_serving_api()
54 |
55 | def _create_app(self):
56 | """"""
57 | self.app = FastAPI(
58 | title=self.name,
59 | description=(self.__doc__ if self.__doc__ else f"Enova {self.name}"),
60 | )
61 |
62 | def register_serving_api(self):
63 | """
64 | register_api from serving
65 | """
66 | self.hf.register_api_router(self.api_router)
67 | self.app.include_router(self.api_router)
68 |
--------------------------------------------------------------------------------
/front/src/stores/instance.ts:
--------------------------------------------------------------------------------
1 | import { defineStore } from 'pinia'
2 | import type { InstanceType } from './config'
3 | import { getServing } from '@/api/instance'
4 | interface instanceStoreState {
5 | instanceList: InstanceType[]
6 | currentId: string
7 | chartTimeRange: string[]
8 | tableLoading: boolean
9 | searchTimePair: string[]
10 | }
11 | interface chartQueryParams {
12 | start: string | number
13 | end: string | number
14 | step: string | number
15 | }
16 |
17 | export const useInstanceStore = defineStore('instance', {
18 | state: (): instanceStoreState => ({
19 | instanceList: [],
20 | currentId: '',
21 | chartTimeRange: [],
22 | tableLoading: false,
23 | searchTimePair: []
24 | }),
25 | getters: {
26 | activeInstance(): InstanceType | undefined {
27 | return this.instanceList.find((item: InstanceType) => item.instance_id === this.currentId)
28 | },
29 | instanceNameMap(): Map {
30 | const res = new Map()
31 | this.instanceList.forEach((item: InstanceType) => {
32 | res.set(item.instance_id, item.instance_name)
33 | })
34 | return res
35 | },
36 | chartQuery(): chartQueryParams {
37 | const [start, end] = this.chartTimeRange
38 | const _start = start ? Math.floor(new Date(start).getTime() / 1000).toFixed(3) : ''
39 | const _end = end ? Math.floor(new Date(end).getTime() / 1000).toFixed(3) : ''
40 | return {
41 | start: _start,
42 | end: _end,
43 | step: '15s'
44 | }
45 | },
46 | activeServingId(): string {
47 | return this.activeInstance != null ? this.activeInstance.serving_id : this.instanceList[0]?.serving_id ?? ''
48 | },
49 | activeServingJob(): string {
50 | return this.activeInstance != null ? this.activeInstance.startup_args.exported_job : this.instanceList[0]?.startup_args.exported_job ?? ''
51 | },
52 | },
53 | actions: {
54 | getInstanceList(): void {
55 | this.tableLoading = true
56 | getServing().then((res) => {
57 | this.instanceList = res.data
58 | }).catch((err) => {
59 | console.error(err)
60 | }).finally(() => {
61 | this.tableLoading = false
62 | })
63 | }
64 | }
65 | })
66 |
--------------------------------------------------------------------------------
/enova/entry/command/mon.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import sys
3 | import click
4 |
5 | from enova.common.cli_helper import ArgumentHelper, DockerComposeHeler
6 | from enova.common.config import CONFIG
7 | from enova.common.logger import LOGGER
8 |
9 |
10 | class EnovaMonitor:
11 | def __init__(self) -> None:
12 | self.docker_services = [
13 | "dcgm-exporter",
14 | "grafana",
15 | "otel-collector",
16 | "prometheus",
17 | "tempo",
18 | "enova-escaler",
19 | "enova-algo",
20 | ] # start up by order
21 | self._docker_compose = DockerComposeHeler()
22 |
23 | def _run_by_compose(self):
24 | for service in self.docker_services:
25 | options = {}
26 | self._docker_compose.update_service_options(service, options)
27 | self._docker_compose.startup_service(service, is_daemon=True)
28 |
29 | def run(self, **kwargs):
30 | args_helper = ArgumentHelper(self, sys._getframe())
31 | CONFIG.update_config(args_helper.args_map)
32 |
33 | self._run_by_compose()
34 |
35 | def _stop_by_compose(self):
36 | pass
37 |
38 | def stop(self):
39 | cmd_params = self._docker_compose.base_cmd
40 | cmd_params += ["down"]
41 |
42 | result = subprocess.run(
43 | [self._docker_compose.excu, "-f", self._docker_compose.compose_file, "down"],
44 | capture_output=True,
45 | text=True,
46 | )
47 | if result.returncode == 0:
48 | LOGGER.info("llmo monitors stop successfully")
49 | else:
50 | LOGGER.error(f"llmo monitors stop failed, {result.stderr}")
51 |
52 |
53 | pass_enova_monitor = click.make_pass_decorator(EnovaMonitor)
54 |
55 |
56 | @click.group(name="mon")
57 | @click.pass_context
58 | def mon_cli(ctx):
59 | """
60 | Run the monitors of LLM server
61 | """
62 | ctx.obj = EnovaMonitor()
63 |
64 |
65 | @mon_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
66 | @pass_enova_monitor
67 | @click.pass_context
68 | def mon_run(ctx, enova_monitor: EnovaMonitor):
69 | enova_monitor.run()
70 |
71 |
72 | @mon_cli.command(name="stop")
73 | @pass_enova_monitor
74 | @click.pass_context
75 | def mon_stop(ctx, enova_monitor: EnovaMonitor):
76 | enova_monitor.stop()
77 |
--------------------------------------------------------------------------------
/front/src/layout/sidebar/index.vue:
--------------------------------------------------------------------------------
1 |
2 |
7 |
25 |
26 |
27 |
28 |
66 |
67 |
80 |
--------------------------------------------------------------------------------
/enova/server/middleware/response.py:
--------------------------------------------------------------------------------
1 | import rapidjson
2 | from fastapi import Request, status
3 |
4 | from fastapi.responses import JSONResponse, StreamingResponse
5 | from enova.common.constant import JSON_RESPONSE_HEADER
6 | from enova.common.g_vars import get_traceid
7 | from enova.server.middleware.base import BaseMiddleware
8 |
9 |
10 | class ResponseMiddleware(BaseMiddleware):
11 |
12 | async def _process_response(self, request: Request, response):
13 | """"""
14 | if request.url.path in [
15 | self.api_config["url_prefix"] + "/docs",
16 | self.api_config["url_prefix"] + "/redoc",
17 | self.api_config["url_prefix"] + "/openapi.json",
18 | ] or request.url.path.startswith(f"{self.api_config['url_prefix']}/admin"):
19 | return response
20 | trace_id = get_traceid()
21 | if isinstance(response, StreamingResponse) and response.headers.get("content-type") == JSON_RESPONSE_HEADER:
22 | response_body = b""
23 | async for chunk in response.body_iterator:
24 | response_body += chunk
25 | resp = rapidjson.loads(response_body)
26 | if "code" in resp and "message" in resp:
27 | if "trace_id" not in resp:
28 | resp["trace_id"] = trace_id
29 | resp = JSONResponse(
30 | status_code=response.status_code,
31 | content=resp,
32 | )
33 | else:
34 | if response.status_code == status.HTTP_200_OK:
35 | code = 0
36 | else:
37 | code = response.status_code
38 | resp = JSONResponse(
39 | status_code=response.status_code,
40 | content={"message": "", "code": code, "result": resp, "trace_id": trace_id, "version": self.api_config["api_version"]},
41 | )
42 | for k, v in response.headers.items():
43 | if k not in resp.headers:
44 | resp.headers[k] = v
45 | return resp
46 | if isinstance(response, dict):
47 | return JSONResponse(
48 | status_code=200,
49 | content={"message": "", "code": 0, "result": response, "trace_id": trace_id, "version": self.api_config["api_version"]},
50 | )
51 | return response
52 |
--------------------------------------------------------------------------------
/enova/common/logger.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import sys
4 | import uuid
5 | from logging import Formatter
6 | from logging import StreamHandler
7 | from logging import getLogger
8 | from logging.handlers import TimedRotatingFileHandler
9 | from enova.common.config import CONFIG
10 | from enova.common.g_vars import get_traceid
11 |
12 |
13 | LOGGER_MAP = {}
14 |
15 |
16 | class AddRequestIdFormatter(Formatter):
17 | def formatMessage(self, record):
18 | trace_id = get_traceid()
19 | if CONFIG.app_name:
20 | record.message = f"[{CONFIG.app_name}][trace_id: {trace_id}]|{record.message}"
21 | else:
22 | record.message = f"[trace_id: {trace_id}]|{record.message}"
23 | return super().formatMessage(record)
24 |
25 |
26 | def setup_logger(name=None, path=None, level=None, file_handler_backupCount=None):
27 | # sys.stdout = Unbuffered(sys.stdout)
28 | # sys.stderr = Unbuffered(sys.stderr)
29 | logger_conf = CONFIG.logger
30 | name = name or logger_conf["name"]
31 | path = path or logger_conf["path"]
32 | level = level or logger_conf["level"]
33 | file_handler_backupCount = file_handler_backupCount or logger_conf["file_handler_backupCount"]
34 |
35 | logger = getLogger(name)
36 | logger.setLevel(level.upper())
37 |
38 | formatter = AddRequestIdFormatter(datefmt=logger_conf["datefmt"], fmt=logger_conf["fmt"])
39 | stream_handler = StreamHandler(sys.stdout)
40 | stream_handler.setFormatter(formatter)
41 | logger.addHandler(stream_handler)
42 | os.makedirs(path, exist_ok=True)
43 | file_handler = TimedRotatingFileHandler(
44 | filename=logger_conf["file_handler_filename_format"].format(path=path, name=name),
45 | when=logger_conf["file_handler_when"],
46 | interval=logger_conf["file_handler_interval"],
47 | backupCount=file_handler_backupCount,
48 | )
49 | file_handler.suffix = logger_conf["file_handler_suffix"]
50 | file_handler.extMatch = re.compile(logger_conf["file_handler_extMatch_pattern"])
51 | file_handler.setFormatter(formatter)
52 | logger.addHandler(file_handler)
53 | return logger
54 |
55 |
56 | def get_logger_by_name(name="default"):
57 | if name not in LOGGER_MAP:
58 | logger_conf = {}
59 | logger = setup_logger(**logger_conf)
60 | LOGGER_MAP[name] = logger
61 | return LOGGER_MAP[name]
62 |
63 |
64 | LOGGER = get_logger_by_name()
65 |
--------------------------------------------------------------------------------
/escaler/pkg/scaler/scaler.go:
--------------------------------------------------------------------------------
1 | package scaler
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
7 | "github.com/Emerging-AI/ENOVA/escaler/pkg/queue"
8 | "github.com/Emerging-AI/ENOVA/escaler/pkg/resource"
9 |
10 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
11 | "github.com/Emerging-AI/ENOVA/escaler/pkg/logger"
12 | )
13 |
14 | type EnovaServingScaler struct {
15 | Queue *queue.InnerChanTaskQueue
16 | Client resource.ClientInterface
17 | stopped bool
18 | }
19 |
20 | func NewServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler {
21 | if config.GetEConfig().ResourceBackend.Type == config.ResourceBackendTypeK8s {
22 | return NewK8sServingScaler(ch)
23 | }
24 | return NewLocalDockerServingScaler(ch)
25 | }
26 |
27 | func NewLocalDockerServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler {
28 | return &EnovaServingScaler{
29 | Queue: &queue.InnerChanTaskQueue{
30 | Ch: ch,
31 | },
32 | Client: resource.NewDockerResourceClient(),
33 | stopped: false,
34 | }
35 | }
36 |
37 | func NewK8sServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler {
38 | return &EnovaServingScaler{
39 | Queue: &queue.InnerChanTaskQueue{
40 | Ch: ch,
41 | },
42 | Client: resource.NewK8sResourceClient(),
43 | }
44 | }
45 |
46 | func (s *EnovaServingScaler) Run() {
47 | // if s.Subscriber == nil {
48 | // panic(errors.New("enovaServingScaler Subscriber is nil"))
49 | // }
50 | // defer s.Subscriber.Close()
51 |
52 | for {
53 | // 接收消息
54 | logger.Infof("enovaServingScaler start Recv message")
55 | task, ok := s.Queue.Pop()
56 | if !ok {
57 | continue
58 | }
59 | // logger.Infof("enovaServingScaler Recv message: %s", msg)
60 | // if err != nil {
61 | // logger.Infof("enovaServingScaler Error receiving message: %s", err)
62 | // continue
63 | // }
64 | acutalTask := task.(*meta.TaskSpec)
65 |
66 | // if err := json.Unmarshal([]byte(msg), &task); err != nil {
67 | // logger.Errorf("enovaServingScaler Error parsing JSON response: %v, msg: %s", err, msg)
68 | // continue
69 | // }
70 |
71 | if acutalTask.Replica == 0 {
72 | s.Client.DeleteTask(*acutalTask)
73 | } else {
74 | // 执行 localDeploy 函数
75 | s.Client.DeployTask(*acutalTask)
76 | }
77 | }
78 | }
79 |
80 | func (s *EnovaServingScaler) Stop() {
81 |
82 | }
83 |
84 | func (s *EnovaServingScaler) RunInWaitGroup(wg *sync.WaitGroup) {
85 | defer wg.Done()
86 | s.Run()
87 | }
88 |
--------------------------------------------------------------------------------
/escaler/pkg/resource/utils/cmd.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "reflect"
7 | "strconv"
8 |
9 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
10 |
11 | "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
12 | )
13 |
14 | func shouldAppend(v interface{}) bool {
15 | switch v := v.(type) {
16 | case int, int32, int64:
17 | return v != 0
18 | case float32, float64:
19 | return v != 0.0
20 | case string:
21 | return v != ""
22 | case bool:
23 | return v // no need to check, because false is the zero value and means "not set"
24 | default:
25 | // This case is for types not explicitly checked above; assumes non-zero by default
26 | return !reflect.DeepEqual(v, reflect.Zero(reflect.TypeOf(v)).Interface())
27 | }
28 | }
29 |
30 | func BuildCmdFromTaskSpec(spec meta.TaskSpec) []string {
31 |
32 | cmd := []string{
33 | "enova", "serving", "run", "--model", spec.Model, "--port", strconv.Itoa(spec.Port), "--host", spec.Host,
34 | "--backend", spec.Backend,
35 | "--exporter_service_name", spec.ExporterServiceName,
36 | }
37 | if config.GetEConfig().ResourceBackend.Type == config.ResourceBackendTypeK8s {
38 | cmd = append(cmd, "--exporter_endpoint", spec.Name+"-collector."+spec.Namespace+".svc.cluster.local:4317")
39 | } else {
40 | cmd = append(cmd, "--exporter_endpoint", spec.ExporterEndpoint)
41 | }
42 |
43 | switch spec.Backend {
44 | case "vllm":
45 | cmd = UpdateCmdByBackendConfig[*meta.VllmBackendConfig](cmd, spec)
46 | case "sglang":
47 | cmd = UpdateCmdByBackendConfig[*meta.SglangBackendConfig](cmd, spec)
48 | }
49 | // Add extra serving params
50 | for k, v := range spec.BackendExtraConfig {
51 | cmd = append(cmd, []string{fmt.Sprintf("--%s", k), fmt.Sprintf("%v", v)}...)
52 | }
53 | return cmd
54 | }
55 |
56 | func UpdateCmdByBackendConfig[B interface{}](cmd []string, spec meta.TaskSpec) []string {
57 | backendConfig, ok := spec.BackendConfig.(B)
58 | if ok {
59 | jsonBytes, err := json.Marshal(backendConfig)
60 | if err != nil {
61 |
62 | } else {
63 | var backendConfigMap map[string]interface{}
64 | err = json.Unmarshal(jsonBytes, &backendConfigMap)
65 | if err != nil {
66 |
67 | } else {
68 | // if there is not valid value, dont append to cmd params
69 | for k, v := range backendConfigMap {
70 | if shouldAppend(v) {
71 | cmd = append(cmd, []string{fmt.Sprintf("--%s", k), fmt.Sprintf("%v", v)}...)
72 | }
73 | }
74 | }
75 | }
76 | }
77 | return cmd
78 | }
79 |
--------------------------------------------------------------------------------
/enova/serving/backend/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
3 | from enova.common.logger import LOGGER
4 |
5 |
6 | def hf_model_params_size(model_name, hf_proxies=None):
7 | """
8 | TODO: implement special model
9 | """
10 | LOGGER.debug(f"starg parse model's config: {model_name}")
11 | try:
12 | return specific_eval_hf_model_params_size(model_name, hf_proxies)
13 | except Exception as e:
14 | LOGGER.warning(f"specific_eval_hf_model_params_size error: {str(e)}")
15 | return estimate_hf_model_params_size(model_name, hf_proxies)
16 |
17 |
18 | def specific_eval_hf_model_params_size(model_name, hf_proxies=None):
19 | """ """
20 | config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, proxies=hf_proxies)
21 | if config.__class__.__name__ in ["BaichuanConfig", "QWenConfig"]:
22 | model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
23 | else:
24 | model = AutoModel.from_config(config, trust_remote_code=True)
25 | params_size = 0
26 | for w_name, p in list(model.named_parameters()):
27 | LOGGER.debug(f"w_name: {w_name}, shape: {p.shape}")
28 | params_size += np.prod(p.shape)
29 | return {"params_size": int(params_size), "model_type": config.model_type}
30 |
31 |
32 | def estimate_hf_model_params_size(model_name, hf_proxies=None):
33 | """fast estimate hf model params_szie"""
34 | config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, proxies=hf_proxies)
35 | if config.model_type == "chatglm":
36 | return chatglm_estimate_hf_model_params_size(config)
37 | num_layers = config.num_hidden_layers
38 | hidden_size = config.hidden_size
39 | vocab_size = config.vocab_size
40 | params_size = (
41 | vocab_size * hidden_size
42 | + num_layers * (4 * hidden_size**2 + 4 * hidden_size)
43 | + num_layers * (8 * hidden_size**2 + 5 * hidden_size)
44 | + 4 * num_layers * hidden_size
45 | )
46 | return {"params_size": int(params_size), "model_type": config.model_type}
47 |
48 |
49 | def chatglm_estimate_hf_model_params_size(config):
50 | num_layers = config.num_layers
51 | hidden_size = config.hidden_size
52 | vocab_size = config.vocab_size
53 | params_size = (
54 | vocab_size * hidden_size
55 | + num_layers * (4 * hidden_size**2 + 4 * hidden_size)
56 | + num_layers * (8 * hidden_size**2 + 5 * hidden_size)
57 | + 4 * num_layers * hidden_size
58 | )
59 | return {"params_size": int(params_size), "model_type": config.model_type}
60 |
--------------------------------------------------------------------------------
/enova/server/restful/router.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | from fastapi import APIRouter
3 | from fastapi.responses import JSONResponse
4 | from enova.common.config import CONFIG
5 | from enova.common.constant import HttpMethod
6 |
7 |
8 | class BaseResource:
9 | PATH = NotImplemented
10 | DEPENDENCIS = NotImplemented
11 | GET_RESPONSE_MODEL = None
12 | PUT_RESPONSE_MODEL = None
13 | DELETE_RESPONSE_MODEL = None
14 | POST_RESPONSE_MODEL = None
15 | GET_RESPONSE_CLASS = JSONResponse
16 | PUT_RESPONSE_CLASS = JSONResponse
17 | DELETE_RESPONSE_CLASS = JSONResponse
18 | POST_RESPONSE_CLASS = JSONResponse
19 | GET_INCLUDE_IN_SCHEMA = True
20 | PUT_INCLUDE_IN_SCHEMA = True
21 | DELETE_INCLUDE_IN_SCHEMA = True
22 | POST_INCLUDE_IN_SCHEMA = True
23 | TAGS = None
24 |
25 |
26 | class WebSocketResource:
27 | PATH = NotImplemented
28 |
29 |
30 | @dataclasses.dataclass
31 | class ApiRouter:
32 | prefix: str = None
33 |
34 | def __post_init__(self) -> None:
35 | """
36 | Dynamically convert GET, POST, DELETE, PUT into interfaces. just for fastapi
37 | """
38 | self.router = APIRouter(
39 | prefix=self.prefix,
40 | dependencies=[],
41 | )
42 |
43 | def register(self, resource_cls):
44 | """"""
45 | if issubclass(resource_cls, BaseResource) and resource_cls != BaseResource:
46 | self._register_http(resource_cls)
47 |
48 | if issubclass(resource_cls, WebSocketResource) and resource_cls != WebSocketResource:
49 | self._register_ws(resource_cls)
50 |
51 | def _register_http(self, resource_cls):
52 | resource_ins = resource_cls()
53 | for method in HttpMethod.values():
54 | if hasattr(resource_ins, method):
55 | response_model = getattr(resource_ins, f"{method.upper()}_RESPONSE_MODEL")
56 | response_class = getattr(resource_ins, f"{method.upper()}_RESPONSE_CLASS")
57 | include_in_schema = getattr(resource_ins, f"{method.upper()}_INCLUDE_IN_SCHEMA")
58 | actual_path = f"/{CONFIG.api['api_version']}{resource_ins.PATH}"
59 | tags = getattr(resource_ins, "TAGS") or []
60 | getattr(self.router, method)(
61 | actual_path,
62 | response_model=response_model,
63 | response_class=response_class,
64 | include_in_schema=include_in_schema,
65 | tags=tags,
66 | )(getattr(resource_ins, method))
67 |
68 | def _register_ws(self, resource_cls):
69 | resource_ins = resource_cls()
70 | if resource_ins.PATH is not NotImplemented:
71 | actual_path = f"/{CONFIG.api['api_version']}{resource_ins.PATH}"
72 | self.router.add_api_websocket_route(actual_path, getattr(resource_ins, "get"))
73 |
--------------------------------------------------------------------------------
/enova/app/resource.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated, Dict
2 | from fastapi import Body, Depends
3 |
4 | from enova.app.serializer import (
5 | ServingCreateSLZ,
6 | QueryServingParameterSLZ,
7 | QueryServingResponseSLZ,
8 | SingleQueryServingResponseSLZ,
9 | SingleQueryTestResponseSLZ,
10 | ListTestResponseSLZ,
11 | TestCreateSLZ,
12 | QueryTestParameterSLZ,
13 | )
14 | from enova.server.restful.router import BaseResource
15 | from enova.app.service import AppService
16 |
17 |
18 | class BaseResource(BaseResource):
19 | def __init__(self) -> None:
20 | self.service = AppService()
21 |
22 |
23 | class HealthzResource(BaseResource):
24 | PATH = "/healthz"
25 | TAGS = ["monitor"]
26 |
27 | async def get(self) -> Dict:
28 | """"""
29 | return {"status": "running"}
30 |
31 |
32 | class ServingResource(BaseResource):
33 | PATH = "/serving"
34 | GET_INCLUDE_IN_SCHEMA = True
35 | GET_RESPONSE_MODEL = QueryServingResponseSLZ
36 | POST_RESPONSE_MODEL = SingleQueryServingResponseSLZ
37 | TAGS = ["serving serve"]
38 |
39 | async def post(self, params: Annotated[ServingCreateSLZ, Body(openapi_examples=ServingCreateSLZ.Extra.openapi_examples)]) -> Dict:
40 | """"""
41 | return await self.service.create_instance(params.dict())
42 |
43 | async def get(self, params: Annotated[QueryServingParameterSLZ, Depends(QueryServingParameterSLZ)]):
44 | """"""
45 | return await self.service.list_instance(params.dict())
46 |
47 |
48 | class SingleServingResource(BaseResource):
49 | PATH = "/serving/{instance_id}"
50 | TAGS = ["serving serve"]
51 |
52 | async def delete(self, instance_id: str):
53 | """"""
54 | return await self.service.delete_instance(instance_id)
55 |
56 | async def get(self, instance_id: str):
57 | """"""
58 | return await self.service.get_instance(instance_id)
59 |
60 |
61 | class TestResource(BaseResource):
62 | PATH = "/serving/instance/test"
63 | GET_RESPONSE_MODEL = ListTestResponseSLZ
64 | POST_RESPONSE_MODEL = SingleQueryTestResponseSLZ
65 | TAGS = ["test inject"]
66 |
67 | async def post(self, params: Annotated[TestCreateSLZ, Body(openapi_examples=TestCreateSLZ.Extra.openapi_examples)]):
68 | return await self.service.create_test(params.dict())
69 |
70 | async def get(self, params: Annotated[QueryTestParameterSLZ, Depends(QueryTestParameterSLZ)]):
71 | return await self.service.list_test(params.dict())
72 |
73 |
74 | class SingleTestResource(BaseResource):
75 | PATH = "/serving/instance/test/{test_id}"
76 | GET_RESPONSE_MODEL = SingleQueryTestResponseSLZ
77 | TAGS = ["test inject"]
78 |
79 | async def get(self, test_id: str):
80 | return await self.service.get_test(test_id)
81 |
82 | async def delete(self, test_id: str):
83 | return await self.service.delete_test(test_id)
84 |
--------------------------------------------------------------------------------
/enova/app/db_modles.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from sqlalchemy import (
4 | Column,
5 | Float,
6 | Integer,
7 | String,
8 | )
9 | from sqlalchemy.orm import declared_attr
10 |
11 | from enova.common.constant import DeployStatus, TestStatus
12 | from enova.common.utils import gen_ulid
13 | from enova.database.relation.orm.base import DBModelBase, table_args, JSON, DateTime
14 |
15 |
16 | class DeploymentInstanceInfoTable(DBModelBase):
17 | __tablename__ = "deployment_instance_info"
18 |
19 | @declared_attr
20 | def __table_args__(cls):
21 | return table_args(cls, {"comment": "table of serving's deployment instance"})
22 |
23 | instance_id = Column(String(256), primary_key=True, nullable=False, comment="instance id", default=gen_ulid)
24 | instance_name = Column(String(64), nullable=False, comment="instance name")
25 | instance_spec = Column(JSON, comment="instance specification")
26 | startup_args = Column(JSON, comment="the arguments of starting up of model serve by serving")
27 | mdl_cfg = Column(JSON, comment="the config of llm model")
28 | serving_id = Column(String(256), nullable=False, comment="serving's unique id, allow use it get the status by polit api")
29 | deploy_status = Column(String(32), nullable=False, default=DeployStatus.UNKNOWN.value, comment="status of deployment")
30 | extra = Column(JSON)
31 | create_time = Column(DateTime, default=datetime.datetime.now)
32 | update_time = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
33 | is_deleted = Column(Integer, default=0)
34 | creator = Column(String(64))
35 | updater = Column(String(64))
36 |
37 |
38 | class TestInfoTable(DBModelBase):
39 | __tablename__ = "test_info"
40 |
41 | @declared_attr
42 | def __table_args__(cls):
43 | return table_args(cls, {"comment": "Inject Test record"})
44 |
45 | test_id = Column(String(256), primary_key=True, nullable=False, comment="test ID", default=gen_ulid)
46 | instance_id = Column(String(256), nullable=False, comment="instance_id in serving's deployment")
47 | data_set = Column(String(64), nullable=False, comment="name of dataset")
48 | param_spec = Column(JSON, comment="serving's startup parameters")
49 | test_spec = Column(JSON, comment="test specification")
50 | test_status = Column(String(32), nullable=False, default=TestStatus.UNKNOWN.value)
51 | prompt_tps = Column(Float, default=0, comment="throughput of prompt tokens")
52 | generation_tps = Column(Float, default=0, comment="throughput of generation tokens")
53 | result = Column(JSON, comment="result of inject test")
54 | create_time = Column(DateTime, default=datetime.datetime.now)
55 | update_time = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
56 | is_deleted = Column(Integer, default=0)
57 | creator = Column(String(64))
58 | updater = Column(String(64))
59 |
--------------------------------------------------------------------------------
/tests/enova/test_eapp.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from httpx import AsyncClient
3 | from enova.app.server import get_app_api_server
4 | from enova.common.config import CONFIG
5 |
6 |
7 | @pytest.fixture
8 | def eapp():
9 | api_server = get_app_api_server()
10 | return api_server.app
11 |
12 |
13 | @pytest.mark.asyncio
14 | class TestEApp:
15 | async def test_healthz(self, eapp):
16 | async with AsyncClient(app=eapp, base_url="http://test") as ac:
17 | response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/healthz")
18 | assert response.status_code == 200
19 |
20 |
21 | @pytest.mark.asyncio
22 | class TestEServe:
23 | async def test_list_serving(self, eapp):
24 | async with AsyncClient(app=eapp, base_url="http://test") as ac:
25 | response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/serving")
26 | assert response.status_code == 200
27 | # TODO: some test of biz flow
28 |
29 | # async def test_create_serving_with_escalar(self, eapp):
30 | # post_params = {}
31 | # async with AsyncClient(app=eapp, base_url="http://test") as ac:
32 | # response = await ac.post(
33 | # f"/{CONFIG.enova_app['url_prefix']}/v1/serving",
34 | # json=post_params,
35 | # )
36 | # assert response.status_code == 200
37 | # # TODO: some test of biz flow
38 |
39 | # async def test_create_serving_missing_escalar(self, eapp):
40 | # post_params = {}
41 | # async with AsyncClient(app=eapp, base_url="http://test") as ac:
42 | # response = await ac.post(
43 | # f"/{CONFIG.enova_app['url_prefix']}/v1/serving",
44 | # json=post_params,
45 | # )
46 | # assert response.status_code == 200
47 | # # TODO: some test of biz flow
48 |
49 | # async def test_get_serving(self, eapp):
50 | # eserve_id = ""
51 | # async with AsyncClient(app=eapp, base_url="http://test") as ac:
52 | # response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/serving/{eserve_id}")
53 | # assert response.status_code == 200
54 | # # TODO: some test of biz flow
55 |
56 | # async def test_delete_serving(self, eapp):
57 | # eserve_id = ""
58 | # async with AsyncClient(app=eapp, base_url="http://test") as ac:
59 | # response = await ac.delete(f"/{CONFIG.enova_app['url_prefix']}/v1/serving/{eserve_id}")
60 | # assert response.status_code == 200
61 | # # TODO: some test of biz flow
62 |
63 |
64 | @pytest.mark.asyncio
65 | class TestTInject:
66 | async def test_list_injector(self, eapp):
67 | async with AsyncClient(app=eapp, base_url="http://test") as ac:
68 | response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/instance/test")
69 | assert response.status_code == 200
70 | # TODO: some test of biz flow
71 |
--------------------------------------------------------------------------------
/enova/entry/command/webui.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import sys
4 | import click
5 |
6 | from enova.common.cli_helper import ArgumentHelper, parse_extra_args
7 | from enova.common.config import CONFIG
8 | from enova.common.utils import get_enova_path
9 |
10 |
11 | class Webui:
12 | def __init__(self):
13 | self.streamlit_process = None
14 |
15 | def start(self, serving_host, serving_port, host, port):
16 | args_helper = ArgumentHelper(self, sys._getframe())
17 | CONFIG.update_config(args_helper.args_map)
18 |
19 | os.environ["SERVING_URL"] = f"http://{serving_host}:{serving_port}"
20 |
21 | base_enova_path = get_enova_path()
22 | streamlit_script = os.path.join(base_enova_path, CONFIG.webui["script"])
23 | self.streamlit_process = subprocess.Popen(
24 | ["streamlit", "run", streamlit_script, "--server.port", str(port), "--server.address", host, "--server.enableCORS", "false"]
25 | )
26 |
27 | def run(
28 | self,
29 | serving_host=CONFIG.serving["host"],
30 | serving_port=CONFIG.serving["port"],
31 | host=CONFIG.webui["host"],
32 | port=CONFIG.webui["port"],
33 | daemon=CONFIG.webui["daemon"],
34 | **kwargs,
35 | ):
36 | """"""
37 | self.start(serving_host, serving_port, host, port)
38 | if daemon:
39 | self.streamlit_process.wait()
40 |
41 | def stop(self):
42 | self.streamlit_process.terminate()
43 | self.streamlit_process.wait()
44 |
45 |
46 | pass_enova_webui = click.make_pass_decorator(Webui)
47 |
48 |
49 | @click.group(name="webui")
50 | @click.pass_context
51 | def webui_cli(ctx):
52 | """
53 | Build agent at this page based on the launched LLM API service.
54 | """
55 | pass
56 |
57 |
58 | @webui_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
59 | @click.option("--serving-host", type=str, default=CONFIG.serving["host"])
60 | @click.option("--serving-port", type=int, default=CONFIG.serving["port"])
61 | @click.option("--host", type=str, default=CONFIG.webui["host"])
62 | @click.option("--port", type=int, default=CONFIG.webui["port"])
63 | @click.option("--daemon", type=bool, default=CONFIG.webui["daemon"])
64 | @pass_enova_webui
65 | @click.pass_context
66 | def webui_run(
67 | ctx,
68 | enova_webui: Webui,
69 | serving_host,
70 | serving_port,
71 | host,
72 | port,
73 | daemon,
74 | ):
75 | enova_webui.run(
76 | serving_host=serving_host,
77 | serving_port=serving_port,
78 | host=host,
79 | port=port,
80 | daemon=daemon,
81 | **parse_extra_args(ctx),
82 | )
83 | pass
84 |
85 |
86 | @webui_cli.command(
87 | name="stop",
88 | context_settings=dict(help_option_names=["-h", "--help"], ignore_unknown_options=True, allow_extra_args=True),
89 | )
90 | @pass_enova_webui
91 | @click.pass_context
92 | def webui_stop(ctx, enova_webui: Webui):
93 | enova_webui.stop()
94 |
--------------------------------------------------------------------------------
/escaler/pkg/api/api.go:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import (
4 | "encoding/json"
5 | "errors"
6 | "fmt"
7 | "io"
8 | "net/http"
9 | "net/url"
10 | "strings"
11 |
12 | "github.com/Emerging-AI/ENOVA/escaler/pkg/logger"
13 | )
14 |
15 | type HttpResponse interface{}
16 |
17 | type HeaderBuilderInterface interface {
18 | Build() (map[string]string, error)
19 | }
20 |
21 | type EmptyHeaderBuilder struct {
22 | }
23 |
24 | func (hb *EmptyHeaderBuilder) Build() (map[string]string, error) {
25 | return make(map[string]string), nil
26 | }
27 |
28 | type HttpApi[T HttpResponse] struct {
29 | Method string
30 | Url string
31 | HeaderBuilder HeaderBuilderInterface
32 | }
33 |
34 | func (api *HttpApi[T]) GetRequest(Params interface{}, Headers map[string]string) (*http.Request, error) {
35 | newHeader, err := api.HeaderBuilder.Build()
36 | if err != nil {
37 | logger.Errorf("HeaderBuilder get error: %v", err)
38 | return nil, err
39 | }
40 |
41 | for key, value := range Headers {
42 | newHeader[key] = value
43 | }
44 |
45 | logger.Infof("make http request")
46 |
47 | actualMethod := strings.ToUpper(api.Method)
48 | var requestData io.Reader
49 | actualUrl := api.Url
50 | switch actualMethod {
51 | case "POST", "PUT":
52 | bytesData, _ := json.Marshal(Params)
53 | reqBody := string(bytesData)
54 | logger.Infof("api %s, request body: %s", api.Url, reqBody)
55 | newHeader["Content-Type"] = "application/json"
56 | requestData = strings.NewReader(reqBody)
57 | case "GET", "DELETE":
58 | Url, _ := url.Parse(api.Url) // todo 处理err
59 | urlValues := url.Values{}
60 | if pm, ok := Params.(map[string]string); ok {
61 | for key, value := range pm {
62 | urlValues.Set(key, value)
63 | }
64 | Url.RawQuery = urlValues.Encode()
65 | actualUrl = Url.String()
66 | }
67 |
68 | }
69 |
70 | req, err := http.NewRequest(actualMethod, actualUrl, requestData)
71 | if err != nil {
72 | return nil, err
73 | }
74 | for key, value := range newHeader {
75 | req.Header.Add(key, value)
76 | }
77 | return req, nil
78 | }
79 |
80 | func (api *HttpApi[T]) Call(Params interface{}, Headers map[string]string) (T, error) {
81 | client := &http.Client{}
82 | req, err := api.GetRequest(Params, Headers)
83 | var resp T
84 | if err != nil {
85 | return resp, err
86 | }
87 | res, err := client.Do(req) // todo 处理err
88 | if err != nil {
89 | return resp, err
90 | }
91 | return api.processResponse(res)
92 | }
93 |
94 | func (api *HttpApi[T]) processResponse(res *http.Response) (T, error) {
95 | defer res.Body.Close()
96 | var httpResp T
97 | if res.StatusCode != http.StatusOK {
98 | resBody, _ := io.ReadAll(res.Body)
99 | msg := fmt.Sprintf("HttpApi get StatusOK not ok: status code: %d, resBody: %s", res.StatusCode, resBody)
100 | logger.Info(msg)
101 | return httpResp, errors.New(msg)
102 | }
103 | resBody, _ := io.ReadAll(res.Body)
104 | if err := json.Unmarshal(resBody, &httpResp); err != nil {
105 | logger.Error("Error parsing JSON response: %v", err)
106 | return httpResp, err
107 | }
108 | return httpResp, nil
109 | }
110 |
--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/jmeter-config-template.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | @LOAD_PROFILE@
12 |
13 |
14 |
15 |
16 |
17 | continue
18 | ${__tstFeedback(tst,100, 1000,10)}
19 |
20 |
21 | @DURATION@
22 |
23 |
24 | S
25 |
26 |
27 |
28 | @ELEMENT_PROP@
29 |
30 |
31 |
32 |
33 | true
34 |
35 |
36 |
37 | @BODY@
38 | =
39 |
40 |
41 |
42 | @HOST@
43 | @PORT@
44 | @PATH@
45 | @METHOD@
46 | true
47 | true
48 |
49 | @DATA@
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/front/src/components/TimeRangePicker.vue:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
123 |
--------------------------------------------------------------------------------
/enova/common/error.py:
--------------------------------------------------------------------------------
1 | from enova.common.config import CONFIG
2 |
3 |
4 | class EmergingAIBaseError(Exception):
5 | BASE_ERROR_CODE: str = CONFIG.BASIC_ERROR_CODE or "100"
6 | MODULE_CODE: str = CONFIG.MODULE_CODE or "001"
7 | ERROR_CODE: str = "000"
8 | ERROR_MESSAGE: str = ""
9 |
10 | def __init__(self, error_message=None, error_code=None, *args, **kwargs):
11 | self.error_code = error_code if error_code is not None else self.ERROR_CODE
12 | self.error_code = f"{self.BASE_ERROR_CODE}{self.MODULE_CODE}{self.error_code}"
13 |
14 | self.error_message = error_message if error_message is not None else self.ERROR_MESSAGE
15 | self.message = self.error_message
16 | self.code = int(self.error_code)
17 | errors = []
18 | if kwargs.get("errors", None):
19 | errors = kwargs["errors"] if isinstance(kwargs["errors"], list) else [kwargs["errors"]]
20 | del kwargs["errors"]
21 | self.errors = errors
22 | kwargs["args"] = args
23 |
24 | super(EmergingAIBaseError, self).__init__(self.error_message, self.error_code, kwargs, errors)
25 |
26 |
27 | class ArgsError(EmergingAIBaseError):
28 | ERROR_CODE: str = "001"
29 | ERROR_MESSAGE: str = "args error"
30 |
31 |
32 | class TranslationError(EmergingAIBaseError):
33 | ERROR_CODE: str = "091"
34 | ERROR_MESSAGE: str = "translation error"
35 |
36 |
37 | # --
38 | class EmergingaiAPIResponseError(EmergingAIBaseError):
39 | ERROR_CODE: str = "010"
40 | ERROR_MESSAGE: str = "response error"
41 |
42 |
43 | class APIParamsError(EmergingAIBaseError):
44 | ERROR_CODE: str = "011"
45 | ERROR_MESSAGE: str = "response error"
46 |
47 |
48 | # --- serving backend api ---
49 | class EScalerApiResponseError(EmergingAIBaseError):
50 | ERROR_CODE: str = "101"
51 | ERROR_MESSAGE: str = "node api response error"
52 |
53 |
54 | class DeploymentInstanceExistError(EmergingAIBaseError):
55 | ERROR_CODE: str = "401"
56 | ERROR_MESSAGE: str = "deployment workload had existed"
57 |
58 |
59 | class DeploymentInstanceNotExistError(EmergingAIBaseError):
60 | ERROR_CODE: str = "402"
61 | ERROR_MESSAGE: str = "deployment workload is not exist"
62 |
63 |
64 | class DeploymentInstanceCreateFailedError(EmergingAIBaseError):
65 | ERROR_CODE: str = "403"
66 | ERROR_MESSAGE: str = "deployment workload create failed"
67 |
68 |
69 | class TestNotExistError(EmergingAIBaseError):
70 | ERROR_CODE: str = "403"
71 | ERROR_MESSAGE: str = "test record is not exist"
72 |
73 |
74 | class JmeterContainerLaunchError(EmergingAIBaseError):
75 | ERROR_CODE: str = "404"
76 | ERROR_MESSAGE: str = "fail to launch jmeter container"
77 |
78 |
79 | class TestStartError(EmergingAIBaseError):
80 | ERROR_CODE: str = "406"
81 | ERROR_MESSAGE: str = "test start failed"
82 |
83 |
84 | class DataFileNotExistError(EmergingAIBaseError):
85 | ERROR_CODE: str = "407"
86 | ERROR_MESSAGE: str = "data file not existed"
87 |
88 |
89 | # ----
90 |
91 |
92 | class NotReadyError(EmergingAIBaseError):
93 | ERROR_CODE: str = "101"
94 | ERROR_MESSAGE: str = "support service not ready"
95 |
96 |
97 | class BackendConfigMissingError(EmergingAIBaseError):
98 | ERROR_CODE: str = "102"
99 | ERROR_MESSAGE: str = "backend default config missing"
100 |
--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/instrumentation/fastapi/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Collection
2 | from opentelemetry import trace, metrics
3 | from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
4 | from opentelemetry.instrumentation.utils import unwrap
5 | from opentelemetry.instrumentation.asgi import collect_request_attributes
6 | from opentelemetry.util.http import _parse_active_request_count_attrs
7 | from wrapt import wrap_function_wrapper
8 | from starlette.types import ASGIApp, Scope, Receive, Send
9 | from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
10 |
11 | import json
12 |
13 | _instruments = ("fastapi >= 0.1",)
14 |
15 |
16 | class EnovaMiddleware:
17 | def __init__(self, app: ASGIApp) -> None:
18 | self.app = app
19 | self.meter = metrics.get_meter(__name__)
20 | self.tracer = trace.get_tracer(__name__)
21 | self.requests_counter = self.meter.create_counter(
22 | name="http.server.requests",
23 | unit="requests",
24 | description="measures the number of HTTP requests received",
25 | )
26 |
27 | async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
28 | if scope["type"] != "http":
29 | await self.app(scope, receive, send)
30 | return
31 |
32 | attrs = collect_request_attributes(scope)
33 | _request_count_attrs = _parse_active_request_count_attrs(attrs)
34 | self.requests_counter.add(1, _request_count_attrs)
35 | messages = []
36 |
37 | if scope["method"] == "POST" and scope["path"] in ["/generate", "/v1/completions", "/v1/chat/completions"]:
38 | span_name = f"POST {scope['path']} params"
39 | more_body = True
40 |
41 | try:
42 | while more_body:
43 | message = await receive()
44 | messages.append(message)
45 | more_body = message.get("more_body", False)
46 | body = b"".join([message.get("body", b"") for message in messages if message.get("body")])
47 | if body:
48 | with self.tracer.start_as_current_span(span_name) as generate_span:
49 | body_json = json.loads(body)
50 | for key in ["prompt", "messages", "model"]:
51 | if key in body_json:
52 | generate_span.set_attribute(key, str(body_json[key]))
53 | except Exception as e:
54 | pass
55 |
56 | async def wrapped_receive():
57 | if messages:
58 | return messages.pop(0)
59 | return await receive()
60 |
61 | await self.app(scope, wrapped_receive, send)
62 |
63 |
64 | class EnovaFastAPIInstrumentor(BaseInstrumentor):
65 | def instrumentation_dependencies(self) -> Collection[str]:
66 | return _instruments
67 |
68 | def _instrument(self, **kwargs):
69 | def fastapi_init_wrapper(wrapped, instance, args, kwargs):
70 | result = wrapped(*args, **kwargs)
71 | instance.add_middleware(EnovaMiddleware)
72 | FastAPIInstrumentor.instrument_app(instance)
73 | return result
74 |
75 | wrap_function_wrapper("fastapi", "FastAPI.__init__", fastapi_init_wrapper)
76 |
77 | def _uninstrument(self, **kwargs):
78 | unwrap("fastapi", "FastAPI.__init__")
79 |
--------------------------------------------------------------------------------
/front/src/assets/svg/earth.svg:
--------------------------------------------------------------------------------
1 |
3 |
9 |
--------------------------------------------------------------------------------
/enova/serving/backend/vllm.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import dataclasses
3 | from enova.common.logger import LOGGER
4 | from enova.common.config import CONFIG
5 | from enova.common.constant import VllmMode
6 | from enova.serving.backend.base import BaseBackend
7 |
8 |
9 | class CustomDict(dict):
10 |
11 | def __getattribute__(self, name: str):
12 | if name in self:
13 | return self[name]
14 | return None
15 |
16 |
17 | @dataclasses.dataclass
18 | class VllmBackend(BaseBackend):
19 | def __post_init__(self):
20 | """"""
21 |
22 | def _create_app(self):
23 | vllm_mode = CONFIG.vllm.pop("vllm_mode", VllmMode.NORMAL.value)
24 | from vllm.engine.arg_utils import AsyncEngineArgs
25 | from vllm.engine.async_llm_engine import AsyncLLMEngine
26 | from vllm.transformers_utils.tokenizer import get_tokenizer
27 | import torch
28 |
29 | if not torch.cuda.is_available():
30 | raise RuntimeError("vLLM Photon requires CUDA runtime")
31 |
32 | if vllm_mode == VllmMode.NORMAL.value:
33 | from vllm.entrypoints import api_server
34 |
35 | engine_args = AsyncEngineArgs(model=self.model, **CONFIG.vllm)
36 | engine = AsyncLLMEngine.from_engine_args(engine_args)
37 | engine_model_config = asyncio.run(engine.get_model_config())
38 | max_model_len = engine_model_config.max_model_len
39 |
40 | api_server.served_model = self.model
41 | api_server.engine = engine
42 | api_server.max_model_len = max_model_len
43 | api_server.tokenizer = get_tokenizer(
44 | engine_args.tokenizer,
45 | tokenizer_mode=engine_args.tokenizer_mode,
46 | trust_remote_code=engine_args.trust_remote_code,
47 | )
48 | elif vllm_mode == VllmMode.OPENAI.value:
49 | from vllm.entrypoints.openai import api_server
50 |
51 | class CustomArgParser(api_server.FlexibleArgumentParser):
52 |
53 | def parse_args(self, args=None, namespace=None):
54 | args, _ = self.parse_known_args(args, namespace)
55 | return args
56 |
57 | parser = CustomArgParser(description="vLLM OpenAI-Compatible RESTful API server.")
58 | parser = api_server.make_arg_parser(parser)
59 | args = parser.parse_args()
60 |
61 | current_engine_args = {k: v for k, v in CONFIG.vllm.items() if k in AsyncEngineArgs.__dataclass_fields__}
62 | engine_args = AsyncEngineArgs(model=self.model, **current_engine_args)
63 | engine = AsyncLLMEngine.from_engine_args(engine_args, usage_context=api_server.UsageContext.OPENAI_API_SERVER)
64 | engine_model_config = asyncio.run(engine.get_vllm_config())
65 | api_server.engine = engine
66 | api_server.async_engine_client = engine
67 | api_server.engine_args = engine_args
68 | api_server.app = api_server.build_app(args)
69 | asyncio.run(api_server.init_app_state(api_server.async_engine_client, engine_model_config, api_server.app.state, args))
70 |
71 | else:
72 | raise ValueError(f"vllm_mode: {vllm_mode} is not support")
73 | LOGGER.info(f"CONFIG.vllm: {CONFIG.vllm}")
74 |
75 | self.app = api_server.app
76 | cur_app = api_server.app
77 |
78 | @cur_app.get("/v1/model/info/args")
79 | async def get_engine_args():
80 | return {"code": 0, "result": engine_args}
81 |
--------------------------------------------------------------------------------
/escaler/pkg/api/enovaalgo.go:
--------------------------------------------------------------------------------
1 | package api
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 |
7 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
8 | )
9 |
10 | var enovaAlgoInitOnce sync.Once
11 |
12 | type enovaAlgoClient struct {
13 | ConfigRecommend HttpApi[EnvoaResponse]
14 | AnomalyDetect HttpApi[EnvoaResponse]
15 | AnomalyRecover HttpApi[EnvoaResponse]
16 | }
17 |
18 | type ConfigRecommendRequest struct {
19 | Llm struct {
20 | Framework string `json:"framework"`
21 | Param float32 `json:"param"`
22 | } `json:"llm"`
23 | Gpu struct {
24 | Name string `json:"name"`
25 | Spec int `json:"spec"`
26 | Num int `json:"num"`
27 | } `json:"gpu"`
28 | }
29 |
30 | type ConfigRecommendResult struct {
31 | MaxNumSeqs int `json:"max_num_seqs"`
32 | TensorParallelSize int `json:"tensor_parallel_size"`
33 | GpuMemoryUtilization float32 `json:"gpu_memory_utilization"`
34 | Replicas int `json:"replicas"`
35 | }
36 |
37 | type Llm struct {
38 | Framework string `json:"framework"`
39 | Param float32 `json:"param"`
40 | }
41 |
42 | type Gpu struct {
43 | Name string `json:"name"`
44 | Spec int `json:"spec"`
45 | Num int `json:"num"`
46 | }
47 |
48 | type MetricValue [2]float64
49 |
50 | type Metrics struct {
51 | ActiveRequests []MetricValue `json:"active_requests"`
52 | RunningRequests []MetricValue `json:"running_requests"`
53 | PendingRequests []MetricValue `json:"pending_requests"`
54 | GPUKVCacheUsage []MetricValue `json:"gpu_kv_cache_usage"`
55 | ServerNewRequests []MetricValue `json:"server_new_requests"`
56 | ServerSuccessRequests []MetricValue `json:"server_success_requests"`
57 | }
58 |
59 | type Configurations struct {
60 | MaxNumSeqs int `json:"max_num_seqs"`
61 | TensorParallelSize int `json:"tensor_parallel_size"`
62 | GPUMemoryUtilization float32 `json:"gpu_memory_utilization"`
63 | Replicas int `json:"replicas"`
64 | }
65 |
66 | type AnomalyRecoverRequest struct {
67 | Metrics []Metrics `json:"metrics"`
68 | Configurations Configurations `json:"configurations"`
69 | Llm Llm `json:"llm"`
70 | Gpu Gpu `json:"gpu"`
71 | }
72 |
73 | type AnomalyDetectRequest struct {
74 | Metrics []Metrics `json:"metrics"`
75 | Configurations Configurations `json:"configurations"`
76 | }
77 |
78 | type AnomalyDetectResponse struct {
79 | IsAnomaly int `json:"is_anomaly"`
80 | }
81 |
82 | var EnovaAlgoClient *enovaAlgoClient
83 |
84 | func GetEnovaAlgoClient() *enovaAlgoClient {
85 | enovaAlgoInitOnce.Do(func() {
86 | EnovaAlgoClient = &enovaAlgoClient{
87 | ConfigRecommend: HttpApi[EnvoaResponse]{
88 | Method: "POST",
89 | Url: fmt.Sprintf("http://%s/api/enovaalgo/v1/config_recommend", config.GetEConfig().EnovaAlgo.Host),
90 | HeaderBuilder: &EmptyHeaderBuilder{},
91 | },
92 | AnomalyDetect: HttpApi[EnvoaResponse]{
93 | Method: "POST",
94 | Url: fmt.Sprintf("http://%s/api/enovaalgo/v1/anomaly_detect", config.GetEConfig().EnovaAlgo.Host),
95 | HeaderBuilder: &EmptyHeaderBuilder{},
96 | },
97 | AnomalyRecover: HttpApi[EnvoaResponse]{
98 | Method: "POST",
99 | Url: fmt.Sprintf("http://%s/api/enovaalgo/v1/anomaly_recover", config.GetEConfig().EnovaAlgo.Host),
100 | HeaderBuilder: &EmptyHeaderBuilder{},
101 | },
102 | }
103 | })
104 | return EnovaAlgoClient
105 | }
106 |
--------------------------------------------------------------------------------
/enova/entry/command/serving.py:
--------------------------------------------------------------------------------
1 | import re
2 | import sys
3 | import click
4 |
5 | from enova.common.cli_helper import ArgumentHelper, parse_extra_args
6 | from enova.common.config import CONFIG
7 | from enova.entry.command.webui import Webui
8 | from enova.serving.apiserver import EApiServer
9 |
10 |
11 | class ServingHandler:
12 | """
13 | serving handler
14 | """
15 |
16 | def __init__(self, host, port, model, backend):
17 | self.host = host
18 | self.port = port
19 | self.model = model
20 | self.apiserver = EApiServer(host, port, self.model, backend)
21 |
22 | def start(self, **kwargs):
23 | self.apiserver.local_run(**kwargs)
24 |
25 | def stop(self, *args):
26 | """"""
27 |
28 |
29 | class EnovaServing:
30 | def run(
31 | self,
32 | model,
33 | host=CONFIG.serving["host"],
34 | port=CONFIG.serving["port"],
35 | backend=CONFIG.serving["backend"],
36 | exporter_endpoint=CONFIG.llmo["eai_exporter_endpoint"],
37 | exporter_service_name=CONFIG.llmo["eai_exporter_service_name"],
38 | include_webui=True,
39 | hf_proxy=None,
40 | **kwargs,
41 | ):
42 | args_helper = ArgumentHelper(self, sys._getframe())
43 | CONFIG.update_config(args_helper.args_map)
44 |
45 | from enova.llmo import start as llmo_start
46 |
47 | CONFIG.update_config({backend: kwargs})
48 | CONFIG.print_config()
49 | if kwargs.get("llmo"):
50 | llmo_start(otlp_exporter_endpoint=exporter_endpoint, service_name=exporter_service_name)
51 | if include_webui:
52 | Webui().run(daemon=False)
53 | from vllm.entrypoints.cli.main import main
54 |
55 | sys.argv = ["vllm", "serve"] + sys.argv[3:]
56 | sys.exit(main())
57 |
58 |
59 | pass_enova_serving = click.make_pass_decorator(EnovaServing)
60 |
61 |
62 | @click.group(name="serving")
63 | @click.pass_context
64 | def serving_cli(ctx):
65 | """
66 | Deploy the target LLM and launch the LLM API service.
67 | """
68 | ctx.obj = EnovaServing()
69 |
70 |
71 | @serving_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
72 | @click.option("--model", type=str)
73 | @click.option("--host", type=str, default=CONFIG.serving["host"])
74 | @click.option("--port", type=int, default=CONFIG.serving["port"])
75 | @click.option("--backend", type=str, default=CONFIG.serving["backend"])
76 | @click.option(
77 | "--exporter-endpoint",
78 | "--exporter_endpoint",
79 | "exporter_endpoint",
80 | type=str,
81 | default=CONFIG.llmo["eai_exporter_endpoint"],
82 | )
83 | @click.option(
84 | "--exporter-service-name",
85 | "--exporter_service_name",
86 | "exporter_service_name",
87 | type=str,
88 | default=CONFIG.llmo["eai_exporter_service_name"],
89 | )
90 | @click.option("--include-webui", "--include_webui", "include_webui", type=bool, default=True)
91 | @click.option("--hf-proxy", "--hf_proxy", "hf_proxy", type=str, default=None)
92 | @pass_enova_serving
93 | @click.pass_context
94 | def serving_run(
95 | ctx,
96 | enova_serving,
97 | model,
98 | host,
99 | port,
100 | backend,
101 | exporter_endpoint,
102 | exporter_service_name,
103 | include_webui,
104 | hf_proxy,
105 | ):
106 | enova_serving.run(
107 | model=model,
108 | host=host,
109 | port=port,
110 | backend=backend,
111 | exporter_endpoint=exporter_endpoint,
112 | exporter_service_name=exporter_service_name,
113 | include_webui=include_webui,
114 | hf_proxy=hf_proxy,
115 | **parse_extra_args(ctx),
116 | )
117 |
--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/metrics_adapter/vllm_logging_metrics.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import re
3 | import threading
4 | import time
5 | from typing import Iterable
6 |
7 | from opentelemetry import metrics
8 | from opentelemetry.metrics import CallbackOptions, Observation
9 |
10 | meter = metrics.get_meter(__name__)
11 |
12 | metric_info = {
13 | "avg_prompt_throughput": {"value": 0.0, "unit": "tokens/s", "last_update": time.time()},
14 | "avg_generation_throughput": {"value": 0.0, "unit": "tokens/s", "last_update": time.time()},
15 | "running_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()},
16 | "swapped_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()},
17 | "pending_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()},
18 | "gpu_kv_cache_usage": {"value": 0.0, "unit": "%", "last_update": time.time()},
19 | "cpu_kv_cache_usage": {"value": 0.0, "unit": "%", "last_update": time.time()},
20 | }
21 |
22 | timeout_seconds = 15
23 |
24 | for metric_name, info in metric_info.items():
25 | def create_scrape_metric_callback(metric_name):
26 | def scrape_metric_callback(options: CallbackOptions) -> Iterable[Observation]:
27 | value = metric_info[metric_name]["value"]
28 | yield Observation(value, attributes={})
29 |
30 | return scrape_metric_callback
31 |
32 | callback = create_scrape_metric_callback(metric_name)
33 | unit = info["unit"]
34 |
35 | meter.create_observable_gauge(
36 | name=metric_name,
37 | callbacks=[callback],
38 | description=f"The value of {metric_name}",
39 | unit=unit
40 | )
41 |
42 |
43 | def update_metric(name, value, current_time):
44 | metric_info[name]["value"] = value
45 | metric_info[name]["last_update"] = current_time
46 |
47 |
48 | class VLLMLogMetricsAdapter(logging.Handler):
49 | def __init__(self):
50 | super().__init__()
51 | self.pattern = re.compile(
52 | r".*?"
53 | r"Avg prompt throughput: (?P\d+\.\d+) tokens/s, "
54 | r"Avg generation throughput: (?P\d+\.\d+) tokens/s, "
55 | r"Running: (?P\d+) reqs, "
56 | r"Swapped: (?P\d+) reqs, "
57 | r"Pending: (?P\d+) reqs, "
58 | r"GPU KV cache usage: (?P\d+\.\d+)%, "
59 | r"CPU KV cache usage: (?P\d+\.\d+)%"
60 | )
61 |
62 | def emit(self, record):
63 | log_message = record.getMessage()
64 | match = self.pattern.search(log_message)
65 | if match:
66 | current_time = time.time()
67 | update_metric("avg_prompt_throughput", float(match.group("avg_prompt")), current_time)
68 | update_metric("avg_generation_throughput", float(match.group("avg_gen")), current_time)
69 | update_metric("running_requests", float(match.group("running")), current_time)
70 | update_metric("swapped_requests", float(match.group("swapped")), current_time)
71 | update_metric("pending_requests", float(match.group("pending")), current_time)
72 | update_metric("gpu_kv_cache_usage", float(match.group("gpu_cache")), current_time)
73 | update_metric("cpu_kv_cache_usage", float(match.group("cpu_cache")), current_time)
74 |
75 |
76 | def update_metrics_periodically():
77 | while True:
78 | for metric_name, info in metric_info.items():
79 | current_time = time.time()
80 | if current_time - info["last_update"] > timeout_seconds:
81 | metric_info[metric_name]["value"] = 0.0 # Reset the value if the data is stale
82 | time.sleep(5) # Update every 5 seconds
83 |
84 |
85 | # Start the background thread to update metrics periodically
86 | threading.Thread(target=update_metrics_periodically, daemon=True).start()
87 |
--------------------------------------------------------------------------------
/enova/webui/chat.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import requests
4 | import streamlit as st
5 | from openai import OpenAI, InternalServerError
6 |
7 | st.title('🤖ENOVA AI WebUI')
8 |
9 | MAX_TURNS = 20
10 | MAX_BOXES = MAX_TURNS * 2
11 |
12 | vllm_mode = os.getenv("VLLM_MODE", "openai")
13 | serving_url = os.getenv("SERVING_URL", "http://127.0.0.1:9199")
14 | openai_api_base = serving_url + "/v1"
15 | openai_api_key = "xxx"
16 |
17 | client = None
18 | model = None
19 | if vllm_mode == "openai":
20 | try:
21 | client = OpenAI(
22 | api_key=openai_api_key,
23 | base_url=openai_api_base,
24 | )
25 | models = client.models.list()
26 | model = models.data[0].id
27 |
28 | except InternalServerError as e:
29 | print("Server not ready. Please wait a moment and refresh the page.")
30 |
31 | except Exception as e:
32 | print(f"An unexpected error occurred: {e}")
33 | print("Please check the server status and try again.")
34 |
35 | system_prompt = st.sidebar.text_area(
36 | label="System Prompt",
37 | value="You are a helpful AI assistant who answers questions in short sentences."
38 | )
39 |
40 | max_tokens = st.sidebar.slider('max_tokens', 0, 4096, 2048, step=1)
41 | temperature = st.sidebar.slider('temperature', 0.0, 1.0, 0.1, step=0.01)
42 | top_p = st.sidebar.slider('top_p', 0.0, 1.0, 0.5, step=0.01) if vllm_mode == "normal" else None
43 |
44 |
45 | if 'messages' not in st.session_state:
46 | st.session_state.messages = []
47 |
48 | messages = st.session_state.messages
49 |
50 | for message in st.session_state.messages:
51 | with st.chat_message(message['role']):
52 | st.markdown(message['content'])
53 |
54 | if user_input := st.chat_input(''):
55 |
56 | with st.chat_message('user'):
57 | st.markdown(user_input)
58 | messages.append({'role': 'user', 'content': user_input})
59 |
60 | with st.chat_message('assistant') as assistant_message:
61 |
62 | if vllm_mode == "normal":
63 | placeholder = st.empty()
64 |
65 | response = requests.post(
66 | url=f"{serving_url}/generate",
67 | headers={'Content-type': 'application/json; charset=utf-8'},
68 | data=json.dumps({
69 | "prompt": user_input,
70 | "max_tokens": max_tokens,
71 | "top_p": top_p,
72 | "temperature": temperature,
73 | "stream": True
74 | }),
75 | stream=True
76 | )
77 |
78 | full_content = ''
79 | for line in response.iter_lines(delimiter=b'\00'):
80 | line = line.decode(encoding='utf-8')
81 | if line.strip() == '':
82 | continue
83 | response_json = json.loads(line)
84 | full_content = response_json['text'][0]
85 | placeholder.markdown(full_content)
86 |
87 | st.session_state.messages.append({'role': 'assistant', 'content': full_content})
88 |
89 | elif vllm_mode == "openai" and model:
90 | placeholder = st.empty()
91 | openai_messages = [
92 | {"role": message["role"], "content": message["content"]}
93 | for message in st.session_state.messages[-5:]
94 | ]
95 |
96 | chat_completion = client.chat.completions.create(
97 | messages=openai_messages,
98 | model=model,
99 | temperature=temperature,
100 | max_tokens=max_tokens,
101 | stream=True
102 | )
103 |
104 | full_content = ''
105 | for chunk in chat_completion:
106 | if chunk.choices[0].delta.content is not None:
107 | full_content += str(chunk.choices[0].delta.content)
108 | placeholder.markdown(full_content)
109 |
110 | st.session_state.messages.append({'role': 'assistant', 'content': full_content})
111 |
--------------------------------------------------------------------------------
/escaler/pkg/resource/k8s.go:
--------------------------------------------------------------------------------
1 | package resource
2 |
3 | import (
4 | "context"
5 |
6 | v1 "k8s.io/api/apps/v1"
7 | corev1 "k8s.io/api/core/v1"
8 |
9 | apierrors "k8s.io/apimachinery/pkg/api/errors"
10 |
11 | "k8s.io/client-go/dynamic"
12 | "k8s.io/client-go/kubernetes"
13 | "k8s.io/client-go/rest"
14 | "k8s.io/client-go/tools/clientcmd"
15 |
16 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config"
17 | "github.com/Emerging-AI/ENOVA/escaler/pkg/logger"
18 | "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
19 | "github.com/Emerging-AI/ENOVA/escaler/pkg/resource/k8s"
20 | )
21 |
22 | type K8sResourceClient struct {
23 | K8sCli *k8s.K8sCli
24 | }
25 |
26 | // DeployTask first check whether deployment existed or not
27 | // start task or scale task
28 | func (c *K8sResourceClient) DeployTask(spec meta.TaskSpec) {
29 | // use deployment to deploy
30 | workload := k8s.Workload{
31 | K8sCli: c.K8sCli,
32 | Spec: &spec,
33 | }
34 |
35 | workload.CreateOrUpdate()
36 | }
37 |
38 | func (c *K8sResourceClient) DeleteTask(spec meta.TaskSpec) {
39 | workload := k8s.Workload{
40 | K8sCli: c.K8sCli,
41 | Spec: &spec,
42 | }
43 | workload.Delete()
44 | }
45 |
46 | func (c *K8sResourceClient) IsTaskExist(spec meta.TaskSpec) bool {
47 | workload := k8s.Workload{
48 | K8sCli: c.K8sCli,
49 | Spec: &spec,
50 | }
51 | _, err := workload.GetDeployment()
52 | if err != nil {
53 | if apierrors.IsNotFound(err) {
54 | return false
55 | } else {
56 | logger.Errorf("K8sResourceClient get ENOVA error: %v", err)
57 | return false
58 | }
59 | }
60 | return true
61 | }
62 |
63 | func (c *K8sResourceClient) IsTaskRunning(spec meta.TaskSpec) bool {
64 | workload := k8s.Workload{
65 | K8sCli: c.K8sCli,
66 | Spec: &spec,
67 | }
68 | podList, err := workload.GetPodsList()
69 | if err != nil {
70 | logger.Errorf("K8sResourceClient IsTaskRunning error: %v", err)
71 | return false
72 | }
73 | if podList.Items[0].Status.Phase == "Running" {
74 | return true
75 | }
76 | return false
77 | }
78 |
79 | func (c *K8sResourceClient) GetRuntimeInfos(spec meta.TaskSpec) *meta.RuntimeInfo {
80 | workload := k8s.Workload{
81 | K8sCli: c.K8sCli,
82 | Spec: &spec,
83 | }
84 | ret := &meta.RuntimeInfo{Source: meta.K8sSource, Deployment: &v1.Deployment{}, PodList: &corev1.PodList{}}
85 | dp, err := workload.GetDeployment()
86 | if err != nil {
87 | if !apierrors.IsNotFound(err) {
88 | logger.Errorf("GetRuntimeInfos GetPodsList error: %v", err)
89 | }
90 | return ret
91 | }
92 | ret.Deployment = dp
93 | podList, err := workload.GetPodsList()
94 | if err != nil {
95 | if !apierrors.IsNotFound(err) {
96 | logger.Errorf("GetRuntimeInfos GetPodsList error: %v", err)
97 | }
98 | return ret
99 | }
100 | ret.PodList = podList
101 | return ret
102 | }
103 |
104 | func NewK8sClient() (*kubernetes.Clientset, error) {
105 | if config.GetEConfig().K8s.InCluster {
106 | conf, err := rest.InClusterConfig()
107 | if err != nil {
108 | }
109 | return kubernetes.NewForConfig(conf)
110 | }
111 | conf, err := clientcmd.BuildConfigFromFlags("", config.GetEConfig().K8s.KubeConfigPath)
112 | if err != nil {
113 | }
114 |
115 | return kubernetes.NewForConfig(conf)
116 | }
117 |
118 | func NewK8sDynamicClient() (*dynamic.DynamicClient, error) {
119 | if config.GetEConfig().K8s.InCluster {
120 | conf, err := rest.InClusterConfig()
121 | if err != nil {
122 | }
123 | return dynamic.NewForConfig(conf)
124 | }
125 | conf, err := clientcmd.BuildConfigFromFlags("", config.GetEConfig().K8s.KubeConfigPath)
126 | if err != nil {
127 | }
128 |
129 | return dynamic.NewForConfig(conf)
130 | }
131 |
132 | func NewK8sResourceClient() *K8sResourceClient {
133 | cli, err := NewK8sClient()
134 | if err != nil {
135 | panic(err)
136 | }
137 |
138 | dynamicCli, err := NewK8sDynamicClient()
139 | if err != nil {
140 | panic(err)
141 | }
142 |
143 | return &K8sResourceClient{
144 | K8sCli: &k8s.K8sCli{
145 | K8sClient: cli,
146 | DynamicClient: dynamicCli,
147 | Ctx: context.Background(),
148 | },
149 | }
150 | }
151 |
--------------------------------------------------------------------------------