├── .dockerignore
├── .gitattributes
├── .github
    └── assets
    │   ├── ENOVA.png
    │   ├── gpu_metrics.png
    │   ├── llm_instance.png
    │   ├── monitoring_metrics.png
    │   ├── request_inject.png
    │   ├── test_results.png
    │   ├── trace.png
    │   └── webui.png
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── README_ZH.md
├── VERSION
├── docker
    ├── Dockerfile
    ├── Dockerfile.enova
    ├── Dockerfile.enova.base
    ├── Dockerfile.escaler
    ├── Dockerfile.jmeter
    ├── Dockerfile.requirements
    ├── build_image.enova.base.sh
    ├── build_image.enova.sh
    ├── build_image.escaler.sh
    └── build_image.jmeter.sh
├── enova
    ├── .gitignore
    ├── algo
    │   ├── __init__.py
    │   ├── resource.py
    │   ├── serializer.py
    │   ├── server.py
    │   └── service.py
    ├── api
    │   ├── __init__.py
    │   ├── app_api.py
    │   ├── base.py
    │   ├── escaler_api.py
    │   ├── prom_api.py
    │   └── serving_api.py
    ├── app
    │   ├── __init__.py
    │   ├── db_modles.py
    │   ├── resource.py
    │   ├── serializer.py
    │   ├── server.py
    │   ├── service.py
    │   └── utils.py
    ├── common
    │   ├── __init__.py
    │   ├── cli_helper.py
    │   ├── config.py
    │   ├── constant.py
    │   ├── encoder.py
    │   ├── error.py
    │   ├── g_vars.py
    │   ├── local.py
    │   ├── logger.py
    │   └── utils.py
    ├── database
    │   ├── __init__.py
    │   └── relation
    │   │   ├── __init__.py
    │   │   ├── orm
    │   │       ├── __init__.py
    │   │       └── base.py
    │   │   └── transaction
    │   │       ├── __init__.py
    │   │       └── session.py
    ├── entry
    │   ├── cli.py
    │   └── command
    │   │   ├── __init__.py
    │   │   ├── algo.py
    │   │   ├── app.py
    │   │   ├── injector.py
    │   │   ├── mon.py
    │   │   ├── pilot.py
    │   │   ├── serving.py
    │   │   └── webui.py
    ├── job
    │   ├── __init__.py
    │   └── job_manager.py
    ├── server
    │   ├── __init__.py
    │   ├── exception
    │   │   ├── __init__.py
    │   │   └── handler.py
    │   ├── middleware
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── response.py
    │   │   └── trace.py
    │   ├── restful
    │   │   ├── __init__.py
    │   │   ├── router.py
    │   │   ├── serializer.py
    │   │   └── service.py
    │   └── server.py
    ├── serving
    │   ├── __init__.py
    │   ├── apiserver.py
    │   ├── backend
    │   │   ├── base.py
    │   │   ├── hf
    │   │   │   ├── __init__.py
    │   │   │   ├── handler.py
    │   │   │   └── hf.py
    │   │   ├── injector.py
    │   │   ├── sglang.py
    │   │   ├── transformers.py
    │   │   ├── utils.py
    │   │   └── vllm.py
    │   └── middlewares
    │   │   ├── auth.py
    │   │   ├── base.py
    │   │   └── cors.py
    ├── template
    │   └── deployment
    │   │   └── docker-compose
    │   │       ├── .gitignore
    │   │       ├── compose-build.yaml
    │   │       ├── compose-dev.yaml
    │   │       ├── compose.yaml
    │   │       ├── dcgm-exporter
    │   │           ├── dcp-metrics-included.csv
    │   │           └── default-counters.csv
    │   │       ├── escaler
    │   │           └── conf
    │   │           │   └── settings.json
    │   │       ├── grafana
    │   │           ├── grafana_dashboards
    │   │           │   ├── enova-dashboard.json
    │   │           │   └── enova-dcgm-metrics.json
    │   │           └── grafana_provisioning
    │   │           │   ├── dashboards
    │   │           │       └── enova-dashboards.yaml
    │   │           │   └── datasources
    │   │           │       └── enova-datasource.yaml
    │   │       ├── haproxy
    │   │           └── haproxy.cfg
    │   │       ├── nginx
    │   │           └── nginx.conf
    │   │       ├── otel-collector
    │   │           └── collector-config.yaml
    │   │       ├── prometheus
    │   │           └── prometheus.yml
    │   │       ├── tempo
    │   │           └── tempo.yaml
    │   │       ├── traffic-injector
    │   │           ├── compose.yaml
    │   │           ├── data.csv
    │   │           ├── data
    │   │           │   ├── arc.csv
    │   │           │   ├── gsm8k.csv
    │   │           │   ├── mbpp.csv
    │   │           │   └── mc_test.csv
    │   │           ├── jmeter-config-template.xml
    │   │           └── jmeter.Dockerfile
    │   │       ├── webui-nginx
    │   │           └── nginx.conf
    │   │       └── webui
    │   │           └── webui.yaml
    └── webui
    │   ├── __init__.py
    │   └── chat.py
├── escaler
    ├── build.sh
    ├── cmd
    │   └── escaler
    │   │   ├── docs
    │   │       ├── docs.go
    │   │       ├── swagger.json
    │   │       └── swagger.yaml
    │   │   ├── main.go
    │   │   ├── main_test.go
    │   │   └── mock_enovaalgo.go
    ├── conf
    │   └── settings.json
    ├── go.mod
    ├── go.sum
    ├── pkg
    │   ├── api
    │   │   ├── api.go
    │   │   ├── enovaalgo.go
    │   │   ├── prom.go
    │   │   └── types.go
    │   ├── config
    │   │   └── config.go
    │   ├── detector
    │   │   ├── detector.go
    │   │   ├── performance.go
    │   │   └── server.go
    │   ├── httpserver
    │   │   ├── middleware
    │   │   │   ├── logger.go
    │   │   │   ├── response.go
    │   │   │   └── trace.go
    │   │   ├── server
    │   │   │   ├── router.go
    │   │   │   └── server.go
    │   │   └── utils
    │   │   │   └── utils.go
    │   ├── logger
    │   │   └── logger.go
    │   ├── meta
    │   │   ├── meta.go
    │   │   └── task.go
    │   ├── queue
    │   │   └── queue.go
    │   ├── redis
    │   │   └── redis.go
    │   ├── resource
    │   │   ├── clients.go
    │   │   ├── docker.go
    │   │   ├── docker
    │   │   │   └── docker.go
    │   │   ├── k8s.go
    │   │   ├── k8s
    │   │   │   └── k8s.go
    │   │   └── utils
    │   │   │   └── cmd.go
    │   ├── scaler
    │   │   └── scaler.go
    │   ├── utils
    │   │   ├── cache.go
    │   │   └── utils.go
    │   └── zmq
    │   │   └── zmq.go
    └── scripts
    │   ├── build_swagger.sh
    │   ├── generate_mock_files.sh
    │   ├── generate_ot_clientset.sh
    │   └── local_docker_run.sh
├── front
    ├── .dockerignore
    ├── .env.development
    ├── .env.production
    ├── .eslintrc.cjs
    ├── .gitignore
    ├── .prettierrc.json
    ├── README.md
    ├── auto-imports.d.ts
    ├── components.d.ts
    ├── env.d.ts
    ├── index.html
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.js
    ├── public
    │   └── favicon.ico
    ├── src
    │   ├── App.vue
    │   ├── api
    │   │   └── instance.ts
    │   ├── assets
    │   │   ├── empty.png
    │   │   ├── filter.png
    │   │   ├── logo
    │   │   │   ├── emergingai_b.png
    │   │   │   └── emergingai_w.png
    │   │   └── svg
    │   │   │   ├── auto.svg
    │   │   │   ├── autoRefresh.svg
    │   │   │   ├── cross.svg
    │   │   │   ├── docker.svg
    │   │   │   ├── earth.svg
    │   │   │   ├── home.svg
    │   │   │   ├── info.svg
    │   │   │   ├── log.svg
    │   │   │   ├── setup.svg
    │   │   │   ├── toggle.svg
    │   │   │   └── user.svg
    │   ├── components
    │   │   ├── Drawer.vue
    │   │   ├── Language.vue
    │   │   ├── Pagination.vue
    │   │   ├── SearchInput.vue
    │   │   ├── SummaryTip.vue
    │   │   ├── SvgIcon.vue
    │   │   ├── TimeRangePicker.vue
    │   │   ├── chart
    │   │   │   ├── LineChart.vue
    │   │   │   └── ToolBar.vue
    │   │   ├── experiment
    │   │   │   ├── TestDetail.vue
    │   │   │   └── TestInfo.vue
    │   │   └── instance
    │   │   │   ├── BaseInfo.vue
    │   │   │   ├── GpuInfo.vue
    │   │   │   ├── InstanceDetail.vue
    │   │   │   └── TestConfig.vue
    │   ├── hooks
    │   │   └── useInitQueryRange.ts
    │   ├── layout
    │   │   ├── header
    │   │   │   └── index.vue
    │   │   ├── index.vue
    │   │   └── sidebar
    │   │   │   └── index.vue
    │   ├── locales
    │   │   ├── index.ts
    │   │   └── lang
    │   │   │   ├── en.ts
    │   │   │   └── zh.ts
    │   ├── main.ts
    │   ├── router
    │   │   └── index.ts
    │   ├── stores
    │   │   ├── app.ts
    │   │   ├── config.ts
    │   │   ├── experiment.ts
    │   │   └── instance.ts
    │   ├── styles
    │   │   ├── element-ui.scss
    │   │   ├── element
    │   │   │   └── index.scss
    │   │   ├── index.css
    │   │   └── index.scss
    │   ├── utils
    │   │   └── request.ts
    │   └── views
    │   │   ├── Instance.vue
    │   │   └── TestRecord.vue
    ├── tailwind.config.js
    ├── tsconfig.app.json
    ├── tsconfig.json
    ├── tsconfig.node.json
    └── vite.config.ts
├── llmo
    └── enova-instrumentation-llmo
    │   ├── README.md
    │   ├── enova
    │       └── llmo
    │       │   ├── __init__.py
    │       │   ├── instrumentation
    │       │       ├── __init__.py
    │       │       ├── fastapi
    │       │       │   └── __init__.py
    │       │       └── vllm
    │       │       │   ├── __init__.py
    │       │       │   └── wrappers.py
    │       │   └── metrics_adapter
    │       │       ├── __init__.py
    │       │       └── vllm_logging_metrics.py
    │   └── pyproject.toml
├── pyproject.toml
├── requirements-docker-no-deps.txt
├── requirements-docker.txt
├── requirements.txt
├── scripts
    ├── pack_whl.enova.sh
    └── pack_whl.llmo.sh
└── tests
    └── enova
        ├── conftest.py
        ├── test_eapp.py
        └── test_requirements.txt


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | build
 3 | dist
 4 | enova.egg-info
 5 | *.log
 6 | .gitignore
 7 | var
 8 | .pre-commit-config.yaml
 9 | tests
10 | front/node_modules
11 | front/package-lock.json
12 | front/yarn.lock


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *whl filter=lfs diff=lfs merge=lfs -text
2 | docker-compose-* filter=lfs diff=lfs merge=lfs -text
3 | *tgz filter=lfs diff=lfs merge=lfs -text
4 | *tar.gz filter=lfs diff=lfs merge=lfs -text
5 | 


--------------------------------------------------------------------------------
/.github/assets/ENOVA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/ENOVA.png


--------------------------------------------------------------------------------
/.github/assets/gpu_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/gpu_metrics.png


--------------------------------------------------------------------------------
/.github/assets/llm_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/llm_instance.png


--------------------------------------------------------------------------------
/.github/assets/monitoring_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/monitoring_metrics.png


--------------------------------------------------------------------------------
/.github/assets/request_inject.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/request_inject.png


--------------------------------------------------------------------------------
/.github/assets/test_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/test_results.png


--------------------------------------------------------------------------------
/.github/assets/trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/trace.png


--------------------------------------------------------------------------------
/.github/assets/webui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/webui.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | !front/src/components/instance
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # Environments
 86 | .env
 87 | .venv
 88 | env/
 89 | venv/
 90 | ENV/
 91 | env.bak/
 92 | venv.bak/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | .vscode
107 | .idea
108 | .history
109 | 
110 | # macos
111 | .DS_Store
112 | src/golang/bin
113 | src/golang/pkg/mod
114 | src/golang/pkg/sumdb
115 | src/golang/dist
116 | 
117 | nohup*
118 | 
119 | # dependencies
120 | dependencies/
121 | enova/template/deployment/docker-compose/bin/
122 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pycqa/flake8
 3 |   rev: 3.9.2
 4 |   hooks:
 5 |     - id: black
 6 |       args:
 7 |       - --max-line-length=150
 8 |       - --max-complexity=60
 9 | 
10 | repos:
11 | - repo: https://github.com/psf/black
12 |   rev: stable  # Use the specific revision or tag you want to pin to
13 |   hooks:
14 |     - id: black
15 |       args:
16 |       - --line-length=150
17 | 
18 | - repo: https://github.com/pre-commit/pre-commit-hooks
19 |   rev: v4.0.1
20 |   hooks:
21 |     - id: check-merge-conflict
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include enova/web_statics/static *
2 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.8


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # FROM nvcr.io/nvidia/pytorch:24.03-py3
 2 | FROM mergingai/enova:base
 3 | 
 4 | RUN apt update && apt install net-tools -y
 5 | 
 6 | COPY ./dist/enova-0.0.8-py3-none-any.whl /tmp/
 7 | COPY ./llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-0.0.8-py3-none-any.whl /tmp/
 8 | 
 9 | RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
10 |     pip uninstall enova enova-instrumentation-llmo -y && \
11 |     pip install --no-cache-dir /tmp/enova_instrumentation_llmo-0.0.8-py3-none-any.whl && \
12 |     pip install --no-cache-dir /tmp/enova-0.0.8-py3-none-any.whl
13 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.enova:
--------------------------------------------------------------------------------
 1 | FROM emergingai/enova:base
 2 | 
 3 | COPY ./llmo /opt/enova/llmo
 4 | 
 5 | COPY ./scripts /opt/enova/scripts
 6 | 
 7 | RUN bash /opt/enova/scripts/pack_whl.llmo.sh 
 8 | 
 9 | ARG LLMO_VERSION=0.0.8
10 | RUN pip install /opt/enova/llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-${LLMO_VERSION}-py3-none-any.whl --no-deps --no-cache-dir
11 | 
12 | ARG CACHEBUST=1
13 | 
14 | COPY . /opt/enova
15 | 
16 | RUN cd /opt/enova && bash ./scripts/pack_whl.enova.sh
17 | ARG ENOVA_VERSION=0.0.8
18 | 
19 | RUN pip install -r /opt/enova/requirements.txt --no-deps  --no-cache-dir && \
20 |     pip install /opt/enova/dist/enova-${ENOVA_VERSION}-py3-none-any.whl --no-deps --no-cache-dir && \
21 |     pip uninstall -y transformer-engine && mkdir -p /workspace/model
22 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.enova.base:
--------------------------------------------------------------------------------
 1 | # image enova:base
 2 | FROM nvcr.io/nvidia/pytorch:24.07-py3
 3 | 
 4 | RUN apt update && apt install -y \
 5 |     net-tools \
 6 |     ocl-icd-libopencl1 \
 7 |     opencl-headers \
 8 |     clinfo
 9 | 
10 | RUN mkdir -p /etc/OpenCL/vendors && \
11 |     echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \
12 |     mkdir -p /opt/enova
13 | 
14 | COPY ./requirements-docker.txt /opt/enova/requirements.txt
15 | COPY ./requirements-docker-no-deps.txt /opt/enova/requirements-docker-no-deps.txt
16 | 
17 | # RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
18 | RUN pip install build --no-cache-dir && \
19 |     pip install pip setuptools setuptools_scm[toml]==7.1.0 toml poetry && \
20 |     pip install -r /opt/enova/requirements.txt --no-cache-dir && \
21 |     pip install flashinfer -i https://flashinfer.ai/whl/cu124/torch2.4 --no-deps --no-cache-dir && \
22 |     pip install -r /opt/enova/requirements-docker-no-deps.txt --no-deps --no-cache-dir
23 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.escaler:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | RUN apt update && apt install build-essential redis libzmq3-dev ca-certificates pkg-config net-tools iputils-ping -y 
 4 | COPY dependencies/go1.22.2.linux-amd64.tar.gz /tmp/go1.22.2.linux-amd64.tar.gz
 5 | RUN cd /tmp && tar -xf go1.22.2.linux-amd64.tar.gz && cp -r go /usr/local/go
 6 | ENV PATH=/usr/local/go/bin:$PATH
 7 | ENV GO111MODULE="on"
 8 | ENV APK_REP="mirrors.ustc.edu.cn"
 9 | 
10 | #ENV GOPROXY="https://goproxy.io,direct"
11 | #ENV GOPROXY=https://proxy.golang.org,direct
12 | ENV GOPROXY=https://goproxy.cn,direct
13 | #ENV GOPROXY=https://mirrors.aliyun.com/goproxy/,direct
14 | #ENV GOCACHE=/go-cache
15 | 
16 | # create and set cache directory permissions
17 | RUN mkdir /go-cache && chmod -R 777 /go-cache
18 | 
19 | WORKDIR /app
20 | 
21 | COPY escaler .
22 | 
23 | # copy go module file to workdir
24 | COPY escaler/go.mod escaler/go.sum ./
25 | 
26 | # download dependencies on go module 
27 | RUN go mod download
28 | 
29 | # download swagger toolset
30 | RUN go install github.com/swaggo/swag/cmd/swag@latest
31 | 
32 | # compile and install
33 | RUN go env \
34 |     && CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o escaler cmd/escaler/main.go && \
35 |     cp escaler /usr/local/bin/escaler
36 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.jmeter:
--------------------------------------------------------------------------------
 1 | FROM centos:centos7
 2 | 
 3 | WORKDIR /data
 4 | 
 5 | # TODO: add jdk and jmeter form url
 6 | ADD ./docker/jdk-8u401-linux-x64.tar.gz /usr/local/
 7 | ADD ./docker/apache-jmeter-5.6.3.tgz /opt/
 8 | 
 9 | RUN mv /usr/local/jdk1.8.0_401 /usr/local/jdk && \
10 |     mv /opt/apache-jmeter-5.6.3 /opt/apache-jmeter
11 | 
12 | ENV JAVA_HOME=/usr/local/jdk \
13 |     PATH=/usr/local/jdk/bin:/opt/apache-jmeter/bin:$PATH
14 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.requirements:
--------------------------------------------------------------------------------
 1 | ARG HARBOR_PATH=emergingai
 2 | 
 3 | FROM ${HARBOR_PATH}/python:base
 4 | 
 5 | RUN apt-get install -y \
 6 |     ocl-icd-libopencl1 \
 7 |     opencl-headers \
 8 |     clinfo
 9 | 
10 | RUN mkdir -p /etc/OpenCL/vendors && \
11 |     echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
12 | 
13 | COPY ./dist/enova-0.0.8-py3-none-any.whl .
14 | COPY ./llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-0.0.8-py3-none-any.whl .
15 | 
16 | RUN pip install enova_instrumentation_llmo-0.0.8-py3-none-any.whl enova-0.0.8-py3-none-any.whl 
17 | 
18 | RUN pip install vllm


--------------------------------------------------------------------------------
/docker/build_image.enova.base.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | echo "Runing build image enova:base using ${PWD}"
 5 | 
 6 | SCRIPT=$(realpath "$0")
 7 | BASEDIR=$(dirname "$SCRIPT")
 8 | BASEDIR=$(dirname "$BASEDIR")
 9 | 
10 | 
11 | export HARBOR_PATH=emergingai
12 | 
13 | # build enova
14 | cd $BASEDIR
15 | docker build -f $BASEDIR/docker/Dockerfile.enova.base -t $HARBOR_PATH/enova:base --build-arg HARBOR_PATH="$HARBOR_PATH" $BASEDIR
16 | 


--------------------------------------------------------------------------------
/docker/build_image.enova.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | IMAGE_VERSION=v`cat VERSION`
 5 | ENOVA_VERSION=`cat VERSION`
 6 | LLMO_VERSION="0.0.8"
 7 | 
 8 | echo "Runing build image enova:${IMAGE_VERSION} using ${PWD}"
 9 | 
10 | SCRIPT=$(realpath "$0")
11 | BASEDIR=$(dirname "$SCRIPT")
12 | BASEDIR=$(dirname "$BASEDIR")
13 | echo "BASEDIR: " ${BASEDIR}
14 | 
15 | # build front
16 | cd $BASEDIR/front
17 | rm $BASEDIR/enova/web_statics -rf
18 | npm install
19 | npm run build
20 | # yarn
21 | # yarn build
22 | 
23 | echo $BASEDIR/front/dist $BASEDIR/enova/web_statics
24 | mv $BASEDIR/front/dist $BASEDIR/enova/web_statics
25 | 
26 | export HARBOR_PATH=emergingai
27 | 
28 | # build enova
29 | cd $BASEDIR
30 | docker build -f $BASEDIR/docker/Dockerfile.enova -t $HARBOR_PATH/enova:$IMAGE_VERSION \
31 |     --build-arg ENOVA_VERSION="${ENOVA_VERSION}" \
32 |     --build-arg LLMO_VERSION="${LLMO_VERSION}" \
33 |     --build-arg HARBOR_PATH="$HARBOR_PATH" \
34 |     --build-arg CACHEBUST=$(date +%s) \
35 |     $BASEDIR
36 | 


--------------------------------------------------------------------------------
/docker/build_image.escaler.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | IMAGE_VERSION=v`cat VERSION`
 5 | 
 6 | echo "Runing build image enova:${IMAGE_VERSION} using ${PWD}"
 7 | 
 8 | SCRIPT=$(realpath "$0")
 9 | BASEDIR=$(dirname "$SCRIPT")
10 | BASEDIR=$(dirname "$BASEDIR")
11 | echo "BASEDIR: " ${BASEDIR}
12 | 
13 | 
14 | export MIRROR_PATH=emergingai
15 | 
16 | # check golang tar.gz
17 | GOLANG_TAR=dependencies/go1.22.2.linux-amd64.tar.gz
18 | DOWNLOAD_URL=https://go.dev/dl/go1.22.2.linux-amd64.tar.gz
19 | 
20 | if [ ! -f "$GOLANG_TAR" ]; then
21 |     mkdir -p dependencies
22 | 
23 |     echo "golang tar $GOLANG_TAR is not existed, start to download..."
24 |     cd dependencies
25 |     wget "$DOWNLOAD_URL"
26 |     cd ../
27 |     if [ $? -eq 0 ]; then
28 |         echo "download sucessfully"
29 |     else
30 |         echo "failed to download"
31 |     fi
32 | fi
33 | 
34 | # build enova
35 | cd $BASEDIR
36 | docker build -f $BASEDIR/docker/Dockerfile.escaler -t $MIRROR_PATH/enova-escaler:$IMAGE_VERSION $BASEDIR
37 | 


--------------------------------------------------------------------------------
/docker/build_image.jmeter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | IMAGE_VERSION=v`cat VERSION`
 5 | 
 6 | echo "Runing build image enova-jmoter:${IMAGE_VERSION} using ${PWD}"
 7 | 
 8 | SCRIPT=$(realpath "$0")
 9 | BASEDIR=$(dirname "$SCRIPT")
10 | BASEDIR=$(dirname "$BASEDIR")
11 | echo "BASEDIR: " ${BASEDIR}
12 | 
13 | 
14 | export HARBOR_PATH=emergingai
15 | 
16 | # build enova
17 | cd $BASEDIR
18 | docker build -f $BASEDIR/docker/Dockerfile.jmeter -t $HARBOR_PATH/enova-jmeter:$IMAGE_VERSION  $BASEDIR
19 | 


--------------------------------------------------------------------------------
/enova/.gitignore:
--------------------------------------------------------------------------------
1 | web_statics/*
2 | !web_statics/.gitkeep


--------------------------------------------------------------------------------
/enova/algo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/algo/__init__.py


--------------------------------------------------------------------------------
/enova/algo/resource.py:
--------------------------------------------------------------------------------
 1 | from enova.server.restful.router import BaseResource
 2 | from enova.algo.serializer import (
 3 |     ConfigRecommendRequestSLZ,
 4 |     ConfigRecommendResponseSLZ,
 5 |     AnomalyDetectRequestSLZ,
 6 |     AnomalyDetectResponseSLZ,
 7 |     AnomalyRecoverRequestSLZ,
 8 |     AnomalyRecoverResponseSLZ,
 9 | )
10 | from enova.algo.service import AlgoService
11 | 
12 | 
13 | class BaseResource(BaseResource):
14 |     def __init__(self) -> None:
15 |         self.service = AlgoService()
16 | 
17 | 
18 | class ConfigRecommendResource(BaseResource):
19 |     PATH = "/config_recommend"
20 |     TAGS = ["Algo"]
21 |     GET_INCLUDE_IN_SCHEMA = False
22 |     POST_INCLUDE_IN_SCHEMA = False
23 | 
24 |     async def post(self, params: ConfigRecommendRequestSLZ) -> ConfigRecommendResponseSLZ:
25 |         return await self.service.config_recommend(params.dict())
26 | 
27 | 
28 | class AnomalyDetectResource(BaseResource):
29 |     PATH = "/anomaly_detect"
30 |     TAGS = ["Algo"]
31 |     GET_INCLUDE_IN_SCHEMA = False
32 |     POST_INCLUDE_IN_SCHEMA = False
33 | 
34 |     async def post(self, params: AnomalyDetectRequestSLZ) -> AnomalyDetectResponseSLZ:
35 |         return await self.service.anomaly_detect(params.dict())
36 | 
37 | 
38 | class AnomalyRecoverResource(BaseResource):
39 |     PATH = "/anomaly_recover"
40 |     TAGS = ["Algo"]
41 |     GET_INCLUDE_IN_SCHEMA = False
42 |     POST_INCLUDE_IN_SCHEMA = False
43 | 
44 |     async def post(self, params: AnomalyRecoverRequestSLZ) -> AnomalyRecoverResponseSLZ:
45 |         return await self.service.anomaly_recover(params.dict())
46 | 


--------------------------------------------------------------------------------
/enova/algo/serializer.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple
  2 | from enova.server.restful.serializer import EmergingAIBaseModel
  3 | 
  4 | 
  5 | class LlmModel(EmergingAIBaseModel):
  6 |     framework: str
  7 |     param: float
  8 | 
  9 | 
 10 | class GpuModel(EmergingAIBaseModel):
 11 |     name: str
 12 |     spec: int
 13 |     num: int
 14 | 
 15 | 
 16 | TimeSeriesData = List[Tuple[float, float]]
 17 | 
 18 | 
 19 | # metric of serving llm model
 20 | class Metrics(EmergingAIBaseModel):
 21 |     active_requests: TimeSeriesData
 22 |     running_requests: TimeSeriesData
 23 |     pending_requests: TimeSeriesData
 24 |     server_new_requests: TimeSeriesData
 25 |     server_success_requests: TimeSeriesData
 26 |     gpu_kv_cache_usage: TimeSeriesData
 27 | 
 28 | 
 29 | # Configurations of llm model
 30 | class Configurations(EmergingAIBaseModel):
 31 |     max_num_seqs: int
 32 |     tensor_parallel_size: int
 33 |     gpu_memory_utilization: float
 34 |     replicas: int
 35 | 
 36 | 
 37 | class ConfigRecommendRequestSLZ(EmergingAIBaseModel):
 38 |     llm: LlmModel
 39 |     gpu: GpuModel
 40 | 
 41 |     class Config:
 42 |         schema_extra = {
 43 |             "llm": {
 44 |                 "framework": "llama",
 45 |                 "param": 13.0,
 46 |             },
 47 |             "gpu": {
 48 |                 "name": "4090",
 49 |                 "spec": 24,
 50 |                 "num": 2,
 51 |             },
 52 |         }
 53 | 
 54 | 
 55 | class ConfigRecommendResponseSLZ(EmergingAIBaseModel):
 56 |     max_num_seqs: int
 57 |     tensor_parallel_size: int
 58 |     gpu_memory_utilization: float
 59 |     replicas: int
 60 | 
 61 | 
 62 | class AnomalyDetectRequestSLZ(EmergingAIBaseModel):
 63 |     metrics: List[Metrics]
 64 |     configurations: Configurations
 65 | 
 66 |     class Config:
 67 |         schema_extra = {
 68 |             "metrics": [
 69 |                 {
 70 |                     "active_requests": [[1000000000, 10.0], [1000000000, 20.0]],
 71 |                     "running_requests": [[1000000000, 5.0], [1000000000, 15.0]],
 72 |                     "pending_requests": [[1000000000, 2.0], [1000000000, 4.0]],
 73 |                     "server_new_requests": [[1000000000, 30.0], [1000000000, 40.0]],
 74 |                     "server_success_requests": [[1000000000, 30.0], [1000000000, 40.0]],
 75 |                     "gpu_kv_cache_usage": [[1000000000, 30.0], [1000000000, 40.0]],
 76 |                 }
 77 |             ],
 78 |             "configurations": {
 79 |                 "max_num_seqs": 100,
 80 |                 "tensor_parallel_size": 8,
 81 |                 "gpu_memory_utilization": 0.75,
 82 |                 "replicas": 1,
 83 |             },
 84 |         }
 85 | 
 86 | 
 87 | class AnomalyDetectResponseSLZ(EmergingAIBaseModel):
 88 |     is_anomaly: int
 89 | 
 90 | 
 91 | class AnomalyRecoverRequestSLZ(EmergingAIBaseModel):
 92 |     metrics: List[Metrics]
 93 |     configurations: Configurations
 94 |     llm: LlmModel
 95 |     gpu: GpuModel
 96 | 
 97 |     class Config:
 98 |         schema_extra = {
 99 |             "metrics": [
100 |                 {
101 |                     "active_requests": [[1000000000, 10.0], [1000000000, 20.0]],
102 |                     "running_requests": [[1000000000, 5.0], [1000000000, 15.0]],
103 |                     "pending_requests": [[1000000000, 2.0], [1000000000, 4.0]],
104 |                     "server_new_requests": [[1000000000, 30.0], [1000000000, 40.0]],
105 |                     "server_success_requests": [[1000000000, 30.0], [1000000000, 40.0]],
106 |                     "gpu_kv_cache_usage": [[1000000000, 30.0], [1000000000, 40.0]],
107 |                 }
108 |             ],
109 |             "configurations": {
110 |                 "max_num_seqs": 100,
111 |                 "tensor_parallel_size": 8,
112 |                 "gpu_memory_utilization": 0.75,
113 |                 "replicas": 1,
114 |             },
115 |             "llm": {
116 |                 "framework": "llama",
117 |                 "param": 13.0,
118 |             },
119 |             "gpu": {
120 |                 "name": "4090",
121 |                 "spec": 24,
122 |                 "num": 2,
123 |             },
124 |         }
125 | 
126 | 
127 | class AnomalyRecoverResponseSLZ(ConfigRecommendResponseSLZ):
128 |     pass
129 | 


--------------------------------------------------------------------------------
/enova/algo/server.py:
--------------------------------------------------------------------------------
 1 | from enova.common.config import CONFIG
 2 | from enova.server.server import ApiServer
 3 | from enova.common.constant import ApiServerType
 4 | 
 5 | 
 6 | def get_algo_api_server(api_server_type=ApiServerType.ENOVA_ALGO.value):
 7 |     api_config = getattr(CONFIG, api_server_type)
 8 |     CONFIG.api.update(api_config)
 9 | 
10 |     api_server = ApiServer(api_config)
11 | 
12 |     return api_server
13 | 


--------------------------------------------------------------------------------
/enova/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/api/__init__.py


--------------------------------------------------------------------------------
/enova/api/app_api.py:
--------------------------------------------------------------------------------
 1 | from enova.common.config import CONFIG
 2 | from enova.api.base import ASyncRestfulEmergingaiAPI, ASyncEmergingaiAPI
 3 | from enova.common.constant import HttpMethod
 4 | 
 5 | 
 6 | APP_API_HOST = CONFIG.enova_app["app_api_host"]
 7 | 
 8 | 
 9 | class _EnovaAppApi:
10 |     def __init__(self) -> None:
11 |         self.healthz = ASyncEmergingaiAPI(method=HttpMethod.GET.value, url=APP_API_HOST + "/v1/healthz")
12 | 
13 |         self.serving = ASyncRestfulEmergingaiAPI(
14 |             url=APP_API_HOST + "/v1/serving",
15 |             resource_key="instance_id",
16 |         )
17 | 
18 |         self.delete_serving_by_name = ASyncEmergingaiAPI(method=HttpMethod.DELETE.value, url=APP_API_HOST + "/v1/serving/name")
19 | 
20 | 
21 | EnovaAppApi = _EnovaAppApi()
22 | 


--------------------------------------------------------------------------------
/enova/api/prom_api.py:
--------------------------------------------------------------------------------
 1 | from enova.common.config import CONFIG
 2 | from enova.api.base import ASyncAPI
 3 | from enova.common.constant import HttpMethod
 4 | 
 5 | 
 6 | PROM_API_HOST = CONFIG.enova_app["prom_api_host"]
 7 | 
 8 | 
 9 | class _PromApi:
10 |     def __init__(self) -> None:
11 |         self.query_range = ASyncAPI(method=HttpMethod.GET.value, url=PROM_API_HOST + "/api/v1/query_range")
12 | 
13 | 
14 | PromApi = _PromApi()
15 | 


--------------------------------------------------------------------------------
/enova/api/serving_api.py:
--------------------------------------------------------------------------------
 1 | from enova.common.config import CONFIG
 2 | from enova.api.base import ASyncEmergingaiAPI
 3 | from enova.common.constant import HttpMethod
 4 | 
 5 | 
 6 | SERVING_API_HOST = CONFIG.enova_app["serving_api_host"]
 7 | 
 8 | 
 9 | class _ServingApi:
10 |     def __init__(self) -> None:
11 |         self.engine_args = ASyncEmergingaiAPI(method=HttpMethod.GET.value, url=SERVING_API_HOST + "/v1/model/info/args")
12 | 
13 | 
14 | ServingApi = _ServingApi()
15 | 


--------------------------------------------------------------------------------
/enova/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/app/__init__.py


--------------------------------------------------------------------------------
/enova/app/db_modles.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from sqlalchemy import (
 4 |     Column,
 5 |     Float,
 6 |     Integer,
 7 |     String,
 8 | )
 9 | from sqlalchemy.orm import declared_attr
10 | 
11 | from enova.common.constant import DeployStatus, TestStatus
12 | from enova.common.utils import gen_ulid
13 | from enova.database.relation.orm.base import DBModelBase, table_args, JSON, DateTime
14 | 
15 | 
16 | class DeploymentInstanceInfoTable(DBModelBase):
17 |     __tablename__ = "deployment_instance_info"
18 | 
19 |     @declared_attr
20 |     def __table_args__(cls):
21 |         return table_args(cls, {"comment": "table of serving's deployment instance"})
22 | 
23 |     instance_id = Column(String(256), primary_key=True, nullable=False, comment="instance id", default=gen_ulid)
24 |     instance_name = Column(String(64), nullable=False, comment="instance name")
25 |     instance_spec = Column(JSON, comment="instance specification")
26 |     startup_args = Column(JSON, comment="the arguments of starting up of model serve by serving")
27 |     mdl_cfg = Column(JSON, comment="the config of llm model")
28 |     serving_id = Column(String(256), nullable=False, comment="serving's unique id, allow use it get the status by polit api")
29 |     deploy_status = Column(String(32), nullable=False, default=DeployStatus.UNKNOWN.value, comment="status of deployment")
30 |     extra = Column(JSON)
31 |     create_time = Column(DateTime, default=datetime.datetime.now)
32 |     update_time = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
33 |     is_deleted = Column(Integer, default=0)
34 |     creator = Column(String(64))
35 |     updater = Column(String(64))
36 | 
37 | 
38 | class TestInfoTable(DBModelBase):
39 |     __tablename__ = "test_info"
40 | 
41 |     @declared_attr
42 |     def __table_args__(cls):
43 |         return table_args(cls, {"comment": "Inject Test record"})
44 | 
45 |     test_id = Column(String(256), primary_key=True, nullable=False, comment="test ID", default=gen_ulid)
46 |     instance_id = Column(String(256), nullable=False, comment="instance_id in serving's deployment")
47 |     data_set = Column(String(64), nullable=False, comment="name of dataset")
48 |     param_spec = Column(JSON, comment="serving's startup parameters")
49 |     test_spec = Column(JSON, comment="test specification")
50 |     test_status = Column(String(32), nullable=False, default=TestStatus.UNKNOWN.value)
51 |     prompt_tps = Column(Float, default=0, comment="throughput of prompt tokens")
52 |     generation_tps = Column(Float, default=0, comment="throughput of generation tokens")
53 |     result = Column(JSON, comment="result of inject test")
54 |     create_time = Column(DateTime, default=datetime.datetime.now)
55 |     update_time = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now)
56 |     is_deleted = Column(Integer, default=0)
57 |     creator = Column(String(64))
58 |     updater = Column(String(64))
59 | 


--------------------------------------------------------------------------------
/enova/app/resource.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated, Dict
 2 | from fastapi import Body, Depends
 3 | 
 4 | from enova.app.serializer import (
 5 |     ServingCreateSLZ,
 6 |     QueryServingParameterSLZ,
 7 |     QueryServingResponseSLZ,
 8 |     SingleQueryServingResponseSLZ,
 9 |     SingleQueryTestResponseSLZ,
10 |     ListTestResponseSLZ,
11 |     TestCreateSLZ,
12 |     QueryTestParameterSLZ,
13 | )
14 | from enova.server.restful.router import BaseResource
15 | from enova.app.service import AppService
16 | 
17 | 
18 | class BaseResource(BaseResource):
19 |     def __init__(self) -> None:
20 |         self.service = AppService()
21 | 
22 | 
23 | class HealthzResource(BaseResource):
24 |     PATH = "/healthz"
25 |     TAGS = ["monitor"]
26 | 
27 |     async def get(self) -> Dict:
28 |         """"""
29 |         return {"status": "running"}
30 | 
31 | 
32 | class ServingResource(BaseResource):
33 |     PATH = "/serving"
34 |     GET_INCLUDE_IN_SCHEMA = True
35 |     GET_RESPONSE_MODEL = QueryServingResponseSLZ
36 |     POST_RESPONSE_MODEL = SingleQueryServingResponseSLZ
37 |     TAGS = ["serving serve"]
38 | 
39 |     async def post(self, params: Annotated[ServingCreateSLZ, Body(openapi_examples=ServingCreateSLZ.Extra.openapi_examples)]) -> Dict:
40 |         """"""
41 |         return await self.service.create_instance(params.dict())
42 | 
43 |     async def get(self, params: Annotated[QueryServingParameterSLZ, Depends(QueryServingParameterSLZ)]):
44 |         """"""
45 |         return await self.service.list_instance(params.dict())
46 | 
47 | 
48 | class SingleServingResource(BaseResource):
49 |     PATH = "/serving/{instance_id}"
50 |     TAGS = ["serving serve"]
51 | 
52 |     async def delete(self, instance_id: str):
53 |         """"""
54 |         return await self.service.delete_instance(instance_id)
55 | 
56 |     async def get(self, instance_id: str):
57 |         """"""
58 |         return await self.service.get_instance(instance_id)
59 | 
60 | 
61 | class TestResource(BaseResource):
62 |     PATH = "/serving/instance/test"
63 |     GET_RESPONSE_MODEL = ListTestResponseSLZ
64 |     POST_RESPONSE_MODEL = SingleQueryTestResponseSLZ
65 |     TAGS = ["test inject"]
66 | 
67 |     async def post(self, params: Annotated[TestCreateSLZ, Body(openapi_examples=TestCreateSLZ.Extra.openapi_examples)]):
68 |         return await self.service.create_test(params.dict())
69 | 
70 |     async def get(self, params: Annotated[QueryTestParameterSLZ, Depends(QueryTestParameterSLZ)]):
71 |         return await self.service.list_test(params.dict())
72 | 
73 | 
74 | class SingleTestResource(BaseResource):
75 |     PATH = "/serving/instance/test/{test_id}"
76 |     GET_RESPONSE_MODEL = SingleQueryTestResponseSLZ
77 |     TAGS = ["test inject"]
78 | 
79 |     async def get(self, test_id: str):
80 |         return await self.service.get_test(test_id)
81 | 
82 |     async def delete(self, test_id: str):
83 |         return await self.service.delete_test(test_id)
84 | 


--------------------------------------------------------------------------------
/enova/app/server.py:
--------------------------------------------------------------------------------
 1 | from http.client import HTTPException
 2 | from pathlib import Path
 3 | 
 4 | from fastapi import Request
 5 | from fastapi.responses import HTMLResponse
 6 | from fastapi.staticfiles import StaticFiles
 7 | import sqlalchemy as sa
 8 | 
 9 | from enova.common.config import CONFIG
10 | from enova.common.logger import LOGGER
11 | from enova.common.constant import ApiServerType
12 | from enova.common.utils import get_web_static_path
13 | from enova.database.relation.orm.base import BaseSqlite
14 | from enova.database.relation.transaction.session import get_session
15 | from enova.server.server import ApiServer
16 | 
17 | 
18 | WEB_STATIC_PATH = get_web_static_path()
19 | 
20 | 
21 | async def redirect_all_requests_to_frontend(request: Request, exc: HTTPException):
22 |     # TODO: need to modify
23 |     if WEB_STATIC_PATH:
24 |         return HTMLResponse(open(Path(WEB_STATIC_PATH) / "index.html").read())
25 |     return "Welcome to enova"
26 | 
27 | 
28 | def init_db():
29 |     with get_session() as session:
30 |         # TODO: allow migrate new tables
31 |         insp = sa.inspect(session.db_engine.engine)
32 |         if not insp.get_table_names():
33 |             BaseSqlite.metadata.create_all(bind=session.db_engine.engine)
34 |             session.commit()
35 | 
36 |         insp = sa.inspect(session.db_engine.engine)
37 |         LOGGER.info(insp.get_table_names())
38 | 
39 | 
40 | def get_app_api_server(api_server_type=ApiServerType.ENOVA_APP.value):
41 |     api_config = getattr(CONFIG, api_server_type)
42 | 
43 |     CONFIG.api.update(api_config)
44 | 
45 |     api_server = ApiServer(api_config)
46 | 
47 |     # mount vuejs dist
48 |     api_server.app.mount(
49 |         f"{CONFIG.api['url_prefix']}/",
50 |         StaticFiles(directory=WEB_STATIC_PATH, html=True),
51 |         name="static",
52 |     )
53 |     api_server.app.add_exception_handler(404, redirect_all_requests_to_frontend)
54 | 
55 |     # datebase init
56 |     init_db()
57 | 
58 |     return api_server
59 | 


--------------------------------------------------------------------------------
/enova/app/utils.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | 
4 | def compute_actual_duration(value, unit):
5 |     return int(pd.Timedelta(f"{value}{unit}").total_seconds())
6 | 


--------------------------------------------------------------------------------
/enova/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/common/__init__.py


--------------------------------------------------------------------------------
/enova/common/constant.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum as BaseEnum
 2 | 
 3 | 
 4 | class Enum(BaseEnum):
 5 |     @classmethod
 6 |     def values(cls):
 7 |         return list(e.value for e in cls.__members__.values())
 8 | 
 9 | 
10 | class ServingBackend(Enum):
11 |     HF = "hf"
12 |     VLLM = "vllm"
13 |     SGLANG = "sglang"
14 | 
15 | 
16 | class HttpMethod(Enum):
17 |     GET = "get"
18 |     POST = "post"
19 |     PUT = "put"
20 |     DELETE = "delete"
21 | 
22 |     @classmethod
23 |     def methods_with_body(cls):
24 |         return [cls.POST.value, cls.PUT.value]
25 | 
26 | 
27 | class OrderBy(Enum):
28 |     ASC = "asc"
29 |     DESC = "desc"
30 | 
31 | 
32 | JSON_RESPONSE_HEADER = "application/json"
33 | 
34 | 
35 | # --- server scope ---
36 | class ApiServerType(Enum):
37 |     ENOVA_ALGO = "enova_algo"
38 |     ENOVA_APP = "enova_app"
39 | 
40 | 
41 | class DeployMode(Enum):
42 |     COMPOSE = "compose"
43 |     LOCAL = "local"
44 | 
45 | 
46 | class TrafficDistributionType(Enum):
47 |     GAUSSIAN = "gaussian"
48 |     POISSON = "poisson"
49 | 
50 | 
51 | class DurationUnitType(Enum):
52 |     SECOND = "sec"
53 |     MINUTE = "min"
54 |     HOUR = "hour"
55 | 
56 | 
57 | # --- db_model scope ---
58 | class DeployStatus(Enum):
59 |     UNKNOWN = "unknown"
60 |     PENDING = "pending"
61 |     RUNNING = "running"
62 |     FAILED = "failed"
63 |     FINISHED = "finsihed"
64 | 
65 | 
66 | class TestStatus(Enum):
67 |     UNKNOWN = "unknown"
68 |     INIT = "init"
69 |     SUCCESS = "success"
70 |     FAILED = "failed"
71 |     RUNNING = "running"
72 |     FINISHED = "finished"
73 | 
74 | 
75 | class ServeStatus(Enum):
76 |     UNKNOWN = "unknown"
77 |     OFF_LINE = "off_line"
78 |     NORMAL = "normal"
79 |     ABNORMAL = "abnormal"
80 | 
81 | 
82 | class Distribution(Enum):
83 |     NORMAL = "normal"
84 |     POISSON = "poisson"
85 | 
86 | 
87 | class VllmMode(Enum):
88 |     NORMAL = "normal"
89 |     OPENAI = "openai"
90 | 


--------------------------------------------------------------------------------
/enova/common/encoder.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import numpy as np
 3 | import json
 4 | 
 5 | 
 6 | class NumpyEncoder(json.JSONEncoder):
 7 |     """Custom encoder for numpy data types"""
 8 | 
 9 |     def default(self, obj):
10 |         if isinstance(
11 |             obj,
12 |             (
13 |                 np.int_,
14 |                 np.intc,
15 |                 np.intp,
16 |                 np.int8,
17 |                 np.int16,
18 |                 np.int32,
19 |                 np.int64,
20 |                 np.uint8,
21 |                 np.uint16,
22 |                 np.uint32,
23 |                 np.uint64,
24 |             ),
25 |         ):
26 |             return int(obj)
27 | 
28 |         elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
29 |             return float(obj)
30 | 
31 |         elif isinstance(obj, (np.complex_, np.complex64, np.complex128)):
32 |             return {"real": obj.real, "imag": obj.imag}
33 | 
34 |         elif isinstance(obj, (np.ndarray,)):
35 |             return obj.tolist()
36 | 
37 |         elif isinstance(obj, (np.bool_)):
38 |             return bool(obj)
39 | 
40 |         elif isinstance(obj, (np.void)):
41 |             return None
42 | 
43 |         return json.JSONEncoder.default(self, obj)
44 | 
45 | 
46 | def numpy_dumps(v, *, default):
47 |     try:
48 |         return json.dumps(v, cls=NumpyEncoder)
49 |     except Exception:
50 |         pass
51 | 
52 |     # orjson.dumps returns bytes, to match standard json.dumps we need to decode
53 |     return json.dumps(v, default=default)
54 | 
55 | 
56 | def json_numpy_obj_hook(dct):
57 |     """
58 |     Decodes a previously encoded numpy ndarray
59 |     with proper shape and dtype
60 |     :param dct: (dict) json encoded ndarray
61 |     :return: (ndarray) if input was an encoded ndarray
62 |     """
63 |     if isinstance(dct, dict) and "__ndarray__" in dct:
64 |         data = base64.b64decode(dct["__ndarray__"])
65 |         return np.frombuffer(data, dct["dtype"]).reshape(dct["shape"])
66 |     return dct
67 | 


--------------------------------------------------------------------------------
/enova/common/error.py:
--------------------------------------------------------------------------------
  1 | from enova.common.config import CONFIG
  2 | 
  3 | 
  4 | class EmergingAIBaseError(Exception):
  5 |     BASE_ERROR_CODE: str = CONFIG.BASIC_ERROR_CODE or "100"
  6 |     MODULE_CODE: str = CONFIG.MODULE_CODE or "001"
  7 |     ERROR_CODE: str = "000"
  8 |     ERROR_MESSAGE: str = ""
  9 | 
 10 |     def __init__(self, error_message=None, error_code=None, *args, **kwargs):
 11 |         self.error_code = error_code if error_code is not None else self.ERROR_CODE
 12 |         self.error_code = f"{self.BASE_ERROR_CODE}{self.MODULE_CODE}{self.error_code}"
 13 | 
 14 |         self.error_message = error_message if error_message is not None else self.ERROR_MESSAGE
 15 |         self.message = self.error_message
 16 |         self.code = int(self.error_code)
 17 |         errors = []
 18 |         if kwargs.get("errors", None):
 19 |             errors = kwargs["errors"] if isinstance(kwargs["errors"], list) else [kwargs["errors"]]
 20 |             del kwargs["errors"]
 21 |         self.errors = errors
 22 |         kwargs["args"] = args
 23 | 
 24 |         super(EmergingAIBaseError, self).__init__(self.error_message, self.error_code, kwargs, errors)
 25 | 
 26 | 
 27 | class ArgsError(EmergingAIBaseError):
 28 |     ERROR_CODE: str = "001"
 29 |     ERROR_MESSAGE: str = "args error"
 30 | 
 31 | 
 32 | class TranslationError(EmergingAIBaseError):
 33 |     ERROR_CODE: str = "091"
 34 |     ERROR_MESSAGE: str = "translation error"
 35 | 
 36 | 
 37 | # --
 38 | class EmergingaiAPIResponseError(EmergingAIBaseError):
 39 |     ERROR_CODE: str = "010"
 40 |     ERROR_MESSAGE: str = "response error"
 41 | 
 42 | 
 43 | class APIParamsError(EmergingAIBaseError):
 44 |     ERROR_CODE: str = "011"
 45 |     ERROR_MESSAGE: str = "response error"
 46 | 
 47 | 
 48 | # --- serving backend api ---
 49 | class EScalerApiResponseError(EmergingAIBaseError):
 50 |     ERROR_CODE: str = "101"
 51 |     ERROR_MESSAGE: str = "node api response error"
 52 | 
 53 | 
 54 | class DeploymentInstanceExistError(EmergingAIBaseError):
 55 |     ERROR_CODE: str = "401"
 56 |     ERROR_MESSAGE: str = "deployment workload had existed"
 57 | 
 58 | 
 59 | class DeploymentInstanceNotExistError(EmergingAIBaseError):
 60 |     ERROR_CODE: str = "402"
 61 |     ERROR_MESSAGE: str = "deployment workload is not exist"
 62 | 
 63 | 
 64 | class DeploymentInstanceCreateFailedError(EmergingAIBaseError):
 65 |     ERROR_CODE: str = "403"
 66 |     ERROR_MESSAGE: str = "deployment workload create failed"
 67 | 
 68 | 
 69 | class TestNotExistError(EmergingAIBaseError):
 70 |     ERROR_CODE: str = "403"
 71 |     ERROR_MESSAGE: str = "test record is not exist"
 72 | 
 73 | 
 74 | class JmeterContainerLaunchError(EmergingAIBaseError):
 75 |     ERROR_CODE: str = "404"
 76 |     ERROR_MESSAGE: str = "fail to launch jmeter container"
 77 | 
 78 | 
 79 | class TestStartError(EmergingAIBaseError):
 80 |     ERROR_CODE: str = "406"
 81 |     ERROR_MESSAGE: str = "test start failed"
 82 | 
 83 | 
 84 | class DataFileNotExistError(EmergingAIBaseError):
 85 |     ERROR_CODE: str = "407"
 86 |     ERROR_MESSAGE: str = "data file not existed"
 87 | 
 88 | 
 89 | # ----
 90 | 
 91 | 
 92 | class NotReadyError(EmergingAIBaseError):
 93 |     ERROR_CODE: str = "101"
 94 |     ERROR_MESSAGE: str = "support service not ready"
 95 | 
 96 | 
 97 | class BackendConfigMissingError(EmergingAIBaseError):
 98 |     ERROR_CODE: str = "102"
 99 |     ERROR_MESSAGE: str = "backend default config missing"
100 | 


--------------------------------------------------------------------------------
/enova/common/g_vars.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from typing import Union
 3 | from enova.common.local import get_contextvars, set_contextvars
 4 | 
 5 | 
 6 | def get_traceid() -> Union[str, None]:
 7 |     trace_id = get_contextvars("trace_id")
 8 |     if trace_id is None:
 9 |         trace_id = uuid.uuid4().hex
10 |         set_contextvars("trace_id", trace_id)
11 |     return trace_id
12 | 
13 | 
14 | def get_realip() -> Union[str, None]:
15 |     real_ip = get_contextvars("real_ip")
16 |     # TODO: LOGGER will case cyclic reference
17 |     # if real_ip is None:
18 |     #     LOGGER.warn("RealIPMiddleware maybe not Setup.")
19 |     return real_ip
20 | 


--------------------------------------------------------------------------------
/enova/common/local.py:
--------------------------------------------------------------------------------
 1 | import contextvars
 2 | import functools
 3 | import threading
 4 | 
 5 | 
 6 | context_vars_dict = {}
 7 | 
 8 | 
 9 | def set_contextvars(key, value):
10 |     """"""
11 |     if key not in context_vars_dict:
12 |         context_vars_dict[key] = contextvars.ContextVar(key)
13 |     context_vars_dict[key].set(value)
14 | 
15 | 
16 | def del_contextvars(key):
17 |     """
18 |     mainly delete the thread vars
19 |     """
20 |     if key in context_vars_dict:
21 |         context_vars_dict[key].clear()
22 | 
23 | 
24 | def get_contextvars(key, default=None):
25 |     """
26 |     mainly get the thread vars
27 |     """
28 |     if key not in context_vars_dict:
29 |         return default
30 |     try:
31 |         return context_vars_dict[key].get()
32 |     except LookupError:
33 |         return default
34 | 
35 | 
36 | def has_contextvars(key):
37 |     """TODO:"""
38 |     return False
39 | 
40 | 
41 | _local = threading.local()
42 | 
43 | 
44 | def set_local_param(key, value):
45 |     """
46 |     mainly setup the custom vars of threads
47 |     """
48 |     setattr(_local, key, value)
49 | 
50 | 
51 | def del_local_param(key):
52 |     """
53 |     mainly delete the custom vars of threads
54 |     """
55 |     if hasattr(_local, key):
56 |         delattr(_local, key)
57 | 
58 | 
59 | def get_local_param(key, default=None):
60 |     return getattr(_local, key, default)
61 | 
62 | 
63 | def contextlocal_cache(func):
64 |     @functools.wraps(func)
65 |     def wrapper(*args, **kwargs):
66 |         key = functools._make_key(args, kwargs, False)
67 |         key = f"{func.__name__}_{key}"
68 |         if has_contextvars(key):
69 |             return get_local_param(key)
70 |         ret = func(*args, **kwargs)
71 |         set_contextvars(key, ret)
72 |         return ret
73 | 
74 |     return wrapper
75 | 


--------------------------------------------------------------------------------
/enova/common/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import sys
 4 | import uuid
 5 | from logging import Formatter
 6 | from logging import StreamHandler
 7 | from logging import getLogger
 8 | from logging.handlers import TimedRotatingFileHandler
 9 | from enova.common.config import CONFIG
10 | from enova.common.g_vars import get_traceid
11 | 
12 | 
13 | LOGGER_MAP = {}
14 | 
15 | 
16 | class AddRequestIdFormatter(Formatter):
17 |     def formatMessage(self, record):
18 |         trace_id = get_traceid()
19 |         if CONFIG.app_name:
20 |             record.message = f"[{CONFIG.app_name}][trace_id: {trace_id}]|{record.message}"
21 |         else:
22 |             record.message = f"[trace_id: {trace_id}]|{record.message}"
23 |         return super().formatMessage(record)
24 | 
25 | 
26 | def setup_logger(name=None, path=None, level=None, file_handler_backupCount=None):
27 |     # sys.stdout = Unbuffered(sys.stdout)
28 |     # sys.stderr = Unbuffered(sys.stderr)
29 |     logger_conf = CONFIG.logger
30 |     name = name or logger_conf["name"]
31 |     path = path or logger_conf["path"]
32 |     level = level or logger_conf["level"]
33 |     file_handler_backupCount = file_handler_backupCount or logger_conf["file_handler_backupCount"]
34 | 
35 |     logger = getLogger(name)
36 |     logger.setLevel(level.upper())
37 | 
38 |     formatter = AddRequestIdFormatter(datefmt=logger_conf["datefmt"], fmt=logger_conf["fmt"])
39 |     stream_handler = StreamHandler(sys.stdout)
40 |     stream_handler.setFormatter(formatter)
41 |     logger.addHandler(stream_handler)
42 |     os.makedirs(path, exist_ok=True)
43 |     file_handler = TimedRotatingFileHandler(
44 |         filename=logger_conf["file_handler_filename_format"].format(path=path, name=name),
45 |         when=logger_conf["file_handler_when"],
46 |         interval=logger_conf["file_handler_interval"],
47 |         backupCount=file_handler_backupCount,
48 |     )
49 |     file_handler.suffix = logger_conf["file_handler_suffix"]
50 |     file_handler.extMatch = re.compile(logger_conf["file_handler_extMatch_pattern"])
51 |     file_handler.setFormatter(formatter)
52 |     logger.addHandler(file_handler)
53 |     return logger
54 | 
55 | 
56 | def get_logger_by_name(name="default"):
57 |     if name not in LOGGER_MAP:
58 |         logger_conf = {}
59 |         logger = setup_logger(**logger_conf)
60 |         LOGGER_MAP[name] = logger
61 |     return LOGGER_MAP[name]
62 | 
63 | 
64 | LOGGER = get_logger_by_name()
65 | 


--------------------------------------------------------------------------------
/enova/database/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/__init__.py


--------------------------------------------------------------------------------
/enova/database/relation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/relation/__init__.py


--------------------------------------------------------------------------------
/enova/database/relation/orm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/relation/orm/__init__.py


--------------------------------------------------------------------------------
/enova/database/relation/transaction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/relation/transaction/__init__.py


--------------------------------------------------------------------------------
/enova/entry/cli.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from enova.common.config import _get_pkg_version, CONFIG
 4 | from enova.entry.command.algo import algo_cli
 5 | from enova.entry.command.app import app_cli
 6 | from enova.entry.command.serving import serving_cli
 7 | from enova.entry.command.injector import injector_cli
 8 | from enova.entry.command.mon import mon_cli
 9 | from enova.entry.command.pilot import pilot_cli
10 | from enova.entry.command.webui import webui_cli
11 | 
12 | 
13 | @click.version_option(_get_pkg_version(), "--version", "-v")
14 | @click.group(context_settings=CONFIG.cli["context_settings"])
15 | def cli():
16 |     """
17 |     \b
18 |     ███████╗███╗   ██╗ ██████╗ ██╗   ██╗ █████╗
19 |     ██╔════╝████╗  ██║██╔═══██╗██║   ██║██╔══██╗
20 |     █████╗  ██╔██╗ ██║██║   ██║██║   ██║███████║
21 |     ██╔══╝  ██║╚██╗██║██║   ██║╚██╗ ██╔╝██╔══██║
22 |     ███████╗██║ ╚████║╚██████╔╝ ╚████╔╝ ██║  ██║
23 |     ╚══════╝╚═╝  ╚═══╝ ╚═════╝   ╚═══╝  ╚═╝  ╚═╝
24 | 
25 |     \b
26 |     ENOVA is an open-source llm deployment, monitoring, injection and auto-scaling service.
27 |     It provides a set of commands to deploy stable serverless serving of LLM on GPU clusters with auto-scaling.
28 |     """
29 |     pass
30 | 
31 | 
32 | def main():
33 |     cli.add_command(serving_cli)
34 |     cli.add_command(app_cli)
35 |     cli.add_command(webui_cli)
36 |     cli.add_command(mon_cli)
37 |     cli.add_command(algo_cli)
38 |     cli.add_command(injector_cli)
39 | 
40 |     cli.add_command(pilot_cli)  # all in one
41 | 
42 |     cli()
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/enova/entry/command/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/entry/command/__init__.py


--------------------------------------------------------------------------------
/enova/entry/command/algo.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import click
 3 | 
 4 | from enova.common.cli_helper import ArgumentHelper
 5 | from enova.common.config import CONFIG
 6 | 
 7 | 
 8 | class EnovaAlgo:
 9 |     # TODO: support run compose
10 |     def run(self):
11 |         args_helper = ArgumentHelper(self, sys._getframe())
12 |         CONFIG.update_config(args_helper.args_map)
13 | 
14 |         import uvicorn
15 | 
16 |         from enova.algo.server import get_algo_api_server
17 | 
18 |         api_server = get_algo_api_server()
19 |         uvicorn.run(api_server.app, host=CONFIG.enova_algo["host"], port=CONFIG.enova_algo["port"])
20 | 
21 | 
22 | pass_enova_algo = click.make_pass_decorator(EnovaAlgo)
23 | 
24 | 
25 | @click.group(name="algo")
26 | @click.pass_context
27 | def algo_cli(ctx):
28 |     """
29 |     Run the autoscaling service.
30 |     """
31 |     ctx.obj = EnovaAlgo()
32 | 
33 | 
34 | @algo_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
35 | @pass_enova_algo
36 | @click.pass_context
37 | def mon_run(ctx, enova_algo: EnovaAlgo):
38 |     enova_algo.run()
39 | 
40 | 
41 | @algo_cli.command(name="stop")
42 | @pass_enova_algo
43 | @click.pass_context
44 | def mon_stop(ctx, enova_algo: EnovaAlgo):
45 |     enova_algo.stop()
46 | 


--------------------------------------------------------------------------------
/enova/entry/command/mon.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | import click
 4 | 
 5 | from enova.common.cli_helper import ArgumentHelper, DockerComposeHeler
 6 | from enova.common.config import CONFIG
 7 | from enova.common.logger import LOGGER
 8 | 
 9 | 
10 | class EnovaMonitor:
11 |     def __init__(self) -> None:
12 |         self.docker_services = [
13 |             "dcgm-exporter",
14 |             "grafana",
15 |             "otel-collector",
16 |             "prometheus",
17 |             "tempo",
18 |             "enova-escaler",
19 |             "enova-algo",
20 |         ]  # start up by order
21 |         self._docker_compose = DockerComposeHeler()
22 | 
23 |     def _run_by_compose(self):
24 |         for service in self.docker_services:
25 |             options = {}
26 |             self._docker_compose.update_service_options(service, options)
27 |             self._docker_compose.startup_service(service, is_daemon=True)
28 | 
29 |     def run(self, **kwargs):
30 |         args_helper = ArgumentHelper(self, sys._getframe())
31 |         CONFIG.update_config(args_helper.args_map)
32 | 
33 |         self._run_by_compose()
34 | 
35 |     def _stop_by_compose(self):
36 |         pass
37 | 
38 |     def stop(self):
39 |         cmd_params = self._docker_compose.base_cmd
40 |         cmd_params += ["down"]
41 | 
42 |         result = subprocess.run(
43 |             [self._docker_compose.excu, "-f", self._docker_compose.compose_file, "down"],
44 |             capture_output=True,
45 |             text=True,
46 |         )
47 |         if result.returncode == 0:
48 |             LOGGER.info("llmo monitors stop successfully")
49 |         else:
50 |             LOGGER.error(f"llmo monitors stop failed, {result.stderr}")
51 | 
52 | 
53 | pass_enova_monitor = click.make_pass_decorator(EnovaMonitor)
54 | 
55 | 
56 | @click.group(name="mon")
57 | @click.pass_context
58 | def mon_cli(ctx):
59 |     """
60 |     Run the monitors of LLM server
61 |     """
62 |     ctx.obj = EnovaMonitor()
63 | 
64 | 
65 | @mon_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
66 | @pass_enova_monitor
67 | @click.pass_context
68 | def mon_run(ctx, enova_monitor: EnovaMonitor):
69 |     enova_monitor.run()
70 | 
71 | 
72 | @mon_cli.command(name="stop")
73 | @pass_enova_monitor
74 | @click.pass_context
75 | def mon_stop(ctx, enova_monitor: EnovaMonitor):
76 |     enova_monitor.stop()
77 | 


--------------------------------------------------------------------------------
/enova/entry/command/serving.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import click
  3 | 
  4 | from enova.common.cli_helper import ArgumentHelper, parse_extra_args
  5 | from enova.common.config import CONFIG
  6 | from enova.common.constant import ServingBackend
  7 | from enova.entry.command.webui import Webui
  8 | from enova.serving.apiserver import EApiServer
  9 | 
 10 | 
 11 | class ServingHandler:
 12 |     """
 13 |     serving handler
 14 |     """
 15 | 
 16 |     def __init__(self, host, port, model, backend):
 17 |         self.host = host
 18 |         self.port = port
 19 |         self.model = model
 20 |         self.apiserver = EApiServer(host, port, self.model, backend)
 21 | 
 22 |     def start(self, **kwargs):
 23 |         self.apiserver.local_run(**kwargs)
 24 | 
 25 |     def stop(self, *args):
 26 |         """"""
 27 | 
 28 | 
 29 | class EnovaServing:
 30 |     def run(
 31 |         self,
 32 |         model,
 33 |         host=CONFIG.serving["host"],
 34 |         port=CONFIG.serving["port"],
 35 |         backend=CONFIG.serving["backend"],
 36 |         exporter_endpoint=CONFIG.llmo["eai_exporter_endpoint"],
 37 |         exporter_service_name=CONFIG.llmo["eai_exporter_service_name"],
 38 |         include_webui=True,
 39 |         hf_proxy=None,
 40 |         **kwargs,
 41 |     ):
 42 |         args_helper = ArgumentHelper(self, sys._getframe())
 43 |         CONFIG.update_config(args_helper.args_map)
 44 | 
 45 |         from enova.llmo import start as llmo_start
 46 | 
 47 |         CONFIG.update_config({backend: kwargs})
 48 |         CONFIG.print_config()
 49 |         llmo_start(otlp_exporter_endpoint=exporter_endpoint, service_name=exporter_service_name)
 50 |         if include_webui:
 51 |             Webui().run(daemon=False)
 52 |         ServingHandler(host, port, model, backend).start()
 53 | 
 54 | 
 55 | pass_enova_serving = click.make_pass_decorator(EnovaServing)
 56 | 
 57 | 
 58 | @click.group(name="serving")
 59 | @click.pass_context
 60 | def serving_cli(ctx):
 61 |     """
 62 |     Deploy the target LLM and launch the LLM API service.
 63 |     """
 64 |     ctx.obj = EnovaServing()
 65 | 
 66 | 
 67 | @serving_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
 68 | @click.option("--model", type=str)
 69 | @click.option("--host", type=str, default=CONFIG.serving["host"])
 70 | @click.option("--port", type=int, default=CONFIG.serving["port"])
 71 | @click.option("--backend", type=str, default=CONFIG.serving["backend"])
 72 | @click.option(
 73 |     "--exporter-endpoint",
 74 |     "--exporter_endpoint",
 75 |     "exporter_endpoint",
 76 |     type=str,
 77 |     default=CONFIG.llmo["eai_exporter_endpoint"],
 78 | )
 79 | @click.option(
 80 |     "--exporter-service-name",
 81 |     "--exporter_service_name",
 82 |     "exporter_service_name",
 83 |     type=str,
 84 |     default=CONFIG.llmo["eai_exporter_service_name"],
 85 | )
 86 | @click.option("--include-webui", "--include_webui", "include_webui", type=bool, default=True)
 87 | @click.option("--hf-proxy", "--hf_proxy", "hf_proxy", type=str, default=None)
 88 | @pass_enova_serving
 89 | @click.pass_context
 90 | def serving_run(
 91 |     ctx,
 92 |     enova_serving,
 93 |     model,
 94 |     host,
 95 |     port,
 96 |     backend,
 97 |     exporter_endpoint,
 98 |     exporter_service_name,
 99 |     include_webui,
100 |     hf_proxy,
101 | ):
102 |     enova_serving.run(
103 |         model=model,
104 |         host=host,
105 |         port=port,
106 |         backend=backend,
107 |         exporter_endpoint=exporter_endpoint,
108 |         exporter_service_name=exporter_service_name,
109 |         include_webui=include_webui,
110 |         hf_proxy=hf_proxy,
111 |         **parse_extra_args(ctx),
112 |     )
113 | 


--------------------------------------------------------------------------------
/enova/entry/command/webui.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import sys
 4 | import click
 5 | 
 6 | from enova.common.cli_helper import ArgumentHelper, parse_extra_args
 7 | from enova.common.config import CONFIG
 8 | from enova.common.utils import get_enova_path
 9 | 
10 | 
11 | class Webui:
12 |     def __init__(self):
13 |         self.streamlit_process = None
14 | 
15 |     def start(self, serving_host, serving_port, host, port):
16 |         args_helper = ArgumentHelper(self, sys._getframe())
17 |         CONFIG.update_config(args_helper.args_map)
18 | 
19 |         os.environ['SERVING_URL'] = f"http://{serving_host}:{serving_port}"
20 | 
21 |         base_enova_path = get_enova_path()
22 |         streamlit_script = os.path.join(base_enova_path, CONFIG.webui["script"])
23 |         self.streamlit_process = subprocess.Popen(
24 |             ["streamlit", "run", streamlit_script, "--server.port", str(port), "--server.address", host]
25 |         )
26 | 
27 |     def run(
28 |         self,
29 |         serving_host=CONFIG.serving["host"],
30 |         serving_port=CONFIG.serving["port"],
31 |         host=CONFIG.webui["host"],
32 |         port=CONFIG.webui["port"],
33 |         daemon=CONFIG.webui["daemon"],
34 |         **kwargs,
35 |     ):
36 |         """"""
37 |         self.start(serving_host, serving_port, host, port)
38 |         if daemon:
39 |             self.streamlit_process.wait()
40 | 
41 |     def stop(self):
42 |         self.streamlit_process.terminate()
43 |         self.streamlit_process.wait()
44 | 
45 | 
46 | pass_enova_webui = click.make_pass_decorator(Webui)
47 | 
48 | 
49 | @click.group(name="webui")
50 | @click.pass_context
51 | def webui_cli(ctx):
52 |     """
53 |     Build agent at this page based on the launched LLM API service.
54 |     """
55 |     pass
56 | 
57 | 
58 | @webui_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"])
59 | @click.option("--serving-host", type=str, default=CONFIG.serving["host"])
60 | @click.option("--serving-port", type=int, default=CONFIG.serving["port"])
61 | @click.option("--host", type=str, default=CONFIG.webui["host"])
62 | @click.option("--port", type=int, default=CONFIG.webui["port"])
63 | @click.option("--daemon", type=bool, default=CONFIG.webui["daemon"])
64 | @pass_enova_webui
65 | @click.pass_context
66 | def webui_run(
67 |     ctx,
68 |     enova_webui: Webui,
69 |     serving_host,
70 |     serving_port,
71 |     host,
72 |     port,
73 |     daemon,
74 | ):
75 |     enova_webui.run(
76 |         serving_host=serving_host,
77 |         serving_port=serving_port,
78 |         host=host,
79 |         port=port,
80 |         daemon=daemon,
81 |         **parse_extra_args(ctx),
82 |     )
83 |     pass
84 | 
85 | 
86 | @webui_cli.command(
87 |     name="stop",
88 |     context_settings=dict(help_option_names=["-h", "--help"], ignore_unknown_options=True, allow_extra_args=True),
89 | )
90 | @pass_enova_webui
91 | @click.pass_context
92 | def webui_stop(ctx, enova_webui: Webui):
93 |     enova_webui.stop()
94 | 


--------------------------------------------------------------------------------
/enova/job/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/job/__init__.py


--------------------------------------------------------------------------------
/enova/job/job_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/job/job_manager.py


--------------------------------------------------------------------------------
/enova/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/__init__.py


--------------------------------------------------------------------------------
/enova/server/exception/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/exception/__init__.py


--------------------------------------------------------------------------------
/enova/server/exception/handler.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from fastapi import Request
 3 | 
 4 | 
 5 | class BaseExceptionHandler(metaclass=abc.ABCMeta):
 6 | 
 7 |     @abc.abstractmethod
 8 |     def get_exception_class(self):
 9 |         """"""
10 | 
11 |     @abc.abstractmethod
12 |     def exception_handler(self, request: Request, exc):
13 |         """"""
14 | 


--------------------------------------------------------------------------------
/enova/server/middleware/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/middleware/__init__.py


--------------------------------------------------------------------------------
/enova/server/middleware/base.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import dataclasses
 3 | from typing import List
 4 | from fastapi import Request
 5 | 
 6 | 
 7 | def get_dependencies() -> List:
 8 |     return
 9 | 
10 | 
11 | @dataclasses.dataclass
12 | class BaseMiddleware(metaclass=abc.ABCMeta):
13 |     """"""
14 | 
15 |     api_config: dict
16 | 
17 | 
18 | class EmergingAIMultiMiddlewares:
19 | 
20 |     def __init__(self) -> None:
21 |         self.middewares: List[BaseMiddleware] = []
22 |         self.request_middlewares: List[BaseMiddleware] = []
23 |         self.response_middlewares: List[BaseMiddleware] = []
24 | 
25 |     def register(self, middleware: BaseMiddleware):
26 |         self.middewares.append(middleware)
27 |         if hasattr(middleware, "_process_request"):
28 |             self.request_middlewares.append(middleware)
29 |         if hasattr(middleware, "_process_response"):
30 |             self.response_middlewares.append(middleware)
31 | 
32 |     async def process(self, request: Request, call_next):
33 |         # request
34 |         for middleware in self.request_middlewares:
35 |             if hasattr(middleware, "_process_request"):
36 |                 await middleware._process_request(request)
37 |         response = await call_next(request)
38 |         # response
39 |         for middleware in self.response_middlewares:
40 |             response = await middleware._process_response(request, response)
41 |         return response
42 | 


--------------------------------------------------------------------------------
/enova/server/middleware/response.py:
--------------------------------------------------------------------------------
 1 | import rapidjson
 2 | from fastapi import Request, status
 3 | 
 4 | from fastapi.responses import JSONResponse, StreamingResponse
 5 | from enova.common.constant import JSON_RESPONSE_HEADER
 6 | from enova.common.g_vars import get_traceid
 7 | from enova.server.middleware.base import BaseMiddleware
 8 | 
 9 | 
10 | class ResponseMiddleware(BaseMiddleware):
11 | 
12 |     async def _process_response(self, request: Request, response):
13 |         """"""
14 |         if request.url.path in [
15 |             self.api_config["url_prefix"] + "/docs",
16 |             self.api_config["url_prefix"] + "/redoc",
17 |             self.api_config["url_prefix"] + "/openapi.json",
18 |         ] or request.url.path.startswith(f"{self.api_config['url_prefix']}/admin"):
19 |             return response
20 |         trace_id = get_traceid()
21 |         if isinstance(response, StreamingResponse) and response.headers.get("content-type") == JSON_RESPONSE_HEADER:
22 |             response_body = b""
23 |             async for chunk in response.body_iterator:
24 |                 response_body += chunk
25 |             resp = rapidjson.loads(response_body)
26 |             if "code" in resp and "message" in resp:
27 |                 if "trace_id" not in resp:
28 |                     resp["trace_id"] = trace_id
29 |                 resp = JSONResponse(
30 |                     status_code=response.status_code,
31 |                     content=resp,
32 |                 )
33 |             else:
34 |                 if response.status_code == status.HTTP_200_OK:
35 |                     code = 0
36 |                 else:
37 |                     code = response.status_code
38 |                 resp = JSONResponse(
39 |                     status_code=response.status_code,
40 |                     content={"message": "", "code": code, "result": resp, "trace_id": trace_id, "version": self.api_config["api_version"]},
41 |                 )
42 |             for k, v in response.headers.items():
43 |                 if k not in resp.headers:
44 |                     resp.headers[k] = v
45 |             return resp
46 |         if isinstance(response, dict):
47 |             return JSONResponse(
48 |                 status_code=200,
49 |                 content={"message": "", "code": 0, "result": response, "trace_id": trace_id, "version": self.api_config["api_version"]},
50 |             )
51 |         return response
52 | 


--------------------------------------------------------------------------------
/enova/server/middleware/trace.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from fastapi import Request
 3 | from enova.server.middleware.base import BaseMiddleware
 4 | from enova.common.local import set_contextvars
 5 | 
 6 | 
 7 | class TraceMiddleware(BaseMiddleware):
 8 | 
 9 |     async def _process_request(self, request: Request):
10 |         """get header trace_id"""
11 |         trace_id = request.headers.get('trace_id') or uuid.uuid4().hex
12 |         set_contextvars('trace_id', trace_id)
13 | 


--------------------------------------------------------------------------------
/enova/server/restful/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/restful/__init__.py


--------------------------------------------------------------------------------
/enova/server/restful/router.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | from fastapi import APIRouter
 3 | from fastapi.responses import JSONResponse
 4 | from enova.common.config import CONFIG
 5 | from enova.common.constant import HttpMethod
 6 | 
 7 | 
 8 | class BaseResource:
 9 |     PATH = NotImplemented
10 |     DEPENDENCIS = NotImplemented
11 |     GET_RESPONSE_MODEL = None
12 |     PUT_RESPONSE_MODEL = None
13 |     DELETE_RESPONSE_MODEL = None
14 |     POST_RESPONSE_MODEL = None
15 |     GET_RESPONSE_CLASS = JSONResponse
16 |     PUT_RESPONSE_CLASS = JSONResponse
17 |     DELETE_RESPONSE_CLASS = JSONResponse
18 |     POST_RESPONSE_CLASS = JSONResponse
19 |     GET_INCLUDE_IN_SCHEMA = True
20 |     PUT_INCLUDE_IN_SCHEMA = True
21 |     DELETE_INCLUDE_IN_SCHEMA = True
22 |     POST_INCLUDE_IN_SCHEMA = True
23 |     TAGS = None
24 | 
25 | 
26 | class WebSocketResource:
27 |     PATH = NotImplemented
28 | 
29 | 
30 | @dataclasses.dataclass
31 | class ApiRouter:
32 |     prefix: str = None
33 | 
34 |     def __post_init__(self) -> None:
35 |         """
36 |         Dynamically convert GET, POST, DELETE, PUT into interfaces. just for fastapi
37 |         """
38 |         self.router = APIRouter(
39 |             prefix=self.prefix,
40 |             dependencies=[],
41 |         )
42 | 
43 |     def register(self, resource_cls):
44 |         """"""
45 |         if issubclass(resource_cls, BaseResource) and resource_cls != BaseResource:
46 |             self._register_http(resource_cls)
47 | 
48 |         if issubclass(resource_cls, WebSocketResource) and resource_cls != WebSocketResource:
49 |             self._register_ws(resource_cls)
50 | 
51 |     def _register_http(self, resource_cls):
52 |         resource_ins = resource_cls()
53 |         for method in HttpMethod.values():
54 |             if hasattr(resource_ins, method):
55 |                 response_model = getattr(resource_ins, f"{method.upper()}_RESPONSE_MODEL")
56 |                 response_class = getattr(resource_ins, f"{method.upper()}_RESPONSE_CLASS")
57 |                 include_in_schema = getattr(resource_ins, f"{method.upper()}_INCLUDE_IN_SCHEMA")
58 |                 actual_path = f"/{CONFIG.api['api_version']}{resource_ins.PATH}"
59 |                 tags = getattr(resource_ins, "TAGS") or []
60 |                 getattr(self.router, method)(
61 |                     actual_path,
62 |                     response_model=response_model,
63 |                     response_class=response_class,
64 |                     include_in_schema=include_in_schema,
65 |                     tags=tags,
66 |                 )(getattr(resource_ins, method))
67 | 
68 |     def _register_ws(self, resource_cls):
69 |         resource_ins = resource_cls()
70 |         if resource_ins.PATH is not NotImplemented:
71 |             actual_path = f"/{CONFIG.api['api_version']}{resource_ins.PATH}"
72 |             self.router.add_api_websocket_route(actual_path, getattr(resource_ins, "get"))
73 | 


--------------------------------------------------------------------------------
/enova/server/restful/serializer.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Dict, List
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | from pydantic.version import VERSION as PYDANTIC_VERSION
 6 | 
 7 | 
 8 | PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
 9 | if PYDANTIC_V2:
10 |     from pydantic._internal._model_construction import ModelMetaclass
11 | else:
12 |     from pydantic.main import ModelMetaclass
13 | 
14 | from enova.common.config import CONFIG  # noqa
15 | from enova.common.constant import OrderBy  # noqa
16 | 
17 | 
18 | class AllFields(ModelMetaclass):
19 |     def __new__(self, name, bases, namespaces, **kwargs):
20 |         for field in namespaces:
21 |             if not field.startswith("__"):
22 |                 namespaces[field] = Field(namespaces[field])
23 |         return super().__new__(self, name, bases, namespaces, **kwargs)
24 | 
25 | 
26 | class EmergingAIBaseModel(BaseModel):
27 |     def dict(self, *args, **kwargs):
28 |         return json.loads(self.model_dump_json())
29 | 
30 | 
31 | class EmergingAIQueryRequestBaseModel(EmergingAIBaseModel):
32 |     page: int = Field(default=1, ge=CONFIG.api["default_min_page"], le=CONFIG.api["default_max_page"])
33 |     size: int = Field(default=10, ge=CONFIG.api["default_min_size"], le=CONFIG.api["default_max_size"])
34 |     order_by: str | None = None
35 |     order_type: OrderBy | None = None
36 |     fuzzy: str | None = None
37 |     start_time: str | None = None
38 |     end_time: str | None = None
39 | 
40 | 
41 | class EmergingAIQueryResponseBaseModel(EmergingAIBaseModel):
42 |     page: int
43 |     size: int
44 |     total_num: int
45 |     total_page: int
46 |     num: int
47 |     data: List[Dict]
48 | 


--------------------------------------------------------------------------------
/enova/serving/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/serving/__init__.py


--------------------------------------------------------------------------------
/enova/serving/apiserver.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | from enova.common.constant import ServingBackend
 3 | from enova.common.config import CONFIG
 4 | from enova.serving.backend.transformers import TransformersBackend
 5 | from enova.serving.backend.vllm import VllmBackend
 6 | from enova.serving.backend.sglang import SglangBackend
 7 | 
 8 | 
 9 | @dataclasses.dataclass
10 | class EApiServer:
11 |     """
12 |     Need to adapt to multiple task, text2text, text2image, image2image
13 |     support multiple api according to different task
14 |     """
15 | 
16 |     host: str
17 |     port: int
18 |     model: str
19 |     backend: str
20 | 
21 |     def __post_init__(self):
22 |         self.backend_ins = None
23 | 
24 |     def get_backend_ins(self):
25 |         engine_map = {
26 |             ServingBackend.HF.value: TransformersBackend,
27 |             ServingBackend.VLLM.value: VllmBackend,
28 |             ServingBackend.SGLANG.value: SglangBackend}
29 |         if self.backend not in engine_map:
30 |             raise ValueError(f"serving.backend: {CONFIG.serving['backend']} is not in {ServingBackend.values()}")
31 |         return engine_map[self.backend](self.backend, self.model)
32 | 
33 |     def local_run(self):
34 |         """"""
35 |         self.backend_ins = self.get_backend_ins()
36 |         self.backend_ins.local_run(host=self.host, port=self.port)
37 | 


--------------------------------------------------------------------------------
/enova/serving/backend/hf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/serving/backend/hf/__init__.py


--------------------------------------------------------------------------------
/enova/serving/backend/hf/handler.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | import functools
 3 | from typing import Callable, Dict
 4 | 
 5 | 
 6 | @dataclasses.dataclass
 7 | class RemoteFunc:
 8 |     method: str
 9 |     path: str
10 |     func: Callable
11 |     kwarg: Dict
12 | 
13 | 
14 | REMOTE_FUNC_TAG = "__remote_func__"
15 | 
16 | 
17 | @dataclasses.dataclass
18 | class HuggingFaceHandler:
19 |     """"""
20 | 
21 |     model: str
22 |     name: str = "serving"
23 | 
24 |     @classmethod
25 |     def remote_func(cls, method, path=None, **kwarg):
26 |         def decorator(func):
27 |             actual_path = f"/{func.__name__}" if path is None else path
28 | 
29 |             @functools.wraps(func)
30 |             def wrapped_func(self, *args, **kwargs):
31 |                 return func(self, *args, **kwargs)
32 | 
33 |             setattr(wrapped_func, REMOTE_FUNC_TAG, (RemoteFunc(method, actual_path, func, kwarg)))
34 |             return wrapped_func
35 | 
36 |         return decorator
37 | 


--------------------------------------------------------------------------------
/enova/serving/backend/sglang.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | import os
 3 | from enova.common.logger import LOGGER
 4 | from enova.common.config import CONFIG
 5 | from enova.serving.backend.base import BaseBackend
 6 | 
 7 | 
 8 | @dataclasses.dataclass
 9 | class SglangBackend(BaseBackend):
10 |     def __post_init__(self):
11 |         """Initialize the SglangBackend specific components."""
12 | 
13 |     def _create_app(self):
14 |         from sglang.srt.server import app as sglang_app, launch_engine
15 |         from sglang.srt.server_args import ServerArgs
16 |         from sglang.srt.utils import add_prometheus_middleware, set_prometheus_multiproc_dir
17 |         from sglang.srt.metrics.func_timer import enable_func_timer
18 | 
19 |         if not hasattr(self, "model"):
20 |             raise RuntimeError("Model path must be specified")
21 | 
22 |         if "tensor_parallel_size" in CONFIG.sglang:
23 |             CONFIG.sglang["tp_size"] = CONFIG.sglang.pop("tensor_parallel_size")
24 |         server_args = ServerArgs(host=CONFIG.serving["host"], port=CONFIG.serving["port"], model_path=self.model, **CONFIG.sglang)
25 |         launch_engine(server_args)
26 |         set_prometheus_multiproc_dir()
27 |         os.makedirs(os.environ["PROMETHEUS_MULTIPROC_DIR"])
28 |         add_prometheus_middleware(sglang_app)
29 |         enable_func_timer()
30 | 
31 |         self.app = sglang_app
32 | 
33 |         @self.app.get("/v1/model/info/args")
34 |         async def get_engine_args():
35 |             return {"code": 0, "result": server_args}
36 | 
37 |         LOGGER.info("SGLangBackend FastAPI app created and routes defined.")
38 | 


--------------------------------------------------------------------------------
/enova/serving/backend/transformers.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | import locate
 3 | from fastapi import FastAPI, APIRouter
 4 | from fastapi.middleware.cors import CORSMiddleware
 5 | from enova.common.config import CONFIG
 6 | from enova.serving.middlewares.base import EnovaAIMultiMiddlewares
 7 | from enova.serving.backend.base import BaseBackend
 8 | from enova.serving.backend.hf.hf import HFText2TextHandler
 9 | 
10 | 
11 | @dataclasses.dataclass
12 | class TransformersBackend(BaseBackend):
13 | 
14 |     def __post_init__(self):
15 |         self.hf = HFText2TextHandler()
16 | 
17 |     def _create_apiserver(self):
18 |         self._create_app()
19 |         self._init_middlewares()
20 |         self._init_routers()
21 | 
22 |     def _init_middlewares(self):
23 |         """"""
24 |         middlewares = EnovaAIMultiMiddlewares()
25 |         for middleware_cls_name in CONFIG.api.get("middleware_names", []):
26 |             middleware_ins = locate(middleware_cls_name)()
27 |             middlewares.register(middleware_ins)
28 |         self.app.middleware("http")(middlewares.process)
29 |         self.app.add_middleware(
30 |             CORSMiddleware,
31 |             allow_origins=["*"],
32 |             allow_credentials=True,
33 |             allow_methods=["*"],
34 |             allow_headers=["*"],
35 |         )
36 | 
37 |     def _init_exception_handler(self):
38 |         """"""
39 | 
40 |     def _init_routers(self):
41 |         """
42 |         according task to add route, such as openai
43 |         """
44 |         self.api_router = APIRouter(
45 |             prefix="",
46 |             dependencies=[],
47 |         )
48 | 
49 |         @self.app.get("/healthz", include_in_schema=False)
50 |         async def healthz():
51 |             return {"status": "ok"}
52 | 
53 |         self.register_serving_api()
54 | 
55 |     def _create_app(self):
56 |         """"""
57 |         self.app = FastAPI(
58 |             title=self.name,
59 |             description=(self.__doc__ if self.__doc__ else f"Enova {self.name}"),
60 |         )
61 | 
62 |     def register_serving_api(self):
63 |         """
64 |         register_api from serving
65 |         """
66 |         self.hf.register_api_router(self.api_router)
67 |         self.app.include_router(self.api_router)
68 | 


--------------------------------------------------------------------------------
/enova/serving/backend/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
 3 | from enova.common.logger import LOGGER
 4 | 
 5 | 
 6 | def hf_model_params_size(model_name, hf_proxies=None):
 7 |     """
 8 |     TODO: implement special model
 9 |     """
10 |     LOGGER.debug(f"starg parse model's config: {model_name}")
11 |     try:
12 |         return specific_eval_hf_model_params_size(model_name, hf_proxies)
13 |     except Exception as e:
14 |         LOGGER.warning(f"specific_eval_hf_model_params_size error: {str(e)}")
15 |         return estimate_hf_model_params_size(model_name, hf_proxies)
16 | 
17 | 
18 | def specific_eval_hf_model_params_size(model_name, hf_proxies=None):
19 |     """ """
20 |     config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, proxies=hf_proxies)
21 |     if config.__class__.__name__ in ["BaichuanConfig", "QWenConfig"]:
22 |         model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
23 |     else:
24 |         model = AutoModel.from_config(config, trust_remote_code=True)
25 |     params_size = 0
26 |     for w_name, p in list(model.named_parameters()):
27 |         LOGGER.debug(f"w_name: {w_name}, shape: {p.shape}")
28 |         params_size += np.prod(p.shape)
29 |     return {"params_size": int(params_size), "model_type": config.model_type}
30 | 
31 | 
32 | def estimate_hf_model_params_size(model_name, hf_proxies=None):
33 |     """fast estimate hf model params_szie"""
34 |     config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, proxies=hf_proxies)
35 |     if config.model_type == "chatglm":
36 |         return chatglm_estimate_hf_model_params_size(config)
37 |     num_layers = config.num_hidden_layers
38 |     hidden_size = config.hidden_size
39 |     vocab_size = config.vocab_size
40 |     params_size = (
41 |         vocab_size * hidden_size
42 |         + num_layers * (4 * hidden_size**2 + 4 * hidden_size)
43 |         + num_layers * (8 * hidden_size**2 + 5 * hidden_size)
44 |         + 4 * num_layers * hidden_size
45 |     )
46 |     return {"params_size": int(params_size), "model_type": config.model_type}
47 | 
48 | 
49 | def chatglm_estimate_hf_model_params_size(config):
50 |     num_layers = config.num_layers
51 |     hidden_size = config.hidden_size
52 |     vocab_size = config.vocab_size
53 |     params_size = (
54 |         vocab_size * hidden_size
55 |         + num_layers * (4 * hidden_size**2 + 4 * hidden_size)
56 |         + num_layers * (8 * hidden_size**2 + 5 * hidden_size)
57 |         + 4 * num_layers * hidden_size
58 |     )
59 |     return {"params_size": int(params_size), "model_type": config.model_type}
60 | 


--------------------------------------------------------------------------------
/enova/serving/backend/vllm.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import dataclasses
 3 | from enova.common.logger import LOGGER
 4 | from enova.common.config import CONFIG
 5 | from enova.common.constant import VllmMode
 6 | from enova.serving.backend.base import BaseBackend
 7 | from enova.serving.middlewares.cors import WSCORSMiddleware
 8 | 
 9 | 
10 | @dataclasses.dataclass
11 | class VllmBackend(BaseBackend):
12 |     def __post_init__(self):
13 |         """"""
14 | 
15 |     def _create_app(self):
16 |         vllm_mode = CONFIG.vllm.pop("vllm_mode", VllmMode.NORMAL.value)
17 |         from vllm.engine.arg_utils import AsyncEngineArgs
18 |         from vllm.engine.async_llm_engine import AsyncLLMEngine
19 |         from vllm.transformers_utils.tokenizer import get_tokenizer
20 |         import torch
21 | 
22 |         if not torch.cuda.is_available():
23 |             raise RuntimeError("vLLM Photon requires CUDA runtime")
24 | 
25 |         if vllm_mode == VllmMode.NORMAL.value:
26 |             from vllm.entrypoints import api_server
27 | 
28 |             engine_args = AsyncEngineArgs(model=self.model, **CONFIG.vllm)
29 |             engine = AsyncLLMEngine.from_engine_args(engine_args)
30 |             engine_model_config = asyncio.run(engine.get_model_config())
31 |             max_model_len = engine_model_config.max_model_len
32 | 
33 |             api_server.served_model = self.model
34 |             api_server.engine = engine
35 |             api_server.max_model_len = max_model_len
36 |             api_server.tokenizer = get_tokenizer(
37 |                 engine_args.tokenizer,
38 |                 tokenizer_mode=engine_args.tokenizer_mode,
39 |                 trust_remote_code=engine_args.trust_remote_code,
40 |             )
41 |         elif vllm_mode == VllmMode.OPENAI.value:
42 |             from vllm.entrypoints.openai import api_server
43 |             from addict import Dict as AddDict
44 | 
45 |             engine_args = AsyncEngineArgs(model=self.model, **CONFIG.vllm)
46 |             engine = AsyncLLMEngine.from_engine_args(engine_args, usage_context=api_server.UsageContext.OPENAI_API_SERVER)
47 | 
48 |             request_logger = api_server.RequestLogger(max_log_len=CONFIG.vllm.get("max_log_len"))
49 |             engine_model_config = asyncio.run(engine.get_model_config())
50 | 
51 |             served_model_names = [self.model]
52 |             openai_serving_chat = api_server.OpenAIServingChat(
53 |                 engine,
54 |                 model_config=engine_model_config,
55 |                 served_model_names=served_model_names,
56 |                 response_role=CONFIG.vllm.get("response_role") or "assistant",
57 |                 lora_modules=CONFIG.vllm.get("lora_modules"),
58 |                 prompt_adapters=CONFIG.vllm.get("prompt_adapters"),
59 |                 request_logger=request_logger,
60 |                 chat_template=CONFIG.vllm.get("chat_template"),
61 |             )
62 |             openai_serving_completion = api_server.OpenAIServingCompletion(
63 |                 engine,
64 |                 model_config=engine_model_config,
65 |                 served_model_names=served_model_names,
66 |                 lora_modules=CONFIG.vllm.get("lora_modules"),
67 |                 prompt_adapters=CONFIG.vllm.get("prompt_adapters"),
68 |                 request_logger=request_logger,
69 |                 return_tokens_as_token_ids=CONFIG.vllm.get("return_tokens_as_token_ids") or False,
70 |             )
71 |             api_server.engine = engine
72 |             api_server.async_engine_client = engine
73 |             api_server.engine_args = engine_args
74 |             api_server.openai_serving_chat = openai_serving_chat
75 |             api_server.openai_serving_completion = openai_serving_completion
76 |             args = AddDict(CONFIG.vllm)
77 |             api_server.app = api_server.build_app(args)
78 |         else:
79 |             raise ValueError(f"vllm_mode: {vllm_mode} is not support")
80 |         LOGGER.info(f"CONFIG.vllm: {CONFIG.vllm}")
81 | 
82 |         self.app = api_server.app
83 |         cur_app = api_server.app
84 | 
85 |         @cur_app.get("/v1/model/info/args")
86 |         async def get_engine_args():
87 |             return {"code": 0, "result": engine_args}
88 | 


--------------------------------------------------------------------------------
/enova/serving/middlewares/auth.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/serving/middlewares/auth.py


--------------------------------------------------------------------------------
/enova/serving/middlewares/base.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from typing import List
 3 | from fastapi import Request
 4 | 
 5 | 
 6 | def get_dependencies() -> List:
 7 |     return
 8 | 
 9 | 
10 | class BaseMiddleware(metaclass=abc.ABCMeta):
11 |     """"""
12 | 
13 | 
14 | class EnovaAIMultiMiddlewares:
15 |     def __init__(self) -> None:
16 |         self.middewares: List[BaseMiddleware] = []
17 |         self.request_middlewares: List[BaseMiddleware] = []
18 |         self.response_middlewares: List[BaseMiddleware] = []
19 | 
20 |     def register(self, middleware: BaseMiddleware):
21 |         self.middewares.append(middleware)
22 |         if hasattr(middleware, "_process_request"):
23 |             self.request_middlewares.append(middleware)
24 |         if hasattr(middleware, "_process_response"):
25 |             self.response_middlewares.append(middleware)
26 | 
27 |     async def process(self, request: Request, call_next):
28 |         # request
29 |         for middleware in self.request_middlewares:
30 |             if hasattr(middleware, "_process_request"):
31 |                 await middleware._process_request(request)
32 |         response = await call_next(request)
33 |         # response
34 |         for middleware in self.response_middlewares:
35 |             response = await middleware._process_response(request, response)
36 |         return response
37 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/.gitignore:
--------------------------------------------------------------------------------
1 | tempo-data
2 | single-demo
3 | enova_compose*.yaml
4 | bin/docker-compose*


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/escaler/conf/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "resource_backend": {
 3 |         "type": "docker"
 4 |     },
 5 |     "docker": {
 6 | 
 7 |     },
 8 |     "detector": {
 9 |         "prom": {
10 |             "host": "enova-prometheus",
11 |             "port": 9090
12 |         },
13 |         "api": {
14 |             "host": "0.0.0.0",
15 |             "port": 8183,
16 |             "version": "v1",
17 |             "url_prefix": "/escaler"
18 |         },
19 |         "detect_interval": 30
20 |     },
21 |     "scaler": {},
22 |     "zmq": {
23 |         "host": "127.0.0.1",
24 |         "port": 4321
25 |     },
26 |     "redis": {
27 |         "addr": "127.0.0.1:6379",
28 |         "password": "",
29 |         "db": 0
30 |     },
31 |     "enova_algo": {
32 |         "host": "enova-algo:8181"
33 |     },
34 |     "serving": {
35 |         "image": "emergingai/enova:v0.0.8",
36 |         "start_cmd": [
37 |         ],
38 |         "network": "enova-mon_enova-net",
39 |         "network_alias": "enova-serving",
40 |         "name": "enova"
41 |     },
42 |     "logger": {
43 |         "name": "server",
44 |         "path": "./var/log/emergingai",
45 |         "level": "debug"
46 |     }
47 | }


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/grafana/grafana_provisioning/dashboards/enova-dashboards.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: 1
 2 | 
 3 | providers:
 4 |   # <string> an unique provider name. Required
 5 |   - name: 'ENOVA-LLMO-dashboards'
 6 |     # <int> Org id. Default to 1
 7 |     orgId: 1
 8 |     # <string> name of the dashboard folder.
 9 |     folder: ''
10 |     # <string> folder UID. will be automatically generated if not specified
11 |     folderUid: ''
12 |     # <string> provider type. Default to 'file'
13 |     type: file
14 |     # <bool> disable dashboard deletion
15 |     disableDeletion: false
16 |     # <int> how often Grafana will scan for changed dashboards
17 |     updateIntervalSeconds: 10
18 |     # <bool> allow updating provisioned dashboards from the UI
19 |     allowUiUpdates: false
20 |     options:
21 |       # <string, required> path to dashboard files on disk. Required when using the 'file' type
22 |       path: /etc/dashboards
23 |       # <bool> use folder names from filesystem to create folders in Grafana
24 |       foldersFromFilesStructure: true~


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/grafana/grafana_provisioning/datasources/enova-datasource.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: 1
 2 | 
 3 | datasources:
 4 | - name: Enova-Prometheus
 5 |   type: prometheus
 6 |   uid: prometheus
 7 |   url: http://prometheus:9090
 8 |   isDefault: true
 9 |   access: proxy
10 |   editable: true
11 |   orgId: 1
12 | 
13 | - name: Enova-Tempo
14 |   type: tempo
15 |   uid: tempo
16 |   url: http://tempo:3200
17 |   isDefault: false
18 |   access: proxy
19 |   orgId: 1
20 |   editable: true
21 |   jsonData:
22 |     httpMethod: GET
23 |     serviceMap:
24 |       datasourceUid: prometheus
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/haproxy/haproxy.cfg:
--------------------------------------------------------------------------------
 1 | defaults
 2 |     mode            tcp
 3 |     log             global
 4 |     option          tcplog
 5 |     option          dontlognull
 6 |     option http-server-close
 7 |     option          redispatch
 8 |     retries         3
 9 |     timeout http-request 10s
10 |     timeout queue   1m
11 |     timeout connect 10s
12 |     timeout client  1m
13 |     timeout server  1m
14 |     timeout http-keep-alive 10s
15 |     timeout check   10s
16 |     maxconn         3000
17 | 
18 | resolvers mydns
19 |   nameserver dns1 127.0.0.1:53
20 |   resolve_retries 3
21 |   timeout resolve 1s
22 |   timeout retry 1s
23 |   hold valid 10s
24 | 
25 | frontend http_front
26 |   bind *:9199
27 |   default_backend http_back
28 | 
29 | backend http_back
30 |   balance roundrobin
31 |   server-template srv 1-3 enova.serving.com:9199 check inter 5s fall 3 rise 2 resolvers mydns init-addr last,libc,none
32 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/nginx/nginx.conf:
--------------------------------------------------------------------------------
 1 | worker_processes 8;
 2 | worker_rlimit_nofile 65535;
 3 | 
 4 | events {
 5 |     worker_connections 20480;
 6 | }
 7 | 
 8 | 
 9 | http {
10 | 
11 |     client_max_body_size 4096M;
12 |     client_header_buffer_size 512k;
13 |     large_client_header_buffers 4 512k;
14 | 
15 |     access_log /var/log/nginx/access.log;
16 |     error_log /var/log/nginx/error.log;
17 | 
18 |     resolver 127.0.0.11 valid=1s;
19 |     upstream backend {
20 |         server enova-serving:9199 max_fails=1 fail_timeout=1s;
21 |     }
22 | 
23 |     server {
24 |         underscores_in_headers on;
25 |         ignore_invalid_headers off;
26 | 
27 |         listen 9199;
28 |         server_name artrefine_proxy;
29 |         keepalive_timeout 3600;
30 | 
31 |         access_log /var/log/nginx/enova_access.log;
32 |         error_log /var/log/nginx/enova_error.log;
33 | 
34 |         location / {
35 |             proxy_read_timeout 3600;
36 |             proxy_pass http://backend;
37 |             proxy_set_header Host $proxy_host;
38 |             proxy_set_header X-Real-IP $remote_addr;
39 |             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
40 |         }
41 | 
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/otel-collector/collector-config.yaml:
--------------------------------------------------------------------------------
 1 | receivers:
 2 |   otlp:
 3 |     protocols:
 4 |       grpc:
 5 |       http:
 6 |   otlp/spanmetrics:
 7 |     protocols:
 8 |       grpc:
 9 |         endpoint: localhost:12345
10 | 
11 | 
12 | exporters:
13 |   debug:
14 |     verbosity: detailed
15 |   otlp:
16 |     endpoint: tempo:4317
17 |     tls:
18 |       insecure: true
19 |   otlp/spanmetrics:
20 |     endpoint: "localhost:4317"
21 |     tls:
22 |       insecure: true
23 |   prometheus:
24 |     endpoint: 0.0.0.0:8889
25 |     # prometheusremotewrite:
26 |     #   endpoint: "http://prometheus:9090/api/v1/write"
27 | 
28 | processors:
29 |   batch:
30 |   memory_limiter:
31 |     check_interval: 5s
32 |     limit_percentage: 80
33 |     spike_limit_percentage: 25
34 |   spanmetrics:
35 |     metrics_exporter: otlp/spanmetrics
36 |     dimensions:
37 |     - name: batch_size
38 |       # - name: parameters
39 |   attributes/http:
40 |     actions:
41 |       - action: delete
42 |         key: "http.server_name"
43 |       - action: delete
44 |         key: "http.host"
45 | 
46 | extensions:
47 |   health_check:
48 | 
49 | service:
50 |   extensions: [health_check]
51 |   pipelines:
52 |     traces:
53 |       receivers: [otlp]
54 |       processors: [spanmetrics, batch]
55 |       exporters: [otlp]
56 |     metrics/spanmetrics:
57 |       receivers: [otlp/spanmetrics]
58 |       exporters: [otlp/spanmetrics]
59 |     metrics:
60 |       receivers: [otlp]
61 |       processors: [attributes/http, batch]
62 |       exporters: [debug, prometheus]
63 |     # logs:
64 |     #   receivers: [otlp]
65 |     #   processors: [batch]
66 |     #   exporters: [debug]


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/prometheus/prometheus.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval: 15s
 3 |   scrape_timeout: 10s
 4 |   evaluation_interval: 15s
 5 | alerting:
 6 |   alertmanagers:
 7 |   - static_configs:
 8 |     - targets: []
 9 |     scheme: http
10 |     timeout: 10s
11 |     api_version: v1
12 | scrape_configs:
13 | - job_name: prometheus
14 |   honor_timestamps: true
15 |   scrape_interval: 15s
16 |   scrape_timeout: 10s
17 |   metrics_path: /metrics
18 |   scheme: http
19 |   static_configs:
20 |   - targets:
21 |     - prometheus:9090
22 | - job_name: 'otel-collector'
23 |   scrape_interval: 10s
24 |   static_configs:
25 |   - targets: ['otel-collector:8888']
26 |   - targets: ['otel-collector:8889']
27 | 
28 | - job_name: 'dcgm'
29 |   static_configs:
30 |   - targets: ['dcgm-exporter:9400']
31 | 
32 | - job_name: 'enovaserving'
33 |   static_configs:
34 |   - targets: ['enova-serving:9199']
35 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/tempo/tempo.yaml:
--------------------------------------------------------------------------------
 1 | stream_over_http_enabled: true
 2 | server:
 3 |   http_listen_port: 3200
 4 |   log_level: info
 5 | 
 6 | query_frontend:
 7 |   search:
 8 |     duration_slo: 5s
 9 |     throughput_bytes_slo: 1.073741824e+09
10 |   trace_by_id:
11 |     duration_slo: 5s
12 | 
13 | distributor:
14 |   receivers:
15 |     otlp:
16 |       protocols:
17 |         http:
18 |         grpc:
19 | 
20 | ingester:
21 |   max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally
22 | 
23 | compactor:
24 |   compaction:
25 |     block_retention: 1h # overall Tempo trace retention. set for demo purposes
26 | 
27 | metrics_generator:
28 |   processor:
29 |     local_blocks:
30 |       filter_server_spans: false
31 |     span_metrics:
32 |         dimensions:
33 |           - http.method
34 |           - http.target
35 |           - http.status_code
36 |           - service.version
37 |     service_graphs:
38 |         dimensions:
39 |           - http.method
40 |           - http.target
41 |           - http.status_code
42 |           - service.version
43 |   registry:
44 |     external_labels:
45 |       source: tempo
46 |       cluster: docker-compose
47 |   storage:
48 |     path: /tmp/tempo/generator/wal
49 |     remote_write:
50 |     - url: http://prometheus:9090/api/v1/write
51 |       send_exemplars: true
52 |   traces_storage:
53 |     path: /tmp/tempo/generator/traces
54 | 
55 | storage:
56 |   trace:
57 |     backend: local # backend configuration to use
58 |     wal:
59 |       path: /tmp/tempo/wal # where to store the the wal locally
60 |     local:
61 |       path: /tmp/tempo/blocks
62 | 
63 | overrides:
64 |   defaults:
65 |     metrics_generator:
66 |       processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator
67 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.8"
 2 | 
 3 | services:
 4 |   traffic_injector:
 5 |     image: 60.204.135.2/emergingai/enova-jmeter:v0.0.2
 6 |     command:
 7 |       - sh
 8 |       - -c
 9 |       - |
10 |         rm -rf /data/report
11 |         mkdir /data/report
12 |         jmeter -n -t /data/jmeter-config.xml -l /data/report/report.log -e -o /data/report
13 |     volumes:
14 |       - ${DATA_FILE}:/opt/data.csv
15 |       - ${OUTPUT}:/data
16 |     networks:
17 |       - enova-net
18 | 
19 | volumes:
20 |   output:
21 | 
22 | networks:
23 |   enova-net:
24 |     enable_ipv6: false
25 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/data.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/template/deployment/docker-compose/traffic-injector/data.csv


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/jmeter-config-template.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.6">
 3 |   <hashTree>
 4 |     <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Test Plan">
 5 |       <elementProp name="TestPlan.user_defined_variables" elementType="Arguments" guiclass="ArgumentsPanel" testclass="Arguments" testname="User Defined Variables">
 6 |         <collectionProp name="Arguments.arguments"/>
 7 |       </elementProp>
 8 |     </TestPlan>
 9 |     <hashTree>
10 |       <kg.apc.jmeter.timers.VariableThroughputTimer guiclass="kg.apc.jmeter.timers.VariableThroughputTimerGui" testclass="kg.apc.jmeter.timers.VariableThroughputTimer" testname="tst">
11 |         <collectionProp name="load_profile">@LOAD_PROFILE@
12 |         </collectionProp>
13 |       </kg.apc.jmeter.timers.VariableThroughputTimer>
14 |       <hashTree/>
15 |       <com.blazemeter.jmeter.threads.concurrency.ConcurrencyThreadGroup guiclass="com.blazemeter.jmeter.threads.concurrency.ConcurrencyThreadGroupGui" testclass="com.blazemeter.jmeter.threads.concurrency.ConcurrencyThreadGroup" testname="bzm - Concurrency Thread Group" enabled="true">
16 |         <elementProp name="ThreadGroup.main_controller" elementType="com.blazemeter.jmeter.control.VirtualUserController"/>
17 |         <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
18 |         <stringProp name="TargetLevel">${__tstFeedback(tst,100, 1000,10)}</stringProp>
19 |         <stringProp name="RampUp"></stringProp>
20 |         <stringProp name="Steps"></stringProp>
21 |         <stringProp name="Hold">@DURATION@</stringProp>
22 |         <stringProp name="LogFilename"></stringProp>
23 |         <stringProp name="Iterations"></stringProp>
24 |         <stringProp name="Unit">S</stringProp>
25 |       </com.blazemeter.jmeter.threads.concurrency.ConcurrencyThreadGroup>
26 |       <hashTree>
27 |         <HeaderManager guiclass="HeaderPanel" testclass="HeaderManager" testname="HTTP Header Manager">
28 |           <collectionProp name="HeaderManager.headers">@ELEMENT_PROP@
29 |           </collectionProp>
30 |         </HeaderManager>
31 |         <hashTree/>
32 |         <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="HTTP Request">
33 |           <boolProp name="HTTPSampler.postBodyRaw">true</boolProp>
34 |           <elementProp name="HTTPsampler.Arguments" elementType="Arguments">
35 |             <collectionProp name="Arguments.arguments">
36 |               <elementProp name="" elementType="HTTPArgument">
37 |                 <stringProp name="Argument.value">@BODY@</stringProp>
38 |                 <stringProp name="Argument.metadata">=</stringProp>
39 |               </elementProp>
40 |             </collectionProp>
41 |           </elementProp>
42 |           <stringProp name="HTTPSampler.domain">@HOST@</stringProp>
43 |           <stringProp name="HTTPSampler.port">@PORT@</stringProp>
44 |           <stringProp name="HTTPSampler.path">@PATH@</stringProp>
45 |           <stringProp name="HTTPSampler.method">@METHOD@</stringProp>
46 |           <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
47 |           <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
48 |         </HTTPSamplerProxy>
49 |         <hashTree/>@DATA@
50 |       </hashTree>
51 |     </hashTree>
52 |   </hashTree>
53 | </jmeterTestPlan>


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/traffic-injector/jmeter.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:centos7
2 | WORKDIR /opt
3 | ADD jdk-8u361-linux-x64.tar.gz /usr/local/
4 | ADD apache-jmeter-5.6.3.tgz /opt/
5 | ENV JAVA_HOME=/usr/local/jdk1.8.0_361 \
6 |     PATH=/usr/local/jdk1.8.0_361/bin:/opt/apache-jmeter-5.6.3/bin:$PATH
7 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/webui-nginx/nginx.conf:
--------------------------------------------------------------------------------
 1 | worker_processes 8;
 2 | worker_rlimit_nofile 65535;
 3 | 
 4 | events {
 5 |     worker_connections 20480;
 6 | }
 7 | 
 8 | 
 9 | http {
10 |     server {
11 |         listen 8501;
12 |         listen [::]:8501;
13 | 
14 | 
15 |         location /stream {
16 |             proxy_pass http://enova-serving:8501;
17 |             proxy_http_version 1.1;
18 |             proxy_set_header Upgrade $http_upgrade;
19 |             proxy_set_header Connection "Upgrade";
20 |             proxy_set_header Host $host;
21 |         }
22 | 
23 | 
24 |         location ^~ /static {
25 |             proxy_pass http://enova-serving:8501/static/;
26 |         }
27 | 
28 |         location ^~ /healthz {
29 |             proxy_pass http://enova-serving:8501/healthz;
30 |         }
31 | 
32 |         location ^~ /vendor {
33 |             proxy_pass http://enova-serving:8501/vendor;
34 |         }
35 | 
36 |         location = /_stcore/health {
37 |             proxy_pass http://enova-serving:8501/_stcore/health;
38 |         }
39 | 
40 |         location = /_stcore/allowed-message-origins {
41 |             proxy_pass http://enova-serving:8501/_stcore/allowed-message-origins;
42 |         }
43 | 
44 |         location = /_stcore/stream {
45 |             proxy_pass http://enova-serving:8501/_stcore/stream;
46 |             proxy_http_version 1.1;
47 |             proxy_redirect off;
48 |             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
49 |             proxy_set_header Host $http_host;
50 |             proxy_set_header Upgrade $http_upgrade;
51 |             proxy_set_header Connection "upgrade";
52 |             proxy_read_timeout 86400;
53 |         }
54 | 
55 | 
56 |         location / {
57 |             proxy_pass http://enova-serving:8501;
58 |             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
59 |             proxy_set_header Host $http_host;
60 |             proxy_redirect off;
61 |         }
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/enova/template/deployment/docker-compose/webui/webui.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/template/deployment/docker-compose/webui/webui.yaml


--------------------------------------------------------------------------------
/enova/webui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/webui/__init__.py


--------------------------------------------------------------------------------
/enova/webui/chat.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import requests
  4 | import streamlit as st
  5 | from openai import OpenAI, InternalServerError
  6 | 
  7 | st.title('🤖ENOVA AI WebUI')
  8 | 
  9 | MAX_TURNS = 20
 10 | MAX_BOXES = MAX_TURNS * 2
 11 | 
 12 | vllm_mode = os.getenv("VLLM_MODE", "openai")
 13 | serving_url = os.getenv("SERVING_URL", "http://127.0.0.1:9199")
 14 | openai_api_base = serving_url + "/v1"
 15 | openai_api_key = "xxx"
 16 | 
 17 | client = None
 18 | model = None
 19 | if vllm_mode == "openai":
 20 |     try:
 21 |         client = OpenAI(
 22 |             api_key=openai_api_key,
 23 |             base_url=openai_api_base,
 24 |         )
 25 |         models = client.models.list()
 26 |         model = models.data[0].id
 27 | 
 28 |     except InternalServerError as e:
 29 |         print("Server not ready. Please wait a moment and refresh the page.")
 30 | 
 31 |     except Exception as e:
 32 |         print(f"An unexpected error occurred: {e}")
 33 |         print("Please check the server status and try again.")
 34 | 
 35 | system_prompt = st.sidebar.text_area(
 36 |     label="System Prompt",
 37 |     value="You are a helpful AI assistant who answers questions in short sentences."
 38 | )
 39 | 
 40 | max_tokens = st.sidebar.slider('max_tokens', 0, 4096, 2048, step=1)
 41 | temperature = st.sidebar.slider('temperature', 0.0, 1.0, 0.1, step=0.01)
 42 | top_p = st.sidebar.slider('top_p', 0.0, 1.0, 0.5, step=0.01) if vllm_mode == "normal" else None
 43 | 
 44 | 
 45 | if 'messages' not in st.session_state:
 46 |     st.session_state.messages = []
 47 | 
 48 | messages = st.session_state.messages
 49 | 
 50 | for message in st.session_state.messages:
 51 |     with st.chat_message(message['role']):
 52 |         st.markdown(message['content'])
 53 | 
 54 | if user_input := st.chat_input(''):
 55 | 
 56 |     with st.chat_message('user'):
 57 |         st.markdown(user_input)
 58 |         messages.append({'role': 'user', 'content': user_input})
 59 | 
 60 |     with st.chat_message('assistant') as assistant_message:
 61 | 
 62 |         if vllm_mode == "normal":
 63 |             placeholder = st.empty()
 64 | 
 65 |             response = requests.post(
 66 |                 url=f"{serving_url}/generate",
 67 |                 headers={'Content-type': 'application/json; charset=utf-8'},
 68 |                 data=json.dumps({
 69 |                     "prompt": user_input,
 70 |                     "max_tokens": max_tokens,
 71 |                     "top_p": top_p,
 72 |                     "temperature": temperature,
 73 |                     "stream": True
 74 |                 }),
 75 |                 stream=True
 76 |             )
 77 | 
 78 |             full_content = ''
 79 |             for line in response.iter_lines(delimiter=b'\00'):
 80 |                 line = line.decode(encoding='utf-8')
 81 |                 if line.strip() == '':
 82 |                     continue
 83 |                 response_json = json.loads(line)
 84 |                 full_content = response_json['text'][0]
 85 |                 placeholder.markdown(full_content)
 86 | 
 87 |             st.session_state.messages.append({'role': 'assistant', 'content': full_content})
 88 | 
 89 |         elif vllm_mode == "openai" and model:
 90 |             placeholder = st.empty()
 91 |             openai_messages = [
 92 |                 {"role": message["role"], "content": message["content"]}
 93 |                 for message in st.session_state.messages[-5:]
 94 |             ]
 95 | 
 96 |             chat_completion = client.chat.completions.create(
 97 |                 messages=openai_messages,
 98 |                 model=model,
 99 |                 temperature=temperature,
100 |                 max_tokens=max_tokens,
101 |                 stream=True
102 |             )
103 | 
104 |             full_content = ''
105 |             for chunk in chat_completion:
106 |                 if chunk.choices[0].delta.content is not None:
107 |                     full_content += str(chunk.choices[0].delta.content)
108 |                     placeholder.markdown(full_content)
109 | 
110 |             st.session_state.messages.append({'role': 'assistant', 'content': full_content})
111 | 


--------------------------------------------------------------------------------
/escaler/build.sh:
--------------------------------------------------------------------------------
1 | 
2 | go mod download
3 | # go install github.com/swaggo/swag/cmd/swag@latest
4 | 
5 | # swag init -g cmd/escaler/main.go -o cmd/escaler/docs --parseDependency --parseInternal
6 | mkdir -p dist/bin
7 | go env && go build -o dist/bin/escaler cmd/escaler/main.go
8 | 


--------------------------------------------------------------------------------
/escaler/cmd/escaler/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"sync"
 7 | 
 8 | 	"github.com/Emerging-AI/ENOVA/escaler/cmd/escaler/docs"
 9 | 
10 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/detector"
11 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
12 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/scaler"
13 | 
14 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/config"
15 | 
16 | 	swaggerfiles "github.com/swaggo/files"
17 | 	ginSwagger "github.com/swaggo/gin-swagger"
18 | )
19 | 
20 | func main() {
21 | 	confPath := flag.String("conf", "conf/settings.json", "Path to the configuration file")
22 | 	flag.Parse()
23 | 
24 | 	fmt.Printf("Using configuration file: %s\n", *confPath)
25 | 	econfig := config.GetEConfig()
26 | 	econfig.Init(*confPath)
27 | 	econfig.PrintConfig()
28 | 
29 | 	docs.SwaggerInfo.Title = "Monitor Service API"
30 | 	docs.SwaggerInfo.Description = "This is a monitor service."
31 | 	docs.SwaggerInfo.Version = "1.0"
32 | 	//docs.SwaggerInfo.Host = "121.36.212.78:30080"
33 | 	docs.SwaggerInfo.Host = "0.0.0.0:8183"
34 | 	docs.SwaggerInfo.BasePath = "/"
35 | 	docs.SwaggerInfo.Schemes = []string{"http", "https"}
36 | 
37 | 	var wg sync.WaitGroup
38 | 
39 | 	ch := make(chan meta.TaskSpecInterface)
40 | 	d := detector.NewDetectorServer(ch, nil)
41 | 	d.GetEngine().GET("/api/escaler/docs/*any", ginSwagger.WrapHandler(swaggerfiles.Handler))
42 | 
43 | 	s := scaler.NewServingScaler(ch)
44 | 
45 | 	wg.Add(2)
46 | 	go d.RunInWaitGroup(&wg)
47 | 	go s.RunInWaitGroup(&wg)
48 | 
49 | 	wg.Wait()
50 | 	close(ch)
51 | 	fmt.Println("All tasks finished.")
52 | }
53 | 


--------------------------------------------------------------------------------
/escaler/cmd/escaler/mock_enovaalgo.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/api"
 7 | 	"github.com/gin-gonic/gin"
 8 | )
 9 | 
10 | func StartMockEnovaAlgoServer() {
11 | 	r := gin.Default()
12 | 	r.POST("/api/enovaalgo/v1/config_recommend", func(c *gin.Context) {
13 | 		c.JSON(http.StatusOK, api.EnvoaResponse{
14 | 			Message: "",
15 | 			Code:    0,
16 | 			Result: api.ConfigRecommendResult{
17 | 				MaxNumSeqs:           32,
18 | 				TensorParallelSize:   1,
19 | 				GpuMemoryUtilization: 0.8,
20 | 				Replicas:             1,
21 | 			},
22 | 			TraceId: "TraceId",
23 | 			Version: "v1",
24 | 		})
25 | 	})
26 | 
27 | 	r.POST("/api/enovaalgo/v1/anomaly_detect", func(c *gin.Context) {
28 | 		c.JSON(http.StatusOK, api.EnvoaResponse{
29 | 			Message: "",
30 | 			Code:    0,
31 | 			Result: api.AnomalyDetectResponse{
32 | 				IsAnomaly: 0,
33 | 			},
34 | 			TraceId: "TraceId",
35 | 			Version: "v1",
36 | 		})
37 | 	})
38 | 
39 | 	r.POST("/api/enovaalgo/v1/anomaly_recover", func(c *gin.Context) {
40 | 		c.JSON(http.StatusOK, api.EnvoaResponse{
41 | 			Message: "",
42 | 			Code:    0,
43 | 			Result: api.ConfigRecommendResult{
44 | 				MaxNumSeqs:           32,
45 | 				TensorParallelSize:   1,
46 | 				GpuMemoryUtilization: 0.8,
47 | 				Replicas:             1,
48 | 			},
49 | 			TraceId: "TraceId",
50 | 			Version: "v1",
51 | 		})
52 | 	})
53 | 	r.Run(":8181")
54 | }
55 | 


--------------------------------------------------------------------------------
/escaler/conf/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "resource_backend": {
 3 |         "type": "docker"
 4 |     },
 5 |     "docker": {
 6 | 
 7 |     },
 8 |     "detector": {
 9 |         "prom": {
10 |             "host": "enova-prometheus",
11 |             "port": 9090
12 |         },
13 |         "api": {
14 |             "host": "0.0.0.0",
15 |             "port": 8183,
16 |             "version": "v1",
17 |             "url_prefix": "/escaler"
18 |         },
19 |         "detect_interval": 30
20 |     },
21 |     "scaler": {},
22 |     "zmq": {
23 |         "host": "127.0.0.1",
24 |         "port": 4321
25 |     },
26 |     "redis": {
27 |         "addr": "127.0.0.1:6379",
28 |         "password": "",
29 |         "db": 0
30 |     },
31 |     "enova_algo": {
32 |         "host": "127.0.0.1:8181"
33 |     },
34 |     "serving": {
35 |         "image": "emergingai/enova:v0.0.8",
36 |         "start_cmd": [
37 |             "sleep",
38 |             "inf"
39 |         ],
40 |         "network": "enova-mon_enova-net",
41 |         "network_alias": "enova-serving",
42 |         "name": "enova"
43 |     },
44 |     "logger": {
45 |         "name": "server",
46 |         "path": "./var/log/emergingai",
47 |         "level": "debug"
48 |     }
49 | }


--------------------------------------------------------------------------------
/escaler/pkg/api/api.go:
--------------------------------------------------------------------------------
  1 | package api
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"net/http"
  9 | 	"net/url"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/logger"
 13 | )
 14 | 
 15 | type HttpResponse interface{}
 16 | 
 17 | type HeaderBuilderInterface interface {
 18 | 	Build() (map[string]string, error)
 19 | }
 20 | 
 21 | type EmptyHeaderBuilder struct {
 22 | }
 23 | 
 24 | func (hb *EmptyHeaderBuilder) Build() (map[string]string, error) {
 25 | 	return make(map[string]string), nil
 26 | }
 27 | 
 28 | type HttpApi[T HttpResponse] struct {
 29 | 	Method        string
 30 | 	Url           string
 31 | 	HeaderBuilder HeaderBuilderInterface
 32 | }
 33 | 
 34 | func (api *HttpApi[T]) GetRequest(Params interface{}, Headers map[string]string) (*http.Request, error) {
 35 | 	newHeader, err := api.HeaderBuilder.Build()
 36 | 	if err != nil {
 37 | 		logger.Errorf("HeaderBuilder get error: %v", err)
 38 | 		return nil, err
 39 | 	}
 40 | 
 41 | 	for key, value := range Headers {
 42 | 		newHeader[key] = value
 43 | 	}
 44 | 
 45 | 	logger.Infof("make http request")
 46 | 
 47 | 	actualMethod := strings.ToUpper(api.Method)
 48 | 	var requestData io.Reader
 49 | 	actualUrl := api.Url
 50 | 	switch actualMethod {
 51 | 	case "POST", "PUT":
 52 | 		bytesData, _ := json.Marshal(Params)
 53 | 		reqBody := string(bytesData)
 54 | 		logger.Infof("api %s, request body: %s", api.Url, reqBody)
 55 | 		newHeader["Content-Type"] = "application/json"
 56 | 		requestData = strings.NewReader(reqBody)
 57 | 	case "GET", "DELETE":
 58 | 		Url, _ := url.Parse(api.Url) // todo 处理err
 59 | 		urlValues := url.Values{}
 60 | 		if pm, ok := Params.(map[string]string); ok {
 61 | 			for key, value := range pm {
 62 | 				urlValues.Set(key, value)
 63 | 			}
 64 | 			Url.RawQuery = urlValues.Encode()
 65 | 			actualUrl = Url.String()
 66 | 		}
 67 | 
 68 | 	}
 69 | 
 70 | 	req, err := http.NewRequest(actualMethod, actualUrl, requestData)
 71 | 	if err != nil {
 72 | 		return nil, err
 73 | 	}
 74 | 	for key, value := range newHeader {
 75 | 		req.Header.Add(key, value)
 76 | 	}
 77 | 	return req, nil
 78 | }
 79 | 
 80 | func (api *HttpApi[T]) Call(Params interface{}, Headers map[string]string) (T, error) {
 81 | 	client := &http.Client{}
 82 | 	req, err := api.GetRequest(Params, Headers)
 83 | 	var resp T
 84 | 	if err != nil {
 85 | 		return resp, err
 86 | 	}
 87 | 	res, err := client.Do(req) // todo 处理err
 88 | 	if err != nil {
 89 | 		return resp, err
 90 | 	}
 91 | 	return api.processResponse(res)
 92 | }
 93 | 
 94 | func (api *HttpApi[T]) processResponse(res *http.Response) (T, error) {
 95 | 	defer res.Body.Close()
 96 | 	var httpResp T
 97 | 	if res.StatusCode != http.StatusOK {
 98 | 		resBody, _ := io.ReadAll(res.Body)
 99 | 		msg := fmt.Sprintf("HttpApi get StatusOK not ok: status code: %d, resBody: %s", res.StatusCode, resBody)
100 | 		logger.Info(msg)
101 | 		return httpResp, errors.New(msg)
102 | 	}
103 | 	resBody, _ := io.ReadAll(res.Body)
104 | 	if err := json.Unmarshal(resBody, &httpResp); err != nil {
105 | 		logger.Error("Error parsing JSON response: %v", err)
106 | 		return httpResp, err
107 | 	}
108 | 	return httpResp, nil
109 | }
110 | 


--------------------------------------------------------------------------------
/escaler/pkg/api/enovaalgo.go:
--------------------------------------------------------------------------------
  1 | package api
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | 
  7 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/config"
  8 | )
  9 | 
 10 | var enovaAlgoInitOnce sync.Once
 11 | 
 12 | type enovaAlgoClient struct {
 13 | 	ConfigRecommend HttpApi[EnvoaResponse]
 14 | 	AnomalyDetect   HttpApi[EnvoaResponse]
 15 | 	AnomalyRecover  HttpApi[EnvoaResponse]
 16 | }
 17 | 
 18 | type ConfigRecommendRequest struct {
 19 | 	Llm struct {
 20 | 		Framework string  `json:"framework"`
 21 | 		Param     float32 `json:"param"`
 22 | 	} `json:"llm"`
 23 | 	Gpu struct {
 24 | 		Name string `json:"name"`
 25 | 		Spec int    `json:"spec"`
 26 | 		Num  int    `json:"num"`
 27 | 	} `json:"gpu"`
 28 | }
 29 | 
 30 | type ConfigRecommendResult struct {
 31 | 	MaxNumSeqs           int     `json:"max_num_seqs"`
 32 | 	TensorParallelSize   int     `json:"tensor_parallel_size"`
 33 | 	GpuMemoryUtilization float32 `json:"gpu_memory_utilization"`
 34 | 	Replicas             int     `json:"replicas"`
 35 | }
 36 | 
 37 | type Llm struct {
 38 | 	Framework string  `json:"framework"`
 39 | 	Param     float32 `json:"param"`
 40 | }
 41 | 
 42 | type Gpu struct {
 43 | 	Name string `json:"name"`
 44 | 	Spec int    `json:"spec"`
 45 | 	Num  int    `json:"num"`
 46 | }
 47 | 
 48 | type MetricValue [2]float64
 49 | 
 50 | type Metrics struct {
 51 | 	ActiveRequests        []MetricValue `json:"active_requests"`
 52 | 	RunningRequests       []MetricValue `json:"running_requests"`
 53 | 	PendingRequests       []MetricValue `json:"pending_requests"`
 54 | 	GPUKVCacheUsage       []MetricValue `json:"gpu_kv_cache_usage"`
 55 | 	ServerNewRequests     []MetricValue `json:"server_new_requests"`
 56 | 	ServerSuccessRequests []MetricValue `json:"server_success_requests"`
 57 | }
 58 | 
 59 | type Configurations struct {
 60 | 	MaxNumSeqs           int     `json:"max_num_seqs"`
 61 | 	TensorParallelSize   int     `json:"tensor_parallel_size"`
 62 | 	GPUMemoryUtilization float32 `json:"gpu_memory_utilization"`
 63 | 	Replicas             int     `json:"replicas"`
 64 | }
 65 | 
 66 | type AnomalyRecoverRequest struct {
 67 | 	Metrics        []Metrics      `json:"metrics"`
 68 | 	Configurations Configurations `json:"configurations"`
 69 | 	Llm            Llm            `json:"llm"`
 70 | 	Gpu            Gpu            `json:"gpu"`
 71 | }
 72 | 
 73 | type AnomalyDetectRequest struct {
 74 | 	Metrics        []Metrics      `json:"metrics"`
 75 | 	Configurations Configurations `json:"configurations"`
 76 | }
 77 | 
 78 | type AnomalyDetectResponse struct {
 79 | 	IsAnomaly int `json:"is_anomaly"`
 80 | }
 81 | 
 82 | var EnovaAlgoClient *enovaAlgoClient
 83 | 
 84 | func GetEnovaAlgoClient() *enovaAlgoClient {
 85 | 	enovaAlgoInitOnce.Do(func() {
 86 | 		EnovaAlgoClient = &enovaAlgoClient{
 87 | 			ConfigRecommend: HttpApi[EnvoaResponse]{
 88 | 				Method:        "POST",
 89 | 				Url:           fmt.Sprintf("http://%s/api/enovaalgo/v1/config_recommend", config.GetEConfig().EnovaAlgo.Host),
 90 | 				HeaderBuilder: &EmptyHeaderBuilder{},
 91 | 			},
 92 | 			AnomalyDetect: HttpApi[EnvoaResponse]{
 93 | 				Method:        "POST",
 94 | 				Url:           fmt.Sprintf("http://%s/api/enovaalgo/v1/anomaly_detect", config.GetEConfig().EnovaAlgo.Host),
 95 | 				HeaderBuilder: &EmptyHeaderBuilder{},
 96 | 			},
 97 | 			AnomalyRecover: HttpApi[EnvoaResponse]{
 98 | 				Method:        "POST",
 99 | 				Url:           fmt.Sprintf("http://%s/api/enovaalgo/v1/anomaly_recover", config.GetEConfig().EnovaAlgo.Host),
100 | 				HeaderBuilder: &EmptyHeaderBuilder{},
101 | 			},
102 | 		}
103 | 	})
104 | 	return EnovaAlgoClient
105 | }
106 | 


--------------------------------------------------------------------------------
/escaler/pkg/api/prom.go:
--------------------------------------------------------------------------------
 1 | package api
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sync"
 6 | 
 7 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/config"
 8 | )
 9 | 
10 | var promClientInitOnce sync.Once
11 | 
12 | type Metric map[string]string
13 | 
14 | type ValueSet []interface{}
15 | 
16 | type Series struct {
17 | 	Metric Metric     `json:"metric"`
18 | 	Values []ValueSet `json:"values"`
19 | }
20 | 
21 | type PromData struct {
22 | 	ResultType string   `json:"resultType"`
23 | 	Result     []Series `json:"result"`
24 | }
25 | 
26 | type PromResponse struct {
27 | 	Status string
28 | 	Data   PromData
29 | }
30 | 
31 | type promClient struct {
32 | 	Query      HttpApi[PromResponse]
33 | 	QueryRange HttpApi[PromResponse]
34 | }
35 | 
36 | var PromClient *promClient
37 | 
38 | func GetPromClient() *promClient {
39 | 	promClientInitOnce.Do(func() {
40 | 		PromClient = &promClient{
41 | 			Query: HttpApi[PromResponse]{
42 | 				Method:        "GET",
43 | 				Url:           fmt.Sprintf("http://%s:%d/api/v1/query", config.GetEConfig().Detector.Prom.Host, config.GetEConfig().Detector.Prom.Port),
44 | 				HeaderBuilder: &EmptyHeaderBuilder{},
45 | 			},
46 | 			QueryRange: HttpApi[PromResponse]{
47 | 				Method:        "GET",
48 | 				Url:           fmt.Sprintf("http://%s:%d/api/v1/query_range", config.GetEConfig().Detector.Prom.Host, config.GetEConfig().Detector.Prom.Port),
49 | 				HeaderBuilder: &EmptyHeaderBuilder{},
50 | 			},
51 | 		}
52 | 	})
53 | 	return PromClient
54 | }
55 | 


--------------------------------------------------------------------------------
/escaler/pkg/api/types.go:
--------------------------------------------------------------------------------
 1 | package api
 2 | 
 3 | type EnvoaResponse struct {
 4 | 	Code    int
 5 | 	Message string
 6 | 	Result  interface{}
 7 | 	TraceId string
 8 | 	Version string
 9 | }
10 | 


--------------------------------------------------------------------------------
/escaler/pkg/httpserver/middleware/logger.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"io/ioutil"
 6 | 	"strings"
 7 | 	"time"
 8 | 
 9 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/logger"
10 | 
11 | 	"github.com/gin-gonic/gin"
12 | )
13 | 
14 | func RequestResponseLogger() gin.HandlerFunc {
15 | 	return func(c *gin.Context) {
16 | 		// just for /api/monitor/v1/
17 | 		if !strings.Contains(c.Request.URL.Path, "/api/enova/v1") {
18 | 			c.Next()
19 | 			return
20 | 		}
21 | 
22 | 		// 获取请求体
23 | 		reqBody, _ := ioutil.ReadAll(c.Request.Body)
24 | 		c.Request.Body = ioutil.NopCloser(bytes.NewBuffer(reqBody))
25 | 
26 | 		// 获取响应体
27 | 		respWriter := &responseWriter{body: bytes.NewBufferString(""), ResponseWriter: c.Writer}
28 | 		c.Writer = respWriter
29 | 
30 | 		// 处理请求
31 | 		c.Next()
32 | 
33 | 		// 记录请求和响应
34 | 		respStr := respWriter.body.String()
35 | 		if respStrLen := len(respStr); respStrLen > 1024 {
36 | 			respStr = respStr[:1024]
37 | 		}
38 | 
39 | 		logger.Info("---------------------------------------------------------")
40 | 		logger.Infof("[INFO] [%s] %s %s %s\n%d %s\n",
41 | 			time.Now().Format("2006-01-02 15:04:05"),
42 | 			c.Request.Method, c.Request.URL.Path, string(reqBody),
43 | 			respWriter.status, respStr,
44 | 		)
45 | 		logger.Info("---------------------------------------------------------")
46 | 	}
47 | }
48 | 
49 | type responseWriter struct {
50 | 	body *bytes.Buffer
51 | 	gin.ResponseWriter
52 | 	status int
53 | }
54 | 
55 | func (w *responseWriter) Write(b []byte) (int, error) {
56 | 	w.body.Write(b)
57 | 	return w.ResponseWriter.Write(b)
58 | }
59 | 
60 | func (w *responseWriter) WriteHeader(statusCode int) {
61 | 	w.status = statusCode
62 | 	w.ResponseWriter.WriteHeader(statusCode)
63 | }
64 | 


--------------------------------------------------------------------------------
/escaler/pkg/httpserver/middleware/response.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/json"
 6 | 	"net/http"
 7 | 
 8 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/config"
 9 | 
10 | 	"github.com/gin-gonic/gin"
11 | )
12 | 
13 | type EApiResponse struct {
14 | 	Message string          `json:"message"`
15 | 	Code    int             `json:"code"`
16 | 	Result  json.RawMessage `json:"result"`
17 | 	TraceId string          `json:"trace_id"`
18 | 	Version string          `json:"version"`
19 | }
20 | 
21 | type responseBodyWriter struct {
22 | 	gin.ResponseWriter
23 | 	body *bytes.Buffer
24 | }
25 | 
26 | func (w responseBodyWriter) Write(b []byte) (int, error) {
27 | 	w.body.Write(b)
28 | 	return w.ResponseWriter.Write(b)
29 | }
30 | 
31 | func (w responseBodyWriter) WriteString(s string) (int, error) {
32 | 	w.body.WriteString(s)
33 | 	return w.ResponseWriter.WriteString(s)
34 | }
35 | 
36 | func ResponseMiddleware() gin.HandlerFunc {
37 | 	return func(c *gin.Context) {
38 | 
39 | 		// 调用下一个中间件或路由处理函数
40 | 		c.Next()
41 | 
42 | 		// 错误的结果直接返回
43 | 		if errResult, ok := c.Get("ErrorResult"); ok {
44 | 			c.JSON(http.StatusOK, errResult)
45 | 			return
46 | 		}
47 | 
48 | 		// 解析成功的返回
49 | 		var jsonResult json.RawMessage
50 | 		result, ok := c.Get("Data")
51 | 		if !ok {
52 | 			return
53 | 		}
54 | 
55 | 		jsonResult, err := json.Marshal(result)
56 | 		if err != nil {
57 | 			c.AbortWithStatusJSON(http.StatusInternalServerError, EApiResponse{
58 | 				Message: "Internal error",
59 | 				Code:    500,
60 | 				Result:  jsonResult,
61 | 				TraceId: GenerateTraceId(),
62 | 				Version: config.GetEConfig().Detector.Api.Version,
63 | 			})
64 | 			return
65 | 		}
66 | 
67 | 		c.JSON(http.StatusOK, EApiResponse{
68 | 			Message: "ok",
69 | 			Code:    0,
70 | 			Result:  jsonResult,
71 | 			TraceId: GenerateTraceId(),
72 | 			Version: config.GetEConfig().Detector.Api.Version,
73 | 		})
74 | 	}
75 | }
76 | 


--------------------------------------------------------------------------------
/escaler/pkg/httpserver/middleware/trace.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | import (
 4 | 	"github.com/gin-gonic/gin"
 5 | 	uuid "github.com/google/uuid"
 6 | )
 7 | 
 8 | const TraceIdKey = "trace_id"
 9 | 
10 | func GenerateTraceId() string {
11 | 	v4, err := uuid.NewUUID()
12 | 	if err != nil {
13 | 		panic(err)
14 | 	}
15 | 	return v4.String()
16 | }
17 | 
18 | func GetTraceId() gin.HandlerFunc {
19 | 	return func(c *gin.Context) {
20 | 		traceId := c.GetHeader(TraceIdKey)
21 | 
22 | 		if traceId == "" {
23 | 			traceId = GenerateTraceId()
24 | 			c.Request.Header.Set(TraceIdKey, traceId)
25 | 			c.Set(TraceIdKey, traceId)
26 | 		}
27 | 
28 | 		// Set TraceIdKey header
29 | 		c.Writer.Header().Set(TraceIdKey, traceId)
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/escaler/pkg/httpserver/server/router.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import "github.com/gin-gonic/gin"
 4 | 
 5 | type BaseResource struct {
 6 | }
 7 | 
 8 | func (r BaseResource) SetResult(c *gin.Context, result interface{}) {
 9 | 	c.Set("Data", result)
10 | }
11 | 
12 | func (r BaseResource) SetErrorResult(c *gin.Context, result interface{}) {
13 | 	c.Set("ErrorResult", result)
14 | }
15 | 
16 | type PathResourceInterface interface {
17 | 	Path() string
18 | }
19 | 
20 | type GetResourceInterface interface {
21 | 	Get(c *gin.Context)
22 | }
23 | 
24 | type ListResourceInterface interface {
25 | 	List(c *gin.Context)
26 | }
27 | 
28 | type PostResourceInterface interface {
29 | 	Post(c *gin.Context)
30 | }
31 | 
32 | type PutResourceInterface interface {
33 | 	Put(c *gin.Context)
34 | }
35 | 
36 | type DeleteResourceInterface interface {
37 | 	Delete(c *gin.Context)
38 | }
39 | 


--------------------------------------------------------------------------------
/escaler/pkg/httpserver/utils/utils.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"strconv"
 6 | )
 7 | 
 8 | func HasMethod(s interface{}, methodName string) bool {
 9 | 	typ := reflect.TypeOf(s)
10 | 	_, ok := typ.MethodByName(methodName)
11 | 	return ok
12 | }
13 | 
14 | // ParseUnixTimestamp
15 | func ParseUnixTimestamp(ts int64) string {
16 | 	if ts >= (1 << 32) {
17 | 		// The timestamp is in milliseconds. Convert it to seconds.
18 | 		ts /= 1000
19 | 	}
20 | 	return strconv.FormatFloat(float64(ts), 'g', -1, 64)
21 | }
22 | 


--------------------------------------------------------------------------------
/escaler/pkg/logger/logger.go:
--------------------------------------------------------------------------------
 1 | package logger
 2 | 
 3 | import (
 4 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/config"
 5 | 	"github.com/sirupsen/logrus"
 6 | )
 7 | 
 8 | var logger *logrus.Logger
 9 | 
10 | func init() {
11 | 	logger = GetLogger()
12 | }
13 | 
14 | func GetLogger() *logrus.Logger {
15 | 	config := config.GetEConfig()
16 | 	logger := logrus.New()
17 | 
18 | 	// 设置日志级别
19 | 	switch config.Logger.Level {
20 | 	case "panic":
21 | 		logrus.SetLevel(logrus.PanicLevel)
22 | 	case "fatal":
23 | 		logrus.SetLevel(logrus.FatalLevel)
24 | 	case "error":
25 | 		logrus.SetLevel(logrus.ErrorLevel)
26 | 	case "warn", "warning":
27 | 		logrus.SetLevel(logrus.WarnLevel)
28 | 	case "info":
29 | 		logrus.SetLevel(logrus.InfoLevel)
30 | 	case "debug":
31 | 		logrus.SetLevel(logrus.DebugLevel)
32 | 	case "trace":
33 | 		logrus.SetLevel(logrus.TraceLevel)
34 | 	default:
35 | 		logrus.Warn("Unknown log level: ", config.Logger.Level)
36 | 		logrus.SetLevel(logrus.InfoLevel) // 设置默认日志等级
37 | 	}
38 | 
39 | 	// 设置日志格式
40 | 	logger.SetFormatter(&logrus.TextFormatter{
41 | 		TimestampFormat: "2006-01-02 15:04:05",
42 | 	})
43 | 	return logger
44 | }
45 | 
46 | func Info(args ...interface{}) {
47 | 	logger.Infoln(args)
48 | }
49 | 
50 | func Infof(format string, args ...interface{}) {
51 | 	logger.Infof(format, args...)
52 | }
53 | 
54 | func Debug(args ...interface{}) {
55 | 	logger.Debugln(args)
56 | }
57 | 
58 | func Debugf(format string, args ...interface{}) {
59 | 	logger.Debugf(format, args...)
60 | }
61 | 
62 | func Fatal(args ...interface{}) {
63 | 	logger.Fatalln(args)
64 | }
65 | 
66 | func Warn(args ...interface{}) {
67 | 	logger.Warnln(args)
68 | }
69 | 
70 | func Error(args ...interface{}) {
71 | 	logger.Errorln(args)
72 | }
73 | 
74 | func Errorf(format string, args ...interface{}) {
75 | 	logger.Errorf(format, args...)
76 | }
77 | 
78 | func Panic(args ...interface{}) {
79 | 	logger.Panicln(args)
80 | }
81 | 


--------------------------------------------------------------------------------
/escaler/pkg/meta/task.go:
--------------------------------------------------------------------------------
 1 | package meta
 2 | 
 3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/api"
 4 | 
 5 | type TaskStatus string
 6 | 
 7 | const (
 8 | 	TaskStatusCreated    TaskStatus = "created"
 9 | 	TaskStatusScheduling TaskStatus = "scheduling"
10 | 	TaskStatusRunning    TaskStatus = "running"
11 | 	TaskStatusError      TaskStatus = "error"
12 | 	TaskStatusFinished   TaskStatus = "finished"
13 | )
14 | 
15 | type DetectTask struct {
16 | 	TaskSpec TaskSpecInterface
17 | 	Status   TaskStatus
18 | }
19 | 
20 | type AnomalyRecommendResult struct {
21 | 	Timestamp             int64                     `json:"timestamp"`
22 | 	IsAnomaly             bool                      `json:"isAnomaly"`
23 | 	ConfigRecommendResult api.ConfigRecommendResult `json:"configRecommendResult"`
24 | 	CurrentConfig         api.ConfigRecommendResult `json:"currentConfig"`
25 | }
26 | 
27 | type TaskInfo struct {
28 | 	Name   string `json:"name"`
29 | 	Status string `json:"status"`
30 | }
31 | 


--------------------------------------------------------------------------------
/escaler/pkg/queue/queue.go:
--------------------------------------------------------------------------------
 1 | package queue
 2 | 
 3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
 4 | 
 5 | type TaskQueue interface {
 6 | 	Append(meta.TaskSpec)
 7 | 	Pop() meta.TaskSpec
 8 | }
 9 | 
10 | type InnerChanTaskQueue struct {
11 | 	Ch chan meta.TaskSpecInterface
12 | }
13 | 
14 | func (q *InnerChanTaskQueue) Append(task meta.TaskSpecInterface) {
15 | 	q.Ch <- task
16 | }
17 | 
18 | func (q *InnerChanTaskQueue) Pop() (meta.TaskSpecInterface, bool) {
19 | 	task, ok := <-q.Ch
20 | 	return task, ok
21 | }
22 | 


--------------------------------------------------------------------------------
/escaler/pkg/redis/redis.go:
--------------------------------------------------------------------------------
 1 | package redis
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"time"
 6 | 
 7 | 	"github.com/go-redis/redis/v8"
 8 | )
 9 | 
10 | type RedisClient struct {
11 | 	Ctx   context.Context
12 | 	Redis *redis.Client
13 | }
14 | 
15 | func (r *RedisClient) SetList(key string, values []string) error {
16 | 	_, err := r.DelList(key)
17 | 	if err != nil {
18 | 		return err
19 | 	}
20 | 	for _, value := range values {
21 | 		if err := r.Redis.RPush(r.Ctx, key, value).Err(); err != nil {
22 | 			return err
23 | 		}
24 | 	}
25 | 	return nil
26 | }
27 | 
28 | func (r *RedisClient) GetList(key string) ([]string, error) {
29 | 	storedStringArray, err := r.Redis.LRange(r.Ctx, key, 0, -1).Result()
30 | 	if err != nil {
31 | 		return storedStringArray, err
32 | 	}
33 | 	return storedStringArray, nil
34 | }
35 | 
36 | func (r *RedisClient) DelList(key string) (int64, error) {
37 | 	return r.Redis.Del(r.Ctx, key).Result()
38 | }
39 | 
40 | func (r *RedisClient) AppendList(key string, value string) error {
41 | 	if err := r.Redis.LPush(r.Ctx, key, value).Err(); err != nil {
42 | 		return err
43 | 	}
44 | 	return nil
45 | }
46 | 
47 | func (r *RedisClient) AppendListWithLimitSize(key string, value string, limit int64) error {
48 | 	if err := r.AppendList(key, value); err != nil {
49 | 		return err
50 | 	}
51 | 	if err := r.Redis.LTrim(r.Ctx, key, 0, limit).Err(); err != nil {
52 | 		return err
53 | 	}
54 | 	return nil
55 | }
56 | 
57 | func (r *RedisClient) Set(key string, value string, timeout int64) {
58 | 	r.Redis.Set(r.Ctx, key, value, time.Duration(time.Duration(timeout)*time.Microsecond))
59 | }
60 | 
61 | func (r *RedisClient) Get(key string) string {
62 | 	result := r.Redis.Get(r.Ctx, key)
63 | 	if result.Err() != nil {
64 | 		return ""
65 | 	}
66 | 	return result.Val()
67 | }
68 | 
69 | func NewRedisClient(addr string, passwd string, db int) *RedisClient {
70 | 	ctx := context.Background()
71 | 
72 | 	rdb := redis.NewClient(&redis.Options{
73 | 		Addr:     addr,
74 | 		Password: passwd,
75 | 		DB:       db,
76 | 	})
77 | 
78 | 	return &RedisClient{
79 | 		Ctx:   ctx,
80 | 		Redis: rdb,
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/escaler/pkg/resource/clients.go:
--------------------------------------------------------------------------------
 1 | package resource
 2 | 
 3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
 4 | 
 5 | type ClientInterface interface {
 6 | 	DeployTask(spec meta.TaskSpec)
 7 | 	DeleteTask(spec meta.TaskSpec)
 8 | 	IsTaskExist(spec meta.TaskSpec) bool
 9 | 	IsTaskRunning(spec meta.TaskSpec) bool
10 | 	GetRuntimeInfos(spec meta.TaskSpec) *meta.RuntimeInfo
11 | 	InPlaceRestart(spec meta.TaskSpec) bool
12 | }
13 | 


--------------------------------------------------------------------------------
/escaler/pkg/resource/utils/cmd.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/config"
 7 | 	"reflect"
 8 | 	"strconv"
 9 | 
10 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
11 | )
12 | 
13 | func shouldAppend(v interface{}) bool {
14 | 	switch v := v.(type) {
15 | 	case int, int32, int64:
16 | 		return v != 0
17 | 	case float32, float64:
18 | 		return v != 0.0
19 | 	case string:
20 | 		return v != ""
21 | 	case bool:
22 | 		return v // no need to check, because false is the zero value and means "not set"
23 | 	default:
24 | 		// This case is for types not explicitly checked above; assumes non-zero by default
25 | 		return !reflect.DeepEqual(v, reflect.Zero(reflect.TypeOf(v)).Interface())
26 | 	}
27 | }
28 | 
29 | func BuildCmdFromTaskSpec(spec meta.TaskSpec) []string {
30 | 
31 | 	cmd := []string{
32 | 		"enova", "serving", "run", "--model", spec.Model, "--port", strconv.Itoa(spec.Port), "--host", spec.Host,
33 | 		"--backend", spec.Backend,
34 | 		"--exporter_service_name", spec.ExporterServiceName,
35 | 	}
36 | 	if config.GetEConfig().ResourceBackend.Type == config.ResourceBackendTypeK8s {
37 | 		cmd = append(cmd, "--exporter_endpoint", spec.Name+"-collector."+spec.Namespace+".svc.cluster.local:4317")
38 | 	} else {
39 | 		cmd = append(cmd, "--exporter_endpoint", spec.ExporterEndpoint)
40 | 	}
41 | 
42 | 	vllmBackendConfig, ok := spec.BackendConfig.(*meta.VllmBackendConfig)
43 | 	if ok {
44 | 		jsonBytes, err := json.Marshal(vllmBackendConfig)
45 | 		if err != nil {
46 | 
47 | 		} else {
48 | 			var vllmBackendConfigMap map[string]interface{}
49 | 			err = json.Unmarshal(jsonBytes, &vllmBackendConfigMap)
50 | 			if err != nil {
51 | 
52 | 			} else {
53 | 				// if there is not valid value, dont append to cmd params
54 | 				for k, v := range vllmBackendConfigMap {
55 | 					if shouldAppend(v) {
56 | 						cmd = append(cmd, []string{fmt.Sprintf("--%s", k), fmt.Sprintf("%v", v)}...)
57 | 					}
58 | 				}
59 | 			}
60 | 
61 | 		}
62 | 	}
63 | 	// Add extra serving params
64 | 	for k, v := range spec.BackendExtraConfig {
65 | 		cmd = append(cmd, []string{fmt.Sprintf("--%s", k), fmt.Sprintf("%v", v)}...)
66 | 	}
67 | 	return cmd
68 | }
69 | 


--------------------------------------------------------------------------------
/escaler/pkg/scaler/scaler.go:
--------------------------------------------------------------------------------
 1 | package scaler
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 
 6 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/meta"
 7 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/queue"
 8 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/resource"
 9 | 
10 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/config"
11 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/logger"
12 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/zmq"
13 | )
14 | 
15 | type EnovaServingScaler struct {
16 | 	// Subscriber *zmq.ZmqSubscriber
17 | 	Queue   *queue.InnerChanTaskQueue
18 | 	Client  resource.ClientInterface
19 | 	stopped bool
20 | }
21 | 
22 | func NewServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler {
23 | 	if config.GetEConfig().ResourceBackend.Type == config.ResourceBackendTypeK8s {
24 | 		return NewK8sServingScaler(ch)
25 | 	}
26 | 	return NewLocalDockerServingScaler(ch)
27 | }
28 | 
29 | func NewLocalDockerServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler {
30 | 	return &EnovaServingScaler{
31 | 		Queue: &queue.InnerChanTaskQueue{
32 | 			Ch: ch,
33 | 		},
34 | 		Client:  resource.NewDockerResourceClient(),
35 | 		stopped: false,
36 | 	}
37 | }
38 | 
39 | func NewK8sServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler {
40 | 	return &EnovaServingScaler{
41 | 		Queue: &queue.InnerChanTaskQueue{
42 | 			Ch: ch,
43 | 		},
44 | 		Client: resource.NewK8sResourceClient(),
45 | 	}
46 | }
47 | 
48 | func NewZmqSubscriber() *zmq.ZmqSubscriber {
49 | 	sub := zmq.ZmqSubscriber{
50 | 		Host: config.GetEConfig().Zmq.Host,
51 | 		Port: config.GetEConfig().Zmq.Port,
52 | 	}
53 | 	sub.Init()
54 | 	return &sub
55 | }
56 | 
57 | func (s *EnovaServingScaler) Run() {
58 | 	// if s.Subscriber == nil {
59 | 	// 	panic(errors.New("enovaServingScaler Subscriber is nil"))
60 | 	// }
61 | 	// defer s.Subscriber.Close()
62 | 
63 | 	for {
64 | 		// 接收消息
65 | 		logger.Infof("enovaServingScaler start Recv message")
66 | 		task, ok := s.Queue.Pop()
67 | 		if !ok {
68 | 			continue
69 | 		}
70 | 		// logger.Infof("enovaServingScaler Recv message: %s", msg)
71 | 		// if err != nil {
72 | 		// 	logger.Infof("enovaServingScaler Error receiving message: %s", err)
73 | 		// 	continue
74 | 		// }
75 | 		acutalTask := task.(*meta.TaskSpec)
76 | 
77 | 		// if err := json.Unmarshal([]byte(msg), &task); err != nil {
78 | 		// 	logger.Errorf("enovaServingScaler Error parsing JSON response: %v, msg: %s", err, msg)
79 | 		// 	continue
80 | 		// }
81 | 
82 | 		if acutalTask.Replica == 0 {
83 | 			s.Client.DeleteTask(*acutalTask)
84 | 		} else {
85 | 			// 执行 localDeploy 函数
86 | 			s.Client.DeployTask(*acutalTask)
87 | 		}
88 | 	}
89 | }
90 | 
91 | func (s *EnovaServingScaler) Stop() {
92 | 
93 | }
94 | 
95 | func (s *EnovaServingScaler) RunInWaitGroup(wg *sync.WaitGroup) {
96 | 	defer wg.Done()
97 | 	s.Run()
98 | }
99 | 


--------------------------------------------------------------------------------
/escaler/pkg/utils/cache.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"github.com/Emerging-AI/ENOVA/escaler/pkg/redis"
 5 | )
 6 | 
 7 | type TTLCache interface {
 8 | 	Set(key string, value string, timeout int64)
 9 | 	Get(key string) string
10 | }
11 | 
12 | type RedisTTLCache struct {
13 | 	Redis *redis.RedisClient
14 | }
15 | 
16 | func NewRedisTTLCache(addr string, passwd string, db int) *RedisTTLCache {
17 | 	return &RedisTTLCache{
18 | 		redis.NewRedisClient(addr, passwd, db),
19 | 	}
20 | }
21 | 
22 | func (r *RedisTTLCache) Set(key string, value string, timeout int64) {
23 | 	r.Redis.Set(key, value, timeout)
24 | }
25 | 
26 | func (r *RedisTTLCache) Get(key string) string {
27 | 	return r.Redis.Get(key)
28 | }
29 | 


--------------------------------------------------------------------------------
/escaler/pkg/utils/utils.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import "reflect"
 4 | 
 5 | func GetAllField(s interface{}) []reflect.StructField {
 6 | 	ret := []reflect.StructField{}
 7 | 	t := reflect.TypeOf(s)
 8 | 
 9 | 	for i := 0; i < t.NumField(); i++ {
10 | 		field := t.Field(i)
11 | 		ret = append(ret, field)
12 | 	}
13 | 	return ret
14 | }
15 |  


--------------------------------------------------------------------------------
/escaler/pkg/zmq/zmq.go:
--------------------------------------------------------------------------------
 1 | package zmq
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 
 7 | 	zmq "github.com/pebbe/zmq4"
 8 | )
 9 | 
10 | type ZmqPublisher struct {
11 | 	Host      string
12 | 	Port      int
13 | 	publisher *zmq.Socket
14 | }
15 | 
16 | type ZmqSubscriber struct {
17 | 	Host       string
18 | 	Port       int
19 | 	subscriber *zmq.Socket
20 | }
21 | 
22 | func (p *ZmqPublisher) Init() {
23 | 	publisher, err := zmq.NewSocket(zmq.PUB)
24 | 	if err != nil {
25 | 		fmt.Printf("Failed to dial publisher: %v\n", err)
26 | 		return
27 | 	}
28 | 	// 订阅所有消息
29 | 	p.publisher = publisher
30 | 	address := fmt.Sprintf("tcp://%s:%d", p.Host, p.Port)
31 | 	err = p.publisher.Bind(address)
32 | 	if err != nil {
33 | 		log.Fatal("ZmqPublisher init error: ", err)
34 | 		p.Close()
35 | 	}
36 | }
37 | 
38 | func (p *ZmqPublisher) Send(msg string) (bool, error) {
39 | 	_, err := p.publisher.Send(msg, 0)
40 | 
41 | 	if err != nil {
42 | 		return false, err
43 | 	}
44 | 	return true, nil
45 | }
46 | 
47 | func (p *ZmqPublisher) Close() {
48 | 	if p.publisher != nil {
49 | 		p.publisher.Close()
50 | 	}
51 | }
52 | 
53 | func (s *ZmqSubscriber) Init() {
54 | 	subscriber, err := zmq.NewSocket(zmq.SUB)
55 | 	topic := ""
56 | 	subscriber.SetSubscribe(topic)
57 | 	s.subscriber = subscriber
58 | 	if err != nil {
59 | 		log.Fatal(err)
60 | 	}
61 | 	address := fmt.Sprintf("tcp://%s:%d", s.Host, s.Port)
62 | 	err = subscriber.Connect(address)
63 | 	if err != nil {
64 | 		log.Fatal(err)
65 | 		s.Close()
66 | 	}
67 | }
68 | 
69 | func (s *ZmqSubscriber) Close() {
70 | 	if s.subscriber != nil {
71 | 		s.subscriber.Close()
72 | 	}
73 | }
74 | 
75 | func (s *ZmqSubscriber) Recv() (string, error) {
76 | 	msg, err := s.subscriber.Recv(0)
77 | 	return msg, err
78 | }
79 | 


--------------------------------------------------------------------------------
/escaler/scripts/build_swagger.sh:
--------------------------------------------------------------------------------
1 | export GOPATH=$(go env GOPATH | awk -F ':' '{print $1}')
2 | export PATH=$PATH:$GOPATH/bin
3 | swag init -g cmd/escaler/main.go -o cmd/escaler/docs --parseDependency --parseInternal


--------------------------------------------------------------------------------
/escaler/scripts/generate_mock_files.sh:
--------------------------------------------------------------------------------
1 | mockgen -source=vendor/github.com/docker/docker/client/interface.go -destination=cmd/escaler/mock_docker_client.go -package=main
2 | 


--------------------------------------------------------------------------------
/escaler/scripts/generate_ot_clientset.sh:
--------------------------------------------------------------------------------
 1 | go install k8s.io/code-generator/cmd/client-gen
 2 | export GOPATH=$(go env GOPATH | awk -F ':' '{print $1}')
 3 | export PATH=$PATH:$GOPATH/bin
 4 | client-gen \
 5 |     --input-base="/root/go/pkg/mod/github.com/open-telemetry/opentelemetry-operator@v1.51.0/apis/v1alpha1" \
 6 |     --input="" \
 7 |     --output-pkg="github.com/Emerging-AI/ENOVA/escaler/pkg/generated/ot/clientset" \
 8 |     --output-dir=./pkg/generated/ot/clientset \
 9 |     --clientset-name="versioned" \
10 |     --go-header-file="./hack/boilerplate.go.txt"
11 | 


--------------------------------------------------------------------------------
/escaler/scripts/local_docker_run.sh:
--------------------------------------------------------------------------------
1 | redis-server &
2 | escaler $@
3 | 


--------------------------------------------------------------------------------
/front/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules


--------------------------------------------------------------------------------
/front/.env.development:
--------------------------------------------------------------------------------
1 | VITE_APP_BASE_URL="/"


--------------------------------------------------------------------------------
/front/.env.production:
--------------------------------------------------------------------------------
1 | VITE_APP_BASE_URL="/"


--------------------------------------------------------------------------------
/front/.eslintrc.cjs:
--------------------------------------------------------------------------------
 1 | /* eslint-env node */
 2 | require('@rushstack/eslint-patch/modern-module-resolution')
 3 | 
 4 | module.exports = {
 5 |   root: true,
 6 |   'extends': [
 7 |     'plugin:vue/vue3-essential',
 8 |     'eslint:recommended',
 9 |     '@vue/eslint-config-typescript',
10 |     '@vue/eslint-config-prettier/skip-formatting'
11 |   ],
12 |   parserOptions: {
13 |     ecmaVersion: 'latest'
14 |   },
15 |   global: {
16 |     ElMessage: 'readonly',
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/front/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | .DS_Store
12 | dist
13 | dist-ssr
14 | coverage
15 | *.local
16 | 
17 | /cypress/videos/
18 | /cypress/screenshots/
19 | 
20 | # Editor directories and files
21 | .vscode/*
22 | !.vscode/extensions.json
23 | .idea
24 | *.suo
25 | *.ntvs*
26 | *.njsproj
27 | *.sln
28 | *.sw?
29 | 
30 | *.tsbuildinfo
31 | 


--------------------------------------------------------------------------------
/front/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "https://json.schemastore.org/prettierrc",
3 |   "semi": false,
4 |   "tabWidth": 2,
5 |   "singleQuote": true,
6 |   "printWidth": 100,
7 |   "trailingComma": "none"
8 | }


--------------------------------------------------------------------------------
/front/README.md:
--------------------------------------------------------------------------------
 1 | # enova-web
 2 | 
 3 | This template should help get you started developing with Vue 3 in Vite.
 4 | 
 5 | ## Recommended IDE Setup
 6 | 
 7 | [VSCode](https://code.visualstudio.com/) + [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur).
 8 | 
 9 | ## Type Support for `.vue` Imports in TS
10 | 
11 | TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types.
12 | 
13 | ## Customize configuration
14 | 
15 | See [Vite Configuration Reference](https://vitejs.dev/config/).
16 | 
17 | ## Project Setup
18 | 
19 | ```sh
20 | npm install
21 | ```
22 | 
23 | ### Compile and Hot-Reload for Development
24 | 
25 | ```sh
26 | npm run dev
27 | ```
28 | 
29 | ### Type-Check, Compile and Minify for Production
30 | 
31 | ```sh
32 | npm run build
33 | ```
34 | 
35 | ### Lint with [ESLint](https://eslint.org/)
36 | 
37 | ```sh
38 | npm run lint
39 | ```
40 | 


--------------------------------------------------------------------------------
/front/auto-imports.d.ts:
--------------------------------------------------------------------------------
 1 | /* eslint-disable */
 2 | /* prettier-ignore */
 3 | // @ts-nocheck
 4 | // noinspection JSUnusedGlobalSymbols
 5 | // Generated by unplugin-auto-import
 6 | export {}
 7 | declare global {
 8 |   const ElMessage: typeof import('element-plus/es')['ElMessage']
 9 | }
10 | 


--------------------------------------------------------------------------------
/front/env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/front/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="UTF-8">
 6 |   <link rel="icon" href="/favicon.ico">
 7 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 8 |   <title>Enova</title>
 9 | </head>
10 | 
11 | <body>
12 |   <div id="app"></div>
13 |   <script type="module" src="/src/main.ts"></script>
14 | </body>
15 | 
16 | </html>


--------------------------------------------------------------------------------
/front/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "enova-web",
 3 |   "version": "1.0.0",
 4 |   "private": true,
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "run-p type-check \"build-only {@}\" --",
 9 |     "preview": "vite preview",
10 |     "build-only": "vite build",
11 |     "type-check": "vue-tsc --build --force",
12 |     "lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix --ignore-path .gitignore",
13 |     "format": "prettier --write src/"
14 |   },
15 |   "dependencies": {
16 |     "@types/axios": "^0.14.0",
17 |     "@vueuse/core": "^10.9.0",
18 |     "axios": "^1.6.8",
19 |     "dayjs": "^1.11.11",
20 |     "echarts": "^5.5.0",
21 |     "element-plus": "^2.6.3",
22 |     "pinia": "^2.1.7",
23 |     "vue": "^3.4.21",
24 |     "vue-i18n": "^9.13.1",
25 |     "vue-router": "^4.3.0"
26 |   },
27 |   "devDependencies": {
28 |     "@rushstack/eslint-patch": "^1.8.0",
29 |     "@tsconfig/node20": "^20.1.4",
30 |     "@types/node": "^20.12.5",
31 |     "@vitejs/plugin-vue": "^5.0.4",
32 |     "@vue/eslint-config-prettier": "^9.0.0",
33 |     "@vue/eslint-config-typescript": "^13.0.0",
34 |     "@vue/tsconfig": "^0.5.1",
35 |     "autoprefixer": "^10.4.19",
36 |     "eslint": "^8.57.0",
37 |     "eslint-plugin-vue": "^9.23.0",
38 |     "npm-run-all2": "^6.1.2",
39 |     "postcss": "^8.4.38",
40 |     "prettier": "^3.2.5",
41 |     "rollup-plugin-visualizer": "^5.12.0",
42 |     "sass": "^1.75.0",
43 |     "tailwindcss": "^3.4.3",
44 |     "typescript": "~5.4.0",
45 |     "unplugin-auto-import": "^0.17.5",
46 |     "unplugin-vue-components": "^0.26.0",
47 |     "vite": "^5.2.8",
48 |     "vite-plugin-svg-icons": "^2.0.1",
49 |     "vite-plugin-vue-devtools": "^7.0.25",
50 |     "vue-tsc": "^2.0.11"
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/front/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | }
7 | 


--------------------------------------------------------------------------------
/front/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/public/favicon.ico


--------------------------------------------------------------------------------
/front/src/App.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-config-provider :locale="locale === 'en' ? en : zhCn">
 3 |     <RouterView />
 4 |   </el-config-provider>
 5 | </template>
 6 | <script setup lang="ts">
 7 | import { ElConfigProvider } from 'element-plus'
 8 | import { useI18n } from 'vue-i18n'
 9 | const { locale } = useI18n()
10 | import zhCn from 'element-plus/es/locale/lang/zh-cn'
11 | import en from 'element-plus/es/locale/lang/en'
12 | </script>
13 | 


--------------------------------------------------------------------------------
/front/src/api/instance.ts:
--------------------------------------------------------------------------------
 1 | import service from '@/utils/request'
 2 | enum API {
 3 |   ENODE = '/v1/serving',
 4 |   MONITOR = '/api/v1/query_range',
 5 |   PILOT = '/api/escaler/v1'
 6 | }
 7 | 
 8 | export const getServing = () => service({
 9 |   url: API.ENODE,
10 |   method: 'get',
11 | });
12 | 
13 | export const addServing = () => service({
14 |   url: API.ENODE,
15 |   method: 'post',
16 |   data: {
17 |     "instance_name": "enova_test",
18 |     "model": "THUDM/chatglm3-6b"
19 |   },
20 | })
21 | 
22 | export const deleteServing = (id: string) => service({
23 |   url: `${API.ENODE}/${id}`,
24 |   method: 'delete',
25 | });
26 | 
27 | export const getExperiment = (params: string) => service({
28 |   url: `${API.ENODE}/instance/test?${params}`,
29 |   method: 'get',
30 | })
31 | 
32 | export const createTest = (data: any) => service({
33 |   url: `${API.ENODE}/instance/test`,
34 |   method: 'post',
35 |   data
36 | })
37 | 
38 | const getPromUrl = (port: number) => {
39 |   const { protocol, hostname } = window.location
40 |   if (import.meta.env.MODE === 'development') return '/'
41 |   return `${protocol}//${hostname}:${port}/`
42 | }
43 | 
44 | export const getMonitorData = (params?: string) => service({
45 |   url: `${API.MONITOR}?${params}`,
46 |   baseURL: getPromUrl(32826),
47 |   method: 'get',
48 | })
49 | 
50 | export const getDetectHistory = (params?: string) => service({
51 |   url: `${API.PILOT}/task/detect/history?${params}`,
52 |   baseURL: getPromUrl(8183),
53 |   method: 'get',
54 | })


--------------------------------------------------------------------------------
/front/src/assets/empty.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/empty.png


--------------------------------------------------------------------------------
/front/src/assets/filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/filter.png


--------------------------------------------------------------------------------
/front/src/assets/logo/emergingai_b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/logo/emergingai_b.png


--------------------------------------------------------------------------------
/front/src/assets/logo/emergingai_w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/logo/emergingai_w.png


--------------------------------------------------------------------------------
/front/src/assets/svg/auto.svg:
--------------------------------------------------------------------------------
1 | <svg class="icon" style="width: 1em;height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="9477"><path d="M725.333333 142.506667a424.874667 424.874667 0 0 1 173.482667 189.312L618.666667 327.253333 682.666667 216.405333a341.333333 341.333333 0 1 0 160.810666 214.016l87.594667 1.450667A425.002667 425.002667 0 0 1 881.493333 725.333333C763.733333 929.408 502.741333 999.338667 298.666667 881.493333 94.592 763.733333 24.661333 502.741333 142.506667 298.666667 260.266667 94.592 521.258667 24.661333 725.333333 142.506667z m-168.789333 163.925333l147.925333 386.901333h-86.954666l-33.28-91.648h-138.197334l-33.237333 91.648H325.845333l147.925334-386.901333h82.773333z m-41.173333 103.850667l-43.648 120.917333h86.869333l-43.221333-120.917333z" fill="#475468" p-id="9478"></path></svg>


--------------------------------------------------------------------------------
/front/src/assets/svg/autoRefresh.svg:
--------------------------------------------------------------------------------
1 | <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <g id="&#232;&#135;&#170;&#229;&#138;&#168;&#229;&#136;&#183;&#230;&#150;&#176;">
3 | <path id="&#232;&#183;&#175;&#229;&#190;&#132;" fill-rule="evenodd" clip-rule="evenodd" d="M13.7734 11.3332C14.5756 9.94808 14.8506 8.3198 14.5477 6.74805L13.179 6.72533C13.5606 8.27569 13.2217 9.86037 12.346 11.0923L13.2994 12.0457C13.4702 11.8214 13.6287 11.5838 13.7734 11.3332ZM10.2856 12.819C10.5005 12.717 10.7059 12.6022 10.9011 12.4759L7.82648 9.40118H6.96919L6.44998 10.8332H5.09103L6.24373 7.81844L3.52441 5.09911C2.32212 6.95168 2.37628 9.39343 3.73617 11.2035C5.25828 13.2296 7.9959 13.9048 10.2856 12.819ZM2.56435 4.13906C2.44473 4.30789 2.33187 4.48376 2.22635 4.66652C0.38535 7.85522 1.47781 11.9324 4.66652 13.7734C6.99688 15.1188 9.80174 14.8975 11.8615 13.4362L12.8533 14.428L13.7961 13.4852L2.48244 2.17153L1.53963 3.11434L2.56435 4.13906ZM8.6959 4.78785L10.3391 9.08533L8.26852 7.0148L8.05252 6.41052L7.94954 6.69582L7.02606 5.77234L7.40248 4.78785H8.6959ZM10.6665 3.38105C8.82855 2.3201 6.5779 2.46493 4.90781 3.65409L3.95327 2.69955C6.02777 1.11706 8.9323 0.8402 11.3332 2.22635C12.5664 2.93835 13.4862 3.98517 14.0438 5.18469L9.66652 5.1131L10.6665 3.38105Z" fill="#475468"/>
4 | </g>
5 | </svg>
6 | 


--------------------------------------------------------------------------------
/front/src/assets/svg/cross.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
2 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg t="1709973199108" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="2322"
4 |      xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200">
5 |   <path
6 |     d="M512.896 466.730667L816.298667 174.933333a38.698667 38.698667 0 0 1 8.192-5.717333 32.256 32.256 0 0 1 29.056 0.554667 35.84 35.84 0 0 1 10.197333 8.576 32 32 0 0 1-3.072 42.709333L557.226667 512.853333l291.797333 303.445334c1.962667 2.218667 2.56 2.730667 4.138667 5.248a32.341333 32.341333 0 0 1 2.474666 28.970666 32 32 0 0 1-50.304 12.416c-0.853333-0.725333-1.621333-1.536-2.432-2.304l-291.797333-303.402666-303.402667 291.797333c-2.261333 1.962667-2.773333 2.517333-5.290666 4.096a32.085333 32.085333 0 0 1-28.970667 2.474667 32.298667 32.298667 0 0 1-18.688-38.528c1.237333-4.266667 3.413333-8.32 6.272-11.733334 0.725333-0.853333 1.536-1.621333 2.304-2.432l303.402667-291.797333L174.933333 207.658667c-1.92-2.218667-2.517333-2.730667-4.096-5.248a32.341333 32.341333 0 0 1-2.474666-28.970667 32.298667 32.298667 0 0 1 38.528-18.688 31.573333 31.573333 0 0 1 11.733333 6.272c0.853333 0.725333 1.621333 1.536 2.432 2.304l291.84 303.402667z"
7 |     p-id="2323" fill="#7588A3"></path>
8 | </svg>
9 | 


--------------------------------------------------------------------------------
/front/src/assets/svg/docker.svg:
--------------------------------------------------------------------------------
1 | <svg width="18" height="18" viewBox="0 0 18 18" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <g id="&#232;&#181;&#132;&#230;&#186;&#144;">
3 | <path id="&#232;&#183;&#175;&#229;&#190;&#132;" fill-rule="evenodd" clip-rule="evenodd" d="M15.0621 11.4001L15.9636 11.9409C16.0768 12.0086 16.146 12.1308 16.146 12.2626C16.146 12.3945 16.0768 12.5166 15.9636 12.5844L9.38614 16.5309C9.14844 16.6737 8.85134 16.6737 8.61364 16.5309L2.03614 12.5844C1.92301 12.5166 1.85376 12.3945 1.85376 12.2626C1.85376 12.1308 1.92301 12.0086 2.03614 11.9409L2.93764 11.4001L8.99989 15.0376L15.0621 11.4001V11.4001ZM15.0621 7.87511L15.9636 8.41586C16.0768 8.48359 16.146 8.60576 16.146 8.73761C16.146 8.86947 16.0768 8.99164 15.9636 9.05936L8.99989 13.2376L2.03614 9.05936C1.92301 8.99164 1.85376 8.86947 1.85376 8.73761C1.85376 8.60576 1.92301 8.48359 2.03614 8.41586L2.93764 7.87511L8.99989 11.5126L15.0621 7.87511ZM9.38539 0.981863L15.9636 4.92836C16.0768 4.99609 16.146 5.11826 16.146 5.25011C16.146 5.38197 16.0768 5.50414 15.9636 5.57186L8.99989 9.75011L2.03614 5.57186C1.92301 5.50414 1.85376 5.38197 1.85376 5.25011C1.85376 5.11826 1.92301 4.99609 2.03614 4.92836L8.61364 0.981863C8.85134 0.839053 9.14844 0.839053 9.38614 0.981863H9.38539ZM8.99989 2.49911L4.41514 5.25011L8.99989 8.00111L13.5846 5.25011L8.99989 2.49911Z" fill="currentColor" fill-opacity="0.6"/>
4 | </g>
5 | </svg>
6 | 


--------------------------------------------------------------------------------
/front/src/assets/svg/earth.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
2 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg t="1709369643633" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5450"
4 |      xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200">
5 |   <path
6 |     d="M854.4 800.9c0.2-0.3 0.5-0.6 0.7-0.9C920.6 722.1 960 621.7 960 512s-39.4-210.1-104.8-288c-0.2-0.3-0.5-0.5-0.7-0.8-1.1-1.3-2.1-2.5-3.2-3.7-0.4-0.5-0.8-0.9-1.2-1.4-1.4-1.6-2.7-3.1-4.1-4.7l-0.1-0.1c-1.5-1.7-3.1-3.4-4.6-5.1l-0.1-0.1c-3.2-3.4-6.4-6.8-9.7-10.1l-0.1-0.1-4.8-4.8-0.3-0.3c-1.5-1.5-3-2.9-4.5-4.3-0.5-0.5-1-1-1.6-1.5-1-1-2-1.9-3-2.8-0.3-0.3-0.7-0.6-1-1C736.4 109.2 629.5 64 512 64s-224.4 45.2-304.3 119.2c-0.3 0.3-0.7 0.6-1 1-1 0.9-2 1.9-3 2.9-0.5 0.5-1 1-1.6 1.5-1.5 1.4-3 2.9-4.5 4.3l-0.3 0.3-4.8 4.8-0.1 0.1c-3.3 3.3-6.5 6.7-9.7 10.1l-0.1 0.1c-1.6 1.7-3.1 3.4-4.6 5.1l-0.1 0.1c-1.4 1.5-2.8 3.1-4.1 4.7-0.4 0.5-0.8 0.9-1.2 1.4-1.1 1.2-2.1 2.5-3.2 3.7-0.2 0.3-0.5 0.5-0.7 0.8C103.4 301.9 64 402.3 64 512s39.4 210.1 104.8 288c0.2 0.3 0.5 0.6 0.7 0.9 1 1.2 2.1 2.5 3.1 3.7 0.4 0.5 0.8 0.9 1.2 1.4 1.4 1.6 2.7 3.1 4.1 4.7 0 0.1 0.1 0.1 0.1 0.2 1.5 1.7 3 3.4 4.6 5l0.1 0.1c3.2 3.4 6.4 6.8 9.6 10.1l0.1 0.1c1.6 1.6 3.1 3.2 4.7 4.7l0.3 0.3c3.3 3.3 6.7 6.5 10.1 9.6 80.1 74 187 119.2 304.5 119.2s224.4-45.2 304.3-119.2c3.4-3.1 6.7-6.3 10-9.6l0.3-0.3c1.6-1.6 3.2-3.1 4.7-4.7l0.1-0.1c3.3-3.3 6.5-6.7 9.6-10.1l0.1-0.1c1.5-1.7 3.1-3.3 4.6-5 0-0.1 0.1-0.1 0.1-0.2 1.4-1.5 2.8-3.1 4.1-4.7 0.4-0.5 0.8-0.9 1.2-1.4 1.2-1.3 2.3-2.5 3.3-3.7z m4.1-142.6c-13.8 32.6-32 62.8-54.2 90.2-24.9-21.5-52.2-40.3-81.5-55.9 11.6-46.9 18.8-98.4 20.7-152.6H887c-3 40.9-12.6 80.6-28.5 118.3zM887 484H743.5c-1.9-54.2-9.1-105.7-20.7-152.6 29.3-15.6 56.6-34.4 81.5-55.9 22.2 27.4 40.4 57.6 54.2 90.2C874.4 403.4 884 443.1 887 484zM658.3 165.5c39.7 16.8 75.8 40 107.6 69.2-18.5 15.8-38.4 29.7-59.4 41.8-15.7-45-35.8-84.1-59.2-115.4 3.7 1.4 7.4 2.9 11 4.4z m-90.6 700.6c-9.2 7.2-18.4 12.7-27.7 16.4V697c39.9 2.8 78.6 11.6 115.7 26.2-8.3 24.6-17.9 47.3-29 67.8-17.4 32.4-37.8 58.3-59 75.1z m59-633.1c11 20.6 20.7 43.3 29 67.8-37.1 14.6-75.8 23.4-115.7 26.2V141.6c9.2 3.7 18.5 9.1 27.7 16.4 21.2 16.7 41.6 42.6 59 75zM540 640.9V540h147.5c-1.6 44.2-7.1 87.1-16.3 127.8l-0.3 1.2c-41.1-15.6-85.1-25.3-130.9-28.1z m0-156.9V383.1c45.8-2.8 89.8-12.5 130.9-28.1l0.3 1.2c9.2 40.7 14.7 83.5 16.3 127.8H540z m-56 56v100.9c-45.8 2.8-89.8 12.5-130.9 28.1l-0.3-1.2c-9.2-40.7-14.7-83.5-16.3-127.8H484z m-147.5-56c1.6-44.2 7.1-87.1 16.3-127.8l0.3-1.2c41.1 15.6 85 25.3 130.9 28.1V484H336.5zM484 697v185.4c-9.2-3.7-18.5-9.1-27.7-16.4-21.2-16.7-41.7-42.7-59.1-75.1-11-20.6-20.7-43.3-29-67.8 37.2-14.6 75.9-23.3 115.8-26.1z m0-370c-39.9-2.8-78.6-11.6-115.7-26.2 8.3-24.6 17.9-47.3 29-67.8 17.4-32.4 37.8-58.4 59.1-75.1 9.2-7.2 18.4-12.7 27.7-16.4V327zM365.7 165.5c3.7-1.5 7.3-3 11-4.4-23.4 31.3-43.5 70.4-59.2 115.4-21-12-40.9-26-59.4-41.8 31.8-29.2 67.9-52.4 107.6-69.2zM165.5 365.7c13.8-32.6 32-62.8 54.2-90.2 24.9 21.5 52.2 40.3 81.5 55.9-11.6 46.9-18.8 98.4-20.7 152.6H137c3-40.9 12.6-80.6 28.5-118.3zM137 540h143.5c1.9 54.2 9.1 105.7 20.7 152.6-29.3 15.6-56.6 34.4-81.5 55.9-22.2-27.4-40.4-57.6-54.2-90.2C149.6 620.6 140 580.9 137 540z m228.7 318.5c-39.7-16.8-75.8-40-107.6-69.2 18.5-15.8 38.4-29.7 59.4-41.8 15.7 45 35.8 84.1 59.2 115.4-3.7-1.4-7.4-2.9-11-4.4z m292.6 0c-3.7 1.5-7.3 3-11 4.4 23.4-31.3 43.5-70.4 59.2-115.4 21 12 40.9 26 59.4 41.8-31.8 29.2-67.9 52.4-107.6 69.2z"
7 |     p-id="5451"></path>
8 | </svg>
9 | 


--------------------------------------------------------------------------------
/front/src/assets/svg/home.svg:
--------------------------------------------------------------------------------
1 | <svg class="icon" style="width: 1em;height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="8155"><path d="M950.555 411.291L570.297 110.696c-32.737-25.963-84.018-25.963-116.755 0L73.445 411.291c-13.063 10.321-15.16 29.189-5 42.09s28.705 15.159 41.768 4.999l11.772-9.676V812.03c0 51.443 54.346 89.985 101.918 89.985h599.9c47.088 0 78.05-36.123 78.05-89.985V449.994l10.967 8.386c5.483 4.354 12.256 6.45 18.868 6.45 8.869 0 17.739-3.87 23.705-11.45 10.321-12.9 8.225-31.768-4.838-42.089z m-468.63 430.734V675.602c0-8.225 14.997-12.418 29.994-12.418s29.995 4.193 29.995 12.418v166.262l-59.99 0.161z m360.1-29.995c0 19.836-5.483 29.995-18.061 29.995H602.549c0-0.484-0.645-0.806-0.645-1.29V675.602c0-34.833-28.06-72.408-89.985-72.408s-89.985 37.575-89.985 72.408v165.133c0 0.484 0.807 0.806 0.807 1.29H224.065c-18.868 0-41.929-16.287-41.929-29.995V401.132L490.31 157.785c11.127-8.708 31.124-8.708 42.09 0l309.464 244.798V812.03z" fill="currentColor" p-id="8156"></path></svg>


--------------------------------------------------------------------------------
/front/src/assets/svg/info.svg:
--------------------------------------------------------------------------------
1 | <svg class="icon" style="width: 1em;height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="19738"><path d="M512 512m-448 0a448 448 0 1 0 896 0 448 448 0 1 0-896 0Z" fill="#2196F3" p-id="19739" data-spm-anchor-id="a313x.search_index.i1.i2.71d83a81d2KKG4"></path><path d="M469.333333 469.333333h85.333334v234.666667h-85.333334z" fill="#FFFFFF" p-id="19740"></path><path d="M512 352m-53.333333 0a53.333333 53.333333 0 1 0 106.666666 0 53.333333 53.333333 0 1 0-106.666666 0Z" fill="#FFFFFF" p-id="19741"></path></svg>


--------------------------------------------------------------------------------
/front/src/assets/svg/log.svg:
--------------------------------------------------------------------------------
1 | <svg width="18" height="18" viewBox="0 0 18 18" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <g id="&#230;&#151;&#165;&#229;&#191;&#151;&#232;&#174;&#176;&#229;&#189;&#149;">
3 | <path id="&#232;&#183;&#175;&#229;&#190;&#132;" fill-rule="evenodd" clip-rule="evenodd" d="M13.125 9C15.4032 9 17.25 10.8468 17.25 13.125C17.25 15.4032 15.4032 17.25 13.125 17.25C10.8468 17.25 9 15.4032 9 13.125C9 10.8468 10.8468 9 13.125 9ZM15 1.5C15.3846 1.5 15.7016 1.78953 15.745 2.16253L15.75 2.25V8.25H14.2493L14.2497 2.9997H3.7494V14.9994L8.25 14.9993V16.5H3C2.61537 16.5 2.29837 16.2105 2.25505 15.8375L2.25 15.75V2.25C2.25 1.86537 2.53953 1.54837 2.91253 1.50505L3 1.5H15ZM13.125 10.5C11.6753 10.5 10.5 11.6753 10.5 13.125C10.5 14.5747 11.6753 15.75 13.125 15.75C14.5747 15.75 15.75 14.5747 15.75 13.125C15.75 11.6753 14.5747 10.5 13.125 10.5ZM13.3127 11.85L13.3125 12.975L14.5104 13.6016L13.9887 14.5984L12.1877 13.6557V11.85H13.3127ZM10.5 7.5V9H5.25V7.5H10.5ZM12.75 4.5V6H5.25V4.5H12.75Z" fill="white" fill-opacity="0.6"/>
4 | </g>
5 | </svg>
6 | 


--------------------------------------------------------------------------------
/front/src/assets/svg/setup.svg:
--------------------------------------------------------------------------------
1 | <svg class="icon" style="width: 1em;height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="10221"><path d="M388.992 768q11.008-28.992 35.488-46.496T479.968 704t55.488 17.504T570.944 768h324.992v64H570.944q-11.008 28.992-35.488 46.496T479.968 896t-55.488-17.504T388.992 832H128v-64h260.992z m192-288q11.008-28.992 35.488-46.496T671.968 416t55.488 17.504T762.944 480h132.992v64h-132.992q-11.008 28.992-35.488 46.496T671.968 608t-55.488-17.504T580.992 544H128v-64h452.992z m-320-288q11.008-28.992 35.488-46.496T351.968 128t55.488 17.504T442.944 192h452.992v64H442.944q-11.008 28.992-35.488 46.496T351.968 320t-55.488-17.504T260.992 256H128V192h132.992z" p-id="10222"></path></svg>


--------------------------------------------------------------------------------
/front/src/assets/svg/toggle.svg:
--------------------------------------------------------------------------------
1 | <svg class="icon" style="width: 1em;height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="3723"><path d="M896 810.666667a42.666667 42.666667 0 0 1-42.666667 42.666666H170.666667a42.666667 42.666667 0 0 1 0-85.333333h682.666666a42.666667 42.666667 0 0 1 42.666667 42.666667zM296.533333 202.965333v319.402667a21.333333 21.333333 0 0 1-36.437333 15.061333L115.498667 392.832a42.666667 42.666667 0 0 1 0-60.330667l144.64-144.64a21.333333 21.333333 0 0 1 36.394666 15.104zM896 512a42.666667 42.666667 0 0 1-42.666667 42.666667h-298.666666a42.666667 42.666667 0 0 1 0-85.333334h298.666666a42.666667 42.666667 0 0 1 42.666667 42.666667z m0-298.666667a42.666667 42.666667 0 0 1-42.666667 42.666667h-298.666666a42.666667 42.666667 0 0 1 0-85.333333h298.666666a42.666667 42.666667 0 0 1 42.666667 42.666666z" fill="currentColor" p-id="3724"></path></svg>


--------------------------------------------------------------------------------
/front/src/assets/svg/user.svg:
--------------------------------------------------------------------------------
1 | <svg class="icon" style="width: 1em;height: 1em;vertical-align: middle;fill: currentColor;overflow: hidden;" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="3687">
2 |   <path d="M512 1024C229.205 1024 0 794.795 0 512S229.205 0 512 0s512 229.205 512 512-229.205 512-512 512z m0-496.47a170.667 170.667 0 1 0 0-341.333 170.667 170.667 0 0 0 0 341.334z m263.765 263.723a263.765 263.765 0 1 0-527.53 0h527.53z" fill="currentColor" p-id="3688"></path>
3 | </svg>


--------------------------------------------------------------------------------
/front/src/components/Drawer.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-drawer
 3 |     :model-value="showDrawer"
 4 |     :close-on-click-modal="false"
 5 |     :close-on-press-escape="false"
 6 |     :size="size"
 7 |     class="enova-drawer"
 8 |     custom-class="enova-drawer"
 9 |     @close="handleClose"
10 |   >
11 |     <template #header>
12 |       <p class="inline-flex items-center gap-1">
13 |         <span class="text-base font-bold text-[#1D2129]">{{ title }}</span>
14 |         <span class="text-sm text-[#86909C]">- {{ titleDesc }}</span>
15 |       </p>
16 |     </template>
17 |     <slot></slot>
18 |   </el-drawer>
19 | </template>
20 | <script setup lang="ts">
21 | const props = defineProps({
22 |   showDrawer: {
23 |     type: Boolean,
24 |     default: false
25 |   },
26 |   title: {
27 |     type: String,
28 |     default: ''
29 |   },
30 |   titleDesc: {
31 |     type: String,
32 |     default: ''
33 |   },
34 |   size: {
35 |     type: String,
36 |     default: '50%'
37 |   }
38 | })
39 | const emit = defineEmits(['closeDrawer'])
40 | const handleClose = () => {
41 |   emit('closeDrawer', false)
42 | }
43 | </script>
44 | <style lang="scss">
45 | .enova-drawer {
46 |   .el-drawer__header {
47 |     @apply m-0 py-3 px-4 border-b border-[#E5E6EB]
48 |   }
49 |   .el-drawer__body {
50 |     @apply p-0;
51 |   }
52 | }
53 | </style>
54 | 


--------------------------------------------------------------------------------
/front/src/components/Language.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-dropdown style="color: #898989" trigger="click" @command="changeLanguage">
 3 |     <div class="flex items-center gap-1">
 4 |       <svg-icon name="earth" />
 5 |       <span class="flex items-center gap-1">
 6 |         {{ selectedLang }}<el-icon class="el-icon--right"><arrow-down /></el-icon>
 7 |       </span>
 8 |     </div>
 9 |     <template #dropdown>
10 |       <el-dropdown-menu>
11 |         <el-dropdown-item
12 |           v-for="item in langList"
13 |           :key="item.value"
14 |           :command="item"
15 |           class="languageDropdownItem"
16 |           :class="{ 'is-active': item.value === activeLang }"
17 |           >{{ item.name }}</el-dropdown-item
18 |         >
19 |       </el-dropdown-menu>
20 |     </template>
21 |   </el-dropdown>
22 | </template>
23 | <script lang="ts" setup>
24 | import { onMounted, ref } from 'vue'
25 | import { useI18n } from 'vue-i18n'
26 | import { ArrowDown } from '@element-plus/icons-vue'
27 | const { t, locale } = useI18n()
28 | const selectedLang = ref('zh')
29 | const langList = [
30 |   {
31 |     name: t('common.lang.zh'),
32 |     value: 'zh_CN'
33 |   },
34 |   {
35 |     name: t('common.lang.en'),
36 |     value: 'en'
37 |   }
38 | ]
39 | const activeLang = ref('zh_CN')
40 | onMounted(() => {
41 |   const lang = localStorage.getItem('lang')
42 |   if (lang) {
43 |     locale.value = lang
44 |   } else {
45 |     locale.value = 'en'
46 |   }
47 |   activeLang.value = locale.value
48 |   langList.forEach((item) => {
49 |     if (item.value === activeLang.value) {
50 |       selectedLang.value = item.name
51 |       return
52 |     }
53 |   })
54 | })
55 | 
56 | const changeLanguage = (command: { name: string; value: string }) => {
57 |   const lang = command.value
58 |   selectedLang.value = command.name
59 |   locale.value = command.value
60 |   activeLang.value = command.value
61 |   localStorage.setItem('lang', lang)
62 |   window.location.reload()
63 | }
64 | </script>
65 | <style lang="scss">
66 | .languageDropdown {
67 |   display: inline-flex;
68 |   align-items: center;
69 |   word-break: keep-all;
70 |   white-space: nowrap;
71 | }
72 | </style>
73 | 


--------------------------------------------------------------------------------
/front/src/components/Pagination.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="page-container pt-3">
 3 |     <el-pagination
 4 |       :current-page="pagination.current_page"
 5 |       :page-sizes="[10, 20, 30, 50, 80]"
 6 |       :page-size="pagination.pageSize ?? 20"
 7 |       :layout="layout"
 8 |       :page-count="pagination.page_count"
 9 |       :total="pagination.total"
10 |       :hide-on-single-page="false"
11 |       @size-change="handleSizeChange"
12 |       @current-change="handleCurrentChange"
13 |     />
14 |   </div>
15 | </template>
16 | 
17 | <script setup lang="ts">
18 | import type { PropType } from 'vue'
19 | 
20 | interface Pagination {
21 |   current_page: number
22 |   page_count: number
23 |   total: number
24 |   pageSize?: number
25 | }
26 | const props = defineProps({
27 |   pagination: {
28 |     type: Object as PropType<Pagination>,
29 |     required: true,
30 |     default: () => ({
31 |       current_page: 1,
32 |       page_count: 1,
33 |       total: 0
34 |     })
35 |   },
36 |   pageSize: {
37 |     type: Number,
38 |     required: false,
39 |     default: 20
40 |   },
41 |   layout: {
42 |     type: String,
43 |     required: false,
44 |     default: 'total, sizes, prev, pager, next, jumper'
45 |   }
46 | })
47 | 
48 | const emit = defineEmits(['update:pageChanged'])
49 | 
50 | const handleSizeChange = (val: number): void => {
51 |   if (val === props.pagination.pageSize) return
52 |   props.pagination.pageSize = val
53 |   emit('update:pageChanged', val)
54 |   
55 | }
56 | const handleCurrentChange = (val: number): void => {
57 |   if (val === props.pagination.current_page) return
58 |   props.pagination.current_page = val
59 |   emit('update:pageChanged', val)
60 | }
61 | </script>
62 | 


--------------------------------------------------------------------------------
/front/src/components/SearchInput.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-input
 3 |     v-model="searchInput"
 4 |     :prefix-icon="Search"
 5 |     style="width: 560px"
 6 |     clearable
 7 |     :placeholder="searchTips"
 8 |     class="input-with-select"
 9 |     @change="handleSearch"
10 |   />
11 | </template>
12 | <script setup lang="ts">
13 | import { Search } from '@element-plus/icons-vue'
14 | 
15 | import { ref } from 'vue'
16 | 
17 | defineProps({
18 |   searchTips: {
19 |     type: String,
20 |     default: ''
21 |   }
22 | })
23 | 
24 | const searchInput = ref('')
25 | 
26 | const emit = defineEmits(['changeSearch'])
27 | 
28 | const handleSearch = () => {
29 |   emit('changeSearch', searchInput.value.trim())
30 | }
31 | </script>


--------------------------------------------------------------------------------
/front/src/components/SummaryTip.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div
 3 |     class="text-base text-primary font-semibold leading-6 px-5 py-3 -mx-5 border-b border-gray2"
 4 |   >
 5 |     <span>{{ title }}</span>
 6 |     <span class="ml-1 text-gray5">({{ count }})</span>
 7 |   </div>
 8 | </template>
 9 | 
10 | <script setup lang="ts">
11 | import { defineProps } from 'vue'
12 | 
13 | defineProps<{
14 |   title: string
15 |   count: number
16 | }>()
17 | </script>
18 | 


--------------------------------------------------------------------------------
/front/src/components/SvgIcon.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <svg aria-hidden="true" class="svg-icon" :fill="props.color" :width="width" :height="height">
 3 |     <use :xlink:href="symbolId" :fill="props.color" />
 4 |   </svg>
 5 | </template>
 6 | 
 7 | <script setup lang="ts">
 8 | import { computed } from 'vue'
 9 | const props = defineProps({
10 |   name: {
11 |     type: String,
12 |     required: true
13 |   },
14 |   size: {
15 |     type: String,
16 |     default: '16'
17 |   },
18 |   width: {
19 |     type: String
20 |   },
21 |   height: {
22 |     type: String
23 |   },
24 |   color: {
25 |     type: String,
26 |     default: 'currentColor'
27 |   }
28 | })
29 | const symbolId = computed(() => `#icon-${props.name}`)
30 | const width = computed(() => (props.width ? props.width : `${props.size}px`))
31 | const height = computed(() => (props.height ? props.height : `${props.size}px`))
32 | </script>


--------------------------------------------------------------------------------
/front/src/components/TimeRangePicker.vue:
--------------------------------------------------------------------------------
  1 | <template>
  2 |   <el-date-picker
  3 |     v-model="searchTimePair"
  4 |     class="!w-full"
  5 |     type="datetimerange"
  6 |     :prefix-icon="Calendar"
  7 |     :range-separator="$t('common.title.to')"
  8 |     :start-placeholder="$t('common.title.startTime')"
  9 |     :end-placeholder="$t('common.title.endTime')"
 10 |     :shortcuts="shortcuts"
 11 |     :disabled-date="disabledDate"
 12 |     value-format="YYYY-MM-DD HH:mm:ss"
 13 |     @change="changeTime"
 14 |   />
 15 | </template>
 16 | 
 17 | <script setup lang="ts">
 18 | import { ref } from 'vue'
 19 | import { useI18n } from 'vue-i18n'
 20 | import { Calendar } from '@element-plus/icons-vue'
 21 | import { watch } from 'vue'
 22 | 
 23 | const props = defineProps({
 24 |   defaultTime: {
 25 |     type: Array<string>,
 26 |     default: () => [],
 27 |     required: false
 28 |   }
 29 | })
 30 | 
 31 | const { t } = useI18n()
 32 | const searchTimePair = ref<any>('')
 33 | 
 34 | const shortcuts = [
 35 |   {
 36 |     text: t('datepicker.lastOneHour'),
 37 |     value: () => {
 38 |       const end = new Date()
 39 |       const start = new Date()
 40 |       start.setTime(start.getTime() - 3600 * 1000 * 1)
 41 |       return [start, end]
 42 |     }
 43 |   },
 44 |   {
 45 |     text: t('datepicker.lastThreeHours'),
 46 |     value: () => {
 47 |       const end = new Date()
 48 |       const start = new Date()
 49 |       start.setTime(start.getTime() - 3600 * 1000 * 3)
 50 |       return [start, end]
 51 |     }
 52 |   },
 53 |   {
 54 |     text: t('datepicker.lastTwelveHours'),
 55 |     value: () => {
 56 |       const end = new Date()
 57 |       const start = new Date()
 58 |       start.setTime(start.getTime() - 3600 * 1000 * 12)
 59 |       return [start, end]
 60 |     }
 61 |   },
 62 |   {
 63 |     text: t('datepicker.lastTwentyFourHours'),
 64 |     value: () => {
 65 |       const end = new Date()
 66 |       const start = new Date()
 67 |       start.setTime(start.getTime() - 3600 * 1000 * 24)
 68 |       return [start, end]
 69 |     }
 70 |   },
 71 |   {
 72 |     text: t('datepicker.lastTwoDays'),
 73 |     value: () => {
 74 |       const end = new Date()
 75 |       const start = new Date()
 76 |       start.setTime(start.getTime() - 3600 * 1000 * 24 * 2)
 77 |       return [start, end]
 78 |     }
 79 |   },
 80 |   {
 81 |     text: t('datepicker.lastSevenDays'),
 82 |     value: () => {
 83 |       const end = new Date()
 84 |       const start = new Date()
 85 |       start.setTime(start.getTime() - 3600 * 1000 * 24 * 7)
 86 |       return [start, end]
 87 |     }
 88 |   },
 89 |   {
 90 |     text: t('datepicker.lastMonth'),
 91 |     value: () => {
 92 |       const end = new Date()
 93 |       const start = new Date()
 94 |       start.setTime(start.getTime() - 3600 * 1000 * 24 * 30)
 95 |       return [start, end]
 96 |     }
 97 |   }
 98 | ]
 99 | 
100 | const disabledDate = (val: Date) => {
101 |   return val && new Date(val).getTime() > Date.now()
102 | }
103 | 
104 | const emit = defineEmits(['changTimeRange'])
105 | 
106 | const changeTime = () => {
107 |   emit('changTimeRange', searchTimePair.value)
108 | }
109 | 
110 | watch(
111 |   () => props.defaultTime,
112 |   (val) => {
113 |     if (val != null && val.length > 0) {
114 |       searchTimePair.value = val
115 |       changeTime()
116 |     }
117 |   },
118 |   {
119 |     immediate: true
120 |   }
121 | )
122 | </script>
123 | 


--------------------------------------------------------------------------------
/front/src/components/experiment/TestDetail.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-tabs v-model="activeName" class="enova-tabs h-full">
 3 |     <el-tab-pane :label="$t('common.title.baseInfo')" name="baseInfo" :lazy="true">
 4 |       <TestInfo />
 5 |     </el-tab-pane>
 6 |     <el-tab-pane :label="$t('common.title.metrics')" name="gpuInfo" class="h-full" :lazy="true">
 7 |       <GpuInfo />
 8 |     </el-tab-pane>
 9 |   </el-tabs>
10 | </template>
11 | <script setup lang="ts">
12 | import { ref } from 'vue'
13 | import TestInfo from './TestInfo.vue'
14 | import GpuInfo from '../instance/GpuInfo.vue'
15 | defineProps({
16 |   showDrawer: {
17 |     type: Boolean,
18 |     default: false
19 |   }
20 | })
21 | const activeName = ref('baseInfo')
22 | </script>
23 | 


--------------------------------------------------------------------------------
/front/src/components/instance/InstanceDetail.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-tabs v-model="activeName" class="enova-tabs h-full">
 3 |     <el-tab-pane :label="$t('common.title.baseInfo')" name="baseInfo" :lazy="true">
 4 |       <BaseInfo />
 5 |     </el-tab-pane>
 6 |     <el-tab-pane :label="$t('common.title.metrics')" name="gpuInfo" class="h-full" :lazy="true">
 7 |       <GpuInfo />
 8 |     </el-tab-pane>
 9 |   </el-tabs>
10 | </template>
11 | <script setup lang="ts">
12 | import { ref } from 'vue'
13 | import BaseInfo from './BaseInfo.vue'
14 | import GpuInfo from './GpuInfo.vue';
15 | defineProps({
16 |   showDrawer: {
17 |     type: Boolean,
18 |     default: false
19 |   }
20 | })
21 | const activeName = ref('baseInfo')
22 | </script>
23 | 


--------------------------------------------------------------------------------
/front/src/hooks/useInitQueryRange.ts:
--------------------------------------------------------------------------------
 1 | import { useExperimentStore } from '@/stores/experiment'
 2 | import { useInstanceStore } from '@/stores/instance'
 3 | import dayjs from 'dayjs'
 4 | import utc from 'dayjs/plugin/utc'
 5 | import {storeToRefs} from 'pinia'
 6 | 
 7 | const getTestDuration = (duration: number, unit: string): number => {
 8 |   switch (unit) {
 9 |     case 'hour':
10 |       return duration * 60 * 60
11 |     case 'min':
12 |       return duration * 60
13 |     case 'sec':
14 |       return (Math.min(duration, 10))
15 |     default:
16 |       return 0
17 |   }
18 | }
19 | 
20 | const useInitQueryRange = () => {
21 |   const { activeExperiment } = storeToRefs(useExperimentStore())
22 |   const { chartTimeRange, searchTimePair } = storeToRefs(useInstanceStore())
23 |   dayjs.extend(utc)
24 |   let startTime = new Date()
25 |   let endTime = new Date()
26 |   
27 |   if (activeExperiment.value != null) {
28 |     startTime = new Date(dayjs.utc(activeExperiment.value.create_time).toDate())
29 |     const { duration, duration_unit } = activeExperiment.value.test_spec
30 |     const testDuration = getTestDuration(duration, duration_unit)
31 |     startTime.setTime(startTime.getTime())
32 |     endTime.setTime(Math.min(startTime.getTime() + (testDuration + 180) * 1000, Date.now()))
33 |   } else {
34 |     startTime.setTime(startTime.getTime() - 3600 * 1000)
35 |   }
36 |   const _start = dayjs(startTime).format('YYYY-MM-DD HH:mm:ss')
37 |   const _end = dayjs(endTime).format('YYYY-MM-DD HH:mm:ss')
38 |   
39 |   chartTimeRange.value = [_start, _end]
40 |   searchTimePair.value = [_start, _end]
41 |   return { start: _start, end: _end }
42 | }
43 | 
44 | export { useInitQueryRange }


--------------------------------------------------------------------------------
/front/src/layout/header/index.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-row class="h-full bg-[#242424]">
 3 |     <el-col :span="8" :sm="8" :md="5">
 4 |       <img
 5 |         v-if="navLogo.src"
 6 |         class="ml-6"
 7 |         :style="{ height: navLogo.height, width: navLogo.width }"
 8 |         :src="navLogo.src"
 9 |         :alt="navLogo.alt"
10 |       />
11 |     </el-col>
12 |     <el-col :span="12" :sm="12" :md="16"> </el-col>
13 |     <el-col :span="4" :sm="4" :md="3">
14 |       <div class="flex items-center w-full h-full justify-end pr-6">
15 |         <Language />
16 |       </div>
17 |     </el-col>
18 |   </el-row>
19 | </template>
20 | 
21 | <script setup lang="ts">
22 | import { useAppStore } from '@/stores/app'
23 | import { storeToRefs } from 'pinia'
24 | import Language from '@/components/Language.vue'
25 | const appStore = useAppStore()
26 | const { navLogo } = storeToRefs(appStore)
27 | </script>
28 | 
29 | 


--------------------------------------------------------------------------------
/front/src/layout/index.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="w-screen h-screen overflow-hidden">
 3 |     <el-container class="h-full">
 4 |       <el-header class="!p-0">
 5 |         <Header />
 6 |       </el-header>
 7 |       <el-container class="h-full">
 8 |         <el-aside
 9 |           class="transition-width duration-300"
10 |           :class="{ '!w-16': isCollapse, '!w-[210px]': !isCollapse }"
11 |           width="210px"
12 |         >
13 |           <Sidebar @toggle-collapse="toggleCollapse" />
14 |         </el-aside>
15 |         <el-main class="!p-0 !m-0 !overflow-hidden">
16 |           <TransitionGroup name="fade-transform">
17 |             <router-view :key="key" />
18 |           </TransitionGroup>
19 |         </el-main>
20 |       </el-container>
21 |     </el-container>
22 |   </div>
23 | </template>
24 | 
25 | <script setup lang="ts">
26 | import { computed, ref } from 'vue'
27 | import { useRoute } from 'vue-router'
28 | import Header from './header/index.vue'
29 | import Sidebar from './sidebar/index.vue'
30 | 
31 | const isCollapse = ref(false)
32 | const route = useRoute()
33 | const key = computed(() => route.path)
34 | 
35 | const toggleCollapse = (val: boolean): void => {
36 |   isCollapse.value = val
37 | }
38 | </script>
39 | <style scoped>
40 | .fade-enter-active,
41 | .fade-leave-active {
42 |   transition: opacity 0.5s;
43 | }
44 | 
45 | .fade-enter-from,
46 | .fade-leave-to {
47 |   opacity: 0;
48 | }
49 | /* fade-transform */
50 | .fade-transform-leave-active,
51 | .fade-transform-enter-active {
52 |   transition: all .5s;
53 | }
54 | 
55 | .fade-transform-enter {
56 |   opacity: 0;
57 |   transform: translateX(-30px);
58 | }
59 | 
60 | .fade-transform-leave-to {
61 |   opacity: 0;
62 |   transform: translateX(30px);
63 | }
64 | </style>


--------------------------------------------------------------------------------
/front/src/layout/sidebar/index.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <el-scrollbar
 3 |     height="100%"
 4 |     wrap-class="h-full sidebarScrollbar"
 5 |     view-class="h-[calc(100vh-60px)]"
 6 |   >
 7 |     <el-menu class="sidebarMenu relative" router :default-active="route.path">
 8 |       <el-menu-item
 9 |         v-for="item in routeOptions"
10 |         :key="item.path"
11 |         :index="item.path"
12 |         @mouseenter="handleMouseEnter"
13 |         @mouseleave="handleMouseLeave"
14 |       >
15 |         <svg-icon :name="item.meta?.icon as string" class="shrink-0" />
16 |         <template #title>{{ $t(`menu.${item.meta?.title}`) }}</template>
17 |       </el-menu-item>
18 |       <svg-icon
19 |         name="toggle"
20 |         class="text-white absolute left-4 bottom-4 cursor-pointer"
21 |         :class="{ 'rotate-180': !isOpen }"
22 |         @click="toggleCollapse"
23 |       />
24 |     </el-menu>
25 |   </el-scrollbar>
26 | </template>
27 | 
28 | <script setup lang="ts">
29 | import { useRoute, useRouter } from 'vue-router'
30 | import { ref, computed, onMounted } from 'vue'
31 | const route = useRoute()
32 | const router = useRouter()
33 | 
34 | const routeOptions = router.options.routes[0].children
35 | 
36 | const activeMenu = ref('')
37 | const isOpen = ref(true)
38 | const sidebarHoverSta = ref(false)
39 | 
40 | const isCollapse = computed(() => {
41 |   if (isOpen.value) return false
42 |   return !sidebarHoverSta.value
43 | })
44 | 
45 | const handleMouseEnter = (): void => {
46 |   sidebarHoverSta.value = true
47 |   emit('toggleCollapse', isCollapse.value)
48 | }
49 | 
50 | const handleMouseLeave = (): void => {
51 |   sidebarHoverSta.value = false
52 |   emit('toggleCollapse', isCollapse.value)
53 | }
54 | 
55 | onMounted(() => {
56 |   activeMenu.value = route.path
57 | })
58 | 
59 | const emit = defineEmits(['toggleCollapse'])
60 | 
61 | const toggleCollapse = () => {
62 |   isOpen.value = !isOpen.value
63 |   emit('toggleCollapse', isCollapse.value)
64 | }
65 | </script>
66 | 
67 | <style lang="scss" scoped>
68 | .sidebarScrollbar {
69 |   .el-menu {
70 |     @apply h-full bg-[#2F374A] px-1.5 py-2 overflow-x-hidden;
71 |     &-item {
72 |       @apply text-white/80 text-sm rounded px-4 py-2.5 flex gap-4 items-center hover:bg-transparent hover:text-white overflow-hidden;
73 |       &.is-active {
74 |         @apply text-white bg-[#4E688E];
75 |       }
76 |     }
77 |   }
78 | }
79 | </style>
80 | 


--------------------------------------------------------------------------------
/front/src/locales/index.ts:
--------------------------------------------------------------------------------
 1 | import { createI18n } from 'vue-i18n'
 2 | import zhLoacles from './lang/zh'
 3 | import enLocales from './lang/en'
 4 | const getLocale = (): string => {
 5 |   let locale = localStorage.getItem('lang')
 6 |   if (!locale) {
 7 |     locale = navigator.language.split('-')[0]
 8 |   }
 9 |   if (!locale || locale === 'zh') {
10 |     locale = 'zh_CN'
11 |   }
12 |   return locale
13 | }
14 | 
15 | const i18n = createI18n({
16 |   locale: getLocale(),
17 |   legacy: false,
18 |   globalInjection: true,
19 |   fallbackLocale: 'zh_CN',
20 |   messages: {
21 |     zh_CN: { ...zhLoacles },
22 |     en: { ...enLocales }
23 |   }
24 | })
25 | 
26 | export default i18n
27 | 


--------------------------------------------------------------------------------
/front/src/main.ts:
--------------------------------------------------------------------------------
 1 | import './styles/index.css'
 2 | 
 3 | import { createApp } from 'vue'
 4 | import { createPinia } from 'pinia'
 5 | import 'virtual:svg-icons-register'
 6 | import i18n from './locales'
 7 | import App from './App.vue'
 8 | import router from './router'
 9 | import './styles/index.scss'
10 | 
11 | const app = createApp(App)
12 | 
13 | app.use(createPinia())
14 | app.use(router)
15 | app.use(i18n)
16 | app.mount('#app')
17 | 


--------------------------------------------------------------------------------
/front/src/router/index.ts:
--------------------------------------------------------------------------------
 1 | import { createRouter, createWebHistory } from 'vue-router'
 2 | import Layout from '@/layout/index.vue'
 3 | 
 4 | const router = createRouter({
 5 |   history: createWebHistory(import.meta.env.BASE_URL),
 6 |   routes: [
 7 |     {
 8 |       path: '/',
 9 |       name: 'home',
10 |       component: Layout,
11 |       redirect: '/instance',
12 |       children: [
13 |         {
14 |           path: '/instance',
15 |           name: 'instance',
16 |           component: () => import('../views/Instance.vue'),
17 |           meta: {
18 |             title: 'service',
19 |             icon: 'docker'
20 |           }
21 |         },
22 |         {
23 |           path: '/record',
24 |           name: 'testRecord',
25 |           component: () => import('../views/TestRecord.vue'),
26 |           meta: {
27 |             title: 'record',
28 |             icon: 'log'
29 |           }
30 |         }
31 |       ]
32 |     }
33 |   ]
34 | })
35 | 
36 | export default router
37 | 


--------------------------------------------------------------------------------
/front/src/stores/app.ts:
--------------------------------------------------------------------------------
 1 | import { defineStore } from 'pinia'
 2 | import navImg from '@/assets/logo/emergingai_w.png'
 3 | import loginImg from '@/assets/logo/emergingai_b.png'
 4 | 
 5 | export const useAppStore = defineStore('app', {
 6 |   state: () => ({
 7 |     navLogo: {
 8 |       src: navImg,
 9 |       width: 'auto',
10 |       height: '56px',
11 |       alt: 'Emergingai'
12 |     },
13 |     loginLogo: {
14 |       src: loginImg,
15 |       width: '220px',
16 |       height: 'auto',
17 |       alt: 'Emergingai'
18 |     },
19 |     sidebarStatus: true
20 |   }),
21 |   actions: {
22 |     toggleSideBar(): void {
23 |       this.sidebarStatus = !this.sidebarStatus
24 |     }
25 |   }
26 | })
27 | 


--------------------------------------------------------------------------------
/front/src/stores/config.ts:
--------------------------------------------------------------------------------
 1 | interface InstanceType {
 2 |   instance_id: 'string'
 3 |   instance_name: 'string'
 4 |   instance_spec: {
 5 |     cpu: {
 6 |       brand_name: string
 7 |       core_amount: number
 8 |     }
 9 |     gpu: {
10 |       product: string
11 |       video_memory: string
12 |       card_amount: number
13 |     }
14 |     memory: string
15 |   }
16 |   startup_args: {
17 |     exported_job: string
18 |     dtype: string
19 |     load_format: string
20 |     max_num_batched_tokens: number
21 |     max_num_seqs: number
22 |     max_paddings: number
23 |     max_seq_len: number
24 |     model: string
25 |     tokenizer: string
26 |     pipeline_parallel_size: number
27 |     tensor_parallel_size: number
28 |     quantization: null
29 |   }
30 |   serving_id: string
31 |   deploy_status: string
32 |   create_time: string
33 | }
34 | 
35 | interface ExperimentType {
36 |   test_id: string
37 |   instance_id: string
38 |   test_spec: {
39 |     data_set: string
40 |     duration: 0
41 |     duration_unit: string
42 |     distribution: string
43 |     tps_mean: 0
44 |     tps_std?: string
45 |   }
46 |   param_spec: {
47 |     max_tokens: number
48 |     temperature: number
49 |     top_p: number
50 |     others: string
51 |   }
52 |   test_status: string
53 |   prompt_tps: number
54 |   generation_tps: number
55 |   result: {
56 |     total: number
57 |     success: number
58 |     elasped_avg: number
59 |   }
60 |   create_time: string
61 | }
62 | 
63 | export type { InstanceType, ExperimentType }
64 | 


--------------------------------------------------------------------------------
/front/src/stores/experiment.ts:
--------------------------------------------------------------------------------
 1 | import { defineStore } from 'pinia'
 2 | import { getExperiment } from '@/api/instance'
 3 | import type { ExperimentType } from './config'
 4 | import dayjs from 'dayjs'
 5 | import utc from 'dayjs/plugin/utc'
 6 | interface ExperimentStoreState {
 7 |   testList: ExperimentType[]
 8 |   currentId: string
 9 |   drawerVisible: boolean
10 | }
11 | 
12 | interface ExperimentRes {
13 |   data: ExperimentType[]
14 |   page: number
15 |   size: number
16 |   total_num: number
17 |   total_page: number
18 |   page_size: number
19 | }
20 | 
21 | export const useExperimentStore = defineStore('experiment', {
22 |   state: (): ExperimentStoreState => ({
23 |     testList: [],
24 |     currentId: '',
25 |     drawerVisible: false
26 |   }),
27 |   getters: {
28 |     activeExperiment: (state): ExperimentType | undefined => {
29 |       return state.testList.find((item) => item.test_id === state.currentId) || undefined
30 |     }
31 |   },
32 |   actions: {
33 |     getTestList(params: string) {
34 |       dayjs.extend(utc)
35 |       return new Promise<ExperimentRes>((resolve, reject) => {
36 |         getExperiment(params)
37 |           .then((res) => {
38 |             this.testList =
39 |               res.data.length > 0
40 |                 ? res.data.map((i: ExperimentType) => {
41 |                     return {
42 |                       ...i,
43 |                       create_time: dayjs.utc(i.create_time).toDate()
44 |                     }
45 |                   })
46 |                 : []
47 |             resolve(res as unknown as ExperimentRes)
48 |           })
49 |           .catch(() => {
50 |             reject(null)
51 |           })
52 |       })
53 |     }
54 |   }
55 | })
56 | 


--------------------------------------------------------------------------------
/front/src/stores/instance.ts:
--------------------------------------------------------------------------------
 1 | import { defineStore } from 'pinia'
 2 | import type { InstanceType } from './config'
 3 | import { getServing } from '@/api/instance'
 4 | interface instanceStoreState {
 5 |   instanceList: InstanceType[]
 6 |   currentId: string
 7 |   chartTimeRange: string[]
 8 |   tableLoading: boolean
 9 |   searchTimePair: string[]
10 | }
11 | interface chartQueryParams {
12 |   start: string | number
13 |   end: string | number
14 |   step: string | number
15 | }
16 | 
17 | export const useInstanceStore = defineStore('instance', {
18 |   state: (): instanceStoreState => ({
19 |     instanceList: [],
20 |     currentId: '',
21 |     chartTimeRange: [],
22 |     tableLoading: false,
23 |     searchTimePair: []
24 |   }),
25 |   getters: {
26 |     activeInstance(): InstanceType | undefined {
27 |       return this.instanceList.find((item: InstanceType) => item.instance_id === this.currentId)
28 |     },
29 |     instanceNameMap(): Map<string, string> {
30 |       const res = new Map<string, string>()
31 |       this.instanceList.forEach((item: InstanceType) => {
32 |         res.set(item.instance_id, item.instance_name)
33 |       })
34 |       return res
35 |     },
36 |     chartQuery(): chartQueryParams {
37 |       const [start, end] = this.chartTimeRange
38 |       const _start = start ? Math.floor(new Date(start).getTime() / 1000).toFixed(3) : ''
39 |       const _end = end ? Math.floor(new Date(end).getTime() / 1000).toFixed(3) : ''
40 |       return {
41 |         start: _start,
42 |         end: _end,
43 |         step: '15s'
44 |       }
45 |     },
46 |     activeServingId(): string {
47 |       return this.activeInstance != null ? this.activeInstance.serving_id : this.instanceList[0]?.serving_id ?? ''
48 |     },
49 |     activeServingJob(): string {
50 |       return this.activeInstance != null ? this.activeInstance.startup_args.exported_job : this.instanceList[0]?.startup_args.exported_job ?? ''
51 |     },
52 |   },
53 |   actions: {
54 |     getInstanceList(): void {
55 |       this.tableLoading = true
56 |       getServing().then((res) => {
57 |         this.instanceList = res.data
58 |       }).catch((err) => {
59 |         console.error(err)
60 |       }).finally(() => {
61 |         this.tableLoading = false
62 |       })
63 |     }
64 |   }
65 | })
66 | 


--------------------------------------------------------------------------------
/front/src/styles/element/index.scss:
--------------------------------------------------------------------------------
 1 | @forward 'element-plus/theme-chalk/src/common/var.scss' with (
 2 |   $colors: (
 3 |     'primary': (
 4 |       'base': #303133,
 5 |     ),
 6 |   ),
 7 |   $table: (
 8 |     'header-bg-color': #EBEDF0,
 9 |     'header-text-color': #606266
10 |   ),
11 |   $collapse: (
12 |     'header-height': 36px,
13 |     'header-bg-color': #F0F2F5
14 |   )
15 | 
16 | );


--------------------------------------------------------------------------------
/front/src/styles/index.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 | 


--------------------------------------------------------------------------------
/front/src/styles/index.scss:
--------------------------------------------------------------------------------
1 | @import './element-ui.scss';


--------------------------------------------------------------------------------
/front/src/utils/request.ts:
--------------------------------------------------------------------------------
 1 | import axios, { type AxiosResponse } from 'axios'
 2 | 
 3 | const service = axios.create({
 4 |   baseURL: '/',
 5 |   timeout: 10000
 6 | })
 7 | 
 8 | service.interceptors.request.use(
 9 |   (config) => {
10 |     return config
11 |   },
12 |   (error) => {
13 |     return Promise.reject(error)
14 |   }
15 | )
16 | 
17 | service.interceptors.response.use(
18 |   (response: AxiosResponse) => {
19 |     const res = response.data
20 |     if (Number(res.code) === 0 || res.status === 'success') {
21 |       return res.code === 0 ? res.result : res
22 |     } else {
23 |       ElMessage({
24 |         message: res.response?.data?.message || res.message || 'Error',
25 |         type: 'error',
26 |         duration: 5 * 1000
27 |       })
28 |       return Promise.reject(res)
29 |     }
30 |   },
31 |   (error) => {
32 |     ElMessage({
33 |       message: error.response?.data?.message || error.message || 'Error',
34 |       type: 'error',
35 |       duration: 5 * 1000
36 |     })
37 |     return Promise.reject(error)
38 |   }
39 | )
40 | 
41 | export default service
42 | 


--------------------------------------------------------------------------------
/front/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | export default {
 3 |   content: [
 4 |     "./index.html",
 5 |     "./src/**/*.{vue,js,ts,jsx,tsx}",
 6 |   ],
 7 |   theme: {
 8 |     extend: {
 9 |       colors: {
10 |         primary: '#303133',
11 |         secondary: '#1272FF',
12 |         disabled: '#A8ABB2',
13 |         regular: '#606266',
14 |         gray1: '#EEF3FF',
15 |         gray2: '#EBEEF5',
16 |         gray3: '#F0F2F5',
17 |         gray4: '#7588A3',
18 |         gray5: '#909399',
19 |         gray7: '#DCDFE6',
20 |         gray8: '#F5F7FA',
21 |         black1: '#1E252E'
22 | 
23 |       },
24 |       boxShadow: {
25 |         tableShadow: 'inset 0px -1px 0px 0px #EBEEF5'
26 |       },
27 |       backgroundImage: {
28 |         'filter-icon': 'url("../assets/filter.png")'
29 |       }
30 |     },
31 |   },
32 |   plugins: [],
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/front/tsconfig.app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "@vue/tsconfig/tsconfig.dom.json",
 3 |   "include": ["env.d.ts", "src/**/*", "src/**/*.vue", "**/*.d.ts", "src/**/*.ts"],
 4 |   "exclude": ["src/**/__tests__/*"],
 5 |   "compilerOptions": {
 6 |     "composite": true,
 7 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
 8 | 
 9 |     "baseUrl": ".",
10 |     "paths": {
11 |       "@/*": ["./src/*"]
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/front/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files": [],
 3 |   "references": [
 4 |     {
 5 |       "path": "./tsconfig.node.json"
 6 |     },
 7 |     {
 8 |       "path": "./tsconfig.app.json"
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/front/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "@tsconfig/node20/tsconfig.json",
 3 |   "include": [
 4 |     "vite.config.*",
 5 |     "vitest.config.*",
 6 |     "cypress.config.*",
 7 |     "nightwatch.conf.*",
 8 |     "playwright.config.*"
 9 |   ],
10 |   "compilerOptions": {
11 |     "composite": true,
12 |     "noEmit": true,
13 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
14 | 
15 |     "module": "ESNext",
16 |     "moduleResolution": "Bundler",
17 |     "types": ["node"]
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/front/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { fileURLToPath, URL } from 'node:url'
 2 | import { defineConfig } from 'vite'
 3 | import vue from '@vitejs/plugin-vue'
 4 | import VueDevTools from 'vite-plugin-vue-devtools'
 5 | import AutoImport from 'unplugin-auto-import/vite'
 6 | import Components from 'unplugin-vue-components/vite'
 7 | import { ElementPlusResolver } from 'unplugin-vue-components/resolvers'
 8 | import { createSvgIconsPlugin } from 'vite-plugin-svg-icons'
 9 | import path from 'path'
10 | // import { visualizer } from 'rollup-plugin-visualizer'
11 | 
12 | export default defineConfig({
13 |   build: {
14 |     outDir: './dist',
15 |     rollupOptions: {
16 |       plugins: [
17 |         // visualizer({
18 |         //   open: true,
19 |         //   gzipSize: true
20 |         // })
21 |       ],
22 |       output: {
23 |         chunkFileNames: 'static/js/[name]-[hash].js',
24 |         entryFileNames: 'static/js/[name]-[hash].js',
25 |         assetFileNames: 'static/[ext]/[name]-[hash].[ext]',
26 |         manualChunks(id) {
27 |           if (id.includes('node_modules')) {
28 |             return id.toString().split('node_modules/')[1].split('/')[0].toString()
29 |           }
30 |         }
31 |       }
32 |     }
33 |   },
34 |   css: {
35 |     preprocessorOptions: {
36 |       scss: {
37 |         additionalData: `@use "~/styles/element/index.scss" as *;`
38 |       }
39 |     }
40 |   },
41 |   plugins: [
42 |     vue(),
43 |     VueDevTools(),
44 |     AutoImport({
45 |       resolvers: [ElementPlusResolver({ importStyle: 'sass' })]
46 |     }),
47 |     Components({
48 |       resolvers: [ElementPlusResolver({ importStyle: 'sass' })]
49 |     }),
50 |     createSvgIconsPlugin({
51 |       iconDirs: [path.resolve(process.cwd(), 'src/assets/svg')],
52 |       symbolId: 'icon-[name]'
53 |     })
54 |   ],
55 |   resolve: {
56 |     alias: {
57 |       '@': fileURLToPath(new URL('./src', import.meta.url)),
58 |       '~/': `${path.resolve(__dirname, 'src')}/`
59 |     }
60 |   }
61 | })
62 | 


--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/README.md:
--------------------------------------------------------------------------------
 1 | ## 使用方式
 2 | 安装whl包
 3 | ```bash
 4 | pip install enova_instrumentation_llmo-0.0.8-py3-none-any.whl 
 5 | ```
 6 | 在vllm程序代码中进行ot配置和开启注入
 7 | ```python
 8 | 
 9 | # 开启instrument
10 | from enova.llmo import start
11 | # 指定ot collector地址和service name
12 | start(otlp_exporter_endpoint="localhost:4317", service_name="service_name")
13 | 
14 | #######接原代码内容#######
15 | ```
16 | 
17 | ## Metrics 指标说明
18 | - `avg_prompt_throughput` prompt 输入速率，单位 tokens/s
19 | - `avg_generation_throughput` 生成速率，单位 tokens/s
20 | - `running_requests` 当前 running 的 requests 数
21 | - `swapped_requests` 当前 swapped 的 requests 数
22 | - `pending_requests` 当前 pending 的 requests 数
23 | - `gpu_kv_cache_usage` gpu kv cache 使用率
24 | - `cpu_kv_cache_usage` cpu kv cache 使用率
25 | - `generated_tokens` 生成的 tokens 数
26 | - `llm_engine_init_config` engine启动参数，attributes如下
27 |   - `model`
28 |   - `tokenizer`
29 |   - `tokenizer_mode`
30 |   - `revision`
31 |   - `tokenizer_revision`
32 |   - `trust_remote_code`
33 |   - `dtype`
34 |   - `max_seq_len`
35 |   - `download_dir`
36 |   - `load_format`
37 |   - `tensor_parallel_size`
38 |   - `disable_custom_all_reduce`
39 |   - `quantization`
40 |   - `enforce_eager`
41 |   - `kv_cache_dtype`
42 |   - `seed`
43 |   - `max_num_batched_tokens`
44 |   - `max_num_seqs`
45 |   - `max_paddings`
46 |   - `pipeline_parallel_size`
47 |   - `worker_use_ray`
48 |   - `max_parallel_loading_workers`
49 | - `http.server.active_requests` FastAPI 正在处理中的 HTTP 请求的数量
50 | - `http.server.duration` FastAPI 服务端请求处理时间。
51 | - `http.server.response.size` FastAPI HTTP 响应消息的大小
52 | - `http.server.request.size` FastAPI HTTP 请求的大小
53 | 
54 | 
55 | ## trace span 说明
56 | - `POST /generate` /generate请求
57 | - `POST /generate prompt` 带有 `prompt` attribute
58 | - `ModelRunner.execute_model` 模型execute，对应一次 token 生成
59 | - `CUDAGraphRunner.forward` CUDA Graph的 forward 计算，在 `ModelRunner.execute_model` 中被调用
60 | - `ChatGLMForCausalLM.forward` chatglm 模型 forward
61 | - `LlamaForCausalLM.forward` llama 模型 forward
62 | 
63 | 


--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from opentelemetry import metrics
 4 | from opentelemetry import trace
 5 | from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
 6 | from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
 7 | from opentelemetry.sdk.metrics import MeterProvider
 8 | from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
 9 | from opentelemetry.sdk.resources import Resource
10 | from opentelemetry.sdk.trace import TracerProvider
11 | from opentelemetry.sdk.trace.export import BatchSpanProcessor
12 | from opentelemetry.semconv.resource import ResourceAttributes
13 | 
14 | 
15 | def start(otlp_exporter_endpoint: str = "localhost:4317", service_name: str = __name__):
16 |     otlp_exporter = OTLPSpanExporter(
17 |         otlp_exporter_endpoint,
18 |         insecure=True,
19 |     )
20 |     resource = Resource(
21 |         attributes={
22 |             ResourceAttributes.SERVICE_NAME: service_name,
23 |         }
24 |     )
25 |     provider = TracerProvider(resource=resource)
26 |     provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
27 |     trace.set_tracer_provider(provider)
28 | 
29 |     exporter = OTLPMetricExporter(endpoint=otlp_exporter_endpoint, insecure=True)
30 |     metric_reader = PeriodicExportingMetricReader(exporter, export_interval_millis=5000)
31 | 
32 |     provider = MeterProvider(metric_readers=[metric_reader], resource=resource)
33 | 
34 |     metrics.set_meter_provider(provider)
35 | 
36 |     from .instrumentation import EnovaFastAPIInstrumentor, EnovaVllmInstrumentor
37 | 
38 |     EnovaFastAPIInstrumentor().instrument()
39 |     EnovaVllmInstrumentor().instrument(service_name)
40 | 
41 |     from .metrics_adapter import VLLMLogMetricsAdapter
42 | 
43 |     metrics_log_handler = VLLMLogMetricsAdapter()
44 |     vllm_logger = logging.getLogger("vllm.engine.metrics")
45 |     vllm_logger.addHandler(metrics_log_handler)
46 | 


--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/instrumentation/__init__.py:
--------------------------------------------------------------------------------
1 | from .vllm import EnovaVllmInstrumentor
2 | from .fastapi import EnovaFastAPIInstrumentor
3 | 


--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/instrumentation/fastapi/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Collection
 2 | from opentelemetry import trace, metrics
 3 | from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 4 | from opentelemetry.instrumentation.utils import unwrap
 5 | from opentelemetry.instrumentation.asgi import collect_request_attributes
 6 | from opentelemetry.util.http import _parse_active_request_count_attrs
 7 | from wrapt import wrap_function_wrapper
 8 | from starlette.types import ASGIApp, Scope, Receive, Send
 9 | from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
10 | 
11 | import json
12 | 
13 | _instruments = ("fastapi >= 0.1",)
14 | 
15 | 
16 | class EnovaMiddleware:
17 |     def __init__(self, app: ASGIApp) -> None:
18 |         self.app = app
19 |         self.meter = metrics.get_meter(__name__)
20 |         self.tracer = trace.get_tracer(__name__)
21 |         self.requests_counter = self.meter.create_counter(
22 |             name="http.server.requests",
23 |             unit="requests",
24 |             description="measures the number of HTTP requests received",
25 |         )
26 | 
27 |     async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
28 |         if scope["type"] != "http":
29 |             await self.app(scope, receive, send)
30 |             return
31 | 
32 |         attrs = collect_request_attributes(scope)
33 |         _request_count_attrs = _parse_active_request_count_attrs(attrs)
34 |         self.requests_counter.add(1, _request_count_attrs)
35 |         messages = []
36 | 
37 |         if scope["method"] == "POST" and scope["path"] in ["/generate", "/v1/completions", "/v1/chat/completions"]:
38 |             span_name = f"POST {scope['path']} params"
39 |             more_body = True
40 | 
41 |             try:
42 |                 while more_body:
43 |                     message = await receive()
44 |                     messages.append(message)
45 |                     more_body = message.get("more_body", False)
46 |                 body = b"".join([message.get("body", b"") for message in messages if message.get("body")])
47 |                 if body:
48 |                     with self.tracer.start_as_current_span(span_name) as generate_span:
49 |                         body_json = json.loads(body)
50 |                         for key in ["prompt", "messages", "model"]:
51 |                             if key in body_json:
52 |                                 generate_span.set_attribute(key, str(body_json[key]))
53 |             except Exception as e:
54 |                 pass
55 | 
56 |         async def wrapped_receive():
57 |             if messages:
58 |                 return messages.pop(0)
59 |             return await receive()
60 | 
61 |         await self.app(scope, wrapped_receive, send)
62 | 
63 | 
64 | class EnovaFastAPIInstrumentor(BaseInstrumentor):
65 |     def instrumentation_dependencies(self) -> Collection[str]:
66 |         return _instruments
67 | 
68 |     def _instrument(self, **kwargs):
69 |         def fastapi_init_wrapper(wrapped, instance, args, kwargs):
70 |             result = wrapped(*args, **kwargs)
71 |             instance.add_middleware(EnovaMiddleware)
72 |             FastAPIInstrumentor.instrument_app(instance)
73 |             return result
74 | 
75 |         wrap_function_wrapper("fastapi", "FastAPI.__init__", fastapi_init_wrapper)
76 | 
77 |     def _uninstrument(self, **kwargs):
78 |         unwrap("fastapi", "FastAPI.__init__")
79 | 


--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/metrics_adapter/__init__.py:
--------------------------------------------------------------------------------
1 | from .vllm_logging_metrics import VLLMLogMetricsAdapter
2 | 


--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/enova/llmo/metrics_adapter/vllm_logging_metrics.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | import threading
 4 | import time
 5 | from typing import Iterable
 6 | 
 7 | from opentelemetry import metrics
 8 | from opentelemetry.metrics import CallbackOptions, Observation
 9 | 
10 | meter = metrics.get_meter(__name__)
11 | 
12 | metric_info = {
13 |     "avg_prompt_throughput": {"value": 0.0, "unit": "tokens/s", "last_update": time.time()},
14 |     "avg_generation_throughput": {"value": 0.0, "unit": "tokens/s", "last_update": time.time()},
15 |     "running_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()},
16 |     "swapped_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()},
17 |     "pending_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()},
18 |     "gpu_kv_cache_usage": {"value": 0.0, "unit": "%", "last_update": time.time()},
19 |     "cpu_kv_cache_usage": {"value": 0.0, "unit": "%", "last_update": time.time()},
20 | }
21 | 
22 | timeout_seconds = 15
23 | 
24 | for metric_name, info in metric_info.items():
25 |     def create_scrape_metric_callback(metric_name):
26 |         def scrape_metric_callback(options: CallbackOptions) -> Iterable[Observation]:
27 |             value = metric_info[metric_name]["value"]
28 |             yield Observation(value, attributes={})
29 | 
30 |         return scrape_metric_callback
31 | 
32 |     callback = create_scrape_metric_callback(metric_name)
33 |     unit = info["unit"]
34 | 
35 |     meter.create_observable_gauge(
36 |         name=metric_name,
37 |         callbacks=[callback],
38 |         description=f"The value of {metric_name}",
39 |         unit=unit
40 |     )
41 | 
42 | 
43 | def update_metric(name, value, current_time):
44 |     metric_info[name]["value"] = value
45 |     metric_info[name]["last_update"] = current_time
46 | 
47 | 
48 | class VLLMLogMetricsAdapter(logging.Handler):
49 |     def __init__(self):
50 |         super().__init__()
51 |         self.pattern = re.compile(
52 |             r".*?"
53 |             r"Avg prompt throughput: (?P<avg_prompt>\d+\.\d+) tokens/s, "
54 |             r"Avg generation throughput: (?P<avg_gen>\d+\.\d+) tokens/s, "
55 |             r"Running: (?P<running>\d+) reqs, "
56 |             r"Swapped: (?P<swapped>\d+) reqs, "
57 |             r"Pending: (?P<pending>\d+) reqs, "
58 |             r"GPU KV cache usage: (?P<gpu_cache>\d+\.\d+)%, "
59 |             r"CPU KV cache usage: (?P<cpu_cache>\d+\.\d+)%"
60 |         )
61 | 
62 |     def emit(self, record):
63 |         log_message = record.getMessage()
64 |         match = self.pattern.search(log_message)
65 |         if match:
66 |             current_time = time.time()
67 |             update_metric("avg_prompt_throughput", float(match.group("avg_prompt")), current_time)
68 |             update_metric("avg_generation_throughput", float(match.group("avg_gen")), current_time)
69 |             update_metric("running_requests", float(match.group("running")), current_time)
70 |             update_metric("swapped_requests", float(match.group("swapped")), current_time)
71 |             update_metric("pending_requests", float(match.group("pending")), current_time)
72 |             update_metric("gpu_kv_cache_usage", float(match.group("gpu_cache")), current_time)
73 |             update_metric("cpu_kv_cache_usage", float(match.group("cpu_cache")), current_time)
74 | 
75 | 
76 | def update_metrics_periodically():
77 |     while True:
78 |         for metric_name, info in metric_info.items():
79 |             current_time = time.time()
80 |             if current_time - info["last_update"] > timeout_seconds:
81 |                 metric_info[metric_name]["value"] = 0.0  # Reset the value if the data is stale
82 |         time.sleep(5)  # Update every 5 seconds
83 | 
84 | 
85 | # Start the background thread to update metrics periodically
86 | threading.Thread(target=update_metrics_periodically, daemon=True).start()
87 | 


--------------------------------------------------------------------------------
/llmo/enova-instrumentation-llmo/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "enova-instrumentation-llmo"
 3 | version = "0.0.8"
 4 | description = "enova-instrumentation-llmo"
 5 | requires-python = ">=3.10"
 6 | dynamic = [
 7 |     "dependencies"
 8 | ]
 9 | authors = [
10 |     { name="wenxinxie", email="wenxin@emergingai-tech.com" },
11 | ]
12 | readme = "README.md"
13 | 
14 | 
15 | [tool.coverage.run]
16 | branch = true
17 | source = [ "enova/llmo" ]
18 | 
19 | [build-system]
20 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2,<=7.1.0", "toml"]
21 | 
22 | [tool.poetry]
23 | name = "enova-instrumentation-llmo"
24 | version = "0.0.8"
25 | description = "llmo instrumentation for OpenTelemetry"
26 | authors = ["wenxinxie <wenxin@emergingai-tech.com>"]
27 | 
28 | [[tool.poetry.packages]]
29 | include = "enova/llmo"
30 | 
31 | [tool.poetry.dependencies]
32 | python = "^3.10"
33 | opentelemetry-api = "*"
34 | opentelemetry-sdk = "*"
35 | vllm = "0.6.3.post1"
36 | fastapi = "*"
37 | opentelemetry-exporter-otlp = "*"
38 | opentelemetry-distro = "*"
39 | opentelemetry-instrumentation-fastapi = "*"
40 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "enova"
 3 | description = "enova"
 4 | requires-python = ">=3.8"
 5 | dynamic = ["dependencies", "version"]
 6 | authors = [
 7 |     { name = "kyokagong", email = "kyokagong@emergingai-tech.com" },
 8 |     { name = "wenxinxie", email = "wenxin@emergingai-tech.com" },
 9 |     { name = "jockyhawk", email = "jockyhawk@emergingai-tech.com" },
10 |     { name = "kimzhao", email = "kimzhao@emergingai-tech.com" },
11 | ]
12 | readme = "README.md"
13 | 
14 | [project.scripts]
15 | enova = "enova.entry.cli:main"
16 | 
17 | [project.optional-dependencies]
18 | lint = ["black==23.12.0"]
19 | test = ["pytest", "pytest-cov", "responses", "respx"]
20 | 
21 | 
22 | [build-system]
23 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2,<=7.1.0", "toml"]
24 | build-backend = "setuptools.build_meta"
25 | 
26 | 
27 | [tool.setuptools.packages.find]
28 | where = ["."]
29 | include = ["enova.*"]
30 | namespaces = true
31 | 
32 | [tool.setuptools.package-data]
33 | "*" = ["*.csv", "docker-compose-*"]
34 | "enova.web_statics" = ["*", "*/*"]
35 | 
36 | [tool.setuptools.dynamic]
37 | dependencies = { file = ["requirements.txt"] }
38 | version = {file = ["VERSION"]}
39 | 
40 | [tool.coverage.run]
41 | omit = ["*/tests/test_*.py"]
42 | 


--------------------------------------------------------------------------------
/requirements-docker-no-deps.txt:
--------------------------------------------------------------------------------
1 | vllm==0.6.3.post1
2 | 


--------------------------------------------------------------------------------
/requirements-docker.txt:
--------------------------------------------------------------------------------
 1 | httpx==0.24.1
 2 | fastapi==0.108.0
 3 | vllm==0.6.3.post1
 4 | sglang==0.3.6
 5 | huggingface_hub
 6 | hf-transfer
 7 | transformers
 8 | locate
 9 | python-rapidjson
10 | opentelemetry-api
11 | opentelemetry-sdk
12 | opentelemetry-exporter-otlp
13 | opentelemetry-distro
14 | opentelemetry-instrumentation-fastapi
15 | streamlit
16 | pymysql==1.1.0
17 | aiomysql==0.2.0
18 | sqlalchemy==2.0.29
19 | sqlalchemy-utils
20 | aiosqlite
21 | greenlet
22 | uvicorn
23 | ulid-py
24 | pyopencl
25 | py-cpuinfo
26 | pytz
27 | tzlocal
28 | openai
29 | packaging
30 | ray
31 | python-multipart
32 | addict
33 | orjson
34 | siphash24
35 | # msgspec
36 | # compressed_tensors
37 | # gguf
38 | # sentencepiece
39 | # mistral_common
40 | 
41 | # filelock
42 | # lm-format-enforcer==0.10.3
43 | # ninja
44 | # nvidia-ml-py
45 | # outlines
46 | # pillow
47 | # prometheus-client
48 | # prometheus-fastapi-instrumentator
49 | # psutil
50 | # sentencepiece
51 | # tiktoken
52 | # tokenizers
53 | # typing-extensions
54 | # vllm-flash-attn==2.5.9.post1
55 | # xformers==0.0.27


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | httpx==0.24.1
 2 | fastapi==0.108.0
 3 | huggingface_hub
 4 | hf-transfer
 5 | transformers
 6 | locate
 7 | python-rapidjson
 8 | opentelemetry-api
 9 | opentelemetry-sdk
10 | opentelemetry-exporter-otlp
11 | opentelemetry-distro
12 | opentelemetry-instrumentation-fastapi
13 | streamlit
14 | pymysql==1.1.0
15 | aiomysql==0.2.0
16 | sqlalchemy==2.0.29
17 | sqlalchemy-utils
18 | aiosqlite
19 | greenlet
20 | uvicorn
21 | ulid-py
22 | pyopencl
23 | py-cpuinfo
24 | pytz
25 | tzlocal
26 | openai
27 | packaging
28 | ray
29 | enova-instrumentation-llmo==0.0.8
30 | addict
31 | sglang==0.3.6
32 | python-multipart
33 | orjson
34 | siphash24


--------------------------------------------------------------------------------
/scripts/pack_whl.enova.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | echo "Runing packing wheel using ${PWD}"
 5 | 
 6 | SCRIPT=$(realpath "$0")
 7 | BASEDIR=$(dirname "$SCRIPT")
 8 | BASEDIR=$(dirname "$BASEDIR")
 9 | 
10 | DOCKER_COMPOSE_BIN=enova/template/deployment/docker-compose/bin/docker-compose-linux-x86_64
11 | DOWNLOAD_URL=https://github.com/docker/compose/releases/download/v2.24.5/docker-compose-linux-x86_64
12 | 
13 | 
14 | if [ ! -f "$DOCKER_COMPOSE_BIN" ]; then
15 |     echo "PWD: " $PWD
16 |     mkdir -p enova/template/deployment/docker-compose/bin/
17 | 
18 |     echo "docker-compose binary $DOCKER_COMPOSE_BIN is not existed, start to download..."
19 |     cd enova/template/deployment/docker-compose/bin/
20 |     wget "$DOWNLOAD_URL"
21 | 
22 |     chmod +x docker-compose-linux-x86_64
23 |     cd $BASEDIR
24 |     if [ $? -eq 0 ]; then
25 |         echo "download sucessfully"
26 |     else
27 |         echo "failed to download"
28 |     fi
29 | fi
30 | 
31 | # pack
32 | cd $BASEDIR
33 | python -m build --no-isolation
34 | 
35 | 


--------------------------------------------------------------------------------
/scripts/pack_whl.llmo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | echo "Runing packing wheel of llmo using ${PWD}"
 5 | 
 6 | SCRIPT=$(realpath "$0")
 7 | BASEDIR=$(dirname "$SCRIPT")
 8 | BASEDIR=$(dirname "$BASEDIR")
 9 | 
10 | # pack
11 | cd $BASEDIR/llmo/enova-instrumentation-llmo
12 | poetry build
13 | 


--------------------------------------------------------------------------------
/tests/enova/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/tests/enova/conftest.py


--------------------------------------------------------------------------------
/tests/enova/test_eapp.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from httpx import AsyncClient
 3 | from enova.app.server import get_app_api_server
 4 | from enova.common.config import CONFIG
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def eapp():
 9 |     api_server = get_app_api_server()
10 |     return api_server.app
11 | 
12 | 
13 | @pytest.mark.asyncio
14 | class TestEApp:
15 |     async def test_healthz(self, eapp):
16 |         async with AsyncClient(app=eapp, base_url="http://test") as ac:
17 |             response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/healthz")
18 |             assert response.status_code == 200
19 | 
20 | 
21 | @pytest.mark.asyncio
22 | class TestEServe:
23 |     async def test_list_serving(self, eapp):
24 |         async with AsyncClient(app=eapp, base_url="http://test") as ac:
25 |             response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/serving")
26 |             assert response.status_code == 200
27 |             # TODO: some test of biz flow
28 | 
29 |     # async def test_create_serving_with_escalar(self, eapp):
30 |     #     post_params = {}
31 |     #     async with AsyncClient(app=eapp, base_url="http://test") as ac:
32 |     #         response = await ac.post(
33 |     #             f"/{CONFIG.enova_app['url_prefix']}/v1/serving",
34 |     #             json=post_params,
35 |     #         )
36 |     #         assert response.status_code == 200
37 |     #         # TODO: some test of biz flow
38 | 
39 |     # async def test_create_serving_missing_escalar(self, eapp):
40 |     #     post_params = {}
41 |     #     async with AsyncClient(app=eapp, base_url="http://test") as ac:
42 |     #         response = await ac.post(
43 |     #             f"/{CONFIG.enova_app['url_prefix']}/v1/serving",
44 |     #             json=post_params,
45 |     #         )
46 |     #         assert response.status_code == 200
47 |     #         # TODO: some test of biz flow
48 | 
49 |     # async def test_get_serving(self, eapp):
50 |     #     eserve_id = ""
51 |     #     async with AsyncClient(app=eapp, base_url="http://test") as ac:
52 |     #         response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/serving/{eserve_id}")
53 |     #         assert response.status_code == 200
54 |     #         # TODO: some test of biz flow
55 | 
56 |     # async def test_delete_serving(self, eapp):
57 |     #     eserve_id = ""
58 |     #     async with AsyncClient(app=eapp, base_url="http://test") as ac:
59 |     #         response = await ac.delete(f"/{CONFIG.enova_app['url_prefix']}/v1/serving/{eserve_id}")
60 |     #         assert response.status_code == 200
61 |     #         # TODO: some test of biz flow
62 | 
63 | 
64 | @pytest.mark.asyncio
65 | class TestTInject:
66 |     async def test_list_injector(self, eapp):
67 |         async with AsyncClient(app=eapp, base_url="http://test") as ac:
68 |             response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/instance/test")
69 |             assert response.status_code == 200
70 |             # TODO: some test of biz flow
71 | 


--------------------------------------------------------------------------------
/tests/enova/test_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-asyncio


--------------------------------------------------------------------------------