├── .dockerignore ├── .gitattributes ├── .github └── assets │ ├── ENOVA.png │ ├── gpu_metrics.png │ ├── llm_instance.png │ ├── monitoring_metrics.png │ ├── request_inject.png │ ├── test_results.png │ ├── trace.png │ └── webui.png ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── README.md ├── README_ZH.md ├── VERSION ├── docker ├── Dockerfile ├── Dockerfile.enova ├── Dockerfile.enova.base ├── Dockerfile.escaler ├── Dockerfile.jmeter ├── Dockerfile.requirements ├── build_image.enova.base.sh ├── build_image.enova.sh ├── build_image.escaler.sh └── build_image.jmeter.sh ├── enova ├── .gitignore ├── algo │ ├── __init__.py │ ├── resource.py │ ├── serializer.py │ ├── server.py │ └── service.py ├── api │ ├── __init__.py │ ├── app_api.py │ ├── base.py │ ├── escaler_api.py │ ├── prom_api.py │ └── serving_api.py ├── app │ ├── __init__.py │ ├── db_modles.py │ ├── resource.py │ ├── serializer.py │ ├── server.py │ ├── service.py │ └── utils.py ├── common │ ├── __init__.py │ ├── cli_helper.py │ ├── config.py │ ├── constant.py │ ├── encoder.py │ ├── error.py │ ├── g_vars.py │ ├── local.py │ ├── logger.py │ └── utils.py ├── database │ ├── __init__.py │ └── relation │ │ ├── __init__.py │ │ ├── orm │ │ ├── __init__.py │ │ └── base.py │ │ └── transaction │ │ ├── __init__.py │ │ └── session.py ├── entry │ ├── cli.py │ └── command │ │ ├── __init__.py │ │ ├── algo.py │ │ ├── app.py │ │ ├── injector.py │ │ ├── mon.py │ │ ├── pilot.py │ │ ├── serving.py │ │ └── webui.py ├── job │ ├── __init__.py │ └── job_manager.py ├── server │ ├── __init__.py │ ├── exception │ │ ├── __init__.py │ │ └── handler.py │ ├── middleware │ │ ├── __init__.py │ │ ├── base.py │ │ ├── response.py │ │ └── trace.py │ ├── restful │ │ ├── __init__.py │ │ ├── router.py │ │ ├── serializer.py │ │ └── service.py │ └── server.py ├── serving │ ├── __init__.py │ ├── apiserver.py │ ├── backend │ │ ├── base.py │ │ ├── hf │ │ │ ├── __init__.py │ │ │ ├── handler.py │ │ │ └── hf.py │ │ ├── injector.py │ │ ├── sglang.py │ │ ├── transformers.py │ │ ├── utils.py │ │ └── vllm.py │ └── middlewares │ │ ├── auth.py │ │ ├── base.py │ │ └── cors.py ├── template │ └── deployment │ │ └── docker-compose │ │ ├── .gitignore │ │ ├── compose-build.yaml │ │ ├── compose-dev.yaml │ │ ├── compose.yaml │ │ ├── dcgm-exporter │ │ ├── dcp-metrics-included.csv │ │ └── default-counters.csv │ │ ├── escaler │ │ └── conf │ │ │ └── settings.json │ │ ├── grafana │ │ ├── grafana_dashboards │ │ │ ├── enova-dashboard.json │ │ │ └── enova-dcgm-metrics.json │ │ └── grafana_provisioning │ │ │ ├── dashboards │ │ │ └── enova-dashboards.yaml │ │ │ └── datasources │ │ │ └── enova-datasource.yaml │ │ ├── haproxy │ │ └── haproxy.cfg │ │ ├── nginx │ │ └── nginx.conf │ │ ├── otel-collector │ │ └── collector-config.yaml │ │ ├── prometheus │ │ └── prometheus.yml │ │ ├── tempo │ │ └── tempo.yaml │ │ ├── traffic-injector │ │ ├── compose.yaml │ │ ├── data.csv │ │ ├── data │ │ │ ├── arc.csv │ │ │ ├── gsm8k.csv │ │ │ ├── mbpp.csv │ │ │ └── mc_test.csv │ │ ├── jmeter-config-template.xml │ │ └── jmeter.Dockerfile │ │ ├── webui-nginx │ │ └── nginx.conf │ │ └── webui │ │ └── webui.yaml └── webui │ ├── __init__.py │ └── chat.py ├── escaler ├── build.sh ├── cmd │ └── escaler │ │ ├── docs │ │ ├── docs.go │ │ ├── swagger.json │ │ └── swagger.yaml │ │ ├── main.go │ │ ├── main_test.go │ │ └── mock_enovaalgo.go ├── conf │ └── settings.json ├── go.mod ├── go.sum ├── pkg │ ├── api │ │ ├── api.go │ │ ├── enovaalgo.go │ │ ├── prom.go │ │ └── types.go │ ├── config │ │ └── config.go │ ├── detector │ │ ├── detector.go │ │ ├── performance.go │ │ └── server.go │ ├── httpserver │ │ ├── middleware │ │ │ ├── logger.go │ │ │ ├── response.go │ │ │ └── trace.go │ │ ├── server │ │ │ ├── router.go │ │ │ └── server.go │ │ └── utils │ │ │ └── utils.go │ ├── logger │ │ └── logger.go │ ├── meta │ │ ├── meta.go │ │ └── task.go │ ├── queue │ │ └── queue.go │ ├── redis │ │ └── redis.go │ ├── resource │ │ ├── clients.go │ │ ├── docker.go │ │ ├── docker │ │ │ └── docker.go │ │ ├── k8s.go │ │ ├── k8s │ │ │ └── k8s.go │ │ └── utils │ │ │ └── cmd.go │ ├── scaler │ │ └── scaler.go │ ├── utils │ │ ├── cache.go │ │ └── utils.go │ └── zmq │ │ └── zmq.go └── scripts │ ├── build_swagger.sh │ ├── generate_mock_files.sh │ ├── generate_ot_clientset.sh │ └── local_docker_run.sh ├── front ├── .dockerignore ├── .env.development ├── .env.production ├── .eslintrc.cjs ├── .gitignore ├── .prettierrc.json ├── README.md ├── auto-imports.d.ts ├── components.d.ts ├── env.d.ts ├── index.html ├── package-lock.json ├── package.json ├── postcss.config.js ├── public │ └── favicon.ico ├── src │ ├── App.vue │ ├── api │ │ └── instance.ts │ ├── assets │ │ ├── empty.png │ │ ├── filter.png │ │ ├── logo │ │ │ ├── emergingai_b.png │ │ │ └── emergingai_w.png │ │ └── svg │ │ │ ├── auto.svg │ │ │ ├── autoRefresh.svg │ │ │ ├── cross.svg │ │ │ ├── docker.svg │ │ │ ├── earth.svg │ │ │ ├── home.svg │ │ │ ├── info.svg │ │ │ ├── log.svg │ │ │ ├── setup.svg │ │ │ ├── toggle.svg │ │ │ └── user.svg │ ├── components │ │ ├── Drawer.vue │ │ ├── Language.vue │ │ ├── Pagination.vue │ │ ├── SearchInput.vue │ │ ├── SummaryTip.vue │ │ ├── SvgIcon.vue │ │ ├── TimeRangePicker.vue │ │ ├── chart │ │ │ ├── LineChart.vue │ │ │ └── ToolBar.vue │ │ ├── experiment │ │ │ ├── TestDetail.vue │ │ │ └── TestInfo.vue │ │ └── instance │ │ │ ├── BaseInfo.vue │ │ │ ├── GpuInfo.vue │ │ │ ├── InstanceDetail.vue │ │ │ └── TestConfig.vue │ ├── hooks │ │ └── useInitQueryRange.ts │ ├── layout │ │ ├── header │ │ │ └── index.vue │ │ ├── index.vue │ │ └── sidebar │ │ │ └── index.vue │ ├── locales │ │ ├── index.ts │ │ └── lang │ │ │ ├── en.ts │ │ │ └── zh.ts │ ├── main.ts │ ├── router │ │ └── index.ts │ ├── stores │ │ ├── app.ts │ │ ├── config.ts │ │ ├── experiment.ts │ │ └── instance.ts │ ├── styles │ │ ├── element-ui.scss │ │ ├── element │ │ │ └── index.scss │ │ ├── index.css │ │ └── index.scss │ ├── utils │ │ └── request.ts │ └── views │ │ ├── Instance.vue │ │ └── TestRecord.vue ├── tailwind.config.js ├── tsconfig.app.json ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts ├── llmo └── enova-instrumentation-llmo │ ├── README.md │ ├── enova │ └── llmo │ │ ├── __init__.py │ │ ├── instrumentation │ │ ├── __init__.py │ │ ├── fastapi │ │ │ └── __init__.py │ │ └── vllm │ │ │ ├── __init__.py │ │ │ └── wrappers.py │ │ └── metrics_adapter │ │ ├── __init__.py │ │ └── vllm_logging_metrics.py │ └── pyproject.toml ├── pyproject.toml ├── requirements-docker-no-deps.txt ├── requirements-docker.txt ├── requirements.txt ├── scripts ├── pack_whl.enova.sh └── pack_whl.llmo.sh └── tests └── enova ├── conftest.py ├── test_eapp.py └── test_requirements.txt /.dockerignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | build 3 | dist 4 | enova.egg-info 5 | *.log 6 | .gitignore 7 | var 8 | .pre-commit-config.yaml 9 | tests 10 | front/node_modules 11 | front/package-lock.json 12 | front/yarn.lock -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *whl filter=lfs diff=lfs merge=lfs -text 2 | docker-compose-* filter=lfs diff=lfs merge=lfs -text 3 | *tgz filter=lfs diff=lfs merge=lfs -text 4 | *tar.gz filter=lfs diff=lfs merge=lfs -text 5 | -------------------------------------------------------------------------------- /.github/assets/ENOVA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/ENOVA.png -------------------------------------------------------------------------------- /.github/assets/gpu_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/gpu_metrics.png -------------------------------------------------------------------------------- /.github/assets/llm_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/llm_instance.png -------------------------------------------------------------------------------- /.github/assets/monitoring_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/monitoring_metrics.png -------------------------------------------------------------------------------- /.github/assets/request_inject.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/request_inject.png -------------------------------------------------------------------------------- /.github/assets/test_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/test_results.png -------------------------------------------------------------------------------- /.github/assets/trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/trace.png -------------------------------------------------------------------------------- /.github/assets/webui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/.github/assets/webui.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | !front/src/components/instance 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | .vscode 107 | .idea 108 | .history 109 | 110 | # macos 111 | .DS_Store 112 | src/golang/bin 113 | src/golang/pkg/mod 114 | src/golang/pkg/sumdb 115 | src/golang/dist 116 | 117 | nohup* 118 | 119 | # dependencies 120 | dependencies/ 121 | enova/template/deployment/docker-compose/bin/ 122 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pycqa/flake8 3 | rev: 3.9.2 4 | hooks: 5 | - id: black 6 | args: 7 | - --max-line-length=150 8 | - --max-complexity=60 9 | 10 | repos: 11 | - repo: https://github.com/psf/black 12 | rev: stable # Use the specific revision or tag you want to pin to 13 | hooks: 14 | - id: black 15 | args: 16 | - --line-length=150 17 | 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v4.0.1 20 | hooks: 21 | - id: check-merge-conflict 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include enova/web_statics/static * 2 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.0.8 -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # FROM nvcr.io/nvidia/pytorch:24.03-py3 2 | FROM mergingai/enova:base 3 | 4 | RUN apt update && apt install net-tools -y 5 | 6 | COPY ./dist/enova-0.0.8-py3-none-any.whl /tmp/ 7 | COPY ./llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-0.0.8-py3-none-any.whl /tmp/ 8 | 9 | RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ 10 | pip uninstall enova enova-instrumentation-llmo -y && \ 11 | pip install --no-cache-dir /tmp/enova_instrumentation_llmo-0.0.8-py3-none-any.whl && \ 12 | pip install --no-cache-dir /tmp/enova-0.0.8-py3-none-any.whl 13 | -------------------------------------------------------------------------------- /docker/Dockerfile.enova: -------------------------------------------------------------------------------- 1 | FROM emergingai/enova:base 2 | 3 | COPY ./llmo /opt/enova/llmo 4 | 5 | COPY ./scripts /opt/enova/scripts 6 | 7 | RUN bash /opt/enova/scripts/pack_whl.llmo.sh 8 | 9 | ARG LLMO_VERSION=0.0.8 10 | RUN pip install /opt/enova/llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-${LLMO_VERSION}-py3-none-any.whl --no-deps --no-cache-dir 11 | 12 | ARG CACHEBUST=1 13 | 14 | COPY . /opt/enova 15 | 16 | RUN cd /opt/enova && bash ./scripts/pack_whl.enova.sh 17 | ARG ENOVA_VERSION=0.0.8 18 | 19 | RUN pip install -r /opt/enova/requirements.txt --no-deps --no-cache-dir && \ 20 | pip install /opt/enova/dist/enova-${ENOVA_VERSION}-py3-none-any.whl --no-deps --no-cache-dir && \ 21 | pip uninstall -y transformer-engine && mkdir -p /workspace/model 22 | -------------------------------------------------------------------------------- /docker/Dockerfile.enova.base: -------------------------------------------------------------------------------- 1 | # image enova:base 2 | FROM nvcr.io/nvidia/pytorch:24.07-py3 3 | 4 | RUN apt update && apt install -y \ 5 | net-tools \ 6 | ocl-icd-libopencl1 \ 7 | opencl-headers \ 8 | clinfo 9 | 10 | RUN mkdir -p /etc/OpenCL/vendors && \ 11 | echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \ 12 | mkdir -p /opt/enova 13 | 14 | COPY ./requirements-docker.txt /opt/enova/requirements.txt 15 | COPY ./requirements-docker-no-deps.txt /opt/enova/requirements-docker-no-deps.txt 16 | 17 | # RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ 18 | RUN pip install build --no-cache-dir && \ 19 | pip install pip setuptools setuptools_scm[toml]==7.1.0 toml poetry && \ 20 | pip install -r /opt/enova/requirements.txt --no-cache-dir && \ 21 | pip install flashinfer -i https://flashinfer.ai/whl/cu124/torch2.4 --no-deps --no-cache-dir && \ 22 | pip install -r /opt/enova/requirements-docker-no-deps.txt --no-deps --no-cache-dir 23 | -------------------------------------------------------------------------------- /docker/Dockerfile.escaler: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN apt update && apt install build-essential redis libzmq3-dev ca-certificates pkg-config net-tools iputils-ping -y 4 | COPY dependencies/go1.22.2.linux-amd64.tar.gz /tmp/go1.22.2.linux-amd64.tar.gz 5 | RUN cd /tmp && tar -xf go1.22.2.linux-amd64.tar.gz && cp -r go /usr/local/go 6 | ENV PATH=/usr/local/go/bin:$PATH 7 | ENV GO111MODULE="on" 8 | ENV APK_REP="mirrors.ustc.edu.cn" 9 | 10 | #ENV GOPROXY="https://goproxy.io,direct" 11 | #ENV GOPROXY=https://proxy.golang.org,direct 12 | ENV GOPROXY=https://goproxy.cn,direct 13 | #ENV GOPROXY=https://mirrors.aliyun.com/goproxy/,direct 14 | #ENV GOCACHE=/go-cache 15 | 16 | # create and set cache directory permissions 17 | RUN mkdir /go-cache && chmod -R 777 /go-cache 18 | 19 | WORKDIR /app 20 | 21 | COPY escaler . 22 | 23 | # copy go module file to workdir 24 | COPY escaler/go.mod escaler/go.sum ./ 25 | 26 | # download dependencies on go module 27 | RUN go mod download 28 | 29 | # download swagger toolset 30 | RUN go install github.com/swaggo/swag/cmd/swag@latest 31 | 32 | # compile and install 33 | RUN go env \ 34 | && CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o escaler cmd/escaler/main.go && \ 35 | cp escaler /usr/local/bin/escaler 36 | -------------------------------------------------------------------------------- /docker/Dockerfile.jmeter: -------------------------------------------------------------------------------- 1 | FROM centos:centos7 2 | 3 | WORKDIR /data 4 | 5 | # TODO: add jdk and jmeter form url 6 | ADD ./docker/jdk-8u401-linux-x64.tar.gz /usr/local/ 7 | ADD ./docker/apache-jmeter-5.6.3.tgz /opt/ 8 | 9 | RUN mv /usr/local/jdk1.8.0_401 /usr/local/jdk && \ 10 | mv /opt/apache-jmeter-5.6.3 /opt/apache-jmeter 11 | 12 | ENV JAVA_HOME=/usr/local/jdk \ 13 | PATH=/usr/local/jdk/bin:/opt/apache-jmeter/bin:$PATH 14 | -------------------------------------------------------------------------------- /docker/Dockerfile.requirements: -------------------------------------------------------------------------------- 1 | ARG HARBOR_PATH=emergingai 2 | 3 | FROM ${HARBOR_PATH}/python:base 4 | 5 | RUN apt-get install -y \ 6 | ocl-icd-libopencl1 \ 7 | opencl-headers \ 8 | clinfo 9 | 10 | RUN mkdir -p /etc/OpenCL/vendors && \ 11 | echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd 12 | 13 | COPY ./dist/enova-0.0.8-py3-none-any.whl . 14 | COPY ./llmo/enova-instrumentation-llmo/dist/enova_instrumentation_llmo-0.0.8-py3-none-any.whl . 15 | 16 | RUN pip install enova_instrumentation_llmo-0.0.8-py3-none-any.whl enova-0.0.8-py3-none-any.whl 17 | 18 | RUN pip install vllm -------------------------------------------------------------------------------- /docker/build_image.enova.base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | echo "Runing build image enova:base using ${PWD}" 5 | 6 | SCRIPT=$(realpath "$0") 7 | BASEDIR=$(dirname "$SCRIPT") 8 | BASEDIR=$(dirname "$BASEDIR") 9 | 10 | 11 | export HARBOR_PATH=emergingai 12 | 13 | # build enova 14 | cd $BASEDIR 15 | docker build -f $BASEDIR/docker/Dockerfile.enova.base -t $HARBOR_PATH/enova:base --build-arg HARBOR_PATH="$HARBOR_PATH" $BASEDIR 16 | -------------------------------------------------------------------------------- /docker/build_image.enova.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | IMAGE_VERSION=v`cat VERSION` 5 | ENOVA_VERSION=`cat VERSION` 6 | LLMO_VERSION="0.0.8" 7 | 8 | echo "Runing build image enova:${IMAGE_VERSION} using ${PWD}" 9 | 10 | SCRIPT=$(realpath "$0") 11 | BASEDIR=$(dirname "$SCRIPT") 12 | BASEDIR=$(dirname "$BASEDIR") 13 | echo "BASEDIR: " ${BASEDIR} 14 | 15 | # build front 16 | cd $BASEDIR/front 17 | rm $BASEDIR/enova/web_statics -rf 18 | npm install 19 | npm run build 20 | # yarn 21 | # yarn build 22 | 23 | echo $BASEDIR/front/dist $BASEDIR/enova/web_statics 24 | mv $BASEDIR/front/dist $BASEDIR/enova/web_statics 25 | 26 | export HARBOR_PATH=emergingai 27 | 28 | # build enova 29 | cd $BASEDIR 30 | docker build -f $BASEDIR/docker/Dockerfile.enova -t $HARBOR_PATH/enova:$IMAGE_VERSION \ 31 | --build-arg ENOVA_VERSION="${ENOVA_VERSION}" \ 32 | --build-arg LLMO_VERSION="${LLMO_VERSION}" \ 33 | --build-arg HARBOR_PATH="$HARBOR_PATH" \ 34 | --build-arg CACHEBUST=$(date +%s) \ 35 | $BASEDIR 36 | -------------------------------------------------------------------------------- /docker/build_image.escaler.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | IMAGE_VERSION=v`cat VERSION` 5 | 6 | echo "Runing build image enova:${IMAGE_VERSION} using ${PWD}" 7 | 8 | SCRIPT=$(realpath "$0") 9 | BASEDIR=$(dirname "$SCRIPT") 10 | BASEDIR=$(dirname "$BASEDIR") 11 | echo "BASEDIR: " ${BASEDIR} 12 | 13 | 14 | export MIRROR_PATH=emergingai 15 | 16 | # check golang tar.gz 17 | GOLANG_TAR=dependencies/go1.22.2.linux-amd64.tar.gz 18 | DOWNLOAD_URL=https://go.dev/dl/go1.22.2.linux-amd64.tar.gz 19 | 20 | if [ ! -f "$GOLANG_TAR" ]; then 21 | mkdir -p dependencies 22 | 23 | echo "golang tar $GOLANG_TAR is not existed, start to download..." 24 | cd dependencies 25 | wget "$DOWNLOAD_URL" 26 | cd ../ 27 | if [ $? -eq 0 ]; then 28 | echo "download sucessfully" 29 | else 30 | echo "failed to download" 31 | fi 32 | fi 33 | 34 | # build enova 35 | cd $BASEDIR 36 | docker build -f $BASEDIR/docker/Dockerfile.escaler -t $MIRROR_PATH/enova-escaler:$IMAGE_VERSION $BASEDIR 37 | -------------------------------------------------------------------------------- /docker/build_image.jmeter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | IMAGE_VERSION=v`cat VERSION` 5 | 6 | echo "Runing build image enova-jmoter:${IMAGE_VERSION} using ${PWD}" 7 | 8 | SCRIPT=$(realpath "$0") 9 | BASEDIR=$(dirname "$SCRIPT") 10 | BASEDIR=$(dirname "$BASEDIR") 11 | echo "BASEDIR: " ${BASEDIR} 12 | 13 | 14 | export HARBOR_PATH=emergingai 15 | 16 | # build enova 17 | cd $BASEDIR 18 | docker build -f $BASEDIR/docker/Dockerfile.jmeter -t $HARBOR_PATH/enova-jmeter:$IMAGE_VERSION $BASEDIR 19 | -------------------------------------------------------------------------------- /enova/.gitignore: -------------------------------------------------------------------------------- 1 | web_statics/* 2 | !web_statics/.gitkeep -------------------------------------------------------------------------------- /enova/algo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/algo/__init__.py -------------------------------------------------------------------------------- /enova/algo/resource.py: -------------------------------------------------------------------------------- 1 | from enova.server.restful.router import BaseResource 2 | from enova.algo.serializer import ( 3 | ConfigRecommendRequestSLZ, 4 | ConfigRecommendResponseSLZ, 5 | AnomalyDetectRequestSLZ, 6 | AnomalyDetectResponseSLZ, 7 | AnomalyRecoverRequestSLZ, 8 | AnomalyRecoverResponseSLZ, 9 | ) 10 | from enova.algo.service import AlgoService 11 | 12 | 13 | class BaseResource(BaseResource): 14 | def __init__(self) -> None: 15 | self.service = AlgoService() 16 | 17 | 18 | class ConfigRecommendResource(BaseResource): 19 | PATH = "/config_recommend" 20 | TAGS = ["Algo"] 21 | GET_INCLUDE_IN_SCHEMA = False 22 | POST_INCLUDE_IN_SCHEMA = False 23 | 24 | async def post(self, params: ConfigRecommendRequestSLZ) -> ConfigRecommendResponseSLZ: 25 | return await self.service.config_recommend(params.dict()) 26 | 27 | 28 | class AnomalyDetectResource(BaseResource): 29 | PATH = "/anomaly_detect" 30 | TAGS = ["Algo"] 31 | GET_INCLUDE_IN_SCHEMA = False 32 | POST_INCLUDE_IN_SCHEMA = False 33 | 34 | async def post(self, params: AnomalyDetectRequestSLZ) -> AnomalyDetectResponseSLZ: 35 | return await self.service.anomaly_detect(params.dict()) 36 | 37 | 38 | class AnomalyRecoverResource(BaseResource): 39 | PATH = "/anomaly_recover" 40 | TAGS = ["Algo"] 41 | GET_INCLUDE_IN_SCHEMA = False 42 | POST_INCLUDE_IN_SCHEMA = False 43 | 44 | async def post(self, params: AnomalyRecoverRequestSLZ) -> AnomalyRecoverResponseSLZ: 45 | return await self.service.anomaly_recover(params.dict()) 46 | -------------------------------------------------------------------------------- /enova/algo/serializer.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from enova.server.restful.serializer import EmergingAIBaseModel 3 | 4 | 5 | class LlmModel(EmergingAIBaseModel): 6 | framework: str 7 | param: float 8 | 9 | 10 | class GpuModel(EmergingAIBaseModel): 11 | name: str 12 | spec: int 13 | num: int 14 | 15 | 16 | TimeSeriesData = List[Tuple[float, float]] 17 | 18 | 19 | # metric of serving llm model 20 | class Metrics(EmergingAIBaseModel): 21 | active_requests: TimeSeriesData 22 | running_requests: TimeSeriesData 23 | pending_requests: TimeSeriesData 24 | server_new_requests: TimeSeriesData 25 | server_success_requests: TimeSeriesData 26 | gpu_kv_cache_usage: TimeSeriesData 27 | 28 | 29 | # Configurations of llm model 30 | class Configurations(EmergingAIBaseModel): 31 | max_num_seqs: int 32 | tensor_parallel_size: int 33 | gpu_memory_utilization: float 34 | replicas: int 35 | 36 | 37 | class ConfigRecommendRequestSLZ(EmergingAIBaseModel): 38 | llm: LlmModel 39 | gpu: GpuModel 40 | 41 | class Config: 42 | schema_extra = { 43 | "llm": { 44 | "framework": "llama", 45 | "param": 13.0, 46 | }, 47 | "gpu": { 48 | "name": "4090", 49 | "spec": 24, 50 | "num": 2, 51 | }, 52 | } 53 | 54 | 55 | class ConfigRecommendResponseSLZ(EmergingAIBaseModel): 56 | max_num_seqs: int 57 | tensor_parallel_size: int 58 | gpu_memory_utilization: float 59 | replicas: int 60 | 61 | 62 | class AnomalyDetectRequestSLZ(EmergingAIBaseModel): 63 | metrics: List[Metrics] 64 | configurations: Configurations 65 | 66 | class Config: 67 | schema_extra = { 68 | "metrics": [ 69 | { 70 | "active_requests": [[1000000000, 10.0], [1000000000, 20.0]], 71 | "running_requests": [[1000000000, 5.0], [1000000000, 15.0]], 72 | "pending_requests": [[1000000000, 2.0], [1000000000, 4.0]], 73 | "server_new_requests": [[1000000000, 30.0], [1000000000, 40.0]], 74 | "server_success_requests": [[1000000000, 30.0], [1000000000, 40.0]], 75 | "gpu_kv_cache_usage": [[1000000000, 30.0], [1000000000, 40.0]], 76 | } 77 | ], 78 | "configurations": { 79 | "max_num_seqs": 100, 80 | "tensor_parallel_size": 8, 81 | "gpu_memory_utilization": 0.75, 82 | "replicas": 1, 83 | }, 84 | } 85 | 86 | 87 | class AnomalyDetectResponseSLZ(EmergingAIBaseModel): 88 | is_anomaly: int 89 | 90 | 91 | class AnomalyRecoverRequestSLZ(EmergingAIBaseModel): 92 | metrics: List[Metrics] 93 | configurations: Configurations 94 | llm: LlmModel 95 | gpu: GpuModel 96 | 97 | class Config: 98 | schema_extra = { 99 | "metrics": [ 100 | { 101 | "active_requests": [[1000000000, 10.0], [1000000000, 20.0]], 102 | "running_requests": [[1000000000, 5.0], [1000000000, 15.0]], 103 | "pending_requests": [[1000000000, 2.0], [1000000000, 4.0]], 104 | "server_new_requests": [[1000000000, 30.0], [1000000000, 40.0]], 105 | "server_success_requests": [[1000000000, 30.0], [1000000000, 40.0]], 106 | "gpu_kv_cache_usage": [[1000000000, 30.0], [1000000000, 40.0]], 107 | } 108 | ], 109 | "configurations": { 110 | "max_num_seqs": 100, 111 | "tensor_parallel_size": 8, 112 | "gpu_memory_utilization": 0.75, 113 | "replicas": 1, 114 | }, 115 | "llm": { 116 | "framework": "llama", 117 | "param": 13.0, 118 | }, 119 | "gpu": { 120 | "name": "4090", 121 | "spec": 24, 122 | "num": 2, 123 | }, 124 | } 125 | 126 | 127 | class AnomalyRecoverResponseSLZ(ConfigRecommendResponseSLZ): 128 | pass 129 | -------------------------------------------------------------------------------- /enova/algo/server.py: -------------------------------------------------------------------------------- 1 | from enova.common.config import CONFIG 2 | from enova.server.server import ApiServer 3 | from enova.common.constant import ApiServerType 4 | 5 | 6 | def get_algo_api_server(api_server_type=ApiServerType.ENOVA_ALGO.value): 7 | api_config = getattr(CONFIG, api_server_type) 8 | CONFIG.api.update(api_config) 9 | 10 | api_server = ApiServer(api_config) 11 | 12 | return api_server 13 | -------------------------------------------------------------------------------- /enova/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/api/__init__.py -------------------------------------------------------------------------------- /enova/api/app_api.py: -------------------------------------------------------------------------------- 1 | from enova.common.config import CONFIG 2 | from enova.api.base import ASyncRestfulEmergingaiAPI, ASyncEmergingaiAPI 3 | from enova.common.constant import HttpMethod 4 | 5 | 6 | APP_API_HOST = CONFIG.enova_app["app_api_host"] 7 | 8 | 9 | class _EnovaAppApi: 10 | def __init__(self) -> None: 11 | self.healthz = ASyncEmergingaiAPI(method=HttpMethod.GET.value, url=APP_API_HOST + "/v1/healthz") 12 | 13 | self.serving = ASyncRestfulEmergingaiAPI( 14 | url=APP_API_HOST + "/v1/serving", 15 | resource_key="instance_id", 16 | ) 17 | 18 | self.delete_serving_by_name = ASyncEmergingaiAPI(method=HttpMethod.DELETE.value, url=APP_API_HOST + "/v1/serving/name") 19 | 20 | 21 | EnovaAppApi = _EnovaAppApi() 22 | -------------------------------------------------------------------------------- /enova/api/prom_api.py: -------------------------------------------------------------------------------- 1 | from enova.common.config import CONFIG 2 | from enova.api.base import ASyncAPI 3 | from enova.common.constant import HttpMethod 4 | 5 | 6 | PROM_API_HOST = CONFIG.enova_app["prom_api_host"] 7 | 8 | 9 | class _PromApi: 10 | def __init__(self) -> None: 11 | self.query_range = ASyncAPI(method=HttpMethod.GET.value, url=PROM_API_HOST + "/api/v1/query_range") 12 | 13 | 14 | PromApi = _PromApi() 15 | -------------------------------------------------------------------------------- /enova/api/serving_api.py: -------------------------------------------------------------------------------- 1 | from enova.common.config import CONFIG 2 | from enova.api.base import ASyncEmergingaiAPI 3 | from enova.common.constant import HttpMethod 4 | 5 | 6 | SERVING_API_HOST = CONFIG.enova_app["serving_api_host"] 7 | 8 | 9 | class _ServingApi: 10 | def __init__(self) -> None: 11 | self.engine_args = ASyncEmergingaiAPI(method=HttpMethod.GET.value, url=SERVING_API_HOST + "/v1/model/info/args") 12 | 13 | 14 | ServingApi = _ServingApi() 15 | -------------------------------------------------------------------------------- /enova/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/app/__init__.py -------------------------------------------------------------------------------- /enova/app/db_modles.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from sqlalchemy import ( 4 | Column, 5 | Float, 6 | Integer, 7 | String, 8 | ) 9 | from sqlalchemy.orm import declared_attr 10 | 11 | from enova.common.constant import DeployStatus, TestStatus 12 | from enova.common.utils import gen_ulid 13 | from enova.database.relation.orm.base import DBModelBase, table_args, JSON, DateTime 14 | 15 | 16 | class DeploymentInstanceInfoTable(DBModelBase): 17 | __tablename__ = "deployment_instance_info" 18 | 19 | @declared_attr 20 | def __table_args__(cls): 21 | return table_args(cls, {"comment": "table of serving's deployment instance"}) 22 | 23 | instance_id = Column(String(256), primary_key=True, nullable=False, comment="instance id", default=gen_ulid) 24 | instance_name = Column(String(64), nullable=False, comment="instance name") 25 | instance_spec = Column(JSON, comment="instance specification") 26 | startup_args = Column(JSON, comment="the arguments of starting up of model serve by serving") 27 | mdl_cfg = Column(JSON, comment="the config of llm model") 28 | serving_id = Column(String(256), nullable=False, comment="serving's unique id, allow use it get the status by polit api") 29 | deploy_status = Column(String(32), nullable=False, default=DeployStatus.UNKNOWN.value, comment="status of deployment") 30 | extra = Column(JSON) 31 | create_time = Column(DateTime, default=datetime.datetime.now) 32 | update_time = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) 33 | is_deleted = Column(Integer, default=0) 34 | creator = Column(String(64)) 35 | updater = Column(String(64)) 36 | 37 | 38 | class TestInfoTable(DBModelBase): 39 | __tablename__ = "test_info" 40 | 41 | @declared_attr 42 | def __table_args__(cls): 43 | return table_args(cls, {"comment": "Inject Test record"}) 44 | 45 | test_id = Column(String(256), primary_key=True, nullable=False, comment="test ID", default=gen_ulid) 46 | instance_id = Column(String(256), nullable=False, comment="instance_id in serving's deployment") 47 | data_set = Column(String(64), nullable=False, comment="name of dataset") 48 | param_spec = Column(JSON, comment="serving's startup parameters") 49 | test_spec = Column(JSON, comment="test specification") 50 | test_status = Column(String(32), nullable=False, default=TestStatus.UNKNOWN.value) 51 | prompt_tps = Column(Float, default=0, comment="throughput of prompt tokens") 52 | generation_tps = Column(Float, default=0, comment="throughput of generation tokens") 53 | result = Column(JSON, comment="result of inject test") 54 | create_time = Column(DateTime, default=datetime.datetime.now) 55 | update_time = Column(DateTime, default=datetime.datetime.now, onupdate=datetime.datetime.now) 56 | is_deleted = Column(Integer, default=0) 57 | creator = Column(String(64)) 58 | updater = Column(String(64)) 59 | -------------------------------------------------------------------------------- /enova/app/resource.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, Dict 2 | from fastapi import Body, Depends 3 | 4 | from enova.app.serializer import ( 5 | ServingCreateSLZ, 6 | QueryServingParameterSLZ, 7 | QueryServingResponseSLZ, 8 | SingleQueryServingResponseSLZ, 9 | SingleQueryTestResponseSLZ, 10 | ListTestResponseSLZ, 11 | TestCreateSLZ, 12 | QueryTestParameterSLZ, 13 | ) 14 | from enova.server.restful.router import BaseResource 15 | from enova.app.service import AppService 16 | 17 | 18 | class BaseResource(BaseResource): 19 | def __init__(self) -> None: 20 | self.service = AppService() 21 | 22 | 23 | class HealthzResource(BaseResource): 24 | PATH = "/healthz" 25 | TAGS = ["monitor"] 26 | 27 | async def get(self) -> Dict: 28 | """""" 29 | return {"status": "running"} 30 | 31 | 32 | class ServingResource(BaseResource): 33 | PATH = "/serving" 34 | GET_INCLUDE_IN_SCHEMA = True 35 | GET_RESPONSE_MODEL = QueryServingResponseSLZ 36 | POST_RESPONSE_MODEL = SingleQueryServingResponseSLZ 37 | TAGS = ["serving serve"] 38 | 39 | async def post(self, params: Annotated[ServingCreateSLZ, Body(openapi_examples=ServingCreateSLZ.Extra.openapi_examples)]) -> Dict: 40 | """""" 41 | return await self.service.create_instance(params.dict()) 42 | 43 | async def get(self, params: Annotated[QueryServingParameterSLZ, Depends(QueryServingParameterSLZ)]): 44 | """""" 45 | return await self.service.list_instance(params.dict()) 46 | 47 | 48 | class SingleServingResource(BaseResource): 49 | PATH = "/serving/{instance_id}" 50 | TAGS = ["serving serve"] 51 | 52 | async def delete(self, instance_id: str): 53 | """""" 54 | return await self.service.delete_instance(instance_id) 55 | 56 | async def get(self, instance_id: str): 57 | """""" 58 | return await self.service.get_instance(instance_id) 59 | 60 | 61 | class TestResource(BaseResource): 62 | PATH = "/serving/instance/test" 63 | GET_RESPONSE_MODEL = ListTestResponseSLZ 64 | POST_RESPONSE_MODEL = SingleQueryTestResponseSLZ 65 | TAGS = ["test inject"] 66 | 67 | async def post(self, params: Annotated[TestCreateSLZ, Body(openapi_examples=TestCreateSLZ.Extra.openapi_examples)]): 68 | return await self.service.create_test(params.dict()) 69 | 70 | async def get(self, params: Annotated[QueryTestParameterSLZ, Depends(QueryTestParameterSLZ)]): 71 | return await self.service.list_test(params.dict()) 72 | 73 | 74 | class SingleTestResource(BaseResource): 75 | PATH = "/serving/instance/test/{test_id}" 76 | GET_RESPONSE_MODEL = SingleQueryTestResponseSLZ 77 | TAGS = ["test inject"] 78 | 79 | async def get(self, test_id: str): 80 | return await self.service.get_test(test_id) 81 | 82 | async def delete(self, test_id: str): 83 | return await self.service.delete_test(test_id) 84 | -------------------------------------------------------------------------------- /enova/app/server.py: -------------------------------------------------------------------------------- 1 | from http.client import HTTPException 2 | from pathlib import Path 3 | 4 | from fastapi import Request 5 | from fastapi.responses import HTMLResponse 6 | from fastapi.staticfiles import StaticFiles 7 | import sqlalchemy as sa 8 | 9 | from enova.common.config import CONFIG 10 | from enova.common.logger import LOGGER 11 | from enova.common.constant import ApiServerType 12 | from enova.common.utils import get_web_static_path 13 | from enova.database.relation.orm.base import BaseSqlite 14 | from enova.database.relation.transaction.session import get_session 15 | from enova.server.server import ApiServer 16 | 17 | 18 | WEB_STATIC_PATH = get_web_static_path() 19 | 20 | 21 | async def redirect_all_requests_to_frontend(request: Request, exc: HTTPException): 22 | # TODO: need to modify 23 | if WEB_STATIC_PATH: 24 | return HTMLResponse(open(Path(WEB_STATIC_PATH) / "index.html").read()) 25 | return "Welcome to enova" 26 | 27 | 28 | def init_db(): 29 | with get_session() as session: 30 | # TODO: allow migrate new tables 31 | insp = sa.inspect(session.db_engine.engine) 32 | if not insp.get_table_names(): 33 | BaseSqlite.metadata.create_all(bind=session.db_engine.engine) 34 | session.commit() 35 | 36 | insp = sa.inspect(session.db_engine.engine) 37 | LOGGER.info(insp.get_table_names()) 38 | 39 | 40 | def get_app_api_server(api_server_type=ApiServerType.ENOVA_APP.value): 41 | api_config = getattr(CONFIG, api_server_type) 42 | 43 | CONFIG.api.update(api_config) 44 | 45 | api_server = ApiServer(api_config) 46 | 47 | # mount vuejs dist 48 | api_server.app.mount( 49 | f"{CONFIG.api['url_prefix']}/", 50 | StaticFiles(directory=WEB_STATIC_PATH, html=True), 51 | name="static", 52 | ) 53 | api_server.app.add_exception_handler(404, redirect_all_requests_to_frontend) 54 | 55 | # datebase init 56 | init_db() 57 | 58 | return api_server 59 | -------------------------------------------------------------------------------- /enova/app/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def compute_actual_duration(value, unit): 5 | return int(pd.Timedelta(f"{value}{unit}").total_seconds()) 6 | -------------------------------------------------------------------------------- /enova/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/common/__init__.py -------------------------------------------------------------------------------- /enova/common/constant.py: -------------------------------------------------------------------------------- 1 | from enum import Enum as BaseEnum 2 | 3 | 4 | class Enum(BaseEnum): 5 | @classmethod 6 | def values(cls): 7 | return list(e.value for e in cls.__members__.values()) 8 | 9 | 10 | class ServingBackend(Enum): 11 | HF = "hf" 12 | VLLM = "vllm" 13 | SGLANG = "sglang" 14 | 15 | 16 | class HttpMethod(Enum): 17 | GET = "get" 18 | POST = "post" 19 | PUT = "put" 20 | DELETE = "delete" 21 | 22 | @classmethod 23 | def methods_with_body(cls): 24 | return [cls.POST.value, cls.PUT.value] 25 | 26 | 27 | class OrderBy(Enum): 28 | ASC = "asc" 29 | DESC = "desc" 30 | 31 | 32 | JSON_RESPONSE_HEADER = "application/json" 33 | 34 | 35 | # --- server scope --- 36 | class ApiServerType(Enum): 37 | ENOVA_ALGO = "enova_algo" 38 | ENOVA_APP = "enova_app" 39 | 40 | 41 | class DeployMode(Enum): 42 | COMPOSE = "compose" 43 | LOCAL = "local" 44 | 45 | 46 | class TrafficDistributionType(Enum): 47 | GAUSSIAN = "gaussian" 48 | POISSON = "poisson" 49 | 50 | 51 | class DurationUnitType(Enum): 52 | SECOND = "sec" 53 | MINUTE = "min" 54 | HOUR = "hour" 55 | 56 | 57 | # --- db_model scope --- 58 | class DeployStatus(Enum): 59 | UNKNOWN = "unknown" 60 | PENDING = "pending" 61 | RUNNING = "running" 62 | FAILED = "failed" 63 | FINISHED = "finsihed" 64 | 65 | 66 | class TestStatus(Enum): 67 | UNKNOWN = "unknown" 68 | INIT = "init" 69 | SUCCESS = "success" 70 | FAILED = "failed" 71 | RUNNING = "running" 72 | FINISHED = "finished" 73 | 74 | 75 | class ServeStatus(Enum): 76 | UNKNOWN = "unknown" 77 | OFF_LINE = "off_line" 78 | NORMAL = "normal" 79 | ABNORMAL = "abnormal" 80 | 81 | 82 | class Distribution(Enum): 83 | NORMAL = "normal" 84 | POISSON = "poisson" 85 | 86 | 87 | class VllmMode(Enum): 88 | NORMAL = "normal" 89 | OPENAI = "openai" 90 | -------------------------------------------------------------------------------- /enova/common/encoder.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import numpy as np 3 | import json 4 | 5 | 6 | class NumpyEncoder(json.JSONEncoder): 7 | """Custom encoder for numpy data types""" 8 | 9 | def default(self, obj): 10 | if isinstance( 11 | obj, 12 | ( 13 | np.int_, 14 | np.intc, 15 | np.intp, 16 | np.int8, 17 | np.int16, 18 | np.int32, 19 | np.int64, 20 | np.uint8, 21 | np.uint16, 22 | np.uint32, 23 | np.uint64, 24 | ), 25 | ): 26 | return int(obj) 27 | 28 | elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): 29 | return float(obj) 30 | 31 | elif isinstance(obj, (np.complex_, np.complex64, np.complex128)): 32 | return {"real": obj.real, "imag": obj.imag} 33 | 34 | elif isinstance(obj, (np.ndarray,)): 35 | return obj.tolist() 36 | 37 | elif isinstance(obj, (np.bool_)): 38 | return bool(obj) 39 | 40 | elif isinstance(obj, (np.void)): 41 | return None 42 | 43 | return json.JSONEncoder.default(self, obj) 44 | 45 | 46 | def numpy_dumps(v, *, default): 47 | try: 48 | return json.dumps(v, cls=NumpyEncoder) 49 | except Exception: 50 | pass 51 | 52 | # orjson.dumps returns bytes, to match standard json.dumps we need to decode 53 | return json.dumps(v, default=default) 54 | 55 | 56 | def json_numpy_obj_hook(dct): 57 | """ 58 | Decodes a previously encoded numpy ndarray 59 | with proper shape and dtype 60 | :param dct: (dict) json encoded ndarray 61 | :return: (ndarray) if input was an encoded ndarray 62 | """ 63 | if isinstance(dct, dict) and "__ndarray__" in dct: 64 | data = base64.b64decode(dct["__ndarray__"]) 65 | return np.frombuffer(data, dct["dtype"]).reshape(dct["shape"]) 66 | return dct 67 | -------------------------------------------------------------------------------- /enova/common/error.py: -------------------------------------------------------------------------------- 1 | from enova.common.config import CONFIG 2 | 3 | 4 | class EmergingAIBaseError(Exception): 5 | BASE_ERROR_CODE: str = CONFIG.BASIC_ERROR_CODE or "100" 6 | MODULE_CODE: str = CONFIG.MODULE_CODE or "001" 7 | ERROR_CODE: str = "000" 8 | ERROR_MESSAGE: str = "" 9 | 10 | def __init__(self, error_message=None, error_code=None, *args, **kwargs): 11 | self.error_code = error_code if error_code is not None else self.ERROR_CODE 12 | self.error_code = f"{self.BASE_ERROR_CODE}{self.MODULE_CODE}{self.error_code}" 13 | 14 | self.error_message = error_message if error_message is not None else self.ERROR_MESSAGE 15 | self.message = self.error_message 16 | self.code = int(self.error_code) 17 | errors = [] 18 | if kwargs.get("errors", None): 19 | errors = kwargs["errors"] if isinstance(kwargs["errors"], list) else [kwargs["errors"]] 20 | del kwargs["errors"] 21 | self.errors = errors 22 | kwargs["args"] = args 23 | 24 | super(EmergingAIBaseError, self).__init__(self.error_message, self.error_code, kwargs, errors) 25 | 26 | 27 | class ArgsError(EmergingAIBaseError): 28 | ERROR_CODE: str = "001" 29 | ERROR_MESSAGE: str = "args error" 30 | 31 | 32 | class TranslationError(EmergingAIBaseError): 33 | ERROR_CODE: str = "091" 34 | ERROR_MESSAGE: str = "translation error" 35 | 36 | 37 | # -- 38 | class EmergingaiAPIResponseError(EmergingAIBaseError): 39 | ERROR_CODE: str = "010" 40 | ERROR_MESSAGE: str = "response error" 41 | 42 | 43 | class APIParamsError(EmergingAIBaseError): 44 | ERROR_CODE: str = "011" 45 | ERROR_MESSAGE: str = "response error" 46 | 47 | 48 | # --- serving backend api --- 49 | class EScalerApiResponseError(EmergingAIBaseError): 50 | ERROR_CODE: str = "101" 51 | ERROR_MESSAGE: str = "node api response error" 52 | 53 | 54 | class DeploymentInstanceExistError(EmergingAIBaseError): 55 | ERROR_CODE: str = "401" 56 | ERROR_MESSAGE: str = "deployment workload had existed" 57 | 58 | 59 | class DeploymentInstanceNotExistError(EmergingAIBaseError): 60 | ERROR_CODE: str = "402" 61 | ERROR_MESSAGE: str = "deployment workload is not exist" 62 | 63 | 64 | class DeploymentInstanceCreateFailedError(EmergingAIBaseError): 65 | ERROR_CODE: str = "403" 66 | ERROR_MESSAGE: str = "deployment workload create failed" 67 | 68 | 69 | class TestNotExistError(EmergingAIBaseError): 70 | ERROR_CODE: str = "403" 71 | ERROR_MESSAGE: str = "test record is not exist" 72 | 73 | 74 | class JmeterContainerLaunchError(EmergingAIBaseError): 75 | ERROR_CODE: str = "404" 76 | ERROR_MESSAGE: str = "fail to launch jmeter container" 77 | 78 | 79 | class TestStartError(EmergingAIBaseError): 80 | ERROR_CODE: str = "406" 81 | ERROR_MESSAGE: str = "test start failed" 82 | 83 | 84 | class DataFileNotExistError(EmergingAIBaseError): 85 | ERROR_CODE: str = "407" 86 | ERROR_MESSAGE: str = "data file not existed" 87 | 88 | 89 | # ---- 90 | 91 | 92 | class NotReadyError(EmergingAIBaseError): 93 | ERROR_CODE: str = "101" 94 | ERROR_MESSAGE: str = "support service not ready" 95 | 96 | 97 | class BackendConfigMissingError(EmergingAIBaseError): 98 | ERROR_CODE: str = "102" 99 | ERROR_MESSAGE: str = "backend default config missing" 100 | -------------------------------------------------------------------------------- /enova/common/g_vars.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from typing import Union 3 | from enova.common.local import get_contextvars, set_contextvars 4 | 5 | 6 | def get_traceid() -> Union[str, None]: 7 | trace_id = get_contextvars("trace_id") 8 | if trace_id is None: 9 | trace_id = uuid.uuid4().hex 10 | set_contextvars("trace_id", trace_id) 11 | return trace_id 12 | 13 | 14 | def get_realip() -> Union[str, None]: 15 | real_ip = get_contextvars("real_ip") 16 | # TODO: LOGGER will case cyclic reference 17 | # if real_ip is None: 18 | # LOGGER.warn("RealIPMiddleware maybe not Setup.") 19 | return real_ip 20 | -------------------------------------------------------------------------------- /enova/common/local.py: -------------------------------------------------------------------------------- 1 | import contextvars 2 | import functools 3 | import threading 4 | 5 | 6 | context_vars_dict = {} 7 | 8 | 9 | def set_contextvars(key, value): 10 | """""" 11 | if key not in context_vars_dict: 12 | context_vars_dict[key] = contextvars.ContextVar(key) 13 | context_vars_dict[key].set(value) 14 | 15 | 16 | def del_contextvars(key): 17 | """ 18 | mainly delete the thread vars 19 | """ 20 | if key in context_vars_dict: 21 | context_vars_dict[key].clear() 22 | 23 | 24 | def get_contextvars(key, default=None): 25 | """ 26 | mainly get the thread vars 27 | """ 28 | if key not in context_vars_dict: 29 | return default 30 | try: 31 | return context_vars_dict[key].get() 32 | except LookupError: 33 | return default 34 | 35 | 36 | def has_contextvars(key): 37 | """TODO:""" 38 | return False 39 | 40 | 41 | _local = threading.local() 42 | 43 | 44 | def set_local_param(key, value): 45 | """ 46 | mainly setup the custom vars of threads 47 | """ 48 | setattr(_local, key, value) 49 | 50 | 51 | def del_local_param(key): 52 | """ 53 | mainly delete the custom vars of threads 54 | """ 55 | if hasattr(_local, key): 56 | delattr(_local, key) 57 | 58 | 59 | def get_local_param(key, default=None): 60 | return getattr(_local, key, default) 61 | 62 | 63 | def contextlocal_cache(func): 64 | @functools.wraps(func) 65 | def wrapper(*args, **kwargs): 66 | key = functools._make_key(args, kwargs, False) 67 | key = f"{func.__name__}_{key}" 68 | if has_contextvars(key): 69 | return get_local_param(key) 70 | ret = func(*args, **kwargs) 71 | set_contextvars(key, ret) 72 | return ret 73 | 74 | return wrapper 75 | -------------------------------------------------------------------------------- /enova/common/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import uuid 5 | from logging import Formatter 6 | from logging import StreamHandler 7 | from logging import getLogger 8 | from logging.handlers import TimedRotatingFileHandler 9 | from enova.common.config import CONFIG 10 | from enova.common.g_vars import get_traceid 11 | 12 | 13 | LOGGER_MAP = {} 14 | 15 | 16 | class AddRequestIdFormatter(Formatter): 17 | def formatMessage(self, record): 18 | trace_id = get_traceid() 19 | if CONFIG.app_name: 20 | record.message = f"[{CONFIG.app_name}][trace_id: {trace_id}]|{record.message}" 21 | else: 22 | record.message = f"[trace_id: {trace_id}]|{record.message}" 23 | return super().formatMessage(record) 24 | 25 | 26 | def setup_logger(name=None, path=None, level=None, file_handler_backupCount=None): 27 | # sys.stdout = Unbuffered(sys.stdout) 28 | # sys.stderr = Unbuffered(sys.stderr) 29 | logger_conf = CONFIG.logger 30 | name = name or logger_conf["name"] 31 | path = path or logger_conf["path"] 32 | level = level or logger_conf["level"] 33 | file_handler_backupCount = file_handler_backupCount or logger_conf["file_handler_backupCount"] 34 | 35 | logger = getLogger(name) 36 | logger.setLevel(level.upper()) 37 | 38 | formatter = AddRequestIdFormatter(datefmt=logger_conf["datefmt"], fmt=logger_conf["fmt"]) 39 | stream_handler = StreamHandler(sys.stdout) 40 | stream_handler.setFormatter(formatter) 41 | logger.addHandler(stream_handler) 42 | os.makedirs(path, exist_ok=True) 43 | file_handler = TimedRotatingFileHandler( 44 | filename=logger_conf["file_handler_filename_format"].format(path=path, name=name), 45 | when=logger_conf["file_handler_when"], 46 | interval=logger_conf["file_handler_interval"], 47 | backupCount=file_handler_backupCount, 48 | ) 49 | file_handler.suffix = logger_conf["file_handler_suffix"] 50 | file_handler.extMatch = re.compile(logger_conf["file_handler_extMatch_pattern"]) 51 | file_handler.setFormatter(formatter) 52 | logger.addHandler(file_handler) 53 | return logger 54 | 55 | 56 | def get_logger_by_name(name="default"): 57 | if name not in LOGGER_MAP: 58 | logger_conf = {} 59 | logger = setup_logger(**logger_conf) 60 | LOGGER_MAP[name] = logger 61 | return LOGGER_MAP[name] 62 | 63 | 64 | LOGGER = get_logger_by_name() 65 | -------------------------------------------------------------------------------- /enova/database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/__init__.py -------------------------------------------------------------------------------- /enova/database/relation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/relation/__init__.py -------------------------------------------------------------------------------- /enova/database/relation/orm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/relation/orm/__init__.py -------------------------------------------------------------------------------- /enova/database/relation/transaction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/database/relation/transaction/__init__.py -------------------------------------------------------------------------------- /enova/entry/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from enova.common.config import _get_pkg_version, CONFIG 4 | from enova.entry.command.algo import algo_cli 5 | from enova.entry.command.app import app_cli 6 | from enova.entry.command.serving import serving_cli 7 | from enova.entry.command.injector import injector_cli 8 | from enova.entry.command.mon import mon_cli 9 | from enova.entry.command.pilot import pilot_cli 10 | from enova.entry.command.webui import webui_cli 11 | 12 | 13 | @click.version_option(_get_pkg_version(), "--version", "-v") 14 | @click.group(context_settings=CONFIG.cli["context_settings"]) 15 | def cli(): 16 | """ 17 | \b 18 | ███████╗███╗ ██╗ ██████╗ ██╗ ██╗ █████╗ 19 | ██╔════╝████╗ ██║██╔═══██╗██║ ██║██╔══██╗ 20 | █████╗ ██╔██╗ ██║██║ ██║██║ ██║███████║ 21 | ██╔══╝ ██║╚██╗██║██║ ██║╚██╗ ██╔╝██╔══██║ 22 | ███████╗██║ ╚████║╚██████╔╝ ╚████╔╝ ██║ ██║ 23 | ╚══════╝╚═╝ ╚═══╝ ╚═════╝ ╚═══╝ ╚═╝ ╚═╝ 24 | 25 | \b 26 | ENOVA is an open-source llm deployment, monitoring, injection and auto-scaling service. 27 | It provides a set of commands to deploy stable serverless serving of LLM on GPU clusters with auto-scaling. 28 | """ 29 | pass 30 | 31 | 32 | def main(): 33 | cli.add_command(serving_cli) 34 | cli.add_command(app_cli) 35 | cli.add_command(webui_cli) 36 | cli.add_command(mon_cli) 37 | cli.add_command(algo_cli) 38 | cli.add_command(injector_cli) 39 | 40 | cli.add_command(pilot_cli) # all in one 41 | 42 | cli() 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /enova/entry/command/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/entry/command/__init__.py -------------------------------------------------------------------------------- /enova/entry/command/algo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import click 3 | 4 | from enova.common.cli_helper import ArgumentHelper 5 | from enova.common.config import CONFIG 6 | 7 | 8 | class EnovaAlgo: 9 | # TODO: support run compose 10 | def run(self): 11 | args_helper = ArgumentHelper(self, sys._getframe()) 12 | CONFIG.update_config(args_helper.args_map) 13 | 14 | import uvicorn 15 | 16 | from enova.algo.server import get_algo_api_server 17 | 18 | api_server = get_algo_api_server() 19 | uvicorn.run(api_server.app, host=CONFIG.enova_algo["host"], port=CONFIG.enova_algo["port"]) 20 | 21 | 22 | pass_enova_algo = click.make_pass_decorator(EnovaAlgo) 23 | 24 | 25 | @click.group(name="algo") 26 | @click.pass_context 27 | def algo_cli(ctx): 28 | """ 29 | Run the autoscaling service. 30 | """ 31 | ctx.obj = EnovaAlgo() 32 | 33 | 34 | @algo_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"]) 35 | @pass_enova_algo 36 | @click.pass_context 37 | def mon_run(ctx, enova_algo: EnovaAlgo): 38 | enova_algo.run() 39 | 40 | 41 | @algo_cli.command(name="stop") 42 | @pass_enova_algo 43 | @click.pass_context 44 | def mon_stop(ctx, enova_algo: EnovaAlgo): 45 | enova_algo.stop() 46 | -------------------------------------------------------------------------------- /enova/entry/command/mon.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import click 4 | 5 | from enova.common.cli_helper import ArgumentHelper, DockerComposeHeler 6 | from enova.common.config import CONFIG 7 | from enova.common.logger import LOGGER 8 | 9 | 10 | class EnovaMonitor: 11 | def __init__(self) -> None: 12 | self.docker_services = [ 13 | "dcgm-exporter", 14 | "grafana", 15 | "otel-collector", 16 | "prometheus", 17 | "tempo", 18 | "enova-escaler", 19 | "enova-algo", 20 | ] # start up by order 21 | self._docker_compose = DockerComposeHeler() 22 | 23 | def _run_by_compose(self): 24 | for service in self.docker_services: 25 | options = {} 26 | self._docker_compose.update_service_options(service, options) 27 | self._docker_compose.startup_service(service, is_daemon=True) 28 | 29 | def run(self, **kwargs): 30 | args_helper = ArgumentHelper(self, sys._getframe()) 31 | CONFIG.update_config(args_helper.args_map) 32 | 33 | self._run_by_compose() 34 | 35 | def _stop_by_compose(self): 36 | pass 37 | 38 | def stop(self): 39 | cmd_params = self._docker_compose.base_cmd 40 | cmd_params += ["down"] 41 | 42 | result = subprocess.run( 43 | [self._docker_compose.excu, "-f", self._docker_compose.compose_file, "down"], 44 | capture_output=True, 45 | text=True, 46 | ) 47 | if result.returncode == 0: 48 | LOGGER.info("llmo monitors stop successfully") 49 | else: 50 | LOGGER.error(f"llmo monitors stop failed, {result.stderr}") 51 | 52 | 53 | pass_enova_monitor = click.make_pass_decorator(EnovaMonitor) 54 | 55 | 56 | @click.group(name="mon") 57 | @click.pass_context 58 | def mon_cli(ctx): 59 | """ 60 | Run the monitors of LLM server 61 | """ 62 | ctx.obj = EnovaMonitor() 63 | 64 | 65 | @mon_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"]) 66 | @pass_enova_monitor 67 | @click.pass_context 68 | def mon_run(ctx, enova_monitor: EnovaMonitor): 69 | enova_monitor.run() 70 | 71 | 72 | @mon_cli.command(name="stop") 73 | @pass_enova_monitor 74 | @click.pass_context 75 | def mon_stop(ctx, enova_monitor: EnovaMonitor): 76 | enova_monitor.stop() 77 | -------------------------------------------------------------------------------- /enova/entry/command/serving.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import click 3 | 4 | from enova.common.cli_helper import ArgumentHelper, parse_extra_args 5 | from enova.common.config import CONFIG 6 | from enova.common.constant import ServingBackend 7 | from enova.entry.command.webui import Webui 8 | from enova.serving.apiserver import EApiServer 9 | 10 | 11 | class ServingHandler: 12 | """ 13 | serving handler 14 | """ 15 | 16 | def __init__(self, host, port, model, backend): 17 | self.host = host 18 | self.port = port 19 | self.model = model 20 | self.apiserver = EApiServer(host, port, self.model, backend) 21 | 22 | def start(self, **kwargs): 23 | self.apiserver.local_run(**kwargs) 24 | 25 | def stop(self, *args): 26 | """""" 27 | 28 | 29 | class EnovaServing: 30 | def run( 31 | self, 32 | model, 33 | host=CONFIG.serving["host"], 34 | port=CONFIG.serving["port"], 35 | backend=CONFIG.serving["backend"], 36 | exporter_endpoint=CONFIG.llmo["eai_exporter_endpoint"], 37 | exporter_service_name=CONFIG.llmo["eai_exporter_service_name"], 38 | include_webui=True, 39 | hf_proxy=None, 40 | **kwargs, 41 | ): 42 | args_helper = ArgumentHelper(self, sys._getframe()) 43 | CONFIG.update_config(args_helper.args_map) 44 | 45 | from enova.llmo import start as llmo_start 46 | 47 | CONFIG.update_config({backend: kwargs}) 48 | CONFIG.print_config() 49 | llmo_start(otlp_exporter_endpoint=exporter_endpoint, service_name=exporter_service_name) 50 | if include_webui: 51 | Webui().run(daemon=False) 52 | ServingHandler(host, port, model, backend).start() 53 | 54 | 55 | pass_enova_serving = click.make_pass_decorator(EnovaServing) 56 | 57 | 58 | @click.group(name="serving") 59 | @click.pass_context 60 | def serving_cli(ctx): 61 | """ 62 | Deploy the target LLM and launch the LLM API service. 63 | """ 64 | ctx.obj = EnovaServing() 65 | 66 | 67 | @serving_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"]) 68 | @click.option("--model", type=str) 69 | @click.option("--host", type=str, default=CONFIG.serving["host"]) 70 | @click.option("--port", type=int, default=CONFIG.serving["port"]) 71 | @click.option("--backend", type=str, default=CONFIG.serving["backend"]) 72 | @click.option( 73 | "--exporter-endpoint", 74 | "--exporter_endpoint", 75 | "exporter_endpoint", 76 | type=str, 77 | default=CONFIG.llmo["eai_exporter_endpoint"], 78 | ) 79 | @click.option( 80 | "--exporter-service-name", 81 | "--exporter_service_name", 82 | "exporter_service_name", 83 | type=str, 84 | default=CONFIG.llmo["eai_exporter_service_name"], 85 | ) 86 | @click.option("--include-webui", "--include_webui", "include_webui", type=bool, default=True) 87 | @click.option("--hf-proxy", "--hf_proxy", "hf_proxy", type=str, default=None) 88 | @pass_enova_serving 89 | @click.pass_context 90 | def serving_run( 91 | ctx, 92 | enova_serving, 93 | model, 94 | host, 95 | port, 96 | backend, 97 | exporter_endpoint, 98 | exporter_service_name, 99 | include_webui, 100 | hf_proxy, 101 | ): 102 | enova_serving.run( 103 | model=model, 104 | host=host, 105 | port=port, 106 | backend=backend, 107 | exporter_endpoint=exporter_endpoint, 108 | exporter_service_name=exporter_service_name, 109 | include_webui=include_webui, 110 | hf_proxy=hf_proxy, 111 | **parse_extra_args(ctx), 112 | ) 113 | -------------------------------------------------------------------------------- /enova/entry/command/webui.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | import click 5 | 6 | from enova.common.cli_helper import ArgumentHelper, parse_extra_args 7 | from enova.common.config import CONFIG 8 | from enova.common.utils import get_enova_path 9 | 10 | 11 | class Webui: 12 | def __init__(self): 13 | self.streamlit_process = None 14 | 15 | def start(self, serving_host, serving_port, host, port): 16 | args_helper = ArgumentHelper(self, sys._getframe()) 17 | CONFIG.update_config(args_helper.args_map) 18 | 19 | os.environ['SERVING_URL'] = f"http://{serving_host}:{serving_port}" 20 | 21 | base_enova_path = get_enova_path() 22 | streamlit_script = os.path.join(base_enova_path, CONFIG.webui["script"]) 23 | self.streamlit_process = subprocess.Popen( 24 | ["streamlit", "run", streamlit_script, "--server.port", str(port), "--server.address", host] 25 | ) 26 | 27 | def run( 28 | self, 29 | serving_host=CONFIG.serving["host"], 30 | serving_port=CONFIG.serving["port"], 31 | host=CONFIG.webui["host"], 32 | port=CONFIG.webui["port"], 33 | daemon=CONFIG.webui["daemon"], 34 | **kwargs, 35 | ): 36 | """""" 37 | self.start(serving_host, serving_port, host, port) 38 | if daemon: 39 | self.streamlit_process.wait() 40 | 41 | def stop(self): 42 | self.streamlit_process.terminate() 43 | self.streamlit_process.wait() 44 | 45 | 46 | pass_enova_webui = click.make_pass_decorator(Webui) 47 | 48 | 49 | @click.group(name="webui") 50 | @click.pass_context 51 | def webui_cli(ctx): 52 | """ 53 | Build agent at this page based on the launched LLM API service. 54 | """ 55 | pass 56 | 57 | 58 | @webui_cli.command(name="run", context_settings=CONFIG.cli["subcmd_context_settings"]) 59 | @click.option("--serving-host", type=str, default=CONFIG.serving["host"]) 60 | @click.option("--serving-port", type=int, default=CONFIG.serving["port"]) 61 | @click.option("--host", type=str, default=CONFIG.webui["host"]) 62 | @click.option("--port", type=int, default=CONFIG.webui["port"]) 63 | @click.option("--daemon", type=bool, default=CONFIG.webui["daemon"]) 64 | @pass_enova_webui 65 | @click.pass_context 66 | def webui_run( 67 | ctx, 68 | enova_webui: Webui, 69 | serving_host, 70 | serving_port, 71 | host, 72 | port, 73 | daemon, 74 | ): 75 | enova_webui.run( 76 | serving_host=serving_host, 77 | serving_port=serving_port, 78 | host=host, 79 | port=port, 80 | daemon=daemon, 81 | **parse_extra_args(ctx), 82 | ) 83 | pass 84 | 85 | 86 | @webui_cli.command( 87 | name="stop", 88 | context_settings=dict(help_option_names=["-h", "--help"], ignore_unknown_options=True, allow_extra_args=True), 89 | ) 90 | @pass_enova_webui 91 | @click.pass_context 92 | def webui_stop(ctx, enova_webui: Webui): 93 | enova_webui.stop() 94 | -------------------------------------------------------------------------------- /enova/job/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/job/__init__.py -------------------------------------------------------------------------------- /enova/job/job_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/job/job_manager.py -------------------------------------------------------------------------------- /enova/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/__init__.py -------------------------------------------------------------------------------- /enova/server/exception/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/exception/__init__.py -------------------------------------------------------------------------------- /enova/server/exception/handler.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from fastapi import Request 3 | 4 | 5 | class BaseExceptionHandler(metaclass=abc.ABCMeta): 6 | 7 | @abc.abstractmethod 8 | def get_exception_class(self): 9 | """""" 10 | 11 | @abc.abstractmethod 12 | def exception_handler(self, request: Request, exc): 13 | """""" 14 | -------------------------------------------------------------------------------- /enova/server/middleware/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/middleware/__init__.py -------------------------------------------------------------------------------- /enova/server/middleware/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import dataclasses 3 | from typing import List 4 | from fastapi import Request 5 | 6 | 7 | def get_dependencies() -> List: 8 | return 9 | 10 | 11 | @dataclasses.dataclass 12 | class BaseMiddleware(metaclass=abc.ABCMeta): 13 | """""" 14 | 15 | api_config: dict 16 | 17 | 18 | class EmergingAIMultiMiddlewares: 19 | 20 | def __init__(self) -> None: 21 | self.middewares: List[BaseMiddleware] = [] 22 | self.request_middlewares: List[BaseMiddleware] = [] 23 | self.response_middlewares: List[BaseMiddleware] = [] 24 | 25 | def register(self, middleware: BaseMiddleware): 26 | self.middewares.append(middleware) 27 | if hasattr(middleware, "_process_request"): 28 | self.request_middlewares.append(middleware) 29 | if hasattr(middleware, "_process_response"): 30 | self.response_middlewares.append(middleware) 31 | 32 | async def process(self, request: Request, call_next): 33 | # request 34 | for middleware in self.request_middlewares: 35 | if hasattr(middleware, "_process_request"): 36 | await middleware._process_request(request) 37 | response = await call_next(request) 38 | # response 39 | for middleware in self.response_middlewares: 40 | response = await middleware._process_response(request, response) 41 | return response 42 | -------------------------------------------------------------------------------- /enova/server/middleware/response.py: -------------------------------------------------------------------------------- 1 | import rapidjson 2 | from fastapi import Request, status 3 | 4 | from fastapi.responses import JSONResponse, StreamingResponse 5 | from enova.common.constant import JSON_RESPONSE_HEADER 6 | from enova.common.g_vars import get_traceid 7 | from enova.server.middleware.base import BaseMiddleware 8 | 9 | 10 | class ResponseMiddleware(BaseMiddleware): 11 | 12 | async def _process_response(self, request: Request, response): 13 | """""" 14 | if request.url.path in [ 15 | self.api_config["url_prefix"] + "/docs", 16 | self.api_config["url_prefix"] + "/redoc", 17 | self.api_config["url_prefix"] + "/openapi.json", 18 | ] or request.url.path.startswith(f"{self.api_config['url_prefix']}/admin"): 19 | return response 20 | trace_id = get_traceid() 21 | if isinstance(response, StreamingResponse) and response.headers.get("content-type") == JSON_RESPONSE_HEADER: 22 | response_body = b"" 23 | async for chunk in response.body_iterator: 24 | response_body += chunk 25 | resp = rapidjson.loads(response_body) 26 | if "code" in resp and "message" in resp: 27 | if "trace_id" not in resp: 28 | resp["trace_id"] = trace_id 29 | resp = JSONResponse( 30 | status_code=response.status_code, 31 | content=resp, 32 | ) 33 | else: 34 | if response.status_code == status.HTTP_200_OK: 35 | code = 0 36 | else: 37 | code = response.status_code 38 | resp = JSONResponse( 39 | status_code=response.status_code, 40 | content={"message": "", "code": code, "result": resp, "trace_id": trace_id, "version": self.api_config["api_version"]}, 41 | ) 42 | for k, v in response.headers.items(): 43 | if k not in resp.headers: 44 | resp.headers[k] = v 45 | return resp 46 | if isinstance(response, dict): 47 | return JSONResponse( 48 | status_code=200, 49 | content={"message": "", "code": 0, "result": response, "trace_id": trace_id, "version": self.api_config["api_version"]}, 50 | ) 51 | return response 52 | -------------------------------------------------------------------------------- /enova/server/middleware/trace.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from fastapi import Request 3 | from enova.server.middleware.base import BaseMiddleware 4 | from enova.common.local import set_contextvars 5 | 6 | 7 | class TraceMiddleware(BaseMiddleware): 8 | 9 | async def _process_request(self, request: Request): 10 | """get header trace_id""" 11 | trace_id = request.headers.get('trace_id') or uuid.uuid4().hex 12 | set_contextvars('trace_id', trace_id) 13 | -------------------------------------------------------------------------------- /enova/server/restful/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/server/restful/__init__.py -------------------------------------------------------------------------------- /enova/server/restful/router.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from fastapi import APIRouter 3 | from fastapi.responses import JSONResponse 4 | from enova.common.config import CONFIG 5 | from enova.common.constant import HttpMethod 6 | 7 | 8 | class BaseResource: 9 | PATH = NotImplemented 10 | DEPENDENCIS = NotImplemented 11 | GET_RESPONSE_MODEL = None 12 | PUT_RESPONSE_MODEL = None 13 | DELETE_RESPONSE_MODEL = None 14 | POST_RESPONSE_MODEL = None 15 | GET_RESPONSE_CLASS = JSONResponse 16 | PUT_RESPONSE_CLASS = JSONResponse 17 | DELETE_RESPONSE_CLASS = JSONResponse 18 | POST_RESPONSE_CLASS = JSONResponse 19 | GET_INCLUDE_IN_SCHEMA = True 20 | PUT_INCLUDE_IN_SCHEMA = True 21 | DELETE_INCLUDE_IN_SCHEMA = True 22 | POST_INCLUDE_IN_SCHEMA = True 23 | TAGS = None 24 | 25 | 26 | class WebSocketResource: 27 | PATH = NotImplemented 28 | 29 | 30 | @dataclasses.dataclass 31 | class ApiRouter: 32 | prefix: str = None 33 | 34 | def __post_init__(self) -> None: 35 | """ 36 | Dynamically convert GET, POST, DELETE, PUT into interfaces. just for fastapi 37 | """ 38 | self.router = APIRouter( 39 | prefix=self.prefix, 40 | dependencies=[], 41 | ) 42 | 43 | def register(self, resource_cls): 44 | """""" 45 | if issubclass(resource_cls, BaseResource) and resource_cls != BaseResource: 46 | self._register_http(resource_cls) 47 | 48 | if issubclass(resource_cls, WebSocketResource) and resource_cls != WebSocketResource: 49 | self._register_ws(resource_cls) 50 | 51 | def _register_http(self, resource_cls): 52 | resource_ins = resource_cls() 53 | for method in HttpMethod.values(): 54 | if hasattr(resource_ins, method): 55 | response_model = getattr(resource_ins, f"{method.upper()}_RESPONSE_MODEL") 56 | response_class = getattr(resource_ins, f"{method.upper()}_RESPONSE_CLASS") 57 | include_in_schema = getattr(resource_ins, f"{method.upper()}_INCLUDE_IN_SCHEMA") 58 | actual_path = f"/{CONFIG.api['api_version']}{resource_ins.PATH}" 59 | tags = getattr(resource_ins, "TAGS") or [] 60 | getattr(self.router, method)( 61 | actual_path, 62 | response_model=response_model, 63 | response_class=response_class, 64 | include_in_schema=include_in_schema, 65 | tags=tags, 66 | )(getattr(resource_ins, method)) 67 | 68 | def _register_ws(self, resource_cls): 69 | resource_ins = resource_cls() 70 | if resource_ins.PATH is not NotImplemented: 71 | actual_path = f"/{CONFIG.api['api_version']}{resource_ins.PATH}" 72 | self.router.add_api_websocket_route(actual_path, getattr(resource_ins, "get")) 73 | -------------------------------------------------------------------------------- /enova/server/restful/serializer.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, List 3 | 4 | from pydantic import BaseModel, Field 5 | from pydantic.version import VERSION as PYDANTIC_VERSION 6 | 7 | 8 | PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.") 9 | if PYDANTIC_V2: 10 | from pydantic._internal._model_construction import ModelMetaclass 11 | else: 12 | from pydantic.main import ModelMetaclass 13 | 14 | from enova.common.config import CONFIG # noqa 15 | from enova.common.constant import OrderBy # noqa 16 | 17 | 18 | class AllFields(ModelMetaclass): 19 | def __new__(self, name, bases, namespaces, **kwargs): 20 | for field in namespaces: 21 | if not field.startswith("__"): 22 | namespaces[field] = Field(namespaces[field]) 23 | return super().__new__(self, name, bases, namespaces, **kwargs) 24 | 25 | 26 | class EmergingAIBaseModel(BaseModel): 27 | def dict(self, *args, **kwargs): 28 | return json.loads(self.model_dump_json()) 29 | 30 | 31 | class EmergingAIQueryRequestBaseModel(EmergingAIBaseModel): 32 | page: int = Field(default=1, ge=CONFIG.api["default_min_page"], le=CONFIG.api["default_max_page"]) 33 | size: int = Field(default=10, ge=CONFIG.api["default_min_size"], le=CONFIG.api["default_max_size"]) 34 | order_by: str | None = None 35 | order_type: OrderBy | None = None 36 | fuzzy: str | None = None 37 | start_time: str | None = None 38 | end_time: str | None = None 39 | 40 | 41 | class EmergingAIQueryResponseBaseModel(EmergingAIBaseModel): 42 | page: int 43 | size: int 44 | total_num: int 45 | total_page: int 46 | num: int 47 | data: List[Dict] 48 | -------------------------------------------------------------------------------- /enova/serving/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/serving/__init__.py -------------------------------------------------------------------------------- /enova/serving/apiserver.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from enova.common.constant import ServingBackend 3 | from enova.common.config import CONFIG 4 | from enova.serving.backend.transformers import TransformersBackend 5 | from enova.serving.backend.vllm import VllmBackend 6 | from enova.serving.backend.sglang import SglangBackend 7 | 8 | 9 | @dataclasses.dataclass 10 | class EApiServer: 11 | """ 12 | Need to adapt to multiple task, text2text, text2image, image2image 13 | support multiple api according to different task 14 | """ 15 | 16 | host: str 17 | port: int 18 | model: str 19 | backend: str 20 | 21 | def __post_init__(self): 22 | self.backend_ins = None 23 | 24 | def get_backend_ins(self): 25 | engine_map = { 26 | ServingBackend.HF.value: TransformersBackend, 27 | ServingBackend.VLLM.value: VllmBackend, 28 | ServingBackend.SGLANG.value: SglangBackend} 29 | if self.backend not in engine_map: 30 | raise ValueError(f"serving.backend: {CONFIG.serving['backend']} is not in {ServingBackend.values()}") 31 | return engine_map[self.backend](self.backend, self.model) 32 | 33 | def local_run(self): 34 | """""" 35 | self.backend_ins = self.get_backend_ins() 36 | self.backend_ins.local_run(host=self.host, port=self.port) 37 | -------------------------------------------------------------------------------- /enova/serving/backend/hf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/serving/backend/hf/__init__.py -------------------------------------------------------------------------------- /enova/serving/backend/hf/handler.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import functools 3 | from typing import Callable, Dict 4 | 5 | 6 | @dataclasses.dataclass 7 | class RemoteFunc: 8 | method: str 9 | path: str 10 | func: Callable 11 | kwarg: Dict 12 | 13 | 14 | REMOTE_FUNC_TAG = "__remote_func__" 15 | 16 | 17 | @dataclasses.dataclass 18 | class HuggingFaceHandler: 19 | """""" 20 | 21 | model: str 22 | name: str = "serving" 23 | 24 | @classmethod 25 | def remote_func(cls, method, path=None, **kwarg): 26 | def decorator(func): 27 | actual_path = f"/{func.__name__}" if path is None else path 28 | 29 | @functools.wraps(func) 30 | def wrapped_func(self, *args, **kwargs): 31 | return func(self, *args, **kwargs) 32 | 33 | setattr(wrapped_func, REMOTE_FUNC_TAG, (RemoteFunc(method, actual_path, func, kwarg))) 34 | return wrapped_func 35 | 36 | return decorator 37 | -------------------------------------------------------------------------------- /enova/serving/backend/sglang.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import os 3 | from enova.common.logger import LOGGER 4 | from enova.common.config import CONFIG 5 | from enova.serving.backend.base import BaseBackend 6 | 7 | 8 | @dataclasses.dataclass 9 | class SglangBackend(BaseBackend): 10 | def __post_init__(self): 11 | """Initialize the SglangBackend specific components.""" 12 | 13 | def _create_app(self): 14 | from sglang.srt.server import app as sglang_app, launch_engine 15 | from sglang.srt.server_args import ServerArgs 16 | from sglang.srt.utils import add_prometheus_middleware, set_prometheus_multiproc_dir 17 | from sglang.srt.metrics.func_timer import enable_func_timer 18 | 19 | if not hasattr(self, "model"): 20 | raise RuntimeError("Model path must be specified") 21 | 22 | if "tensor_parallel_size" in CONFIG.sglang: 23 | CONFIG.sglang["tp_size"] = CONFIG.sglang.pop("tensor_parallel_size") 24 | server_args = ServerArgs(host=CONFIG.serving["host"], port=CONFIG.serving["port"], model_path=self.model, **CONFIG.sglang) 25 | launch_engine(server_args) 26 | set_prometheus_multiproc_dir() 27 | os.makedirs(os.environ["PROMETHEUS_MULTIPROC_DIR"]) 28 | add_prometheus_middleware(sglang_app) 29 | enable_func_timer() 30 | 31 | self.app = sglang_app 32 | 33 | @self.app.get("/v1/model/info/args") 34 | async def get_engine_args(): 35 | return {"code": 0, "result": server_args} 36 | 37 | LOGGER.info("SGLangBackend FastAPI app created and routes defined.") 38 | -------------------------------------------------------------------------------- /enova/serving/backend/transformers.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import locate 3 | from fastapi import FastAPI, APIRouter 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from enova.common.config import CONFIG 6 | from enova.serving.middlewares.base import EnovaAIMultiMiddlewares 7 | from enova.serving.backend.base import BaseBackend 8 | from enova.serving.backend.hf.hf import HFText2TextHandler 9 | 10 | 11 | @dataclasses.dataclass 12 | class TransformersBackend(BaseBackend): 13 | 14 | def __post_init__(self): 15 | self.hf = HFText2TextHandler() 16 | 17 | def _create_apiserver(self): 18 | self._create_app() 19 | self._init_middlewares() 20 | self._init_routers() 21 | 22 | def _init_middlewares(self): 23 | """""" 24 | middlewares = EnovaAIMultiMiddlewares() 25 | for middleware_cls_name in CONFIG.api.get("middleware_names", []): 26 | middleware_ins = locate(middleware_cls_name)() 27 | middlewares.register(middleware_ins) 28 | self.app.middleware("http")(middlewares.process) 29 | self.app.add_middleware( 30 | CORSMiddleware, 31 | allow_origins=["*"], 32 | allow_credentials=True, 33 | allow_methods=["*"], 34 | allow_headers=["*"], 35 | ) 36 | 37 | def _init_exception_handler(self): 38 | """""" 39 | 40 | def _init_routers(self): 41 | """ 42 | according task to add route, such as openai 43 | """ 44 | self.api_router = APIRouter( 45 | prefix="", 46 | dependencies=[], 47 | ) 48 | 49 | @self.app.get("/healthz", include_in_schema=False) 50 | async def healthz(): 51 | return {"status": "ok"} 52 | 53 | self.register_serving_api() 54 | 55 | def _create_app(self): 56 | """""" 57 | self.app = FastAPI( 58 | title=self.name, 59 | description=(self.__doc__ if self.__doc__ else f"Enova {self.name}"), 60 | ) 61 | 62 | def register_serving_api(self): 63 | """ 64 | register_api from serving 65 | """ 66 | self.hf.register_api_router(self.api_router) 67 | self.app.include_router(self.api_router) 68 | -------------------------------------------------------------------------------- /enova/serving/backend/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM 3 | from enova.common.logger import LOGGER 4 | 5 | 6 | def hf_model_params_size(model_name, hf_proxies=None): 7 | """ 8 | TODO: implement special model 9 | """ 10 | LOGGER.debug(f"starg parse model's config: {model_name}") 11 | try: 12 | return specific_eval_hf_model_params_size(model_name, hf_proxies) 13 | except Exception as e: 14 | LOGGER.warning(f"specific_eval_hf_model_params_size error: {str(e)}") 15 | return estimate_hf_model_params_size(model_name, hf_proxies) 16 | 17 | 18 | def specific_eval_hf_model_params_size(model_name, hf_proxies=None): 19 | """ """ 20 | config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, proxies=hf_proxies) 21 | if config.__class__.__name__ in ["BaichuanConfig", "QWenConfig"]: 22 | model = AutoModelForCausalLM.from_config(config, trust_remote_code=True) 23 | else: 24 | model = AutoModel.from_config(config, trust_remote_code=True) 25 | params_size = 0 26 | for w_name, p in list(model.named_parameters()): 27 | LOGGER.debug(f"w_name: {w_name}, shape: {p.shape}") 28 | params_size += np.prod(p.shape) 29 | return {"params_size": int(params_size), "model_type": config.model_type} 30 | 31 | 32 | def estimate_hf_model_params_size(model_name, hf_proxies=None): 33 | """fast estimate hf model params_szie""" 34 | config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, proxies=hf_proxies) 35 | if config.model_type == "chatglm": 36 | return chatglm_estimate_hf_model_params_size(config) 37 | num_layers = config.num_hidden_layers 38 | hidden_size = config.hidden_size 39 | vocab_size = config.vocab_size 40 | params_size = ( 41 | vocab_size * hidden_size 42 | + num_layers * (4 * hidden_size**2 + 4 * hidden_size) 43 | + num_layers * (8 * hidden_size**2 + 5 * hidden_size) 44 | + 4 * num_layers * hidden_size 45 | ) 46 | return {"params_size": int(params_size), "model_type": config.model_type} 47 | 48 | 49 | def chatglm_estimate_hf_model_params_size(config): 50 | num_layers = config.num_layers 51 | hidden_size = config.hidden_size 52 | vocab_size = config.vocab_size 53 | params_size = ( 54 | vocab_size * hidden_size 55 | + num_layers * (4 * hidden_size**2 + 4 * hidden_size) 56 | + num_layers * (8 * hidden_size**2 + 5 * hidden_size) 57 | + 4 * num_layers * hidden_size 58 | ) 59 | return {"params_size": int(params_size), "model_type": config.model_type} 60 | -------------------------------------------------------------------------------- /enova/serving/backend/vllm.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import dataclasses 3 | from enova.common.logger import LOGGER 4 | from enova.common.config import CONFIG 5 | from enova.common.constant import VllmMode 6 | from enova.serving.backend.base import BaseBackend 7 | from enova.serving.middlewares.cors import WSCORSMiddleware 8 | 9 | 10 | @dataclasses.dataclass 11 | class VllmBackend(BaseBackend): 12 | def __post_init__(self): 13 | """""" 14 | 15 | def _create_app(self): 16 | vllm_mode = CONFIG.vllm.pop("vllm_mode", VllmMode.NORMAL.value) 17 | from vllm.engine.arg_utils import AsyncEngineArgs 18 | from vllm.engine.async_llm_engine import AsyncLLMEngine 19 | from vllm.transformers_utils.tokenizer import get_tokenizer 20 | import torch 21 | 22 | if not torch.cuda.is_available(): 23 | raise RuntimeError("vLLM Photon requires CUDA runtime") 24 | 25 | if vllm_mode == VllmMode.NORMAL.value: 26 | from vllm.entrypoints import api_server 27 | 28 | engine_args = AsyncEngineArgs(model=self.model, **CONFIG.vllm) 29 | engine = AsyncLLMEngine.from_engine_args(engine_args) 30 | engine_model_config = asyncio.run(engine.get_model_config()) 31 | max_model_len = engine_model_config.max_model_len 32 | 33 | api_server.served_model = self.model 34 | api_server.engine = engine 35 | api_server.max_model_len = max_model_len 36 | api_server.tokenizer = get_tokenizer( 37 | engine_args.tokenizer, 38 | tokenizer_mode=engine_args.tokenizer_mode, 39 | trust_remote_code=engine_args.trust_remote_code, 40 | ) 41 | elif vllm_mode == VllmMode.OPENAI.value: 42 | from vllm.entrypoints.openai import api_server 43 | from addict import Dict as AddDict 44 | 45 | engine_args = AsyncEngineArgs(model=self.model, **CONFIG.vllm) 46 | engine = AsyncLLMEngine.from_engine_args(engine_args, usage_context=api_server.UsageContext.OPENAI_API_SERVER) 47 | 48 | request_logger = api_server.RequestLogger(max_log_len=CONFIG.vllm.get("max_log_len")) 49 | engine_model_config = asyncio.run(engine.get_model_config()) 50 | 51 | served_model_names = [self.model] 52 | openai_serving_chat = api_server.OpenAIServingChat( 53 | engine, 54 | model_config=engine_model_config, 55 | served_model_names=served_model_names, 56 | response_role=CONFIG.vllm.get("response_role") or "assistant", 57 | lora_modules=CONFIG.vllm.get("lora_modules"), 58 | prompt_adapters=CONFIG.vllm.get("prompt_adapters"), 59 | request_logger=request_logger, 60 | chat_template=CONFIG.vllm.get("chat_template"), 61 | ) 62 | openai_serving_completion = api_server.OpenAIServingCompletion( 63 | engine, 64 | model_config=engine_model_config, 65 | served_model_names=served_model_names, 66 | lora_modules=CONFIG.vllm.get("lora_modules"), 67 | prompt_adapters=CONFIG.vllm.get("prompt_adapters"), 68 | request_logger=request_logger, 69 | return_tokens_as_token_ids=CONFIG.vllm.get("return_tokens_as_token_ids") or False, 70 | ) 71 | api_server.engine = engine 72 | api_server.async_engine_client = engine 73 | api_server.engine_args = engine_args 74 | api_server.openai_serving_chat = openai_serving_chat 75 | api_server.openai_serving_completion = openai_serving_completion 76 | args = AddDict(CONFIG.vllm) 77 | api_server.app = api_server.build_app(args) 78 | else: 79 | raise ValueError(f"vllm_mode: {vllm_mode} is not support") 80 | LOGGER.info(f"CONFIG.vllm: {CONFIG.vllm}") 81 | 82 | self.app = api_server.app 83 | cur_app = api_server.app 84 | 85 | @cur_app.get("/v1/model/info/args") 86 | async def get_engine_args(): 87 | return {"code": 0, "result": engine_args} 88 | -------------------------------------------------------------------------------- /enova/serving/middlewares/auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/serving/middlewares/auth.py -------------------------------------------------------------------------------- /enova/serving/middlewares/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | from fastapi import Request 4 | 5 | 6 | def get_dependencies() -> List: 7 | return 8 | 9 | 10 | class BaseMiddleware(metaclass=abc.ABCMeta): 11 | """""" 12 | 13 | 14 | class EnovaAIMultiMiddlewares: 15 | def __init__(self) -> None: 16 | self.middewares: List[BaseMiddleware] = [] 17 | self.request_middlewares: List[BaseMiddleware] = [] 18 | self.response_middlewares: List[BaseMiddleware] = [] 19 | 20 | def register(self, middleware: BaseMiddleware): 21 | self.middewares.append(middleware) 22 | if hasattr(middleware, "_process_request"): 23 | self.request_middlewares.append(middleware) 24 | if hasattr(middleware, "_process_response"): 25 | self.response_middlewares.append(middleware) 26 | 27 | async def process(self, request: Request, call_next): 28 | # request 29 | for middleware in self.request_middlewares: 30 | if hasattr(middleware, "_process_request"): 31 | await middleware._process_request(request) 32 | response = await call_next(request) 33 | # response 34 | for middleware in self.response_middlewares: 35 | response = await middleware._process_response(request, response) 36 | return response 37 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/.gitignore: -------------------------------------------------------------------------------- 1 | tempo-data 2 | single-demo 3 | enova_compose*.yaml 4 | bin/docker-compose* -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/escaler/conf/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "resource_backend": { 3 | "type": "docker" 4 | }, 5 | "docker": { 6 | 7 | }, 8 | "detector": { 9 | "prom": { 10 | "host": "enova-prometheus", 11 | "port": 9090 12 | }, 13 | "api": { 14 | "host": "0.0.0.0", 15 | "port": 8183, 16 | "version": "v1", 17 | "url_prefix": "/escaler" 18 | }, 19 | "detect_interval": 30 20 | }, 21 | "scaler": {}, 22 | "zmq": { 23 | "host": "127.0.0.1", 24 | "port": 4321 25 | }, 26 | "redis": { 27 | "addr": "127.0.0.1:6379", 28 | "password": "", 29 | "db": 0 30 | }, 31 | "enova_algo": { 32 | "host": "enova-algo:8181" 33 | }, 34 | "serving": { 35 | "image": "emergingai/enova:v0.0.8", 36 | "start_cmd": [ 37 | ], 38 | "network": "enova-mon_enova-net", 39 | "network_alias": "enova-serving", 40 | "name": "enova" 41 | }, 42 | "logger": { 43 | "name": "server", 44 | "path": "./var/log/emergingai", 45 | "level": "debug" 46 | } 47 | } -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/grafana/grafana_provisioning/dashboards/enova-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | # an unique provider name. Required 5 | - name: 'ENOVA-LLMO-dashboards' 6 | # Org id. Default to 1 7 | orgId: 1 8 | # name of the dashboard folder. 9 | folder: '' 10 | # folder UID. will be automatically generated if not specified 11 | folderUid: '' 12 | # provider type. Default to 'file' 13 | type: file 14 | # disable dashboard deletion 15 | disableDeletion: false 16 | # how often Grafana will scan for changed dashboards 17 | updateIntervalSeconds: 10 18 | # allow updating provisioned dashboards from the UI 19 | allowUiUpdates: false 20 | options: 21 | # path to dashboard files on disk. Required when using the 'file' type 22 | path: /etc/dashboards 23 | # use folder names from filesystem to create folders in Grafana 24 | foldersFromFilesStructure: true~ -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/grafana/grafana_provisioning/datasources/enova-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: Enova-Prometheus 5 | type: prometheus 6 | uid: prometheus 7 | url: http://prometheus:9090 8 | isDefault: true 9 | access: proxy 10 | editable: true 11 | orgId: 1 12 | 13 | - name: Enova-Tempo 14 | type: tempo 15 | uid: tempo 16 | url: http://tempo:3200 17 | isDefault: false 18 | access: proxy 19 | orgId: 1 20 | editable: true 21 | jsonData: 22 | httpMethod: GET 23 | serviceMap: 24 | datasourceUid: prometheus 25 | 26 | 27 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/haproxy/haproxy.cfg: -------------------------------------------------------------------------------- 1 | defaults 2 | mode tcp 3 | log global 4 | option tcplog 5 | option dontlognull 6 | option http-server-close 7 | option redispatch 8 | retries 3 9 | timeout http-request 10s 10 | timeout queue 1m 11 | timeout connect 10s 12 | timeout client 1m 13 | timeout server 1m 14 | timeout http-keep-alive 10s 15 | timeout check 10s 16 | maxconn 3000 17 | 18 | resolvers mydns 19 | nameserver dns1 127.0.0.1:53 20 | resolve_retries 3 21 | timeout resolve 1s 22 | timeout retry 1s 23 | hold valid 10s 24 | 25 | frontend http_front 26 | bind *:9199 27 | default_backend http_back 28 | 29 | backend http_back 30 | balance roundrobin 31 | server-template srv 1-3 enova.serving.com:9199 check inter 5s fall 3 rise 2 resolvers mydns init-addr last,libc,none 32 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 8; 2 | worker_rlimit_nofile 65535; 3 | 4 | events { 5 | worker_connections 20480; 6 | } 7 | 8 | 9 | http { 10 | 11 | client_max_body_size 4096M; 12 | client_header_buffer_size 512k; 13 | large_client_header_buffers 4 512k; 14 | 15 | access_log /var/log/nginx/access.log; 16 | error_log /var/log/nginx/error.log; 17 | 18 | resolver 127.0.0.11 valid=1s; 19 | upstream backend { 20 | server enova-serving:9199 max_fails=1 fail_timeout=1s; 21 | } 22 | 23 | server { 24 | underscores_in_headers on; 25 | ignore_invalid_headers off; 26 | 27 | listen 9199; 28 | server_name artrefine_proxy; 29 | keepalive_timeout 3600; 30 | 31 | access_log /var/log/nginx/enova_access.log; 32 | error_log /var/log/nginx/enova_error.log; 33 | 34 | location / { 35 | proxy_read_timeout 3600; 36 | proxy_pass http://backend; 37 | proxy_set_header Host $proxy_host; 38 | proxy_set_header X-Real-IP $remote_addr; 39 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 40 | } 41 | 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/otel-collector/collector-config.yaml: -------------------------------------------------------------------------------- 1 | receivers: 2 | otlp: 3 | protocols: 4 | grpc: 5 | http: 6 | otlp/spanmetrics: 7 | protocols: 8 | grpc: 9 | endpoint: localhost:12345 10 | 11 | 12 | exporters: 13 | debug: 14 | verbosity: detailed 15 | otlp: 16 | endpoint: tempo:4317 17 | tls: 18 | insecure: true 19 | otlp/spanmetrics: 20 | endpoint: "localhost:4317" 21 | tls: 22 | insecure: true 23 | prometheus: 24 | endpoint: 0.0.0.0:8889 25 | # prometheusremotewrite: 26 | # endpoint: "http://prometheus:9090/api/v1/write" 27 | 28 | processors: 29 | batch: 30 | memory_limiter: 31 | check_interval: 5s 32 | limit_percentage: 80 33 | spike_limit_percentage: 25 34 | spanmetrics: 35 | metrics_exporter: otlp/spanmetrics 36 | dimensions: 37 | - name: batch_size 38 | # - name: parameters 39 | attributes/http: 40 | actions: 41 | - action: delete 42 | key: "http.server_name" 43 | - action: delete 44 | key: "http.host" 45 | 46 | extensions: 47 | health_check: 48 | 49 | service: 50 | extensions: [health_check] 51 | pipelines: 52 | traces: 53 | receivers: [otlp] 54 | processors: [spanmetrics, batch] 55 | exporters: [otlp] 56 | metrics/spanmetrics: 57 | receivers: [otlp/spanmetrics] 58 | exporters: [otlp/spanmetrics] 59 | metrics: 60 | receivers: [otlp] 61 | processors: [attributes/http, batch] 62 | exporters: [debug, prometheus] 63 | # logs: 64 | # receivers: [otlp] 65 | # processors: [batch] 66 | # exporters: [debug] -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | scrape_timeout: 10s 4 | evaluation_interval: 15s 5 | alerting: 6 | alertmanagers: 7 | - static_configs: 8 | - targets: [] 9 | scheme: http 10 | timeout: 10s 11 | api_version: v1 12 | scrape_configs: 13 | - job_name: prometheus 14 | honor_timestamps: true 15 | scrape_interval: 15s 16 | scrape_timeout: 10s 17 | metrics_path: /metrics 18 | scheme: http 19 | static_configs: 20 | - targets: 21 | - prometheus:9090 22 | - job_name: 'otel-collector' 23 | scrape_interval: 10s 24 | static_configs: 25 | - targets: ['otel-collector:8888'] 26 | - targets: ['otel-collector:8889'] 27 | 28 | - job_name: 'dcgm' 29 | static_configs: 30 | - targets: ['dcgm-exporter:9400'] 31 | 32 | - job_name: 'enovaserving' 33 | static_configs: 34 | - targets: ['enova-serving:9199'] 35 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/tempo/tempo.yaml: -------------------------------------------------------------------------------- 1 | stream_over_http_enabled: true 2 | server: 3 | http_listen_port: 3200 4 | log_level: info 5 | 6 | query_frontend: 7 | search: 8 | duration_slo: 5s 9 | throughput_bytes_slo: 1.073741824e+09 10 | trace_by_id: 11 | duration_slo: 5s 12 | 13 | distributor: 14 | receivers: 15 | otlp: 16 | protocols: 17 | http: 18 | grpc: 19 | 20 | ingester: 21 | max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally 22 | 23 | compactor: 24 | compaction: 25 | block_retention: 1h # overall Tempo trace retention. set for demo purposes 26 | 27 | metrics_generator: 28 | processor: 29 | local_blocks: 30 | filter_server_spans: false 31 | span_metrics: 32 | dimensions: 33 | - http.method 34 | - http.target 35 | - http.status_code 36 | - service.version 37 | service_graphs: 38 | dimensions: 39 | - http.method 40 | - http.target 41 | - http.status_code 42 | - service.version 43 | registry: 44 | external_labels: 45 | source: tempo 46 | cluster: docker-compose 47 | storage: 48 | path: /tmp/tempo/generator/wal 49 | remote_write: 50 | - url: http://prometheus:9090/api/v1/write 51 | send_exemplars: true 52 | traces_storage: 53 | path: /tmp/tempo/generator/traces 54 | 55 | storage: 56 | trace: 57 | backend: local # backend configuration to use 58 | wal: 59 | path: /tmp/tempo/wal # where to store the the wal locally 60 | local: 61 | path: /tmp/tempo/blocks 62 | 63 | overrides: 64 | defaults: 65 | metrics_generator: 66 | processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator 67 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/traffic-injector/compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | 3 | services: 4 | traffic_injector: 5 | image: 60.204.135.2/emergingai/enova-jmeter:v0.0.2 6 | command: 7 | - sh 8 | - -c 9 | - | 10 | rm -rf /data/report 11 | mkdir /data/report 12 | jmeter -n -t /data/jmeter-config.xml -l /data/report/report.log -e -o /data/report 13 | volumes: 14 | - ${DATA_FILE}:/opt/data.csv 15 | - ${OUTPUT}:/data 16 | networks: 17 | - enova-net 18 | 19 | volumes: 20 | output: 21 | 22 | networks: 23 | enova-net: 24 | enable_ipv6: false 25 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/traffic-injector/data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/template/deployment/docker-compose/traffic-injector/data.csv -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/traffic-injector/jmeter-config-template.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | @LOAD_PROFILE@ 12 | 13 | 14 | 15 | 16 | 17 | continue 18 | ${__tstFeedback(tst,100, 1000,10)} 19 | 20 | 21 | @DURATION@ 22 | 23 | 24 | S 25 | 26 | 27 | 28 | @ELEMENT_PROP@ 29 | 30 | 31 | 32 | 33 | true 34 | 35 | 36 | 37 | @BODY@ 38 | = 39 | 40 | 41 | 42 | @HOST@ 43 | @PORT@ 44 | @PATH@ 45 | @METHOD@ 46 | true 47 | true 48 | 49 | @DATA@ 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/traffic-injector/jmeter.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:centos7 2 | WORKDIR /opt 3 | ADD jdk-8u361-linux-x64.tar.gz /usr/local/ 4 | ADD apache-jmeter-5.6.3.tgz /opt/ 5 | ENV JAVA_HOME=/usr/local/jdk1.8.0_361 \ 6 | PATH=/usr/local/jdk1.8.0_361/bin:/opt/apache-jmeter-5.6.3/bin:$PATH 7 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/webui-nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 8; 2 | worker_rlimit_nofile 65535; 3 | 4 | events { 5 | worker_connections 20480; 6 | } 7 | 8 | 9 | http { 10 | server { 11 | listen 8501; 12 | listen [::]:8501; 13 | 14 | 15 | location /stream { 16 | proxy_pass http://enova-serving:8501; 17 | proxy_http_version 1.1; 18 | proxy_set_header Upgrade $http_upgrade; 19 | proxy_set_header Connection "Upgrade"; 20 | proxy_set_header Host $host; 21 | } 22 | 23 | 24 | location ^~ /static { 25 | proxy_pass http://enova-serving:8501/static/; 26 | } 27 | 28 | location ^~ /healthz { 29 | proxy_pass http://enova-serving:8501/healthz; 30 | } 31 | 32 | location ^~ /vendor { 33 | proxy_pass http://enova-serving:8501/vendor; 34 | } 35 | 36 | location = /_stcore/health { 37 | proxy_pass http://enova-serving:8501/_stcore/health; 38 | } 39 | 40 | location = /_stcore/allowed-message-origins { 41 | proxy_pass http://enova-serving:8501/_stcore/allowed-message-origins; 42 | } 43 | 44 | location = /_stcore/stream { 45 | proxy_pass http://enova-serving:8501/_stcore/stream; 46 | proxy_http_version 1.1; 47 | proxy_redirect off; 48 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 49 | proxy_set_header Host $http_host; 50 | proxy_set_header Upgrade $http_upgrade; 51 | proxy_set_header Connection "upgrade"; 52 | proxy_read_timeout 86400; 53 | } 54 | 55 | 56 | location / { 57 | proxy_pass http://enova-serving:8501; 58 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 59 | proxy_set_header Host $http_host; 60 | proxy_redirect off; 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /enova/template/deployment/docker-compose/webui/webui.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/template/deployment/docker-compose/webui/webui.yaml -------------------------------------------------------------------------------- /enova/webui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/enova/webui/__init__.py -------------------------------------------------------------------------------- /enova/webui/chat.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import requests 4 | import streamlit as st 5 | from openai import OpenAI, InternalServerError 6 | 7 | st.title('🤖ENOVA AI WebUI') 8 | 9 | MAX_TURNS = 20 10 | MAX_BOXES = MAX_TURNS * 2 11 | 12 | vllm_mode = os.getenv("VLLM_MODE", "openai") 13 | serving_url = os.getenv("SERVING_URL", "http://127.0.0.1:9199") 14 | openai_api_base = serving_url + "/v1" 15 | openai_api_key = "xxx" 16 | 17 | client = None 18 | model = None 19 | if vllm_mode == "openai": 20 | try: 21 | client = OpenAI( 22 | api_key=openai_api_key, 23 | base_url=openai_api_base, 24 | ) 25 | models = client.models.list() 26 | model = models.data[0].id 27 | 28 | except InternalServerError as e: 29 | print("Server not ready. Please wait a moment and refresh the page.") 30 | 31 | except Exception as e: 32 | print(f"An unexpected error occurred: {e}") 33 | print("Please check the server status and try again.") 34 | 35 | system_prompt = st.sidebar.text_area( 36 | label="System Prompt", 37 | value="You are a helpful AI assistant who answers questions in short sentences." 38 | ) 39 | 40 | max_tokens = st.sidebar.slider('max_tokens', 0, 4096, 2048, step=1) 41 | temperature = st.sidebar.slider('temperature', 0.0, 1.0, 0.1, step=0.01) 42 | top_p = st.sidebar.slider('top_p', 0.0, 1.0, 0.5, step=0.01) if vllm_mode == "normal" else None 43 | 44 | 45 | if 'messages' not in st.session_state: 46 | st.session_state.messages = [] 47 | 48 | messages = st.session_state.messages 49 | 50 | for message in st.session_state.messages: 51 | with st.chat_message(message['role']): 52 | st.markdown(message['content']) 53 | 54 | if user_input := st.chat_input(''): 55 | 56 | with st.chat_message('user'): 57 | st.markdown(user_input) 58 | messages.append({'role': 'user', 'content': user_input}) 59 | 60 | with st.chat_message('assistant') as assistant_message: 61 | 62 | if vllm_mode == "normal": 63 | placeholder = st.empty() 64 | 65 | response = requests.post( 66 | url=f"{serving_url}/generate", 67 | headers={'Content-type': 'application/json; charset=utf-8'}, 68 | data=json.dumps({ 69 | "prompt": user_input, 70 | "max_tokens": max_tokens, 71 | "top_p": top_p, 72 | "temperature": temperature, 73 | "stream": True 74 | }), 75 | stream=True 76 | ) 77 | 78 | full_content = '' 79 | for line in response.iter_lines(delimiter=b'\00'): 80 | line = line.decode(encoding='utf-8') 81 | if line.strip() == '': 82 | continue 83 | response_json = json.loads(line) 84 | full_content = response_json['text'][0] 85 | placeholder.markdown(full_content) 86 | 87 | st.session_state.messages.append({'role': 'assistant', 'content': full_content}) 88 | 89 | elif vllm_mode == "openai" and model: 90 | placeholder = st.empty() 91 | openai_messages = [ 92 | {"role": message["role"], "content": message["content"]} 93 | for message in st.session_state.messages[-5:] 94 | ] 95 | 96 | chat_completion = client.chat.completions.create( 97 | messages=openai_messages, 98 | model=model, 99 | temperature=temperature, 100 | max_tokens=max_tokens, 101 | stream=True 102 | ) 103 | 104 | full_content = '' 105 | for chunk in chat_completion: 106 | if chunk.choices[0].delta.content is not None: 107 | full_content += str(chunk.choices[0].delta.content) 108 | placeholder.markdown(full_content) 109 | 110 | st.session_state.messages.append({'role': 'assistant', 'content': full_content}) 111 | -------------------------------------------------------------------------------- /escaler/build.sh: -------------------------------------------------------------------------------- 1 | 2 | go mod download 3 | # go install github.com/swaggo/swag/cmd/swag@latest 4 | 5 | # swag init -g cmd/escaler/main.go -o cmd/escaler/docs --parseDependency --parseInternal 6 | mkdir -p dist/bin 7 | go env && go build -o dist/bin/escaler cmd/escaler/main.go 8 | -------------------------------------------------------------------------------- /escaler/cmd/escaler/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "sync" 7 | 8 | "github.com/Emerging-AI/ENOVA/escaler/cmd/escaler/docs" 9 | 10 | "github.com/Emerging-AI/ENOVA/escaler/pkg/detector" 11 | "github.com/Emerging-AI/ENOVA/escaler/pkg/meta" 12 | "github.com/Emerging-AI/ENOVA/escaler/pkg/scaler" 13 | 14 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config" 15 | 16 | swaggerfiles "github.com/swaggo/files" 17 | ginSwagger "github.com/swaggo/gin-swagger" 18 | ) 19 | 20 | func main() { 21 | confPath := flag.String("conf", "conf/settings.json", "Path to the configuration file") 22 | flag.Parse() 23 | 24 | fmt.Printf("Using configuration file: %s\n", *confPath) 25 | econfig := config.GetEConfig() 26 | econfig.Init(*confPath) 27 | econfig.PrintConfig() 28 | 29 | docs.SwaggerInfo.Title = "Monitor Service API" 30 | docs.SwaggerInfo.Description = "This is a monitor service." 31 | docs.SwaggerInfo.Version = "1.0" 32 | //docs.SwaggerInfo.Host = "121.36.212.78:30080" 33 | docs.SwaggerInfo.Host = "0.0.0.0:8183" 34 | docs.SwaggerInfo.BasePath = "/" 35 | docs.SwaggerInfo.Schemes = []string{"http", "https"} 36 | 37 | var wg sync.WaitGroup 38 | 39 | ch := make(chan meta.TaskSpecInterface) 40 | d := detector.NewDetectorServer(ch, nil) 41 | d.GetEngine().GET("/api/escaler/docs/*any", ginSwagger.WrapHandler(swaggerfiles.Handler)) 42 | 43 | s := scaler.NewServingScaler(ch) 44 | 45 | wg.Add(2) 46 | go d.RunInWaitGroup(&wg) 47 | go s.RunInWaitGroup(&wg) 48 | 49 | wg.Wait() 50 | close(ch) 51 | fmt.Println("All tasks finished.") 52 | } 53 | -------------------------------------------------------------------------------- /escaler/cmd/escaler/mock_enovaalgo.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/Emerging-AI/ENOVA/escaler/pkg/api" 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | func StartMockEnovaAlgoServer() { 11 | r := gin.Default() 12 | r.POST("/api/enovaalgo/v1/config_recommend", func(c *gin.Context) { 13 | c.JSON(http.StatusOK, api.EnvoaResponse{ 14 | Message: "", 15 | Code: 0, 16 | Result: api.ConfigRecommendResult{ 17 | MaxNumSeqs: 32, 18 | TensorParallelSize: 1, 19 | GpuMemoryUtilization: 0.8, 20 | Replicas: 1, 21 | }, 22 | TraceId: "TraceId", 23 | Version: "v1", 24 | }) 25 | }) 26 | 27 | r.POST("/api/enovaalgo/v1/anomaly_detect", func(c *gin.Context) { 28 | c.JSON(http.StatusOK, api.EnvoaResponse{ 29 | Message: "", 30 | Code: 0, 31 | Result: api.AnomalyDetectResponse{ 32 | IsAnomaly: 0, 33 | }, 34 | TraceId: "TraceId", 35 | Version: "v1", 36 | }) 37 | }) 38 | 39 | r.POST("/api/enovaalgo/v1/anomaly_recover", func(c *gin.Context) { 40 | c.JSON(http.StatusOK, api.EnvoaResponse{ 41 | Message: "", 42 | Code: 0, 43 | Result: api.ConfigRecommendResult{ 44 | MaxNumSeqs: 32, 45 | TensorParallelSize: 1, 46 | GpuMemoryUtilization: 0.8, 47 | Replicas: 1, 48 | }, 49 | TraceId: "TraceId", 50 | Version: "v1", 51 | }) 52 | }) 53 | r.Run(":8181") 54 | } 55 | -------------------------------------------------------------------------------- /escaler/conf/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "resource_backend": { 3 | "type": "docker" 4 | }, 5 | "docker": { 6 | 7 | }, 8 | "detector": { 9 | "prom": { 10 | "host": "enova-prometheus", 11 | "port": 9090 12 | }, 13 | "api": { 14 | "host": "0.0.0.0", 15 | "port": 8183, 16 | "version": "v1", 17 | "url_prefix": "/escaler" 18 | }, 19 | "detect_interval": 30 20 | }, 21 | "scaler": {}, 22 | "zmq": { 23 | "host": "127.0.0.1", 24 | "port": 4321 25 | }, 26 | "redis": { 27 | "addr": "127.0.0.1:6379", 28 | "password": "", 29 | "db": 0 30 | }, 31 | "enova_algo": { 32 | "host": "127.0.0.1:8181" 33 | }, 34 | "serving": { 35 | "image": "emergingai/enova:v0.0.8", 36 | "start_cmd": [ 37 | "sleep", 38 | "inf" 39 | ], 40 | "network": "enova-mon_enova-net", 41 | "network_alias": "enova-serving", 42 | "name": "enova" 43 | }, 44 | "logger": { 45 | "name": "server", 46 | "path": "./var/log/emergingai", 47 | "level": "debug" 48 | } 49 | } -------------------------------------------------------------------------------- /escaler/pkg/api/api.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "net/url" 10 | "strings" 11 | 12 | "github.com/Emerging-AI/ENOVA/escaler/pkg/logger" 13 | ) 14 | 15 | type HttpResponse interface{} 16 | 17 | type HeaderBuilderInterface interface { 18 | Build() (map[string]string, error) 19 | } 20 | 21 | type EmptyHeaderBuilder struct { 22 | } 23 | 24 | func (hb *EmptyHeaderBuilder) Build() (map[string]string, error) { 25 | return make(map[string]string), nil 26 | } 27 | 28 | type HttpApi[T HttpResponse] struct { 29 | Method string 30 | Url string 31 | HeaderBuilder HeaderBuilderInterface 32 | } 33 | 34 | func (api *HttpApi[T]) GetRequest(Params interface{}, Headers map[string]string) (*http.Request, error) { 35 | newHeader, err := api.HeaderBuilder.Build() 36 | if err != nil { 37 | logger.Errorf("HeaderBuilder get error: %v", err) 38 | return nil, err 39 | } 40 | 41 | for key, value := range Headers { 42 | newHeader[key] = value 43 | } 44 | 45 | logger.Infof("make http request") 46 | 47 | actualMethod := strings.ToUpper(api.Method) 48 | var requestData io.Reader 49 | actualUrl := api.Url 50 | switch actualMethod { 51 | case "POST", "PUT": 52 | bytesData, _ := json.Marshal(Params) 53 | reqBody := string(bytesData) 54 | logger.Infof("api %s, request body: %s", api.Url, reqBody) 55 | newHeader["Content-Type"] = "application/json" 56 | requestData = strings.NewReader(reqBody) 57 | case "GET", "DELETE": 58 | Url, _ := url.Parse(api.Url) // todo 处理err 59 | urlValues := url.Values{} 60 | if pm, ok := Params.(map[string]string); ok { 61 | for key, value := range pm { 62 | urlValues.Set(key, value) 63 | } 64 | Url.RawQuery = urlValues.Encode() 65 | actualUrl = Url.String() 66 | } 67 | 68 | } 69 | 70 | req, err := http.NewRequest(actualMethod, actualUrl, requestData) 71 | if err != nil { 72 | return nil, err 73 | } 74 | for key, value := range newHeader { 75 | req.Header.Add(key, value) 76 | } 77 | return req, nil 78 | } 79 | 80 | func (api *HttpApi[T]) Call(Params interface{}, Headers map[string]string) (T, error) { 81 | client := &http.Client{} 82 | req, err := api.GetRequest(Params, Headers) 83 | var resp T 84 | if err != nil { 85 | return resp, err 86 | } 87 | res, err := client.Do(req) // todo 处理err 88 | if err != nil { 89 | return resp, err 90 | } 91 | return api.processResponse(res) 92 | } 93 | 94 | func (api *HttpApi[T]) processResponse(res *http.Response) (T, error) { 95 | defer res.Body.Close() 96 | var httpResp T 97 | if res.StatusCode != http.StatusOK { 98 | resBody, _ := io.ReadAll(res.Body) 99 | msg := fmt.Sprintf("HttpApi get StatusOK not ok: status code: %d, resBody: %s", res.StatusCode, resBody) 100 | logger.Info(msg) 101 | return httpResp, errors.New(msg) 102 | } 103 | resBody, _ := io.ReadAll(res.Body) 104 | if err := json.Unmarshal(resBody, &httpResp); err != nil { 105 | logger.Error("Error parsing JSON response: %v", err) 106 | return httpResp, err 107 | } 108 | return httpResp, nil 109 | } 110 | -------------------------------------------------------------------------------- /escaler/pkg/api/enovaalgo.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | 7 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config" 8 | ) 9 | 10 | var enovaAlgoInitOnce sync.Once 11 | 12 | type enovaAlgoClient struct { 13 | ConfigRecommend HttpApi[EnvoaResponse] 14 | AnomalyDetect HttpApi[EnvoaResponse] 15 | AnomalyRecover HttpApi[EnvoaResponse] 16 | } 17 | 18 | type ConfigRecommendRequest struct { 19 | Llm struct { 20 | Framework string `json:"framework"` 21 | Param float32 `json:"param"` 22 | } `json:"llm"` 23 | Gpu struct { 24 | Name string `json:"name"` 25 | Spec int `json:"spec"` 26 | Num int `json:"num"` 27 | } `json:"gpu"` 28 | } 29 | 30 | type ConfigRecommendResult struct { 31 | MaxNumSeqs int `json:"max_num_seqs"` 32 | TensorParallelSize int `json:"tensor_parallel_size"` 33 | GpuMemoryUtilization float32 `json:"gpu_memory_utilization"` 34 | Replicas int `json:"replicas"` 35 | } 36 | 37 | type Llm struct { 38 | Framework string `json:"framework"` 39 | Param float32 `json:"param"` 40 | } 41 | 42 | type Gpu struct { 43 | Name string `json:"name"` 44 | Spec int `json:"spec"` 45 | Num int `json:"num"` 46 | } 47 | 48 | type MetricValue [2]float64 49 | 50 | type Metrics struct { 51 | ActiveRequests []MetricValue `json:"active_requests"` 52 | RunningRequests []MetricValue `json:"running_requests"` 53 | PendingRequests []MetricValue `json:"pending_requests"` 54 | GPUKVCacheUsage []MetricValue `json:"gpu_kv_cache_usage"` 55 | ServerNewRequests []MetricValue `json:"server_new_requests"` 56 | ServerSuccessRequests []MetricValue `json:"server_success_requests"` 57 | } 58 | 59 | type Configurations struct { 60 | MaxNumSeqs int `json:"max_num_seqs"` 61 | TensorParallelSize int `json:"tensor_parallel_size"` 62 | GPUMemoryUtilization float32 `json:"gpu_memory_utilization"` 63 | Replicas int `json:"replicas"` 64 | } 65 | 66 | type AnomalyRecoverRequest struct { 67 | Metrics []Metrics `json:"metrics"` 68 | Configurations Configurations `json:"configurations"` 69 | Llm Llm `json:"llm"` 70 | Gpu Gpu `json:"gpu"` 71 | } 72 | 73 | type AnomalyDetectRequest struct { 74 | Metrics []Metrics `json:"metrics"` 75 | Configurations Configurations `json:"configurations"` 76 | } 77 | 78 | type AnomalyDetectResponse struct { 79 | IsAnomaly int `json:"is_anomaly"` 80 | } 81 | 82 | var EnovaAlgoClient *enovaAlgoClient 83 | 84 | func GetEnovaAlgoClient() *enovaAlgoClient { 85 | enovaAlgoInitOnce.Do(func() { 86 | EnovaAlgoClient = &enovaAlgoClient{ 87 | ConfigRecommend: HttpApi[EnvoaResponse]{ 88 | Method: "POST", 89 | Url: fmt.Sprintf("http://%s/api/enovaalgo/v1/config_recommend", config.GetEConfig().EnovaAlgo.Host), 90 | HeaderBuilder: &EmptyHeaderBuilder{}, 91 | }, 92 | AnomalyDetect: HttpApi[EnvoaResponse]{ 93 | Method: "POST", 94 | Url: fmt.Sprintf("http://%s/api/enovaalgo/v1/anomaly_detect", config.GetEConfig().EnovaAlgo.Host), 95 | HeaderBuilder: &EmptyHeaderBuilder{}, 96 | }, 97 | AnomalyRecover: HttpApi[EnvoaResponse]{ 98 | Method: "POST", 99 | Url: fmt.Sprintf("http://%s/api/enovaalgo/v1/anomaly_recover", config.GetEConfig().EnovaAlgo.Host), 100 | HeaderBuilder: &EmptyHeaderBuilder{}, 101 | }, 102 | } 103 | }) 104 | return EnovaAlgoClient 105 | } 106 | -------------------------------------------------------------------------------- /escaler/pkg/api/prom.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | 7 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config" 8 | ) 9 | 10 | var promClientInitOnce sync.Once 11 | 12 | type Metric map[string]string 13 | 14 | type ValueSet []interface{} 15 | 16 | type Series struct { 17 | Metric Metric `json:"metric"` 18 | Values []ValueSet `json:"values"` 19 | } 20 | 21 | type PromData struct { 22 | ResultType string `json:"resultType"` 23 | Result []Series `json:"result"` 24 | } 25 | 26 | type PromResponse struct { 27 | Status string 28 | Data PromData 29 | } 30 | 31 | type promClient struct { 32 | Query HttpApi[PromResponse] 33 | QueryRange HttpApi[PromResponse] 34 | } 35 | 36 | var PromClient *promClient 37 | 38 | func GetPromClient() *promClient { 39 | promClientInitOnce.Do(func() { 40 | PromClient = &promClient{ 41 | Query: HttpApi[PromResponse]{ 42 | Method: "GET", 43 | Url: fmt.Sprintf("http://%s:%d/api/v1/query", config.GetEConfig().Detector.Prom.Host, config.GetEConfig().Detector.Prom.Port), 44 | HeaderBuilder: &EmptyHeaderBuilder{}, 45 | }, 46 | QueryRange: HttpApi[PromResponse]{ 47 | Method: "GET", 48 | Url: fmt.Sprintf("http://%s:%d/api/v1/query_range", config.GetEConfig().Detector.Prom.Host, config.GetEConfig().Detector.Prom.Port), 49 | HeaderBuilder: &EmptyHeaderBuilder{}, 50 | }, 51 | } 52 | }) 53 | return PromClient 54 | } 55 | -------------------------------------------------------------------------------- /escaler/pkg/api/types.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | type EnvoaResponse struct { 4 | Code int 5 | Message string 6 | Result interface{} 7 | TraceId string 8 | Version string 9 | } 10 | -------------------------------------------------------------------------------- /escaler/pkg/httpserver/middleware/logger.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "strings" 7 | "time" 8 | 9 | "github.com/Emerging-AI/ENOVA/escaler/pkg/logger" 10 | 11 | "github.com/gin-gonic/gin" 12 | ) 13 | 14 | func RequestResponseLogger() gin.HandlerFunc { 15 | return func(c *gin.Context) { 16 | // just for /api/monitor/v1/ 17 | if !strings.Contains(c.Request.URL.Path, "/api/enova/v1") { 18 | c.Next() 19 | return 20 | } 21 | 22 | // 获取请求体 23 | reqBody, _ := ioutil.ReadAll(c.Request.Body) 24 | c.Request.Body = ioutil.NopCloser(bytes.NewBuffer(reqBody)) 25 | 26 | // 获取响应体 27 | respWriter := &responseWriter{body: bytes.NewBufferString(""), ResponseWriter: c.Writer} 28 | c.Writer = respWriter 29 | 30 | // 处理请求 31 | c.Next() 32 | 33 | // 记录请求和响应 34 | respStr := respWriter.body.String() 35 | if respStrLen := len(respStr); respStrLen > 1024 { 36 | respStr = respStr[:1024] 37 | } 38 | 39 | logger.Info("---------------------------------------------------------") 40 | logger.Infof("[INFO] [%s] %s %s %s\n%d %s\n", 41 | time.Now().Format("2006-01-02 15:04:05"), 42 | c.Request.Method, c.Request.URL.Path, string(reqBody), 43 | respWriter.status, respStr, 44 | ) 45 | logger.Info("---------------------------------------------------------") 46 | } 47 | } 48 | 49 | type responseWriter struct { 50 | body *bytes.Buffer 51 | gin.ResponseWriter 52 | status int 53 | } 54 | 55 | func (w *responseWriter) Write(b []byte) (int, error) { 56 | w.body.Write(b) 57 | return w.ResponseWriter.Write(b) 58 | } 59 | 60 | func (w *responseWriter) WriteHeader(statusCode int) { 61 | w.status = statusCode 62 | w.ResponseWriter.WriteHeader(statusCode) 63 | } 64 | -------------------------------------------------------------------------------- /escaler/pkg/httpserver/middleware/response.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "net/http" 7 | 8 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config" 9 | 10 | "github.com/gin-gonic/gin" 11 | ) 12 | 13 | type EApiResponse struct { 14 | Message string `json:"message"` 15 | Code int `json:"code"` 16 | Result json.RawMessage `json:"result"` 17 | TraceId string `json:"trace_id"` 18 | Version string `json:"version"` 19 | } 20 | 21 | type responseBodyWriter struct { 22 | gin.ResponseWriter 23 | body *bytes.Buffer 24 | } 25 | 26 | func (w responseBodyWriter) Write(b []byte) (int, error) { 27 | w.body.Write(b) 28 | return w.ResponseWriter.Write(b) 29 | } 30 | 31 | func (w responseBodyWriter) WriteString(s string) (int, error) { 32 | w.body.WriteString(s) 33 | return w.ResponseWriter.WriteString(s) 34 | } 35 | 36 | func ResponseMiddleware() gin.HandlerFunc { 37 | return func(c *gin.Context) { 38 | 39 | // 调用下一个中间件或路由处理函数 40 | c.Next() 41 | 42 | // 错误的结果直接返回 43 | if errResult, ok := c.Get("ErrorResult"); ok { 44 | c.JSON(http.StatusOK, errResult) 45 | return 46 | } 47 | 48 | // 解析成功的返回 49 | var jsonResult json.RawMessage 50 | result, ok := c.Get("Data") 51 | if !ok { 52 | return 53 | } 54 | 55 | jsonResult, err := json.Marshal(result) 56 | if err != nil { 57 | c.AbortWithStatusJSON(http.StatusInternalServerError, EApiResponse{ 58 | Message: "Internal error", 59 | Code: 500, 60 | Result: jsonResult, 61 | TraceId: GenerateTraceId(), 62 | Version: config.GetEConfig().Detector.Api.Version, 63 | }) 64 | return 65 | } 66 | 67 | c.JSON(http.StatusOK, EApiResponse{ 68 | Message: "ok", 69 | Code: 0, 70 | Result: jsonResult, 71 | TraceId: GenerateTraceId(), 72 | Version: config.GetEConfig().Detector.Api.Version, 73 | }) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /escaler/pkg/httpserver/middleware/trace.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | uuid "github.com/google/uuid" 6 | ) 7 | 8 | const TraceIdKey = "trace_id" 9 | 10 | func GenerateTraceId() string { 11 | v4, err := uuid.NewUUID() 12 | if err != nil { 13 | panic(err) 14 | } 15 | return v4.String() 16 | } 17 | 18 | func GetTraceId() gin.HandlerFunc { 19 | return func(c *gin.Context) { 20 | traceId := c.GetHeader(TraceIdKey) 21 | 22 | if traceId == "" { 23 | traceId = GenerateTraceId() 24 | c.Request.Header.Set(TraceIdKey, traceId) 25 | c.Set(TraceIdKey, traceId) 26 | } 27 | 28 | // Set TraceIdKey header 29 | c.Writer.Header().Set(TraceIdKey, traceId) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /escaler/pkg/httpserver/server/router.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | type BaseResource struct { 6 | } 7 | 8 | func (r BaseResource) SetResult(c *gin.Context, result interface{}) { 9 | c.Set("Data", result) 10 | } 11 | 12 | func (r BaseResource) SetErrorResult(c *gin.Context, result interface{}) { 13 | c.Set("ErrorResult", result) 14 | } 15 | 16 | type PathResourceInterface interface { 17 | Path() string 18 | } 19 | 20 | type GetResourceInterface interface { 21 | Get(c *gin.Context) 22 | } 23 | 24 | type ListResourceInterface interface { 25 | List(c *gin.Context) 26 | } 27 | 28 | type PostResourceInterface interface { 29 | Post(c *gin.Context) 30 | } 31 | 32 | type PutResourceInterface interface { 33 | Put(c *gin.Context) 34 | } 35 | 36 | type DeleteResourceInterface interface { 37 | Delete(c *gin.Context) 38 | } 39 | -------------------------------------------------------------------------------- /escaler/pkg/httpserver/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "reflect" 5 | "strconv" 6 | ) 7 | 8 | func HasMethod(s interface{}, methodName string) bool { 9 | typ := reflect.TypeOf(s) 10 | _, ok := typ.MethodByName(methodName) 11 | return ok 12 | } 13 | 14 | // ParseUnixTimestamp 15 | func ParseUnixTimestamp(ts int64) string { 16 | if ts >= (1 << 32) { 17 | // The timestamp is in milliseconds. Convert it to seconds. 18 | ts /= 1000 19 | } 20 | return strconv.FormatFloat(float64(ts), 'g', -1, 64) 21 | } 22 | -------------------------------------------------------------------------------- /escaler/pkg/logger/logger.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config" 5 | "github.com/sirupsen/logrus" 6 | ) 7 | 8 | var logger *logrus.Logger 9 | 10 | func init() { 11 | logger = GetLogger() 12 | } 13 | 14 | func GetLogger() *logrus.Logger { 15 | config := config.GetEConfig() 16 | logger := logrus.New() 17 | 18 | // 设置日志级别 19 | switch config.Logger.Level { 20 | case "panic": 21 | logrus.SetLevel(logrus.PanicLevel) 22 | case "fatal": 23 | logrus.SetLevel(logrus.FatalLevel) 24 | case "error": 25 | logrus.SetLevel(logrus.ErrorLevel) 26 | case "warn", "warning": 27 | logrus.SetLevel(logrus.WarnLevel) 28 | case "info": 29 | logrus.SetLevel(logrus.InfoLevel) 30 | case "debug": 31 | logrus.SetLevel(logrus.DebugLevel) 32 | case "trace": 33 | logrus.SetLevel(logrus.TraceLevel) 34 | default: 35 | logrus.Warn("Unknown log level: ", config.Logger.Level) 36 | logrus.SetLevel(logrus.InfoLevel) // 设置默认日志等级 37 | } 38 | 39 | // 设置日志格式 40 | logger.SetFormatter(&logrus.TextFormatter{ 41 | TimestampFormat: "2006-01-02 15:04:05", 42 | }) 43 | return logger 44 | } 45 | 46 | func Info(args ...interface{}) { 47 | logger.Infoln(args) 48 | } 49 | 50 | func Infof(format string, args ...interface{}) { 51 | logger.Infof(format, args...) 52 | } 53 | 54 | func Debug(args ...interface{}) { 55 | logger.Debugln(args) 56 | } 57 | 58 | func Debugf(format string, args ...interface{}) { 59 | logger.Debugf(format, args...) 60 | } 61 | 62 | func Fatal(args ...interface{}) { 63 | logger.Fatalln(args) 64 | } 65 | 66 | func Warn(args ...interface{}) { 67 | logger.Warnln(args) 68 | } 69 | 70 | func Error(args ...interface{}) { 71 | logger.Errorln(args) 72 | } 73 | 74 | func Errorf(format string, args ...interface{}) { 75 | logger.Errorf(format, args...) 76 | } 77 | 78 | func Panic(args ...interface{}) { 79 | logger.Panicln(args) 80 | } 81 | -------------------------------------------------------------------------------- /escaler/pkg/meta/task.go: -------------------------------------------------------------------------------- 1 | package meta 2 | 3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/api" 4 | 5 | type TaskStatus string 6 | 7 | const ( 8 | TaskStatusCreated TaskStatus = "created" 9 | TaskStatusScheduling TaskStatus = "scheduling" 10 | TaskStatusRunning TaskStatus = "running" 11 | TaskStatusError TaskStatus = "error" 12 | TaskStatusFinished TaskStatus = "finished" 13 | ) 14 | 15 | type DetectTask struct { 16 | TaskSpec TaskSpecInterface 17 | Status TaskStatus 18 | } 19 | 20 | type AnomalyRecommendResult struct { 21 | Timestamp int64 `json:"timestamp"` 22 | IsAnomaly bool `json:"isAnomaly"` 23 | ConfigRecommendResult api.ConfigRecommendResult `json:"configRecommendResult"` 24 | CurrentConfig api.ConfigRecommendResult `json:"currentConfig"` 25 | } 26 | 27 | type TaskInfo struct { 28 | Name string `json:"name"` 29 | Status string `json:"status"` 30 | } 31 | -------------------------------------------------------------------------------- /escaler/pkg/queue/queue.go: -------------------------------------------------------------------------------- 1 | package queue 2 | 3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/meta" 4 | 5 | type TaskQueue interface { 6 | Append(meta.TaskSpec) 7 | Pop() meta.TaskSpec 8 | } 9 | 10 | type InnerChanTaskQueue struct { 11 | Ch chan meta.TaskSpecInterface 12 | } 13 | 14 | func (q *InnerChanTaskQueue) Append(task meta.TaskSpecInterface) { 15 | q.Ch <- task 16 | } 17 | 18 | func (q *InnerChanTaskQueue) Pop() (meta.TaskSpecInterface, bool) { 19 | task, ok := <-q.Ch 20 | return task, ok 21 | } 22 | -------------------------------------------------------------------------------- /escaler/pkg/redis/redis.go: -------------------------------------------------------------------------------- 1 | package redis 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/go-redis/redis/v8" 8 | ) 9 | 10 | type RedisClient struct { 11 | Ctx context.Context 12 | Redis *redis.Client 13 | } 14 | 15 | func (r *RedisClient) SetList(key string, values []string) error { 16 | _, err := r.DelList(key) 17 | if err != nil { 18 | return err 19 | } 20 | for _, value := range values { 21 | if err := r.Redis.RPush(r.Ctx, key, value).Err(); err != nil { 22 | return err 23 | } 24 | } 25 | return nil 26 | } 27 | 28 | func (r *RedisClient) GetList(key string) ([]string, error) { 29 | storedStringArray, err := r.Redis.LRange(r.Ctx, key, 0, -1).Result() 30 | if err != nil { 31 | return storedStringArray, err 32 | } 33 | return storedStringArray, nil 34 | } 35 | 36 | func (r *RedisClient) DelList(key string) (int64, error) { 37 | return r.Redis.Del(r.Ctx, key).Result() 38 | } 39 | 40 | func (r *RedisClient) AppendList(key string, value string) error { 41 | if err := r.Redis.LPush(r.Ctx, key, value).Err(); err != nil { 42 | return err 43 | } 44 | return nil 45 | } 46 | 47 | func (r *RedisClient) AppendListWithLimitSize(key string, value string, limit int64) error { 48 | if err := r.AppendList(key, value); err != nil { 49 | return err 50 | } 51 | if err := r.Redis.LTrim(r.Ctx, key, 0, limit).Err(); err != nil { 52 | return err 53 | } 54 | return nil 55 | } 56 | 57 | func (r *RedisClient) Set(key string, value string, timeout int64) { 58 | r.Redis.Set(r.Ctx, key, value, time.Duration(time.Duration(timeout)*time.Microsecond)) 59 | } 60 | 61 | func (r *RedisClient) Get(key string) string { 62 | result := r.Redis.Get(r.Ctx, key) 63 | if result.Err() != nil { 64 | return "" 65 | } 66 | return result.Val() 67 | } 68 | 69 | func NewRedisClient(addr string, passwd string, db int) *RedisClient { 70 | ctx := context.Background() 71 | 72 | rdb := redis.NewClient(&redis.Options{ 73 | Addr: addr, 74 | Password: passwd, 75 | DB: db, 76 | }) 77 | 78 | return &RedisClient{ 79 | Ctx: ctx, 80 | Redis: rdb, 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /escaler/pkg/resource/clients.go: -------------------------------------------------------------------------------- 1 | package resource 2 | 3 | import "github.com/Emerging-AI/ENOVA/escaler/pkg/meta" 4 | 5 | type ClientInterface interface { 6 | DeployTask(spec meta.TaskSpec) 7 | DeleteTask(spec meta.TaskSpec) 8 | IsTaskExist(spec meta.TaskSpec) bool 9 | IsTaskRunning(spec meta.TaskSpec) bool 10 | GetRuntimeInfos(spec meta.TaskSpec) *meta.RuntimeInfo 11 | InPlaceRestart(spec meta.TaskSpec) bool 12 | } 13 | -------------------------------------------------------------------------------- /escaler/pkg/resource/utils/cmd.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config" 7 | "reflect" 8 | "strconv" 9 | 10 | "github.com/Emerging-AI/ENOVA/escaler/pkg/meta" 11 | ) 12 | 13 | func shouldAppend(v interface{}) bool { 14 | switch v := v.(type) { 15 | case int, int32, int64: 16 | return v != 0 17 | case float32, float64: 18 | return v != 0.0 19 | case string: 20 | return v != "" 21 | case bool: 22 | return v // no need to check, because false is the zero value and means "not set" 23 | default: 24 | // This case is for types not explicitly checked above; assumes non-zero by default 25 | return !reflect.DeepEqual(v, reflect.Zero(reflect.TypeOf(v)).Interface()) 26 | } 27 | } 28 | 29 | func BuildCmdFromTaskSpec(spec meta.TaskSpec) []string { 30 | 31 | cmd := []string{ 32 | "enova", "serving", "run", "--model", spec.Model, "--port", strconv.Itoa(spec.Port), "--host", spec.Host, 33 | "--backend", spec.Backend, 34 | "--exporter_service_name", spec.ExporterServiceName, 35 | } 36 | if config.GetEConfig().ResourceBackend.Type == config.ResourceBackendTypeK8s { 37 | cmd = append(cmd, "--exporter_endpoint", spec.Name+"-collector."+spec.Namespace+".svc.cluster.local:4317") 38 | } else { 39 | cmd = append(cmd, "--exporter_endpoint", spec.ExporterEndpoint) 40 | } 41 | 42 | vllmBackendConfig, ok := spec.BackendConfig.(*meta.VllmBackendConfig) 43 | if ok { 44 | jsonBytes, err := json.Marshal(vllmBackendConfig) 45 | if err != nil { 46 | 47 | } else { 48 | var vllmBackendConfigMap map[string]interface{} 49 | err = json.Unmarshal(jsonBytes, &vllmBackendConfigMap) 50 | if err != nil { 51 | 52 | } else { 53 | // if there is not valid value, dont append to cmd params 54 | for k, v := range vllmBackendConfigMap { 55 | if shouldAppend(v) { 56 | cmd = append(cmd, []string{fmt.Sprintf("--%s", k), fmt.Sprintf("%v", v)}...) 57 | } 58 | } 59 | } 60 | 61 | } 62 | } 63 | // Add extra serving params 64 | for k, v := range spec.BackendExtraConfig { 65 | cmd = append(cmd, []string{fmt.Sprintf("--%s", k), fmt.Sprintf("%v", v)}...) 66 | } 67 | return cmd 68 | } 69 | -------------------------------------------------------------------------------- /escaler/pkg/scaler/scaler.go: -------------------------------------------------------------------------------- 1 | package scaler 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/Emerging-AI/ENOVA/escaler/pkg/meta" 7 | "github.com/Emerging-AI/ENOVA/escaler/pkg/queue" 8 | "github.com/Emerging-AI/ENOVA/escaler/pkg/resource" 9 | 10 | "github.com/Emerging-AI/ENOVA/escaler/pkg/config" 11 | "github.com/Emerging-AI/ENOVA/escaler/pkg/logger" 12 | "github.com/Emerging-AI/ENOVA/escaler/pkg/zmq" 13 | ) 14 | 15 | type EnovaServingScaler struct { 16 | // Subscriber *zmq.ZmqSubscriber 17 | Queue *queue.InnerChanTaskQueue 18 | Client resource.ClientInterface 19 | stopped bool 20 | } 21 | 22 | func NewServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler { 23 | if config.GetEConfig().ResourceBackend.Type == config.ResourceBackendTypeK8s { 24 | return NewK8sServingScaler(ch) 25 | } 26 | return NewLocalDockerServingScaler(ch) 27 | } 28 | 29 | func NewLocalDockerServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler { 30 | return &EnovaServingScaler{ 31 | Queue: &queue.InnerChanTaskQueue{ 32 | Ch: ch, 33 | }, 34 | Client: resource.NewDockerResourceClient(), 35 | stopped: false, 36 | } 37 | } 38 | 39 | func NewK8sServingScaler(ch chan meta.TaskSpecInterface) *EnovaServingScaler { 40 | return &EnovaServingScaler{ 41 | Queue: &queue.InnerChanTaskQueue{ 42 | Ch: ch, 43 | }, 44 | Client: resource.NewK8sResourceClient(), 45 | } 46 | } 47 | 48 | func NewZmqSubscriber() *zmq.ZmqSubscriber { 49 | sub := zmq.ZmqSubscriber{ 50 | Host: config.GetEConfig().Zmq.Host, 51 | Port: config.GetEConfig().Zmq.Port, 52 | } 53 | sub.Init() 54 | return &sub 55 | } 56 | 57 | func (s *EnovaServingScaler) Run() { 58 | // if s.Subscriber == nil { 59 | // panic(errors.New("enovaServingScaler Subscriber is nil")) 60 | // } 61 | // defer s.Subscriber.Close() 62 | 63 | for { 64 | // 接收消息 65 | logger.Infof("enovaServingScaler start Recv message") 66 | task, ok := s.Queue.Pop() 67 | if !ok { 68 | continue 69 | } 70 | // logger.Infof("enovaServingScaler Recv message: %s", msg) 71 | // if err != nil { 72 | // logger.Infof("enovaServingScaler Error receiving message: %s", err) 73 | // continue 74 | // } 75 | acutalTask := task.(*meta.TaskSpec) 76 | 77 | // if err := json.Unmarshal([]byte(msg), &task); err != nil { 78 | // logger.Errorf("enovaServingScaler Error parsing JSON response: %v, msg: %s", err, msg) 79 | // continue 80 | // } 81 | 82 | if acutalTask.Replica == 0 { 83 | s.Client.DeleteTask(*acutalTask) 84 | } else { 85 | // 执行 localDeploy 函数 86 | s.Client.DeployTask(*acutalTask) 87 | } 88 | } 89 | } 90 | 91 | func (s *EnovaServingScaler) Stop() { 92 | 93 | } 94 | 95 | func (s *EnovaServingScaler) RunInWaitGroup(wg *sync.WaitGroup) { 96 | defer wg.Done() 97 | s.Run() 98 | } 99 | -------------------------------------------------------------------------------- /escaler/pkg/utils/cache.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/Emerging-AI/ENOVA/escaler/pkg/redis" 5 | ) 6 | 7 | type TTLCache interface { 8 | Set(key string, value string, timeout int64) 9 | Get(key string) string 10 | } 11 | 12 | type RedisTTLCache struct { 13 | Redis *redis.RedisClient 14 | } 15 | 16 | func NewRedisTTLCache(addr string, passwd string, db int) *RedisTTLCache { 17 | return &RedisTTLCache{ 18 | redis.NewRedisClient(addr, passwd, db), 19 | } 20 | } 21 | 22 | func (r *RedisTTLCache) Set(key string, value string, timeout int64) { 23 | r.Redis.Set(key, value, timeout) 24 | } 25 | 26 | func (r *RedisTTLCache) Get(key string) string { 27 | return r.Redis.Get(key) 28 | } 29 | -------------------------------------------------------------------------------- /escaler/pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "reflect" 4 | 5 | func GetAllField(s interface{}) []reflect.StructField { 6 | ret := []reflect.StructField{} 7 | t := reflect.TypeOf(s) 8 | 9 | for i := 0; i < t.NumField(); i++ { 10 | field := t.Field(i) 11 | ret = append(ret, field) 12 | } 13 | return ret 14 | } 15 | -------------------------------------------------------------------------------- /escaler/pkg/zmq/zmq.go: -------------------------------------------------------------------------------- 1 | package zmq 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | zmq "github.com/pebbe/zmq4" 8 | ) 9 | 10 | type ZmqPublisher struct { 11 | Host string 12 | Port int 13 | publisher *zmq.Socket 14 | } 15 | 16 | type ZmqSubscriber struct { 17 | Host string 18 | Port int 19 | subscriber *zmq.Socket 20 | } 21 | 22 | func (p *ZmqPublisher) Init() { 23 | publisher, err := zmq.NewSocket(zmq.PUB) 24 | if err != nil { 25 | fmt.Printf("Failed to dial publisher: %v\n", err) 26 | return 27 | } 28 | // 订阅所有消息 29 | p.publisher = publisher 30 | address := fmt.Sprintf("tcp://%s:%d", p.Host, p.Port) 31 | err = p.publisher.Bind(address) 32 | if err != nil { 33 | log.Fatal("ZmqPublisher init error: ", err) 34 | p.Close() 35 | } 36 | } 37 | 38 | func (p *ZmqPublisher) Send(msg string) (bool, error) { 39 | _, err := p.publisher.Send(msg, 0) 40 | 41 | if err != nil { 42 | return false, err 43 | } 44 | return true, nil 45 | } 46 | 47 | func (p *ZmqPublisher) Close() { 48 | if p.publisher != nil { 49 | p.publisher.Close() 50 | } 51 | } 52 | 53 | func (s *ZmqSubscriber) Init() { 54 | subscriber, err := zmq.NewSocket(zmq.SUB) 55 | topic := "" 56 | subscriber.SetSubscribe(topic) 57 | s.subscriber = subscriber 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | address := fmt.Sprintf("tcp://%s:%d", s.Host, s.Port) 62 | err = subscriber.Connect(address) 63 | if err != nil { 64 | log.Fatal(err) 65 | s.Close() 66 | } 67 | } 68 | 69 | func (s *ZmqSubscriber) Close() { 70 | if s.subscriber != nil { 71 | s.subscriber.Close() 72 | } 73 | } 74 | 75 | func (s *ZmqSubscriber) Recv() (string, error) { 76 | msg, err := s.subscriber.Recv(0) 77 | return msg, err 78 | } 79 | -------------------------------------------------------------------------------- /escaler/scripts/build_swagger.sh: -------------------------------------------------------------------------------- 1 | export GOPATH=$(go env GOPATH | awk -F ':' '{print $1}') 2 | export PATH=$PATH:$GOPATH/bin 3 | swag init -g cmd/escaler/main.go -o cmd/escaler/docs --parseDependency --parseInternal -------------------------------------------------------------------------------- /escaler/scripts/generate_mock_files.sh: -------------------------------------------------------------------------------- 1 | mockgen -source=vendor/github.com/docker/docker/client/interface.go -destination=cmd/escaler/mock_docker_client.go -package=main 2 | -------------------------------------------------------------------------------- /escaler/scripts/generate_ot_clientset.sh: -------------------------------------------------------------------------------- 1 | go install k8s.io/code-generator/cmd/client-gen 2 | export GOPATH=$(go env GOPATH | awk -F ':' '{print $1}') 3 | export PATH=$PATH:$GOPATH/bin 4 | client-gen \ 5 | --input-base="/root/go/pkg/mod/github.com/open-telemetry/opentelemetry-operator@v1.51.0/apis/v1alpha1" \ 6 | --input="" \ 7 | --output-pkg="github.com/Emerging-AI/ENOVA/escaler/pkg/generated/ot/clientset" \ 8 | --output-dir=./pkg/generated/ot/clientset \ 9 | --clientset-name="versioned" \ 10 | --go-header-file="./hack/boilerplate.go.txt" 11 | -------------------------------------------------------------------------------- /escaler/scripts/local_docker_run.sh: -------------------------------------------------------------------------------- 1 | redis-server & 2 | escaler $@ 3 | -------------------------------------------------------------------------------- /front/.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules -------------------------------------------------------------------------------- /front/.env.development: -------------------------------------------------------------------------------- 1 | VITE_APP_BASE_URL="/" -------------------------------------------------------------------------------- /front/.env.production: -------------------------------------------------------------------------------- 1 | VITE_APP_BASE_URL="/" -------------------------------------------------------------------------------- /front/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | /* eslint-env node */ 2 | require('@rushstack/eslint-patch/modern-module-resolution') 3 | 4 | module.exports = { 5 | root: true, 6 | 'extends': [ 7 | 'plugin:vue/vue3-essential', 8 | 'eslint:recommended', 9 | '@vue/eslint-config-typescript', 10 | '@vue/eslint-config-prettier/skip-formatting' 11 | ], 12 | parserOptions: { 13 | ecmaVersion: 'latest' 14 | }, 15 | global: { 16 | ElMessage: 'readonly', 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /front/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | .DS_Store 12 | dist 13 | dist-ssr 14 | coverage 15 | *.local 16 | 17 | /cypress/videos/ 18 | /cypress/screenshots/ 19 | 20 | # Editor directories and files 21 | .vscode/* 22 | !.vscode/extensions.json 23 | .idea 24 | *.suo 25 | *.ntvs* 26 | *.njsproj 27 | *.sln 28 | *.sw? 29 | 30 | *.tsbuildinfo 31 | -------------------------------------------------------------------------------- /front/.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json.schemastore.org/prettierrc", 3 | "semi": false, 4 | "tabWidth": 2, 5 | "singleQuote": true, 6 | "printWidth": 100, 7 | "trailingComma": "none" 8 | } -------------------------------------------------------------------------------- /front/README.md: -------------------------------------------------------------------------------- 1 | # enova-web 2 | 3 | This template should help get you started developing with Vue 3 in Vite. 4 | 5 | ## Recommended IDE Setup 6 | 7 | [VSCode](https://code.visualstudio.com/) + [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) (and disable Vetur). 8 | 9 | ## Type Support for `.vue` Imports in TS 10 | 11 | TypeScript cannot handle type information for `.vue` imports by default, so we replace the `tsc` CLI with `vue-tsc` for type checking. In editors, we need [Volar](https://marketplace.visualstudio.com/items?itemName=Vue.volar) to make the TypeScript language service aware of `.vue` types. 12 | 13 | ## Customize configuration 14 | 15 | See [Vite Configuration Reference](https://vitejs.dev/config/). 16 | 17 | ## Project Setup 18 | 19 | ```sh 20 | npm install 21 | ``` 22 | 23 | ### Compile and Hot-Reload for Development 24 | 25 | ```sh 26 | npm run dev 27 | ``` 28 | 29 | ### Type-Check, Compile and Minify for Production 30 | 31 | ```sh 32 | npm run build 33 | ``` 34 | 35 | ### Lint with [ESLint](https://eslint.org/) 36 | 37 | ```sh 38 | npm run lint 39 | ``` 40 | -------------------------------------------------------------------------------- /front/auto-imports.d.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | /* prettier-ignore */ 3 | // @ts-nocheck 4 | // noinspection JSUnusedGlobalSymbols 5 | // Generated by unplugin-auto-import 6 | export {} 7 | declare global { 8 | const ElMessage: typeof import('element-plus/es')['ElMessage'] 9 | } 10 | -------------------------------------------------------------------------------- /front/env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /front/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Enova 9 | 10 | 11 | 12 |
13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /front/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "enova-web", 3 | "version": "1.0.0", 4 | "private": true, 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "run-p type-check \"build-only {@}\" --", 9 | "preview": "vite preview", 10 | "build-only": "vite build", 11 | "type-check": "vue-tsc --build --force", 12 | "lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix --ignore-path .gitignore", 13 | "format": "prettier --write src/" 14 | }, 15 | "dependencies": { 16 | "@types/axios": "^0.14.0", 17 | "@vueuse/core": "^10.9.0", 18 | "axios": "^1.6.8", 19 | "dayjs": "^1.11.11", 20 | "echarts": "^5.5.0", 21 | "element-plus": "^2.6.3", 22 | "pinia": "^2.1.7", 23 | "vue": "^3.4.21", 24 | "vue-i18n": "^9.13.1", 25 | "vue-router": "^4.3.0" 26 | }, 27 | "devDependencies": { 28 | "@rushstack/eslint-patch": "^1.8.0", 29 | "@tsconfig/node20": "^20.1.4", 30 | "@types/node": "^20.12.5", 31 | "@vitejs/plugin-vue": "^5.0.4", 32 | "@vue/eslint-config-prettier": "^9.0.0", 33 | "@vue/eslint-config-typescript": "^13.0.0", 34 | "@vue/tsconfig": "^0.5.1", 35 | "autoprefixer": "^10.4.19", 36 | "eslint": "^8.57.0", 37 | "eslint-plugin-vue": "^9.23.0", 38 | "npm-run-all2": "^6.1.2", 39 | "postcss": "^8.4.38", 40 | "prettier": "^3.2.5", 41 | "rollup-plugin-visualizer": "^5.12.0", 42 | "sass": "^1.75.0", 43 | "tailwindcss": "^3.4.3", 44 | "typescript": "~5.4.0", 45 | "unplugin-auto-import": "^0.17.5", 46 | "unplugin-vue-components": "^0.26.0", 47 | "vite": "^5.2.8", 48 | "vite-plugin-svg-icons": "^2.0.1", 49 | "vite-plugin-vue-devtools": "^7.0.25", 50 | "vue-tsc": "^2.0.11" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /front/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /front/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/public/favicon.ico -------------------------------------------------------------------------------- /front/src/App.vue: -------------------------------------------------------------------------------- 1 | 6 | 13 | -------------------------------------------------------------------------------- /front/src/api/instance.ts: -------------------------------------------------------------------------------- 1 | import service from '@/utils/request' 2 | enum API { 3 | ENODE = '/v1/serving', 4 | MONITOR = '/api/v1/query_range', 5 | PILOT = '/api/escaler/v1' 6 | } 7 | 8 | export const getServing = () => service({ 9 | url: API.ENODE, 10 | method: 'get', 11 | }); 12 | 13 | export const addServing = () => service({ 14 | url: API.ENODE, 15 | method: 'post', 16 | data: { 17 | "instance_name": "enova_test", 18 | "model": "THUDM/chatglm3-6b" 19 | }, 20 | }) 21 | 22 | export const deleteServing = (id: string) => service({ 23 | url: `${API.ENODE}/${id}`, 24 | method: 'delete', 25 | }); 26 | 27 | export const getExperiment = (params: string) => service({ 28 | url: `${API.ENODE}/instance/test?${params}`, 29 | method: 'get', 30 | }) 31 | 32 | export const createTest = (data: any) => service({ 33 | url: `${API.ENODE}/instance/test`, 34 | method: 'post', 35 | data 36 | }) 37 | 38 | const getPromUrl = (port: number) => { 39 | const { protocol, hostname } = window.location 40 | if (import.meta.env.MODE === 'development') return '/' 41 | return `${protocol}//${hostname}:${port}/` 42 | } 43 | 44 | export const getMonitorData = (params?: string) => service({ 45 | url: `${API.MONITOR}?${params}`, 46 | baseURL: getPromUrl(32826), 47 | method: 'get', 48 | }) 49 | 50 | export const getDetectHistory = (params?: string) => service({ 51 | url: `${API.PILOT}/task/detect/history?${params}`, 52 | baseURL: getPromUrl(8183), 53 | method: 'get', 54 | }) -------------------------------------------------------------------------------- /front/src/assets/empty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/empty.png -------------------------------------------------------------------------------- /front/src/assets/filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/filter.png -------------------------------------------------------------------------------- /front/src/assets/logo/emergingai_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/logo/emergingai_b.png -------------------------------------------------------------------------------- /front/src/assets/logo/emergingai_w.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/front/src/assets/logo/emergingai_w.png -------------------------------------------------------------------------------- /front/src/assets/svg/auto.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /front/src/assets/svg/autoRefresh.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /front/src/assets/svg/cross.svg: -------------------------------------------------------------------------------- 1 | 3 | 5 | 8 | 9 | -------------------------------------------------------------------------------- /front/src/assets/svg/docker.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /front/src/assets/svg/earth.svg: -------------------------------------------------------------------------------- 1 | 3 | 5 | 8 | 9 | -------------------------------------------------------------------------------- /front/src/assets/svg/home.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /front/src/assets/svg/info.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /front/src/assets/svg/log.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /front/src/assets/svg/setup.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /front/src/assets/svg/toggle.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /front/src/assets/svg/user.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /front/src/components/Drawer.vue: -------------------------------------------------------------------------------- 1 | 20 | 44 | 54 | -------------------------------------------------------------------------------- /front/src/components/Language.vue: -------------------------------------------------------------------------------- 1 | 23 | 65 | 73 | -------------------------------------------------------------------------------- /front/src/components/Pagination.vue: -------------------------------------------------------------------------------- 1 | 16 | 17 | 62 | -------------------------------------------------------------------------------- /front/src/components/SearchInput.vue: -------------------------------------------------------------------------------- 1 | 12 | -------------------------------------------------------------------------------- /front/src/components/SummaryTip.vue: -------------------------------------------------------------------------------- 1 | 9 | 10 | 18 | -------------------------------------------------------------------------------- /front/src/components/SvgIcon.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | -------------------------------------------------------------------------------- /front/src/components/TimeRangePicker.vue: -------------------------------------------------------------------------------- 1 | 16 | 17 | 123 | -------------------------------------------------------------------------------- /front/src/components/experiment/TestDetail.vue: -------------------------------------------------------------------------------- 1 | 11 | 23 | -------------------------------------------------------------------------------- /front/src/components/instance/InstanceDetail.vue: -------------------------------------------------------------------------------- 1 | 11 | 23 | -------------------------------------------------------------------------------- /front/src/hooks/useInitQueryRange.ts: -------------------------------------------------------------------------------- 1 | import { useExperimentStore } from '@/stores/experiment' 2 | import { useInstanceStore } from '@/stores/instance' 3 | import dayjs from 'dayjs' 4 | import utc from 'dayjs/plugin/utc' 5 | import {storeToRefs} from 'pinia' 6 | 7 | const getTestDuration = (duration: number, unit: string): number => { 8 | switch (unit) { 9 | case 'hour': 10 | return duration * 60 * 60 11 | case 'min': 12 | return duration * 60 13 | case 'sec': 14 | return (Math.min(duration, 10)) 15 | default: 16 | return 0 17 | } 18 | } 19 | 20 | const useInitQueryRange = () => { 21 | const { activeExperiment } = storeToRefs(useExperimentStore()) 22 | const { chartTimeRange, searchTimePair } = storeToRefs(useInstanceStore()) 23 | dayjs.extend(utc) 24 | let startTime = new Date() 25 | let endTime = new Date() 26 | 27 | if (activeExperiment.value != null) { 28 | startTime = new Date(dayjs.utc(activeExperiment.value.create_time).toDate()) 29 | const { duration, duration_unit } = activeExperiment.value.test_spec 30 | const testDuration = getTestDuration(duration, duration_unit) 31 | startTime.setTime(startTime.getTime()) 32 | endTime.setTime(Math.min(startTime.getTime() + (testDuration + 180) * 1000, Date.now())) 33 | } else { 34 | startTime.setTime(startTime.getTime() - 3600 * 1000) 35 | } 36 | const _start = dayjs(startTime).format('YYYY-MM-DD HH:mm:ss') 37 | const _end = dayjs(endTime).format('YYYY-MM-DD HH:mm:ss') 38 | 39 | chartTimeRange.value = [_start, _end] 40 | searchTimePair.value = [_start, _end] 41 | return { start: _start, end: _end } 42 | } 43 | 44 | export { useInitQueryRange } -------------------------------------------------------------------------------- /front/src/layout/header/index.vue: -------------------------------------------------------------------------------- 1 | 20 | 21 | 28 | 29 | -------------------------------------------------------------------------------- /front/src/layout/index.vue: -------------------------------------------------------------------------------- 1 | 24 | 25 | 39 | -------------------------------------------------------------------------------- /front/src/layout/sidebar/index.vue: -------------------------------------------------------------------------------- 1 | 27 | 28 | 66 | 67 | 80 | -------------------------------------------------------------------------------- /front/src/locales/index.ts: -------------------------------------------------------------------------------- 1 | import { createI18n } from 'vue-i18n' 2 | import zhLoacles from './lang/zh' 3 | import enLocales from './lang/en' 4 | const getLocale = (): string => { 5 | let locale = localStorage.getItem('lang') 6 | if (!locale) { 7 | locale = navigator.language.split('-')[0] 8 | } 9 | if (!locale || locale === 'zh') { 10 | locale = 'zh_CN' 11 | } 12 | return locale 13 | } 14 | 15 | const i18n = createI18n({ 16 | locale: getLocale(), 17 | legacy: false, 18 | globalInjection: true, 19 | fallbackLocale: 'zh_CN', 20 | messages: { 21 | zh_CN: { ...zhLoacles }, 22 | en: { ...enLocales } 23 | } 24 | }) 25 | 26 | export default i18n 27 | -------------------------------------------------------------------------------- /front/src/main.ts: -------------------------------------------------------------------------------- 1 | import './styles/index.css' 2 | 3 | import { createApp } from 'vue' 4 | import { createPinia } from 'pinia' 5 | import 'virtual:svg-icons-register' 6 | import i18n from './locales' 7 | import App from './App.vue' 8 | import router from './router' 9 | import './styles/index.scss' 10 | 11 | const app = createApp(App) 12 | 13 | app.use(createPinia()) 14 | app.use(router) 15 | app.use(i18n) 16 | app.mount('#app') 17 | -------------------------------------------------------------------------------- /front/src/router/index.ts: -------------------------------------------------------------------------------- 1 | import { createRouter, createWebHistory } from 'vue-router' 2 | import Layout from '@/layout/index.vue' 3 | 4 | const router = createRouter({ 5 | history: createWebHistory(import.meta.env.BASE_URL), 6 | routes: [ 7 | { 8 | path: '/', 9 | name: 'home', 10 | component: Layout, 11 | redirect: '/instance', 12 | children: [ 13 | { 14 | path: '/instance', 15 | name: 'instance', 16 | component: () => import('../views/Instance.vue'), 17 | meta: { 18 | title: 'service', 19 | icon: 'docker' 20 | } 21 | }, 22 | { 23 | path: '/record', 24 | name: 'testRecord', 25 | component: () => import('../views/TestRecord.vue'), 26 | meta: { 27 | title: 'record', 28 | icon: 'log' 29 | } 30 | } 31 | ] 32 | } 33 | ] 34 | }) 35 | 36 | export default router 37 | -------------------------------------------------------------------------------- /front/src/stores/app.ts: -------------------------------------------------------------------------------- 1 | import { defineStore } from 'pinia' 2 | import navImg from '@/assets/logo/emergingai_w.png' 3 | import loginImg from '@/assets/logo/emergingai_b.png' 4 | 5 | export const useAppStore = defineStore('app', { 6 | state: () => ({ 7 | navLogo: { 8 | src: navImg, 9 | width: 'auto', 10 | height: '56px', 11 | alt: 'Emergingai' 12 | }, 13 | loginLogo: { 14 | src: loginImg, 15 | width: '220px', 16 | height: 'auto', 17 | alt: 'Emergingai' 18 | }, 19 | sidebarStatus: true 20 | }), 21 | actions: { 22 | toggleSideBar(): void { 23 | this.sidebarStatus = !this.sidebarStatus 24 | } 25 | } 26 | }) 27 | -------------------------------------------------------------------------------- /front/src/stores/config.ts: -------------------------------------------------------------------------------- 1 | interface InstanceType { 2 | instance_id: 'string' 3 | instance_name: 'string' 4 | instance_spec: { 5 | cpu: { 6 | brand_name: string 7 | core_amount: number 8 | } 9 | gpu: { 10 | product: string 11 | video_memory: string 12 | card_amount: number 13 | } 14 | memory: string 15 | } 16 | startup_args: { 17 | exported_job: string 18 | dtype: string 19 | load_format: string 20 | max_num_batched_tokens: number 21 | max_num_seqs: number 22 | max_paddings: number 23 | max_seq_len: number 24 | model: string 25 | tokenizer: string 26 | pipeline_parallel_size: number 27 | tensor_parallel_size: number 28 | quantization: null 29 | } 30 | serving_id: string 31 | deploy_status: string 32 | create_time: string 33 | } 34 | 35 | interface ExperimentType { 36 | test_id: string 37 | instance_id: string 38 | test_spec: { 39 | data_set: string 40 | duration: 0 41 | duration_unit: string 42 | distribution: string 43 | tps_mean: 0 44 | tps_std?: string 45 | } 46 | param_spec: { 47 | max_tokens: number 48 | temperature: number 49 | top_p: number 50 | others: string 51 | } 52 | test_status: string 53 | prompt_tps: number 54 | generation_tps: number 55 | result: { 56 | total: number 57 | success: number 58 | elasped_avg: number 59 | } 60 | create_time: string 61 | } 62 | 63 | export type { InstanceType, ExperimentType } 64 | -------------------------------------------------------------------------------- /front/src/stores/experiment.ts: -------------------------------------------------------------------------------- 1 | import { defineStore } from 'pinia' 2 | import { getExperiment } from '@/api/instance' 3 | import type { ExperimentType } from './config' 4 | import dayjs from 'dayjs' 5 | import utc from 'dayjs/plugin/utc' 6 | interface ExperimentStoreState { 7 | testList: ExperimentType[] 8 | currentId: string 9 | drawerVisible: boolean 10 | } 11 | 12 | interface ExperimentRes { 13 | data: ExperimentType[] 14 | page: number 15 | size: number 16 | total_num: number 17 | total_page: number 18 | page_size: number 19 | } 20 | 21 | export const useExperimentStore = defineStore('experiment', { 22 | state: (): ExperimentStoreState => ({ 23 | testList: [], 24 | currentId: '', 25 | drawerVisible: false 26 | }), 27 | getters: { 28 | activeExperiment: (state): ExperimentType | undefined => { 29 | return state.testList.find((item) => item.test_id === state.currentId) || undefined 30 | } 31 | }, 32 | actions: { 33 | getTestList(params: string) { 34 | dayjs.extend(utc) 35 | return new Promise((resolve, reject) => { 36 | getExperiment(params) 37 | .then((res) => { 38 | this.testList = 39 | res.data.length > 0 40 | ? res.data.map((i: ExperimentType) => { 41 | return { 42 | ...i, 43 | create_time: dayjs.utc(i.create_time).toDate() 44 | } 45 | }) 46 | : [] 47 | resolve(res as unknown as ExperimentRes) 48 | }) 49 | .catch(() => { 50 | reject(null) 51 | }) 52 | }) 53 | } 54 | } 55 | }) 56 | -------------------------------------------------------------------------------- /front/src/stores/instance.ts: -------------------------------------------------------------------------------- 1 | import { defineStore } from 'pinia' 2 | import type { InstanceType } from './config' 3 | import { getServing } from '@/api/instance' 4 | interface instanceStoreState { 5 | instanceList: InstanceType[] 6 | currentId: string 7 | chartTimeRange: string[] 8 | tableLoading: boolean 9 | searchTimePair: string[] 10 | } 11 | interface chartQueryParams { 12 | start: string | number 13 | end: string | number 14 | step: string | number 15 | } 16 | 17 | export const useInstanceStore = defineStore('instance', { 18 | state: (): instanceStoreState => ({ 19 | instanceList: [], 20 | currentId: '', 21 | chartTimeRange: [], 22 | tableLoading: false, 23 | searchTimePair: [] 24 | }), 25 | getters: { 26 | activeInstance(): InstanceType | undefined { 27 | return this.instanceList.find((item: InstanceType) => item.instance_id === this.currentId) 28 | }, 29 | instanceNameMap(): Map { 30 | const res = new Map() 31 | this.instanceList.forEach((item: InstanceType) => { 32 | res.set(item.instance_id, item.instance_name) 33 | }) 34 | return res 35 | }, 36 | chartQuery(): chartQueryParams { 37 | const [start, end] = this.chartTimeRange 38 | const _start = start ? Math.floor(new Date(start).getTime() / 1000).toFixed(3) : '' 39 | const _end = end ? Math.floor(new Date(end).getTime() / 1000).toFixed(3) : '' 40 | return { 41 | start: _start, 42 | end: _end, 43 | step: '15s' 44 | } 45 | }, 46 | activeServingId(): string { 47 | return this.activeInstance != null ? this.activeInstance.serving_id : this.instanceList[0]?.serving_id ?? '' 48 | }, 49 | activeServingJob(): string { 50 | return this.activeInstance != null ? this.activeInstance.startup_args.exported_job : this.instanceList[0]?.startup_args.exported_job ?? '' 51 | }, 52 | }, 53 | actions: { 54 | getInstanceList(): void { 55 | this.tableLoading = true 56 | getServing().then((res) => { 57 | this.instanceList = res.data 58 | }).catch((err) => { 59 | console.error(err) 60 | }).finally(() => { 61 | this.tableLoading = false 62 | }) 63 | } 64 | } 65 | }) 66 | -------------------------------------------------------------------------------- /front/src/styles/element/index.scss: -------------------------------------------------------------------------------- 1 | @forward 'element-plus/theme-chalk/src/common/var.scss' with ( 2 | $colors: ( 3 | 'primary': ( 4 | 'base': #303133, 5 | ), 6 | ), 7 | $table: ( 8 | 'header-bg-color': #EBEDF0, 9 | 'header-text-color': #606266 10 | ), 11 | $collapse: ( 12 | 'header-height': 36px, 13 | 'header-bg-color': #F0F2F5 14 | ) 15 | 16 | ); -------------------------------------------------------------------------------- /front/src/styles/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /front/src/styles/index.scss: -------------------------------------------------------------------------------- 1 | @import './element-ui.scss'; -------------------------------------------------------------------------------- /front/src/utils/request.ts: -------------------------------------------------------------------------------- 1 | import axios, { type AxiosResponse } from 'axios' 2 | 3 | const service = axios.create({ 4 | baseURL: '/', 5 | timeout: 10000 6 | }) 7 | 8 | service.interceptors.request.use( 9 | (config) => { 10 | return config 11 | }, 12 | (error) => { 13 | return Promise.reject(error) 14 | } 15 | ) 16 | 17 | service.interceptors.response.use( 18 | (response: AxiosResponse) => { 19 | const res = response.data 20 | if (Number(res.code) === 0 || res.status === 'success') { 21 | return res.code === 0 ? res.result : res 22 | } else { 23 | ElMessage({ 24 | message: res.response?.data?.message || res.message || 'Error', 25 | type: 'error', 26 | duration: 5 * 1000 27 | }) 28 | return Promise.reject(res) 29 | } 30 | }, 31 | (error) => { 32 | ElMessage({ 33 | message: error.response?.data?.message || error.message || 'Error', 34 | type: 'error', 35 | duration: 5 * 1000 36 | }) 37 | return Promise.reject(error) 38 | } 39 | ) 40 | 41 | export default service 42 | -------------------------------------------------------------------------------- /front/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: [ 4 | "./index.html", 5 | "./src/**/*.{vue,js,ts,jsx,tsx}", 6 | ], 7 | theme: { 8 | extend: { 9 | colors: { 10 | primary: '#303133', 11 | secondary: '#1272FF', 12 | disabled: '#A8ABB2', 13 | regular: '#606266', 14 | gray1: '#EEF3FF', 15 | gray2: '#EBEEF5', 16 | gray3: '#F0F2F5', 17 | gray4: '#7588A3', 18 | gray5: '#909399', 19 | gray7: '#DCDFE6', 20 | gray8: '#F5F7FA', 21 | black1: '#1E252E' 22 | 23 | }, 24 | boxShadow: { 25 | tableShadow: 'inset 0px -1px 0px 0px #EBEEF5' 26 | }, 27 | backgroundImage: { 28 | 'filter-icon': 'url("../assets/filter.png")' 29 | } 30 | }, 31 | }, 32 | plugins: [], 33 | } 34 | 35 | -------------------------------------------------------------------------------- /front/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@vue/tsconfig/tsconfig.dom.json", 3 | "include": ["env.d.ts", "src/**/*", "src/**/*.vue", "**/*.d.ts", "src/**/*.ts"], 4 | "exclude": ["src/**/__tests__/*"], 5 | "compilerOptions": { 6 | "composite": true, 7 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 8 | 9 | "baseUrl": ".", 10 | "paths": { 11 | "@/*": ["./src/*"] 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /front/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { 5 | "path": "./tsconfig.node.json" 6 | }, 7 | { 8 | "path": "./tsconfig.app.json" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /front/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@tsconfig/node20/tsconfig.json", 3 | "include": [ 4 | "vite.config.*", 5 | "vitest.config.*", 6 | "cypress.config.*", 7 | "nightwatch.conf.*", 8 | "playwright.config.*" 9 | ], 10 | "compilerOptions": { 11 | "composite": true, 12 | "noEmit": true, 13 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 14 | 15 | "module": "ESNext", 16 | "moduleResolution": "Bundler", 17 | "types": ["node"] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /front/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { fileURLToPath, URL } from 'node:url' 2 | import { defineConfig } from 'vite' 3 | import vue from '@vitejs/plugin-vue' 4 | import VueDevTools from 'vite-plugin-vue-devtools' 5 | import AutoImport from 'unplugin-auto-import/vite' 6 | import Components from 'unplugin-vue-components/vite' 7 | import { ElementPlusResolver } from 'unplugin-vue-components/resolvers' 8 | import { createSvgIconsPlugin } from 'vite-plugin-svg-icons' 9 | import path from 'path' 10 | // import { visualizer } from 'rollup-plugin-visualizer' 11 | 12 | export default defineConfig({ 13 | build: { 14 | outDir: './dist', 15 | rollupOptions: { 16 | plugins: [ 17 | // visualizer({ 18 | // open: true, 19 | // gzipSize: true 20 | // }) 21 | ], 22 | output: { 23 | chunkFileNames: 'static/js/[name]-[hash].js', 24 | entryFileNames: 'static/js/[name]-[hash].js', 25 | assetFileNames: 'static/[ext]/[name]-[hash].[ext]', 26 | manualChunks(id) { 27 | if (id.includes('node_modules')) { 28 | return id.toString().split('node_modules/')[1].split('/')[0].toString() 29 | } 30 | } 31 | } 32 | } 33 | }, 34 | css: { 35 | preprocessorOptions: { 36 | scss: { 37 | additionalData: `@use "~/styles/element/index.scss" as *;` 38 | } 39 | } 40 | }, 41 | plugins: [ 42 | vue(), 43 | VueDevTools(), 44 | AutoImport({ 45 | resolvers: [ElementPlusResolver({ importStyle: 'sass' })] 46 | }), 47 | Components({ 48 | resolvers: [ElementPlusResolver({ importStyle: 'sass' })] 49 | }), 50 | createSvgIconsPlugin({ 51 | iconDirs: [path.resolve(process.cwd(), 'src/assets/svg')], 52 | symbolId: 'icon-[name]' 53 | }) 54 | ], 55 | resolve: { 56 | alias: { 57 | '@': fileURLToPath(new URL('./src', import.meta.url)), 58 | '~/': `${path.resolve(__dirname, 'src')}/` 59 | } 60 | } 61 | }) 62 | -------------------------------------------------------------------------------- /llmo/enova-instrumentation-llmo/README.md: -------------------------------------------------------------------------------- 1 | ## 使用方式 2 | 安装whl包 3 | ```bash 4 | pip install enova_instrumentation_llmo-0.0.8-py3-none-any.whl 5 | ``` 6 | 在vllm程序代码中进行ot配置和开启注入 7 | ```python 8 | 9 | # 开启instrument 10 | from enova.llmo import start 11 | # 指定ot collector地址和service name 12 | start(otlp_exporter_endpoint="localhost:4317", service_name="service_name") 13 | 14 | #######接原代码内容####### 15 | ``` 16 | 17 | ## Metrics 指标说明 18 | - `avg_prompt_throughput` prompt 输入速率,单位 tokens/s 19 | - `avg_generation_throughput` 生成速率,单位 tokens/s 20 | - `running_requests` 当前 running 的 requests 数 21 | - `swapped_requests` 当前 swapped 的 requests 数 22 | - `pending_requests` 当前 pending 的 requests 数 23 | - `gpu_kv_cache_usage` gpu kv cache 使用率 24 | - `cpu_kv_cache_usage` cpu kv cache 使用率 25 | - `generated_tokens` 生成的 tokens 数 26 | - `llm_engine_init_config` engine启动参数,attributes如下 27 | - `model` 28 | - `tokenizer` 29 | - `tokenizer_mode` 30 | - `revision` 31 | - `tokenizer_revision` 32 | - `trust_remote_code` 33 | - `dtype` 34 | - `max_seq_len` 35 | - `download_dir` 36 | - `load_format` 37 | - `tensor_parallel_size` 38 | - `disable_custom_all_reduce` 39 | - `quantization` 40 | - `enforce_eager` 41 | - `kv_cache_dtype` 42 | - `seed` 43 | - `max_num_batched_tokens` 44 | - `max_num_seqs` 45 | - `max_paddings` 46 | - `pipeline_parallel_size` 47 | - `worker_use_ray` 48 | - `max_parallel_loading_workers` 49 | - `http.server.active_requests` FastAPI 正在处理中的 HTTP 请求的数量 50 | - `http.server.duration` FastAPI 服务端请求处理时间。 51 | - `http.server.response.size` FastAPI HTTP 响应消息的大小 52 | - `http.server.request.size` FastAPI HTTP 请求的大小 53 | 54 | 55 | ## trace span 说明 56 | - `POST /generate` /generate请求 57 | - `POST /generate prompt` 带有 `prompt` attribute 58 | - `ModelRunner.execute_model` 模型execute,对应一次 token 生成 59 | - `CUDAGraphRunner.forward` CUDA Graph的 forward 计算,在 `ModelRunner.execute_model` 中被调用 60 | - `ChatGLMForCausalLM.forward` chatglm 模型 forward 61 | - `LlamaForCausalLM.forward` llama 模型 forward 62 | 63 | -------------------------------------------------------------------------------- /llmo/enova-instrumentation-llmo/enova/llmo/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from opentelemetry import metrics 4 | from opentelemetry import trace 5 | from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter 6 | from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter 7 | from opentelemetry.sdk.metrics import MeterProvider 8 | from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader 9 | from opentelemetry.sdk.resources import Resource 10 | from opentelemetry.sdk.trace import TracerProvider 11 | from opentelemetry.sdk.trace.export import BatchSpanProcessor 12 | from opentelemetry.semconv.resource import ResourceAttributes 13 | 14 | 15 | def start(otlp_exporter_endpoint: str = "localhost:4317", service_name: str = __name__): 16 | otlp_exporter = OTLPSpanExporter( 17 | otlp_exporter_endpoint, 18 | insecure=True, 19 | ) 20 | resource = Resource( 21 | attributes={ 22 | ResourceAttributes.SERVICE_NAME: service_name, 23 | } 24 | ) 25 | provider = TracerProvider(resource=resource) 26 | provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) 27 | trace.set_tracer_provider(provider) 28 | 29 | exporter = OTLPMetricExporter(endpoint=otlp_exporter_endpoint, insecure=True) 30 | metric_reader = PeriodicExportingMetricReader(exporter, export_interval_millis=5000) 31 | 32 | provider = MeterProvider(metric_readers=[metric_reader], resource=resource) 33 | 34 | metrics.set_meter_provider(provider) 35 | 36 | from .instrumentation import EnovaFastAPIInstrumentor, EnovaVllmInstrumentor 37 | 38 | EnovaFastAPIInstrumentor().instrument() 39 | EnovaVllmInstrumentor().instrument(service_name) 40 | 41 | from .metrics_adapter import VLLMLogMetricsAdapter 42 | 43 | metrics_log_handler = VLLMLogMetricsAdapter() 44 | vllm_logger = logging.getLogger("vllm.engine.metrics") 45 | vllm_logger.addHandler(metrics_log_handler) 46 | -------------------------------------------------------------------------------- /llmo/enova-instrumentation-llmo/enova/llmo/instrumentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .vllm import EnovaVllmInstrumentor 2 | from .fastapi import EnovaFastAPIInstrumentor 3 | -------------------------------------------------------------------------------- /llmo/enova-instrumentation-llmo/enova/llmo/instrumentation/fastapi/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Collection 2 | from opentelemetry import trace, metrics 3 | from opentelemetry.instrumentation.instrumentor import BaseInstrumentor 4 | from opentelemetry.instrumentation.utils import unwrap 5 | from opentelemetry.instrumentation.asgi import collect_request_attributes 6 | from opentelemetry.util.http import _parse_active_request_count_attrs 7 | from wrapt import wrap_function_wrapper 8 | from starlette.types import ASGIApp, Scope, Receive, Send 9 | from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor 10 | 11 | import json 12 | 13 | _instruments = ("fastapi >= 0.1",) 14 | 15 | 16 | class EnovaMiddleware: 17 | def __init__(self, app: ASGIApp) -> None: 18 | self.app = app 19 | self.meter = metrics.get_meter(__name__) 20 | self.tracer = trace.get_tracer(__name__) 21 | self.requests_counter = self.meter.create_counter( 22 | name="http.server.requests", 23 | unit="requests", 24 | description="measures the number of HTTP requests received", 25 | ) 26 | 27 | async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: 28 | if scope["type"] != "http": 29 | await self.app(scope, receive, send) 30 | return 31 | 32 | attrs = collect_request_attributes(scope) 33 | _request_count_attrs = _parse_active_request_count_attrs(attrs) 34 | self.requests_counter.add(1, _request_count_attrs) 35 | messages = [] 36 | 37 | if scope["method"] == "POST" and scope["path"] in ["/generate", "/v1/completions", "/v1/chat/completions"]: 38 | span_name = f"POST {scope['path']} params" 39 | more_body = True 40 | 41 | try: 42 | while more_body: 43 | message = await receive() 44 | messages.append(message) 45 | more_body = message.get("more_body", False) 46 | body = b"".join([message.get("body", b"") for message in messages if message.get("body")]) 47 | if body: 48 | with self.tracer.start_as_current_span(span_name) as generate_span: 49 | body_json = json.loads(body) 50 | for key in ["prompt", "messages", "model"]: 51 | if key in body_json: 52 | generate_span.set_attribute(key, str(body_json[key])) 53 | except Exception as e: 54 | pass 55 | 56 | async def wrapped_receive(): 57 | if messages: 58 | return messages.pop(0) 59 | return await receive() 60 | 61 | await self.app(scope, wrapped_receive, send) 62 | 63 | 64 | class EnovaFastAPIInstrumentor(BaseInstrumentor): 65 | def instrumentation_dependencies(self) -> Collection[str]: 66 | return _instruments 67 | 68 | def _instrument(self, **kwargs): 69 | def fastapi_init_wrapper(wrapped, instance, args, kwargs): 70 | result = wrapped(*args, **kwargs) 71 | instance.add_middleware(EnovaMiddleware) 72 | FastAPIInstrumentor.instrument_app(instance) 73 | return result 74 | 75 | wrap_function_wrapper("fastapi", "FastAPI.__init__", fastapi_init_wrapper) 76 | 77 | def _uninstrument(self, **kwargs): 78 | unwrap("fastapi", "FastAPI.__init__") 79 | -------------------------------------------------------------------------------- /llmo/enova-instrumentation-llmo/enova/llmo/metrics_adapter/__init__.py: -------------------------------------------------------------------------------- 1 | from .vllm_logging_metrics import VLLMLogMetricsAdapter 2 | -------------------------------------------------------------------------------- /llmo/enova-instrumentation-llmo/enova/llmo/metrics_adapter/vllm_logging_metrics.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import threading 4 | import time 5 | from typing import Iterable 6 | 7 | from opentelemetry import metrics 8 | from opentelemetry.metrics import CallbackOptions, Observation 9 | 10 | meter = metrics.get_meter(__name__) 11 | 12 | metric_info = { 13 | "avg_prompt_throughput": {"value": 0.0, "unit": "tokens/s", "last_update": time.time()}, 14 | "avg_generation_throughput": {"value": 0.0, "unit": "tokens/s", "last_update": time.time()}, 15 | "running_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()}, 16 | "swapped_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()}, 17 | "pending_requests": {"value": 0.0, "unit": "requests", "last_update": time.time()}, 18 | "gpu_kv_cache_usage": {"value": 0.0, "unit": "%", "last_update": time.time()}, 19 | "cpu_kv_cache_usage": {"value": 0.0, "unit": "%", "last_update": time.time()}, 20 | } 21 | 22 | timeout_seconds = 15 23 | 24 | for metric_name, info in metric_info.items(): 25 | def create_scrape_metric_callback(metric_name): 26 | def scrape_metric_callback(options: CallbackOptions) -> Iterable[Observation]: 27 | value = metric_info[metric_name]["value"] 28 | yield Observation(value, attributes={}) 29 | 30 | return scrape_metric_callback 31 | 32 | callback = create_scrape_metric_callback(metric_name) 33 | unit = info["unit"] 34 | 35 | meter.create_observable_gauge( 36 | name=metric_name, 37 | callbacks=[callback], 38 | description=f"The value of {metric_name}", 39 | unit=unit 40 | ) 41 | 42 | 43 | def update_metric(name, value, current_time): 44 | metric_info[name]["value"] = value 45 | metric_info[name]["last_update"] = current_time 46 | 47 | 48 | class VLLMLogMetricsAdapter(logging.Handler): 49 | def __init__(self): 50 | super().__init__() 51 | self.pattern = re.compile( 52 | r".*?" 53 | r"Avg prompt throughput: (?P\d+\.\d+) tokens/s, " 54 | r"Avg generation throughput: (?P\d+\.\d+) tokens/s, " 55 | r"Running: (?P\d+) reqs, " 56 | r"Swapped: (?P\d+) reqs, " 57 | r"Pending: (?P\d+) reqs, " 58 | r"GPU KV cache usage: (?P\d+\.\d+)%, " 59 | r"CPU KV cache usage: (?P\d+\.\d+)%" 60 | ) 61 | 62 | def emit(self, record): 63 | log_message = record.getMessage() 64 | match = self.pattern.search(log_message) 65 | if match: 66 | current_time = time.time() 67 | update_metric("avg_prompt_throughput", float(match.group("avg_prompt")), current_time) 68 | update_metric("avg_generation_throughput", float(match.group("avg_gen")), current_time) 69 | update_metric("running_requests", float(match.group("running")), current_time) 70 | update_metric("swapped_requests", float(match.group("swapped")), current_time) 71 | update_metric("pending_requests", float(match.group("pending")), current_time) 72 | update_metric("gpu_kv_cache_usage", float(match.group("gpu_cache")), current_time) 73 | update_metric("cpu_kv_cache_usage", float(match.group("cpu_cache")), current_time) 74 | 75 | 76 | def update_metrics_periodically(): 77 | while True: 78 | for metric_name, info in metric_info.items(): 79 | current_time = time.time() 80 | if current_time - info["last_update"] > timeout_seconds: 81 | metric_info[metric_name]["value"] = 0.0 # Reset the value if the data is stale 82 | time.sleep(5) # Update every 5 seconds 83 | 84 | 85 | # Start the background thread to update metrics periodically 86 | threading.Thread(target=update_metrics_periodically, daemon=True).start() 87 | -------------------------------------------------------------------------------- /llmo/enova-instrumentation-llmo/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "enova-instrumentation-llmo" 3 | version = "0.0.8" 4 | description = "enova-instrumentation-llmo" 5 | requires-python = ">=3.10" 6 | dynamic = [ 7 | "dependencies" 8 | ] 9 | authors = [ 10 | { name="wenxinxie", email="wenxin@emergingai-tech.com" }, 11 | ] 12 | readme = "README.md" 13 | 14 | 15 | [tool.coverage.run] 16 | branch = true 17 | source = [ "enova/llmo" ] 18 | 19 | [build-system] 20 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2,<=7.1.0", "toml"] 21 | 22 | [tool.poetry] 23 | name = "enova-instrumentation-llmo" 24 | version = "0.0.8" 25 | description = "llmo instrumentation for OpenTelemetry" 26 | authors = ["wenxinxie "] 27 | 28 | [[tool.poetry.packages]] 29 | include = "enova/llmo" 30 | 31 | [tool.poetry.dependencies] 32 | python = "^3.10" 33 | opentelemetry-api = "*" 34 | opentelemetry-sdk = "*" 35 | vllm = "0.6.3.post1" 36 | fastapi = "*" 37 | opentelemetry-exporter-otlp = "*" 38 | opentelemetry-distro = "*" 39 | opentelemetry-instrumentation-fastapi = "*" 40 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "enova" 3 | description = "enova" 4 | requires-python = ">=3.8" 5 | dynamic = ["dependencies", "version"] 6 | authors = [ 7 | { name = "kyokagong", email = "kyokagong@emergingai-tech.com" }, 8 | { name = "wenxinxie", email = "wenxin@emergingai-tech.com" }, 9 | { name = "jockyhawk", email = "jockyhawk@emergingai-tech.com" }, 10 | { name = "kimzhao", email = "kimzhao@emergingai-tech.com" }, 11 | ] 12 | readme = "README.md" 13 | 14 | [project.scripts] 15 | enova = "enova.entry.cli:main" 16 | 17 | [project.optional-dependencies] 18 | lint = ["black==23.12.0"] 19 | test = ["pytest", "pytest-cov", "responses", "respx"] 20 | 21 | 22 | [build-system] 23 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2,<=7.1.0", "toml"] 24 | build-backend = "setuptools.build_meta" 25 | 26 | 27 | [tool.setuptools.packages.find] 28 | where = ["."] 29 | include = ["enova.*"] 30 | namespaces = true 31 | 32 | [tool.setuptools.package-data] 33 | "*" = ["*.csv", "docker-compose-*"] 34 | "enova.web_statics" = ["*", "*/*"] 35 | 36 | [tool.setuptools.dynamic] 37 | dependencies = { file = ["requirements.txt"] } 38 | version = {file = ["VERSION"]} 39 | 40 | [tool.coverage.run] 41 | omit = ["*/tests/test_*.py"] 42 | -------------------------------------------------------------------------------- /requirements-docker-no-deps.txt: -------------------------------------------------------------------------------- 1 | vllm==0.6.3.post1 2 | -------------------------------------------------------------------------------- /requirements-docker.txt: -------------------------------------------------------------------------------- 1 | httpx==0.24.1 2 | fastapi==0.108.0 3 | vllm==0.6.3.post1 4 | sglang==0.3.6 5 | huggingface_hub 6 | hf-transfer 7 | transformers 8 | locate 9 | python-rapidjson 10 | opentelemetry-api 11 | opentelemetry-sdk 12 | opentelemetry-exporter-otlp 13 | opentelemetry-distro 14 | opentelemetry-instrumentation-fastapi 15 | streamlit 16 | pymysql==1.1.0 17 | aiomysql==0.2.0 18 | sqlalchemy==2.0.29 19 | sqlalchemy-utils 20 | aiosqlite 21 | greenlet 22 | uvicorn 23 | ulid-py 24 | pyopencl 25 | py-cpuinfo 26 | pytz 27 | tzlocal 28 | openai 29 | packaging 30 | ray 31 | python-multipart 32 | addict 33 | orjson 34 | siphash24 35 | # msgspec 36 | # compressed_tensors 37 | # gguf 38 | # sentencepiece 39 | # mistral_common 40 | 41 | # filelock 42 | # lm-format-enforcer==0.10.3 43 | # ninja 44 | # nvidia-ml-py 45 | # outlines 46 | # pillow 47 | # prometheus-client 48 | # prometheus-fastapi-instrumentator 49 | # psutil 50 | # sentencepiece 51 | # tiktoken 52 | # tokenizers 53 | # typing-extensions 54 | # vllm-flash-attn==2.5.9.post1 55 | # xformers==0.0.27 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | httpx==0.24.1 2 | fastapi==0.108.0 3 | huggingface_hub 4 | hf-transfer 5 | transformers 6 | locate 7 | python-rapidjson 8 | opentelemetry-api 9 | opentelemetry-sdk 10 | opentelemetry-exporter-otlp 11 | opentelemetry-distro 12 | opentelemetry-instrumentation-fastapi 13 | streamlit 14 | pymysql==1.1.0 15 | aiomysql==0.2.0 16 | sqlalchemy==2.0.29 17 | sqlalchemy-utils 18 | aiosqlite 19 | greenlet 20 | uvicorn 21 | ulid-py 22 | pyopencl 23 | py-cpuinfo 24 | pytz 25 | tzlocal 26 | openai 27 | packaging 28 | ray 29 | enova-instrumentation-llmo==0.0.8 30 | addict 31 | sglang==0.3.6 32 | python-multipart 33 | orjson 34 | siphash24 -------------------------------------------------------------------------------- /scripts/pack_whl.enova.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | echo "Runing packing wheel using ${PWD}" 5 | 6 | SCRIPT=$(realpath "$0") 7 | BASEDIR=$(dirname "$SCRIPT") 8 | BASEDIR=$(dirname "$BASEDIR") 9 | 10 | DOCKER_COMPOSE_BIN=enova/template/deployment/docker-compose/bin/docker-compose-linux-x86_64 11 | DOWNLOAD_URL=https://github.com/docker/compose/releases/download/v2.24.5/docker-compose-linux-x86_64 12 | 13 | 14 | if [ ! -f "$DOCKER_COMPOSE_BIN" ]; then 15 | echo "PWD: " $PWD 16 | mkdir -p enova/template/deployment/docker-compose/bin/ 17 | 18 | echo "docker-compose binary $DOCKER_COMPOSE_BIN is not existed, start to download..." 19 | cd enova/template/deployment/docker-compose/bin/ 20 | wget "$DOWNLOAD_URL" 21 | 22 | chmod +x docker-compose-linux-x86_64 23 | cd $BASEDIR 24 | if [ $? -eq 0 ]; then 25 | echo "download sucessfully" 26 | else 27 | echo "failed to download" 28 | fi 29 | fi 30 | 31 | # pack 32 | cd $BASEDIR 33 | python -m build --no-isolation 34 | 35 | -------------------------------------------------------------------------------- /scripts/pack_whl.llmo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | echo "Runing packing wheel of llmo using ${PWD}" 5 | 6 | SCRIPT=$(realpath "$0") 7 | BASEDIR=$(dirname "$SCRIPT") 8 | BASEDIR=$(dirname "$BASEDIR") 9 | 10 | # pack 11 | cd $BASEDIR/llmo/enova-instrumentation-llmo 12 | poetry build 13 | -------------------------------------------------------------------------------- /tests/enova/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Emerging-AI/ENOVA/b3661d06064ba1a46417f24c9e8ee8dec7e2fa03/tests/enova/conftest.py -------------------------------------------------------------------------------- /tests/enova/test_eapp.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from httpx import AsyncClient 3 | from enova.app.server import get_app_api_server 4 | from enova.common.config import CONFIG 5 | 6 | 7 | @pytest.fixture 8 | def eapp(): 9 | api_server = get_app_api_server() 10 | return api_server.app 11 | 12 | 13 | @pytest.mark.asyncio 14 | class TestEApp: 15 | async def test_healthz(self, eapp): 16 | async with AsyncClient(app=eapp, base_url="http://test") as ac: 17 | response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/healthz") 18 | assert response.status_code == 200 19 | 20 | 21 | @pytest.mark.asyncio 22 | class TestEServe: 23 | async def test_list_serving(self, eapp): 24 | async with AsyncClient(app=eapp, base_url="http://test") as ac: 25 | response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/serving") 26 | assert response.status_code == 200 27 | # TODO: some test of biz flow 28 | 29 | # async def test_create_serving_with_escalar(self, eapp): 30 | # post_params = {} 31 | # async with AsyncClient(app=eapp, base_url="http://test") as ac: 32 | # response = await ac.post( 33 | # f"/{CONFIG.enova_app['url_prefix']}/v1/serving", 34 | # json=post_params, 35 | # ) 36 | # assert response.status_code == 200 37 | # # TODO: some test of biz flow 38 | 39 | # async def test_create_serving_missing_escalar(self, eapp): 40 | # post_params = {} 41 | # async with AsyncClient(app=eapp, base_url="http://test") as ac: 42 | # response = await ac.post( 43 | # f"/{CONFIG.enova_app['url_prefix']}/v1/serving", 44 | # json=post_params, 45 | # ) 46 | # assert response.status_code == 200 47 | # # TODO: some test of biz flow 48 | 49 | # async def test_get_serving(self, eapp): 50 | # eserve_id = "" 51 | # async with AsyncClient(app=eapp, base_url="http://test") as ac: 52 | # response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/serving/{eserve_id}") 53 | # assert response.status_code == 200 54 | # # TODO: some test of biz flow 55 | 56 | # async def test_delete_serving(self, eapp): 57 | # eserve_id = "" 58 | # async with AsyncClient(app=eapp, base_url="http://test") as ac: 59 | # response = await ac.delete(f"/{CONFIG.enova_app['url_prefix']}/v1/serving/{eserve_id}") 60 | # assert response.status_code == 200 61 | # # TODO: some test of biz flow 62 | 63 | 64 | @pytest.mark.asyncio 65 | class TestTInject: 66 | async def test_list_injector(self, eapp): 67 | async with AsyncClient(app=eapp, base_url="http://test") as ac: 68 | response = await ac.get(f"/{CONFIG.enova_app['url_prefix']}/v1/instance/test") 69 | assert response.status_code == 200 70 | # TODO: some test of biz flow 71 | -------------------------------------------------------------------------------- /tests/enova/test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-asyncio --------------------------------------------------------------------------------