├── .black.toml ├── .circleci ├── config.yml └── resources │ ├── .minikube-config-map │ ├── .minikube-registry-creds │ ├── postgres-k8s.yaml │ └── redis-k8s.yaml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── custom.md │ └── feature_request.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .ruff.toml ├── LICENSE ├── NOTICE ├── README.md ├── charts ├── .helmignore └── model-engine │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── templates │ ├── _helpers.tpl │ ├── _istio-attribute-match-conditions.tpl │ ├── aws_config_map.yaml │ ├── balloon_cpu_deployment.yaml │ ├── balloon_deployments.yaml │ ├── cacher_deployment.yaml │ ├── cacher_vpa.yaml │ ├── celery_autoscaler_stateful_set.yaml │ ├── cluster_rolebinding.yaml │ ├── database_migration_job.yaml │ ├── endpoint_builder_deployment.yaml │ ├── endpoint_builder_vpa.yaml │ ├── gateway_deployment.yaml │ ├── gateway_hpa.yaml │ ├── gateway_service.yaml │ ├── gateway_vpa.yaml │ ├── inference_framework_config.yaml │ ├── istio-destinationrule.yaml │ ├── istio-metrics.yaml │ ├── istio-virtualservice.yaml │ ├── model_engine_default_priority_class.yaml │ ├── model_engine_high_priority_class.yaml │ ├── model_engine_low_priority_class.yaml │ ├── pod_disruption_budget.yaml │ ├── populate_fine_tuning_repository_job.yaml │ ├── proportional_a100_autoscaler_deployment.yaml │ ├── proportional_a10_autoscaler_deployment.yaml │ ├── proportional_t4_autoscaler_deployment.yaml │ ├── recommended_hardware_config_map.yaml │ ├── restart_keda_operator.yaml │ ├── service_account.yaml │ ├── service_account_image_builder.yaml │ ├── service_account_inference.yaml │ ├── service_config_map.yaml │ ├── service_template_config_map.yaml │ ├── spellbook_init_job.yaml │ └── trigger_authentication.yaml │ ├── values.yaml │ ├── values_circleci.yaml │ └── values_sample.yaml ├── clients └── python │ ├── Makefile │ ├── README.md │ ├── llmengine │ ├── __init__.py │ ├── api_engine.py │ ├── completion.py │ ├── data_types │ │ ├── __init__.py │ │ ├── batch_completion.py │ │ ├── chat_completion.py │ │ ├── completion.py │ │ ├── core.py │ │ ├── gen │ │ │ ├── __init__.py │ │ │ └── openai.py │ │ ├── model_endpoints.py │ │ ├── pydantic_types.py │ │ ├── rest.py │ │ └── vllm.py │ ├── errors.py │ ├── file.py │ ├── fine_tuning.py │ ├── model.py │ └── py.typed │ ├── mypy.ini │ ├── poetry.lock │ ├── pyproject.toml │ └── setup.py ├── docs ├── CNAME ├── _static │ ├── favicon-32x32.png │ ├── launch-logo.png │ └── launch-logo.svg ├── api │ ├── data_types.md │ ├── error_handling.md │ ├── langchain.md │ └── python_client.md ├── contributing.md ├── examples │ └── finetuning.ipynb ├── faq.md ├── getting_started.md ├── guides │ ├── completions.md │ ├── endpoint_creation.md │ ├── fine_tuning.md │ ├── rate_limits.md │ ├── self_hosting.md │ └── token_streaming.md ├── index.md ├── integrations.md ├── model_zoo.md ├── pricing.md └── stylesheets │ └── index.css ├── examples ├── download_a_finetuned_model.ipynb └── finetune_llama_2_on_science_qa.ipynb ├── integration_tests ├── __init__.py ├── rest_api_utils.py ├── test_batch_jobs.py ├── test_bundles.py ├── test_completions.py ├── test_endpoints.py ├── test_file.py └── test_fine_tunes.py ├── mkdocs.yml ├── model-engine ├── .coveragerc ├── Dockerfile ├── Dockerfile.fips ├── README.md ├── model_engine_server │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── app.py │ │ ├── batch_jobs_v1.py │ │ ├── dependencies.py │ │ ├── docker_image_batch_job_bundles_v1.py │ │ ├── files_v1.py │ │ ├── llms_v1.py │ │ ├── model_bundles_v1.py │ │ ├── model_bundles_v2.py │ │ ├── model_endpoints_docs_v1.py │ │ ├── model_endpoints_v1.py │ │ ├── tasks_v1.py │ │ ├── triggers_v1.py │ │ ├── v2 │ │ │ ├── __init__.py │ │ │ ├── batch_completion.py │ │ │ ├── chat_completion.py │ │ │ ├── common.py │ │ │ └── completion.py │ │ └── worker.py │ ├── common │ │ ├── __init__.py │ │ ├── aiohttp_sse_client.py │ │ ├── concurrency_limiter.py │ │ ├── config.py │ │ ├── constants.py │ │ ├── datadog_utils.py │ │ ├── dtos │ │ │ ├── __init__.py │ │ │ ├── batch_jobs.py │ │ │ ├── core.py │ │ │ ├── docker_repository.py │ │ │ ├── endpoint_builder.py │ │ │ ├── files.py │ │ │ ├── llms │ │ │ │ ├── __init__.py │ │ │ │ ├── batch_completion.py │ │ │ │ ├── chat_completion.py │ │ │ │ ├── completion.py │ │ │ │ ├── model_endpoints.py │ │ │ │ ├── sglang.py │ │ │ │ └── vllm.py │ │ │ ├── model_bundles.py │ │ │ ├── model_endpoints.py │ │ │ ├── resource_manager.py │ │ │ ├── tasks.py │ │ │ └── triggers.py │ │ ├── env_vars.py │ │ ├── errors.py │ │ ├── io.py │ │ ├── pydantic_types.py │ │ ├── resource_limits.py │ │ ├── serialization_utils.py │ │ ├── service_requests.py │ │ ├── settings.py │ │ └── types │ │ │ ├── __init__.py │ │ │ ├── endpoint.py │ │ │ └── gen │ │ │ └── openai.py │ ├── core │ │ ├── __init__.py │ │ ├── auth │ │ │ ├── __init__.py │ │ │ ├── authentication_repository.py │ │ │ └── fake_authentication_repository.py │ │ ├── aws │ │ │ ├── __init__.py │ │ │ ├── roles.py │ │ │ ├── secrets.py │ │ │ └── storage_client.py │ │ ├── celery │ │ │ ├── __init__.py │ │ │ ├── abs.py │ │ │ ├── app.py │ │ │ ├── celery_autoscaler.py │ │ │ └── s3.py │ │ ├── config.py │ │ ├── configmap.py │ │ ├── configs │ │ │ └── default.yaml │ │ ├── docker │ │ │ ├── __init__.py │ │ │ ├── docker_image.py │ │ │ ├── ecr.py │ │ │ ├── kaniko_template.yaml │ │ │ ├── kaniko_template_circleci.yaml │ │ │ └── remote_build.py │ │ ├── fake_notification_gateway.py │ │ ├── loggers.py │ │ ├── notification_gateway.py │ │ ├── tracing │ │ │ ├── __init__.py │ │ │ ├── live_tracing_gateway.py │ │ │ ├── span.py │ │ │ └── tracing_gateway.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── env.py │ │ │ ├── format.py │ │ │ ├── git.py │ │ │ ├── python_utils.py │ │ │ ├── timer.py │ │ │ └── url.py │ ├── db │ │ ├── __init__.py │ │ ├── base.py │ │ ├── endpoint_row_lock.py │ │ ├── local_setup.py │ │ ├── migrations │ │ │ ├── README │ │ │ ├── alembic.ini │ │ │ ├── alembic │ │ │ │ ├── env.py │ │ │ │ ├── script.py.mako │ │ │ │ └── versions │ │ │ │ │ ├── 2024_09_09_1736-fa3267c80731_initial.py │ │ │ │ │ ├── 2024_09_09_1831-b574e9711e35_chat_completion_add_extra_routes.py │ │ │ │ │ ├── 2024_09_24_1456-f55525c81eb5_multinode_bundle.py │ │ │ │ │ ├── 2025_09_16_1741-e580182d6bfd_add_passthrough_forwarder.py │ │ │ │ │ └── 2025_09_25_1940-221aa19d3f32_add_routes_column.py │ │ │ ├── initial.sql │ │ │ ├── run_database_migration.sh │ │ │ └── stamp_initial_schema.sh │ │ └── models │ │ │ ├── __init__.py │ │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── query.py │ │ │ └── record.py │ │ │ ├── constants.py │ │ │ ├── exceptions.py │ │ │ ├── hosted_model_inference.py │ │ │ ├── model.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── misc.py │ ├── domain │ │ ├── __init__.py │ │ ├── authorization │ │ │ ├── __init__.py │ │ │ └── live_authorization_module.py │ │ ├── entities │ │ │ ├── __init__.py │ │ │ ├── batch_job_entity.py │ │ │ ├── common_types.py │ │ │ ├── docker_image_batch_job_bundle_entity.py │ │ │ ├── file_entity.py │ │ │ ├── gpu_type.py │ │ │ ├── llm_entity.py │ │ │ ├── llm_fine_tune_entity.py │ │ │ ├── model_bundle_entity.py │ │ │ ├── model_endpoint_entity.py │ │ │ ├── owned_entity.py │ │ │ └── trigger_entity.py │ │ ├── exceptions.py │ │ ├── gateways │ │ │ ├── __init__.py │ │ │ ├── async_model_endpoint_inference_gateway.py │ │ │ ├── cron_job_gateway.py │ │ │ ├── docker_image_batch_job_gateway.py │ │ │ ├── file_storage_gateway.py │ │ │ ├── inference_autoscaling_metrics_gateway.py │ │ │ ├── llm_artifact_gateway.py │ │ │ ├── model_endpoints_schema_gateway.py │ │ │ ├── model_primitive_gateway.py │ │ │ ├── monitoring_metrics_gateway.py │ │ │ ├── streaming_model_endpoint_inference_gateway.py │ │ │ ├── sync_model_endpoint_inference_gateway.py │ │ │ └── task_queue_gateway.py │ │ ├── repositories │ │ │ ├── __init__.py │ │ │ ├── docker_image_batch_job_bundle_repository.py │ │ │ ├── docker_repository.py │ │ │ ├── llm_fine_tune_events_repository.py │ │ │ ├── model_bundle_repository.py │ │ │ ├── tokenizer_repository.py │ │ │ └── trigger_repository.py │ │ ├── services │ │ │ ├── __init__.py │ │ │ ├── batch_job_service.py │ │ │ ├── endpoint_builder_service.py │ │ │ ├── llm_batch_completions_service.py │ │ │ ├── llm_fine_tuning_service.py │ │ │ ├── llm_model_endpoint_service.py │ │ │ └── model_endpoint_service.py │ │ └── use_cases │ │ │ ├── __init__.py │ │ │ ├── async_inference_use_cases.py │ │ │ ├── batch_job_use_cases.py │ │ │ ├── docker_image_batch_job_bundle_use_cases.py │ │ │ ├── file_use_cases.py │ │ │ ├── llm_fine_tuning_use_cases.py │ │ │ ├── llm_model_endpoint_use_cases.py │ │ │ ├── model_bundle_use_cases.py │ │ │ ├── model_endpoint_use_cases.py │ │ │ ├── model_endpoints_schema_use_cases.py │ │ │ ├── streaming_inference_use_cases.py │ │ │ ├── sync_inference_use_cases.py │ │ │ └── trigger_use_cases.py │ ├── entrypoints │ │ ├── __init__.py │ │ ├── init_database.py │ │ ├── init_spellbook_models.py │ │ ├── k8s_cache.py │ │ ├── populate_llm_fine_tuning_job_repository.py │ │ ├── start_batch_job_orchestration.py │ │ ├── start_docker_image_batch_job_init_container.py │ │ └── start_fastapi_server.py │ ├── inference │ │ ├── __init__.py │ │ ├── async_inference │ │ │ ├── __init__.py │ │ │ ├── celery.py │ │ │ ├── tasks.py │ │ │ └── vpa.yaml │ │ ├── base.Dockerfile │ │ ├── batch_inference │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── dto.py │ │ │ ├── examples │ │ │ │ ├── generate_tool_sample_data.py │ │ │ │ ├── sample_config.json │ │ │ │ ├── sample_config_gemma.json │ │ │ │ ├── sample_config_mixtral.json │ │ │ │ ├── sample_config_tool.json │ │ │ │ ├── sample_data.json │ │ │ │ └── sample_data_tool.json │ │ │ ├── requirements.txt │ │ │ └── vllm_batch.py │ │ ├── common.py │ │ ├── configs │ │ │ ├── service--forwarder-runnable-img-converted-from-artifact.yaml │ │ │ ├── service--forwarder.yaml │ │ │ └── service--http_forwarder.yaml │ │ ├── domain │ │ │ └── gateways │ │ │ │ ├── inference_monitoring_metrics_gateway.py │ │ │ │ ├── streaming_storage_gateway.py │ │ │ │ └── usage_metrics_gateway.py │ │ ├── download_and_inject_bundle.py │ │ ├── forwarding │ │ │ ├── __init__.py │ │ │ ├── celery_forwarder.py │ │ │ ├── echo_server.py │ │ │ ├── forwarding.py │ │ │ └── http_forwarder.py │ │ ├── infra │ │ │ ├── __init__.py │ │ │ └── gateways │ │ │ │ ├── __init__.py │ │ │ │ ├── datadog_inference_monitoring_metrics_gateway.py │ │ │ │ ├── fake_usage_metrics_gateway.py │ │ │ │ └── firehose_streaming_storage_gateway.py │ │ ├── inject_bundle.Dockerfile │ │ ├── limits.conf │ │ ├── post_inference_hooks.py │ │ ├── pytorch_or_tf.base.Dockerfile │ │ ├── pytorch_or_tf.user.Dockerfile │ │ ├── requirements_base.txt │ │ ├── service_requests.py │ │ ├── sglang │ │ │ ├── Dockerfile.sglang │ │ │ ├── README.md │ │ │ └── sglang-startup-script.py │ │ ├── sync_inference │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── destination_rule.yaml │ │ │ ├── fastapi_server.py │ │ │ ├── start_fastapi_server.py │ │ │ ├── virtual_service.yaml │ │ │ └── vpa.yaml │ │ ├── tensorrt-llm │ │ │ ├── Dockerfile │ │ │ ├── README.md │ │ │ ├── launch_triton_server.py │ │ │ ├── requirements.txt │ │ │ └── triton_model_repo │ │ │ │ ├── ensemble │ │ │ │ ├── 1 │ │ │ │ │ └── .tmp │ │ │ │ └── config.pbtxt │ │ │ │ ├── postprocessing │ │ │ │ ├── 1 │ │ │ │ │ └── model.py │ │ │ │ └── config.pbtxt │ │ │ │ ├── preprocessing │ │ │ │ ├── 1 │ │ │ │ │ └── model.py │ │ │ │ └── config.pbtxt │ │ │ │ ├── tensorrt_llm │ │ │ │ ├── 1 │ │ │ │ │ └── .gitkeep │ │ │ │ └── config.pbtxt │ │ │ │ └── tensorrt_llm_bls │ │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ │ └── config.pbtxt │ │ ├── tool_completion │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── tools.py │ │ │ └── utils.py │ │ ├── user.Dockerfile │ │ ├── utils.py │ │ └── vllm │ │ │ ├── Dockerfile.vllm │ │ │ ├── README.md │ │ │ ├── build_and_upload_image.sh │ │ │ ├── examples │ │ │ └── v2 │ │ │ │ ├── gemma │ │ │ │ ├── README.md │ │ │ │ ├── config.json │ │ │ │ ├── config_w_oai_chat_content.json │ │ │ │ ├── data_oai_chat.json │ │ │ │ └── data_oai_completion.json │ │ │ │ └── llama-3.2-vision │ │ │ │ ├── README.md │ │ │ │ ├── config.json │ │ │ │ ├── data_oai_chat.json │ │ │ │ └── output_oi_chat.json │ │ │ ├── gen_sample_data.py │ │ │ ├── init_ray.sh │ │ │ ├── init_ray_batch_inf_v2.py │ │ │ ├── requirements-batch.txt │ │ │ ├── requirements-dev.txt │ │ │ ├── requirements.txt │ │ │ ├── vllm_batch.py │ │ │ └── vllm_server.py │ ├── infra │ │ ├── __init__.py │ │ ├── gateways │ │ │ ├── __init__.py │ │ │ ├── abs_file_storage_gateway.py │ │ │ ├── abs_filesystem_gateway.py │ │ │ ├── abs_llm_artifact_gateway.py │ │ │ ├── asb_inference_autoscaling_metrics_gateway.py │ │ │ ├── batch_job_orchestration_gateway.py │ │ │ ├── batch_job_progress_gateway.py │ │ │ ├── celery_task_queue_gateway.py │ │ │ ├── datadog_monitoring_metrics_gateway.py │ │ │ ├── dns_resolver.py │ │ │ ├── fake_model_primitive_gateway.py │ │ │ ├── fake_monitoring_metrics_gateway.py │ │ │ ├── filesystem_gateway.py │ │ │ ├── k8s_resource_parser.py │ │ │ ├── live_async_model_endpoint_inference_gateway.py │ │ │ ├── live_batch_job_orchestration_gateway.py │ │ │ ├── live_batch_job_progress_gateway.py │ │ │ ├── live_cron_job_gateway.py │ │ │ ├── live_docker_image_batch_job_gateway.py │ │ │ ├── live_model_endpoint_infra_gateway.py │ │ │ ├── live_model_endpoints_schema_gateway.py │ │ │ ├── live_streaming_model_endpoint_inference_gateway.py │ │ │ ├── live_sync_model_endpoint_inference_gateway.py │ │ │ ├── model_endpoint_infra_gateway.py │ │ │ ├── redis_inference_autoscaling_metrics_gateway.py │ │ │ ├── resources │ │ │ │ ├── __init__.py │ │ │ │ ├── asb_queue_endpoint_resource_delegate.py │ │ │ │ ├── endpoint_resource_gateway.py │ │ │ │ ├── fake_queue_endpoint_resource_delegate.py │ │ │ │ ├── image_cache_gateway.py │ │ │ │ ├── k8s_endpoint_resource_delegate.py │ │ │ │ ├── k8s_resource_types.py │ │ │ │ ├── live_endpoint_resource_gateway.py │ │ │ │ ├── queue_endpoint_resource_delegate.py │ │ │ │ ├── sqs_queue_endpoint_resource_delegate.py │ │ │ │ └── templates │ │ │ │ │ └── service_template_config_map_circleci.yaml │ │ │ ├── s3_file_storage_gateway.py │ │ │ ├── s3_filesystem_gateway.py │ │ │ └── s3_llm_artifact_gateway.py │ │ ├── infra_utils.py │ │ ├── repositories │ │ │ ├── __init__.py │ │ │ ├── abs_file_llm_fine_tune_events_repository.py │ │ │ ├── abs_file_llm_fine_tune_repository.py │ │ │ ├── acr_docker_repository.py │ │ │ ├── batch_job_record_repository.py │ │ │ ├── db_batch_job_record_repository.py │ │ │ ├── db_docker_image_batch_job_bundle_repository.py │ │ │ ├── db_model_bundle_repository.py │ │ │ ├── db_model_endpoint_record_repository.py │ │ │ ├── db_repository_mixin.py │ │ │ ├── db_trigger_repository.py │ │ │ ├── ecr_docker_repository.py │ │ │ ├── fake_docker_repository.py │ │ │ ├── feature_flag_repository.py │ │ │ ├── live_tokenizer_repository.py │ │ │ ├── llm_fine_tune_repository.py │ │ │ ├── model_endpoint_cache_repository.py │ │ │ ├── model_endpoint_record_repository.py │ │ │ ├── redis_feature_flag_repository.py │ │ │ ├── redis_model_endpoint_cache_repository.py │ │ │ ├── s3_file_llm_fine_tune_events_repository.py │ │ │ └── s3_file_llm_fine_tune_repository.py │ │ └── services │ │ │ ├── __init__.py │ │ │ ├── batch_job_orchestration_service.py │ │ │ ├── docker_image_batch_job_llm_fine_tuning_service.py │ │ │ ├── fake_llm_batch_completions_service.py │ │ │ ├── image_cache_service.py │ │ │ ├── live_batch_job_orchestration_service.py │ │ │ ├── live_batch_job_service.py │ │ │ ├── live_endpoint_builder_service.py │ │ │ ├── live_llm_batch_completions_service.py │ │ │ ├── live_llm_model_endpoint_service.py │ │ │ ├── live_model_endpoint_service.py │ │ │ └── model_endpoint_cache_service.py │ └── service_builder │ │ ├── __init__.py │ │ ├── celery.py │ │ └── tasks_v1.py ├── mypy.ini ├── requirements-test.txt ├── requirements.in ├── requirements.txt ├── requirements_override.txt ├── service_configs │ └── service_config_circleci.yaml ├── setup.cfg ├── setup.py └── tests │ ├── README.md │ ├── __init__.py │ ├── integration │ ├── __init__.py │ └── inference │ │ ├── conftest.py │ │ └── test_async_inference.py │ └── unit │ ├── __init__.py │ ├── api │ ├── __init__.py │ ├── conftest.py │ ├── test_app.py │ ├── test_batch_jobs.py │ ├── test_dependencies.py │ ├── test_docker_image_batch_job_bundles.py │ ├── test_llms.py │ ├── test_model_bundles.py │ ├── test_model_endpoints.py │ ├── test_model_endpoints_docs.py │ ├── test_tasks.py │ └── test_triggers.py │ ├── common │ ├── __init__.py │ ├── test_batch_jobs_dtos.py │ └── test_settings.py │ ├── conftest.py │ ├── core │ └── utils │ │ └── test_timer.py │ ├── domain │ ├── __init__.py │ ├── conftest.py │ ├── test_async_inference_use_cases.py │ ├── test_docker_image_batch_job_bundle_use_cases.py │ ├── test_entities.py │ ├── test_llm_use_cases.py │ ├── test_model_bundle_use_cases.py │ ├── test_model_endpoint_use_cases.py │ ├── test_streaming_inference_use_cases.py │ └── test_sync_inference_use_cases.py │ ├── inference │ ├── conftest.py │ ├── test_forwarding.py │ ├── test_http_forwarder.py │ └── test_vllm_batch.py │ ├── infra │ ├── gateways │ │ ├── conftest.py │ │ ├── k8s_fake_objects.py │ │ ├── resources │ │ │ ├── example_lws_config.json │ │ │ ├── test_image_cache_gateway.py │ │ │ ├── test_k8s_endpoint_resource_delegate.py │ │ │ └── test_sqs_queue_endpoint_resource_delegate.py │ │ ├── test_datadog_inference_monitoring_metrics_gateway.py │ │ ├── test_datadog_monitoring_metrics_gateway.py │ │ ├── test_firehose_streaming_storage_gateway.py │ │ ├── test_k8s_resource_parser.py │ │ ├── test_live_async_model_inference_gateway.py │ │ ├── test_live_batch_job_progress_gateway.py │ │ ├── test_live_docker_image_batch_job_gateway.py │ │ ├── test_live_model_endpoint_infra_gateway.py │ │ ├── test_live_model_endpoints_schema_gateway.py │ │ ├── test_live_streaming_model_endpoint_inference_gateway.py │ │ ├── test_live_sync_model_endpoint_inference_gateway.py │ │ └── test_s3_llm_artifact_gateway.py │ ├── repositories │ │ ├── conftest.py │ │ ├── test_db_batch_job_record_repository.py │ │ ├── test_db_docker_image_batch_job_bundle_repository.py │ │ ├── test_db_model_bundle_repository.py │ │ ├── test_db_model_endpoint_record_repository.py │ │ ├── test_live_tokenizer_repository.py │ │ ├── test_redis_feature_flag_repository.py │ │ └── test_redis_model_endpoint_cache_repository.py │ └── services │ │ ├── conftest.py │ │ ├── test_docker_image_batch_job_llm_fine_tuning_service.py │ │ ├── test_image_cache_service.py │ │ ├── test_live_batch_job_orchestration_service.py │ │ ├── test_live_batch_job_service.py │ │ ├── test_live_endpoint_builder_service.py │ │ ├── test_live_model_endpoint_service.py │ │ └── test_model_endpoint_cache_service.py │ └── service_builder │ ├── test_celery.py │ └── test_init.py ├── requirements-dev.txt ├── requirements-docs.txt └── scripts ├── generate-openai-types.sh ├── openai-spec.yaml ├── requirements.txt └── throughput_benchmarks.py /.black.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.black.toml -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.circleci/config.yml -------------------------------------------------------------------------------- /.circleci/resources/.minikube-config-map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.circleci/resources/.minikube-config-map -------------------------------------------------------------------------------- /.circleci/resources/.minikube-registry-creds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.circleci/resources/.minikube-registry-creds -------------------------------------------------------------------------------- /.circleci/resources/postgres-k8s.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.circleci/resources/postgres-k8s.yaml -------------------------------------------------------------------------------- /.circleci/resources/redis-k8s.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.circleci/resources/redis-k8s.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.github/ISSUE_TEMPLATE/custom.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.gitignore -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.isort.cfg -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.ruff.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/.ruff.toml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/LICENSE -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/NOTICE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/README.md -------------------------------------------------------------------------------- /charts/.helmignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/.helmignore -------------------------------------------------------------------------------- /charts/model-engine/.helmignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/.helmignore -------------------------------------------------------------------------------- /charts/model-engine/Chart.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/Chart.yaml -------------------------------------------------------------------------------- /charts/model-engine/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/README.md -------------------------------------------------------------------------------- /charts/model-engine/templates/_helpers.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/_helpers.tpl -------------------------------------------------------------------------------- /charts/model-engine/templates/_istio-attribute-match-conditions.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/_istio-attribute-match-conditions.tpl -------------------------------------------------------------------------------- /charts/model-engine/templates/aws_config_map.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/aws_config_map.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/balloon_cpu_deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/balloon_cpu_deployment.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/balloon_deployments.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/balloon_deployments.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/cacher_deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/cacher_deployment.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/cacher_vpa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/cacher_vpa.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/celery_autoscaler_stateful_set.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/celery_autoscaler_stateful_set.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/cluster_rolebinding.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/cluster_rolebinding.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/database_migration_job.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/database_migration_job.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/endpoint_builder_deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/endpoint_builder_deployment.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/endpoint_builder_vpa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/endpoint_builder_vpa.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/gateway_deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/gateway_deployment.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/gateway_hpa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/gateway_hpa.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/gateway_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/gateway_service.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/gateway_vpa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/gateway_vpa.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/inference_framework_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/inference_framework_config.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/istio-destinationrule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/istio-destinationrule.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/istio-metrics.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/istio-metrics.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/istio-virtualservice.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/istio-virtualservice.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/model_engine_default_priority_class.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/model_engine_default_priority_class.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/model_engine_high_priority_class.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/model_engine_high_priority_class.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/model_engine_low_priority_class.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/model_engine_low_priority_class.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/pod_disruption_budget.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/pod_disruption_budget.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/populate_fine_tuning_repository_job.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/populate_fine_tuning_repository_job.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/proportional_a100_autoscaler_deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/proportional_a100_autoscaler_deployment.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/proportional_a10_autoscaler_deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/proportional_a10_autoscaler_deployment.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/proportional_t4_autoscaler_deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/proportional_t4_autoscaler_deployment.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/recommended_hardware_config_map.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/recommended_hardware_config_map.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/restart_keda_operator.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/restart_keda_operator.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/service_account.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/service_account.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/service_account_image_builder.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/service_account_image_builder.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/service_account_inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/service_account_inference.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/service_config_map.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/service_config_map.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/service_template_config_map.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/service_template_config_map.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/spellbook_init_job.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/spellbook_init_job.yaml -------------------------------------------------------------------------------- /charts/model-engine/templates/trigger_authentication.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/templates/trigger_authentication.yaml -------------------------------------------------------------------------------- /charts/model-engine/values.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/values.yaml -------------------------------------------------------------------------------- /charts/model-engine/values_circleci.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/values_circleci.yaml -------------------------------------------------------------------------------- /charts/model-engine/values_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/charts/model-engine/values_sample.yaml -------------------------------------------------------------------------------- /clients/python/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/Makefile -------------------------------------------------------------------------------- /clients/python/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/README.md -------------------------------------------------------------------------------- /clients/python/llmengine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/__init__.py -------------------------------------------------------------------------------- /clients/python/llmengine/api_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/api_engine.py -------------------------------------------------------------------------------- /clients/python/llmengine/completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/completion.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/__init__.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/batch_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/batch_completion.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/chat_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/chat_completion.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/completion.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/core.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/gen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/gen/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/gen/openai.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/model_endpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/model_endpoints.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/pydantic_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/pydantic_types.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/rest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/rest.py -------------------------------------------------------------------------------- /clients/python/llmengine/data_types/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/data_types/vllm.py -------------------------------------------------------------------------------- /clients/python/llmengine/errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/errors.py -------------------------------------------------------------------------------- /clients/python/llmengine/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/file.py -------------------------------------------------------------------------------- /clients/python/llmengine/fine_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/fine_tuning.py -------------------------------------------------------------------------------- /clients/python/llmengine/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/llmengine/model.py -------------------------------------------------------------------------------- /clients/python/llmengine/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /clients/python/mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/mypy.ini -------------------------------------------------------------------------------- /clients/python/poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/poetry.lock -------------------------------------------------------------------------------- /clients/python/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/pyproject.toml -------------------------------------------------------------------------------- /clients/python/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/clients/python/setup.py -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | llm-engine.scale.com 2 | -------------------------------------------------------------------------------- /docs/_static/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/_static/favicon-32x32.png -------------------------------------------------------------------------------- /docs/_static/launch-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/_static/launch-logo.png -------------------------------------------------------------------------------- /docs/_static/launch-logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/_static/launch-logo.svg -------------------------------------------------------------------------------- /docs/api/data_types.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/api/data_types.md -------------------------------------------------------------------------------- /docs/api/error_handling.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/api/error_handling.md -------------------------------------------------------------------------------- /docs/api/langchain.md: -------------------------------------------------------------------------------- 1 | # 🦜 Langchain 2 | 3 | Coming soon! 4 | -------------------------------------------------------------------------------- /docs/api/python_client.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/api/python_client.md -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/contributing.md -------------------------------------------------------------------------------- /docs/examples/finetuning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/examples/finetuning.ipynb -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions 2 | -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/getting_started.md -------------------------------------------------------------------------------- /docs/guides/completions.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/guides/completions.md -------------------------------------------------------------------------------- /docs/guides/endpoint_creation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/guides/endpoint_creation.md -------------------------------------------------------------------------------- /docs/guides/fine_tuning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/guides/fine_tuning.md -------------------------------------------------------------------------------- /docs/guides/rate_limits.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/guides/rate_limits.md -------------------------------------------------------------------------------- /docs/guides/self_hosting.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/guides/self_hosting.md -------------------------------------------------------------------------------- /docs/guides/token_streaming.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/guides/token_streaming.md -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/index.md -------------------------------------------------------------------------------- /docs/integrations.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/integrations.md -------------------------------------------------------------------------------- /docs/model_zoo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/model_zoo.md -------------------------------------------------------------------------------- /docs/pricing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/docs/pricing.md -------------------------------------------------------------------------------- /docs/stylesheets/index.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/download_a_finetuned_model.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/examples/download_a_finetuned_model.ipynb -------------------------------------------------------------------------------- /examples/finetune_llama_2_on_science_qa.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/examples/finetune_llama_2_on_science_qa.ipynb -------------------------------------------------------------------------------- /integration_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /integration_tests/rest_api_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/integration_tests/rest_api_utils.py -------------------------------------------------------------------------------- /integration_tests/test_batch_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/integration_tests/test_batch_jobs.py -------------------------------------------------------------------------------- /integration_tests/test_bundles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/integration_tests/test_bundles.py -------------------------------------------------------------------------------- /integration_tests/test_completions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/integration_tests/test_completions.py -------------------------------------------------------------------------------- /integration_tests/test_endpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/integration_tests/test_endpoints.py -------------------------------------------------------------------------------- /integration_tests/test_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/integration_tests/test_file.py -------------------------------------------------------------------------------- /integration_tests/test_fine_tunes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/integration_tests/test_fine_tunes.py -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/mkdocs.yml -------------------------------------------------------------------------------- /model-engine/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/.coveragerc -------------------------------------------------------------------------------- /model-engine/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/Dockerfile -------------------------------------------------------------------------------- /model-engine/Dockerfile.fips: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/Dockerfile.fips -------------------------------------------------------------------------------- /model-engine/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/README.md -------------------------------------------------------------------------------- /model-engine/model_engine_server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/app.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/batch_jobs_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/batch_jobs_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/dependencies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/dependencies.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/docker_image_batch_job_bundles_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/docker_image_batch_job_bundles_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/files_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/files_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/llms_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/llms_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/model_bundles_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/model_bundles_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/model_bundles_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/model_bundles_v2.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/model_endpoints_docs_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/model_endpoints_docs_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/model_endpoints_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/model_endpoints_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/tasks_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/tasks_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/triggers_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/triggers_v1.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/v2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/v2/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/v2/batch_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/v2/batch_completion.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/v2/chat_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/v2/chat_completion.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/v2/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/v2/common.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/v2/completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/v2/completion.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/api/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/api/worker.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/aiohttp_sse_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/aiohttp_sse_client.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/concurrency_limiter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/concurrency_limiter.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/config.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/constants.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/datadog_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/datadog_utils.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/batch_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/batch_jobs.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/core.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/docker_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/docker_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/endpoint_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/endpoint_builder.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/files.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/llms/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/llms/batch_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/llms/batch_completion.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/llms/chat_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/llms/chat_completion.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/llms/completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/llms/completion.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/llms/model_endpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/llms/model_endpoints.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/llms/sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/llms/sglang.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/llms/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/llms/vllm.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/model_bundles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/model_bundles.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/model_endpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/model_endpoints.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/resource_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/resource_manager.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/tasks.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/dtos/triggers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/dtos/triggers.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/env_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/env_vars.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/errors.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/io.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/pydantic_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/pydantic_types.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/resource_limits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/resource_limits.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/serialization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/serialization_utils.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/service_requests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/service_requests.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/settings.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/types/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/types/endpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/types/endpoint.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/common/types/gen/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/common/types/gen/openai.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/auth/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/auth/authentication_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/auth/authentication_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/auth/fake_authentication_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/auth/fake_authentication_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/aws/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/aws/roles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/aws/roles.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/aws/secrets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/aws/secrets.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/aws/storage_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/aws/storage_client.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/celery/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/celery/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/celery/abs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/celery/abs.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/celery/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/celery/app.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/celery/celery_autoscaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/celery/celery_autoscaler.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/celery/s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/celery/s3.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/config.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/configmap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/configmap.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/configs/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/configs/default.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/docker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/docker/docker_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/docker/docker_image.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/docker/ecr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/docker/ecr.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/docker/kaniko_template.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/docker/kaniko_template.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/docker/kaniko_template_circleci.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/docker/kaniko_template_circleci.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/docker/remote_build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/docker/remote_build.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/fake_notification_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/fake_notification_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/loggers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/loggers.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/notification_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/notification_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/tracing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/tracing/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/tracing/live_tracing_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/tracing/live_tracing_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/tracing/span.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/tracing/span.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/tracing/tracing_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/tracing/tracing_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/utils/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/utils/env.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/utils/format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/utils/format.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/utils/git.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/utils/git.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/utils/python_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/utils/python_utils.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/utils/timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/utils/timer.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/core/utils/url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/core/utils/url.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/base.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/endpoint_row_lock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/endpoint_row_lock.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/local_setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/local_setup.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/README -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic.ini -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic/env.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic/script.py.mako: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic/script.py.mako -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic/versions/2024_09_09_1736-fa3267c80731_initial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic/versions/2024_09_09_1736-fa3267c80731_initial.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic/versions/2024_09_09_1831-b574e9711e35_chat_completion_add_extra_routes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic/versions/2024_09_09_1831-b574e9711e35_chat_completion_add_extra_routes.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic/versions/2024_09_24_1456-f55525c81eb5_multinode_bundle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic/versions/2024_09_24_1456-f55525c81eb5_multinode_bundle.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic/versions/2025_09_16_1741-e580182d6bfd_add_passthrough_forwarder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic/versions/2025_09_16_1741-e580182d6bfd_add_passthrough_forwarder.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/alembic/versions/2025_09_25_1940-221aa19d3f32_add_routes_column.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/alembic/versions/2025_09_25_1940-221aa19d3f32_add_routes_column.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/initial.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/initial.sql -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/run_database_migration.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/run_database_migration.sh -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/migrations/stamp_initial_schema.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/migrations/stamp_initial_schema.sh -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/common/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/common/query.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/common/record.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/common/record.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/constants.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/exceptions.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/hosted_model_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/hosted_model_inference.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/model.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/db/models/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/db/models/utils/misc.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/authorization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/authorization/live_authorization_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/authorization/live_authorization_module.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/batch_job_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/batch_job_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/common_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/common_types.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/docker_image_batch_job_bundle_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/docker_image_batch_job_bundle_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/file_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/file_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/gpu_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/gpu_type.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/llm_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/llm_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/llm_fine_tune_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/llm_fine_tune_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/model_bundle_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/model_bundle_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/model_endpoint_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/model_endpoint_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/owned_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/owned_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/entities/trigger_entity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/entities/trigger_entity.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/exceptions.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/async_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/async_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/cron_job_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/cron_job_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/docker_image_batch_job_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/docker_image_batch_job_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/file_storage_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/file_storage_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/inference_autoscaling_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/inference_autoscaling_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/llm_artifact_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/llm_artifact_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/model_endpoints_schema_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/model_endpoints_schema_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/model_primitive_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/model_primitive_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/monitoring_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/monitoring_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/streaming_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/streaming_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/sync_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/sync_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/gateways/task_queue_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/gateways/task_queue_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/repositories/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/repositories/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/repositories/docker_image_batch_job_bundle_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/repositories/docker_image_batch_job_bundle_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/repositories/docker_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/repositories/docker_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/repositories/llm_fine_tune_events_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/repositories/llm_fine_tune_events_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/repositories/model_bundle_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/repositories/model_bundle_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/repositories/tokenizer_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/repositories/tokenizer_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/repositories/trigger_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/repositories/trigger_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/services/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/services/batch_job_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/services/batch_job_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/services/endpoint_builder_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/services/endpoint_builder_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/services/llm_batch_completions_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/services/llm_batch_completions_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/services/llm_fine_tuning_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/services/llm_fine_tuning_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/services/llm_model_endpoint_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/services/llm_model_endpoint_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/services/model_endpoint_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/services/model_endpoint_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/async_inference_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/async_inference_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/batch_job_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/batch_job_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/docker_image_batch_job_bundle_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/docker_image_batch_job_bundle_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/file_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/file_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/llm_fine_tuning_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/llm_fine_tuning_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/model_bundle_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/model_bundle_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/model_endpoints_schema_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/model_endpoints_schema_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/streaming_inference_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/streaming_inference_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/sync_inference_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/sync_inference_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/domain/use_cases/trigger_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/domain/use_cases/trigger_use_cases.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/init_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/entrypoints/init_database.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/init_spellbook_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/entrypoints/init_spellbook_models.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/k8s_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/entrypoints/k8s_cache.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/populate_llm_fine_tuning_job_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/entrypoints/populate_llm_fine_tuning_job_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/start_batch_job_orchestration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/entrypoints/start_batch_job_orchestration.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/start_docker_image_batch_job_init_container.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/entrypoints/start_docker_image_batch_job_init_container.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/entrypoints/start_fastapi_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/entrypoints/start_fastapi_server.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/async_inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/async_inference/celery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/async_inference/celery.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/async_inference/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/async_inference/tasks.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/async_inference/vpa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/async_inference/vpa.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/base.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/base.Dockerfile -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/README.md -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/dto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/dto.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/examples/generate_tool_sample_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/examples/generate_tool_sample_data.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/examples/sample_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/examples/sample_config.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/examples/sample_config_gemma.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/examples/sample_config_gemma.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/examples/sample_config_mixtral.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/examples/sample_config_mixtral.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/examples/sample_config_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/examples/sample_config_tool.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/examples/sample_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/examples/sample_data.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/examples/sample_data_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/examples/sample_data_tool.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/requirements.txt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/batch_inference/vllm_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/batch_inference/vllm_batch.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/common.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/configs/service--forwarder-runnable-img-converted-from-artifact.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/configs/service--forwarder-runnable-img-converted-from-artifact.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/configs/service--forwarder.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/configs/service--forwarder.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/configs/service--http_forwarder.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/domain/gateways/inference_monitoring_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/domain/gateways/inference_monitoring_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/domain/gateways/streaming_storage_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/domain/gateways/streaming_storage_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/domain/gateways/usage_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/domain/gateways/usage_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/download_and_inject_bundle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/download_and_inject_bundle.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/forwarding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/forwarding/celery_forwarder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/forwarding/celery_forwarder.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/forwarding/echo_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/forwarding/echo_server.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/forwarding/forwarding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/forwarding/forwarding.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/forwarding/http_forwarder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/forwarding/http_forwarder.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/infra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/infra/gateways/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/infra/gateways/datadog_inference_monitoring_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/infra/gateways/datadog_inference_monitoring_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/infra/gateways/fake_usage_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/infra/gateways/fake_usage_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/infra/gateways/firehose_streaming_storage_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/infra/gateways/firehose_streaming_storage_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/inject_bundle.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/inject_bundle.Dockerfile -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/limits.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/limits.conf -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/post_inference_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/post_inference_hooks.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/pytorch_or_tf.base.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/pytorch_or_tf.base.Dockerfile -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/pytorch_or_tf.user.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/pytorch_or_tf.user.Dockerfile -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/requirements_base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/requirements_base.txt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/service_requests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/service_requests.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sglang/Dockerfile.sglang: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sglang/Dockerfile.sglang -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sglang/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sglang/README.md -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sglang/sglang-startup-script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sglang/sglang-startup-script.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sync_inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sync_inference/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sync_inference/constants.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sync_inference/destination_rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sync_inference/destination_rule.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sync_inference/fastapi_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sync_inference/fastapi_server.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sync_inference/start_fastapi_server.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sync_inference/virtual_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sync_inference/virtual_service.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/sync_inference/vpa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/sync_inference/vpa.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/Dockerfile -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/README.md -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/launch_triton_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/launch_triton_server.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece==0.1.99 2 | protobuf==4.24.4 3 | torch==2.2.2 -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/ensemble/1/.tmp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/ensemble/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/ensemble/config.pbtxt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/postprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/postprocessing/1/model.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/postprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/postprocessing/config.pbtxt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/preprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/preprocessing/1/model.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/preprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/preprocessing/config.pbtxt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/tensorrt_llm/1/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/tensorrt_llm/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/tensorrt_llm/config.pbtxt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/tensorrt_llm_bls/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/tensorrt_llm_bls/1/model.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/tensorrt_llm_bls/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tensorrt-llm/triton_model_repo/tensorrt_llm_bls/config.pbtxt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tool_completion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tool_completion/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tool_completion/base.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tool_completion/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tool_completion/tools.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/tool_completion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/tool_completion/utils.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/user.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/user.Dockerfile -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/utils.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/Dockerfile.vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/README.md -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/gemma/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/gemma/README.md -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/gemma/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/gemma/config.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/gemma/config_w_oai_chat_content.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/gemma/config_w_oai_chat_content.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/gemma/data_oai_chat.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/gemma/data_oai_chat.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/gemma/data_oai_completion.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/gemma/data_oai_completion.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/README.md -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/config.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/data_oai_chat.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/data_oai_chat.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/output_oi_chat.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/examples/v2/llama-3.2-vision/output_oi_chat.json -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/gen_sample_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/gen_sample_data.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/init_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/init_ray.sh -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/init_ray_batch_inf_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/init_ray_batch_inf_v2.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/requirements-batch.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/requirements-batch.txt -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | vllm==0.11.0 2 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/requirements.txt: -------------------------------------------------------------------------------- 1 | pydantic>=2.0 2 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/vllm_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/vllm_batch.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/inference/vllm/vllm_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/inference/vllm/vllm_server.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/abs_file_storage_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/abs_file_storage_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/abs_filesystem_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/abs_filesystem_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/abs_llm_artifact_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/abs_llm_artifact_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/asb_inference_autoscaling_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/asb_inference_autoscaling_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/batch_job_orchestration_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/batch_job_orchestration_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/batch_job_progress_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/batch_job_progress_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/celery_task_queue_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/celery_task_queue_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/datadog_monitoring_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/dns_resolver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/dns_resolver.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/fake_model_primitive_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/fake_model_primitive_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/fake_monitoring_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/fake_monitoring_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/filesystem_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/filesystem_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/k8s_resource_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/k8s_resource_parser.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_async_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_async_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_batch_job_orchestration_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_batch_job_orchestration_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_batch_job_progress_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_batch_job_progress_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_cron_job_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_cron_job_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_docker_image_batch_job_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_docker_image_batch_job_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_model_endpoints_schema_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_model_endpoints_schema_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_streaming_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_streaming_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/live_sync_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/live_sync_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/model_endpoint_infra_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/model_endpoint_infra_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/redis_inference_autoscaling_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/redis_inference_autoscaling_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/asb_queue_endpoint_resource_delegate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/asb_queue_endpoint_resource_delegate.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/endpoint_resource_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/endpoint_resource_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/fake_queue_endpoint_resource_delegate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/fake_queue_endpoint_resource_delegate.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/image_cache_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/image_cache_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/live_endpoint_resource_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/live_endpoint_resource_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/queue_endpoint_resource_delegate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/queue_endpoint_resource_delegate.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/sqs_queue_endpoint_resource_delegate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/sqs_queue_endpoint_resource_delegate.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/resources/templates/service_template_config_map_circleci.yaml -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/s3_file_storage_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/s3_file_storage_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/s3_filesystem_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/s3_filesystem_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/gateways/s3_llm_artifact_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/gateways/s3_llm_artifact_gateway.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/infra_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/infra_utils.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/abs_file_llm_fine_tune_events_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/abs_file_llm_fine_tune_events_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/abs_file_llm_fine_tune_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/abs_file_llm_fine_tune_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/acr_docker_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/acr_docker_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/batch_job_record_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/batch_job_record_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/db_batch_job_record_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/db_batch_job_record_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/db_docker_image_batch_job_bundle_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/db_docker_image_batch_job_bundle_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/db_model_bundle_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/db_model_endpoint_record_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/db_model_endpoint_record_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/db_repository_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/db_repository_mixin.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/db_trigger_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/db_trigger_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/fake_docker_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/fake_docker_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/feature_flag_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/feature_flag_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/live_tokenizer_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/live_tokenizer_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/llm_fine_tune_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/llm_fine_tune_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/model_endpoint_cache_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/model_endpoint_cache_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/model_endpoint_record_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/model_endpoint_record_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/redis_feature_flag_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/redis_feature_flag_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/redis_model_endpoint_cache_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/redis_model_endpoint_cache_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/s3_file_llm_fine_tune_events_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/s3_file_llm_fine_tune_events_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/repositories/s3_file_llm_fine_tune_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/repositories/s3_file_llm_fine_tune_repository.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/batch_job_orchestration_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/batch_job_orchestration_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/docker_image_batch_job_llm_fine_tuning_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/docker_image_batch_job_llm_fine_tuning_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/fake_llm_batch_completions_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/fake_llm_batch_completions_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/image_cache_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/image_cache_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/live_batch_job_orchestration_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/live_batch_job_orchestration_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/live_batch_job_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/live_batch_job_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/live_llm_batch_completions_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/live_llm_batch_completions_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/live_llm_model_endpoint_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/live_llm_model_endpoint_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/live_model_endpoint_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/infra/services/model_endpoint_cache_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/infra/services/model_endpoint_cache_service.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/service_builder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/service_builder/__init__.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/service_builder/celery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/service_builder/celery.py -------------------------------------------------------------------------------- /model-engine/model_engine_server/service_builder/tasks_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/model_engine_server/service_builder/tasks_v1.py -------------------------------------------------------------------------------- /model-engine/mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/mypy.ini -------------------------------------------------------------------------------- /model-engine/requirements-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/requirements-test.txt -------------------------------------------------------------------------------- /model-engine/requirements.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/requirements.in -------------------------------------------------------------------------------- /model-engine/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/requirements.txt -------------------------------------------------------------------------------- /model-engine/requirements_override.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/requirements_override.txt -------------------------------------------------------------------------------- /model-engine/service_configs/service_config_circleci.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/service_configs/service_config_circleci.yaml -------------------------------------------------------------------------------- /model-engine/setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/setup.cfg -------------------------------------------------------------------------------- /model-engine/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/setup.py -------------------------------------------------------------------------------- /model-engine/tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/README.md -------------------------------------------------------------------------------- /model-engine/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/tests/integration/inference/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/integration/inference/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/integration/inference/test_async_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/integration/inference/test_async_inference.py -------------------------------------------------------------------------------- /model-engine/tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/tests/unit/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/tests/unit/api/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_app.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_batch_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_batch_jobs.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_dependencies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_dependencies.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_docker_image_batch_job_bundles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_docker_image_batch_job_bundles.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_llms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_llms.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_model_bundles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_model_bundles.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_model_endpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_model_endpoints.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_model_endpoints_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_model_endpoints_docs.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_tasks.py -------------------------------------------------------------------------------- /model-engine/tests/unit/api/test_triggers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/api/test_triggers.py -------------------------------------------------------------------------------- /model-engine/tests/unit/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/tests/unit/common/test_batch_jobs_dtos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/common/test_batch_jobs_dtos.py -------------------------------------------------------------------------------- /model-engine/tests/unit/common/test_settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/common/test_settings.py -------------------------------------------------------------------------------- /model-engine/tests/unit/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/unit/core/utils/test_timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/core/utils/test_timer.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_async_inference_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_async_inference_use_cases.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_docker_image_batch_job_bundle_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_docker_image_batch_job_bundle_use_cases.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_entities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_entities.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_llm_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_llm_use_cases.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_model_bundle_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_model_bundle_use_cases.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_model_endpoint_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_model_endpoint_use_cases.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_streaming_inference_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_streaming_inference_use_cases.py -------------------------------------------------------------------------------- /model-engine/tests/unit/domain/test_sync_inference_use_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/domain/test_sync_inference_use_cases.py -------------------------------------------------------------------------------- /model-engine/tests/unit/inference/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/inference/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/unit/inference/test_forwarding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/inference/test_forwarding.py -------------------------------------------------------------------------------- /model-engine/tests/unit/inference/test_http_forwarder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/inference/test_http_forwarder.py -------------------------------------------------------------------------------- /model-engine/tests/unit/inference/test_vllm_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/inference/test_vllm_batch.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/k8s_fake_objects.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/k8s_fake_objects.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/resources/example_lws_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/resources/example_lws_config.json -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/resources/test_image_cache_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/resources/test_image_cache_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/resources/test_k8s_endpoint_resource_delegate.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/resources/test_sqs_queue_endpoint_resource_delegate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/resources/test_sqs_queue_endpoint_resource_delegate.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_datadog_inference_monitoring_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_datadog_inference_monitoring_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_datadog_monitoring_metrics_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_datadog_monitoring_metrics_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_firehose_streaming_storage_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_firehose_streaming_storage_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_k8s_resource_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_k8s_resource_parser.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_live_async_model_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_live_async_model_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_live_batch_job_progress_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_live_batch_job_progress_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_live_docker_image_batch_job_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_live_docker_image_batch_job_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_live_model_endpoint_infra_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_live_model_endpoint_infra_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_live_model_endpoints_schema_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_live_model_endpoints_schema_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_live_streaming_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_live_streaming_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_live_sync_model_endpoint_inference_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_live_sync_model_endpoint_inference_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/gateways/test_s3_llm_artifact_gateway.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/gateways/test_s3_llm_artifact_gateway.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/test_db_batch_job_record_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/test_db_batch_job_record_repository.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/test_db_docker_image_batch_job_bundle_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/test_db_docker_image_batch_job_bundle_repository.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/test_db_model_bundle_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/test_db_model_bundle_repository.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/test_db_model_endpoint_record_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/test_db_model_endpoint_record_repository.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/test_live_tokenizer_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/test_live_tokenizer_repository.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/test_redis_feature_flag_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/test_redis_feature_flag_repository.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/repositories/test_redis_model_endpoint_cache_repository.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/repositories/test_redis_model_endpoint_cache_repository.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/conftest.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/test_docker_image_batch_job_llm_fine_tuning_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/test_docker_image_batch_job_llm_fine_tuning_service.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/test_image_cache_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/test_image_cache_service.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/test_live_batch_job_orchestration_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/test_live_batch_job_orchestration_service.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/test_live_batch_job_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/test_live_batch_job_service.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/test_live_model_endpoint_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/test_live_model_endpoint_service.py -------------------------------------------------------------------------------- /model-engine/tests/unit/infra/services/test_model_endpoint_cache_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/infra/services/test_model_endpoint_cache_service.py -------------------------------------------------------------------------------- /model-engine/tests/unit/service_builder/test_celery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/service_builder/test_celery.py -------------------------------------------------------------------------------- /model-engine/tests/unit/service_builder/test_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/model-engine/tests/unit/service_builder/test_init.py -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/requirements-docs.txt -------------------------------------------------------------------------------- /scripts/generate-openai-types.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/scripts/generate-openai-types.sh -------------------------------------------------------------------------------- /scripts/openai-spec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/scripts/openai-spec.yaml -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/scripts/requirements.txt -------------------------------------------------------------------------------- /scripts/throughput_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scaleapi/llm-engine/HEAD/scripts/throughput_benchmarks.py --------------------------------------------------------------------------------