├── .gitignore ├── LICENSE ├── README.md ├── clearml_serving ├── __init__.py ├── __main__.py ├── engines │ ├── __init__.py │ └── triton │ │ ├── Dockerfile │ │ ├── Dockerfile.tr2207 │ │ ├── __init__.py │ │ ├── entrypoint.sh │ │ ├── requirements.txt │ │ └── triton_helper.py ├── preprocess │ └── preprocess_template.py ├── serving │ ├── Dockerfile │ ├── __init__.py │ ├── endpoints.py │ ├── entrypoint.sh │ ├── init.py │ ├── main.py │ ├── model_request_processor.py │ ├── preprocess_service.py │ ├── requirements.txt │ ├── utils.py │ └── uvicorn_mp_entrypoint.py ├── statistics │ ├── Dockerfile │ ├── __init__.py │ ├── entrypoint.sh │ ├── main.py │ ├── metrics.py │ └── requirements.txt └── version.py ├── docker ├── datasource.yml ├── docker-compose-triton-gpu.yml ├── docker-compose-triton.yml ├── docker-compose.yml ├── example.env └── prometheus.yml ├── docs ├── design_diagram.png ├── grafana_screenshot.png └── webapp_screenshots.gif ├── examples ├── custom │ ├── preprocess.py │ ├── readme.md │ ├── requirements.txt │ └── train_model.py ├── ensemble │ ├── preprocess.py │ ├── readme.md │ ├── requirements.txt │ └── train_ensemble.py ├── huggingface │ ├── docker-compose-override.yml │ ├── example_payload.json │ ├── preprocess.py │ ├── readme.md │ └── requirements.txt ├── keras │ ├── preprocess.py │ ├── readme.md │ ├── requirements.txt │ └── train_keras_mnist.py ├── lightgbm │ ├── preprocess.py │ ├── readme.md │ ├── requirements.txt │ └── train_model.py ├── pipeline │ ├── async_preprocess.py │ ├── preprocess.py │ └── readme.md ├── preprocess_template │ └── preprocess_template.py ├── pytorch │ ├── 5.jpg │ ├── preprocess.py │ ├── readme.md │ ├── requirements.txt │ └── train_pytorch_mnist.py ├── sklearn │ ├── preprocess.py │ ├── readme.md │ ├── requirements.txt │ └── train_model.py └── xgboost │ ├── preprocess.py │ ├── readme.md │ ├── requirements.txt │ └── train_model.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # python build 2 | dist/ 3 | build/ 4 | *.egg-info/ 5 | .tmp/ 6 | 7 | 8 | # Compiled Python bytecode 9 | *.py[cod] 10 | 11 | # Log files 12 | *.log 13 | 14 | # JetBrains IDE 15 | .idea/ 16 | .vscode/ 17 | 18 | tests/huggingface 19 | 20 | # Generated by MacOS 21 | .DS_Store 22 | 23 | # Generated by Windows 24 | Thumbs.db 25 | 26 | # Virtual environment 27 | .venv 28 | 29 | # Applications 30 | *.app 31 | *.exe 32 | *.war 33 | *.pkl 34 | *.pt 35 | *.pb 36 | data/ 37 | runs/ 38 | variables/ 39 | 40 | # Large media files 41 | *.mp4 42 | *.tiff 43 | *.avi 44 | *.flv 45 | *.mov 46 | *.wmv 47 | 48 | # models 49 | *.pbtxt 50 | *.h5 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2025 ClearML Inc 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
").replace("\"", "").replace("\'", "").\ 397 | replace("", "\"").replace(" ", "\'") 398 | else: 399 | config_pbtxt = "" 400 | 401 | # merge the two 402 | final_config_pbtxt += config_pbtxt 403 | print("INFO: target config.pbtxt file for endpoint '{}':\n{}\n".format( 404 | endpoint.serving_url, final_config_pbtxt)) 405 | 406 | with open(target_pbtxt_file, "w") as config_file: 407 | config_file.write(final_config_pbtxt) 408 | 409 | return True 410 | 411 | @staticmethod 412 | def np_to_triton_dtype(np_dtype): 413 | # type (np.dtype) -> str 414 | """ 415 | copied from tritonclientutils import np_to_triton_dtype 416 | """ 417 | if np_dtype == bool: 418 | return "BOOL" 419 | elif np_dtype == np.int8: 420 | return "INT8" 421 | elif np_dtype == np.int16: 422 | return "INT16" 423 | elif np_dtype == np.int32: 424 | return "INT32" 425 | elif np_dtype == np.int64: 426 | return "INT64" 427 | elif np_dtype == np.uint8: 428 | return "UINT8" 429 | elif np_dtype == np.uint16: 430 | return "UINT16" 431 | elif np_dtype == np.uint32: 432 | return "UINT32" 433 | elif np_dtype == np.uint64: 434 | return "UINT64" 435 | elif np_dtype == np.float16: 436 | return "FP16" 437 | elif np_dtype == np.float32: 438 | return "FP32" 439 | elif np_dtype == np.float64: 440 | return "FP64" 441 | elif np_dtype == str: 442 | return "STRING" 443 | elif np_dtype == np.object_ or np_dtype.type == np.bytes_: 444 | return "BYTES" 445 | return None 446 | 447 | @staticmethod 448 | def triton_to_np_dtype(dtype): 449 | if dtype == "BOOL": 450 | return bool 451 | elif dtype == "INT8": 452 | return np.int8 453 | elif dtype == "INT16": 454 | return np.int16 455 | elif dtype == "INT32": 456 | return np.int32 457 | elif dtype == "INT64": 458 | return np.int64 459 | elif dtype == "UINT8": 460 | return np.uint8 461 | elif dtype == "UINT16": 462 | return np.uint16 463 | elif dtype == "UINT32": 464 | return np.uint32 465 | elif dtype == "UINT64": 466 | return np.uint64 467 | elif dtype == "FP16": 468 | return np.float16 469 | elif dtype == "FP32": 470 | return np.float32 471 | elif dtype == "FP64": 472 | return np.float64 473 | elif dtype == "BYTES": 474 | return np.object_ 475 | return None 476 | 477 | 478 | def main(): 479 | title = 'clearml-serving - Nvidia Triton Engine Controller' 480 | print(title) 481 | parser = ArgumentParser(prog='clearml-serving', description=title) 482 | parser.add_argument( 483 | '--serving-id', default=os.environ.get('CLEARML_SERVING_TASK_ID'), type=str, 484 | help='Specify main serving service Task ID') 485 | parser.add_argument( 486 | '--project', default=None, type=str, 487 | help='Optional specify project for the serving engine Task') 488 | parser.add_argument( 489 | '--name', default='triton engine', type=str, 490 | help='Optional specify task name for the serving engine Task') 491 | parser.add_argument( 492 | '--update-frequency', default=os.environ.get('CLEARML_TRITON_POLL_FREQ') or 10., type=float, 493 | help='Model update frequency in minutes') 494 | parser.add_argument( 495 | '--metric-frequency', default=os.environ.get('CLEARML_TRITON_METRIC_FREQ') or 1., type=float, 496 | help='Metric reporting update frequency in minutes') 497 | parser.add_argument( 498 | '--inference-task-id', default=None, type=str, 499 | help='Optional: Specify the inference Task ID to report to. default: create a new one') 500 | parser.add_argument( 501 | '--t-http-port', type=str, help='The port for the server to listen on for HTTP requests') 502 | parser.add_argument( 503 | '--t-http-thread-count', type=str, help=' Number of threads handling HTTP requests') 504 | parser.add_argument( 505 | '--t-allow-grpc', type=str, help=' Allow the server to listen for GRPC requests') 506 | parser.add_argument( 507 | '--t-grpc-port', type=str, help=' The port for the server to listen on for GRPC requests') 508 | parser.add_argument( 509 | '--t-grpc-infer-allocation-pool-size', type=str, 510 | help=' The maximum number of inference request/response objects that remain ' 511 | 'allocated for reuse. As long as the number of in-flight requests doesn\'t exceed ' 512 | 'this value there will be no allocation/deallocation of request/response objects') 513 | parser.add_argument( 514 | '--t-pinned-memory-pool-byte-size', type=str, 515 | help=' The total byte size that can be allocated as pinned system ' 516 | 'memory. If GPU support is enabled, the server will allocate pinned ' 517 | 'system memory to accelerate data transfer between host and devices ' 518 | 'until it exceeds the specified byte size. This option will not affect ' 519 | 'the allocation conducted by the backend frameworks. Default is 256 MB') 520 | parser.add_argument( 521 | '--t-cuda-memory-pool-byte-size', type=str, 522 | help='< : > The total byte size that can be allocated as CUDA memory for ' 523 | 'the GPU device. If GPU support is enabled, the server will allocate ' 524 | 'CUDA memory to minimize data transfer between host and devices ' 525 | 'until it exceeds the specified byte size. This option will not affect ' 526 | 'the allocation conducted by the backend frameworks. The argument ' 527 | 'should be 2 integers separated by colons in the format : . This option can be used multiple times, but only ' 529 | 'once per GPU device. Subsequent uses will overwrite previous uses for ' 530 | 'the same GPU device. Default is 64 MB') 531 | parser.add_argument( 532 | '--t-min-supported-compute-capability', type=str, 533 | help=' The minimum supported CUDA compute capability. GPUs that ' 534 | 'don\'t support this compute capability will not be used by the server') 535 | parser.add_argument( 536 | '--t-buffer-manager-thread-count', type=str, 537 | help=' The number of threads used to accelerate copies and other' 538 | 'operations required to manage input and output tensor contents.' 539 | 'Default is 0') 540 | parser.add_argument( 541 | '--t-log-verbose', type=str, 542 | help=' Triton server logging verbosity (default disabled)') 543 | parser.add_argument( 544 | '--t-exit-on-error', type=bool, default=True, 545 | help='Exits the inference server if any error occurs during initialization.' 546 | 'Recommended to set to True to catch any unanticipated errors.' 547 | 'False prevents single models breaking the whole tritonserver.' 548 | ) 549 | 550 | args = parser.parse_args() 551 | 552 | # check Args OS overrides 553 | prefix = "CLEARML_TRITON_" 554 | for k, v in os.environ.items(): 555 | if not k.startswith(prefix): 556 | continue 557 | args_var = k.replace(prefix, "", 1).replace("-", "_").lower() 558 | if args_var in args.__dict__: 559 | # casting 560 | t = type(getattr(args, args_var, None)) 561 | setattr(args, args_var, type(t)(v) if t is not None else v) 562 | 563 | # noinspection PyProtectedMember 564 | serving_task = ModelRequestProcessor._get_control_plane_task(task_id=args.inference_task_id) 565 | 566 | task = Task.init( 567 | project_name=args.project or serving_task.get_project_name() or "serving", 568 | task_name="{} - {}".format(serving_task.name, args.name), 569 | task_type=Task.TaskTypes.inference, 570 | continue_last_task=args.inference_task_id or None 571 | ) 572 | print("configuration args: {}".format(args)) 573 | helper = TritonHelper(args, task, serving_id=args.serving_id) 574 | 575 | # safe casting 576 | try: 577 | update_frequency_sec = float(args.update_frequency) * 60.0 578 | except (ValueError, TypeError): 579 | update_frequency_sec = 600 580 | try: 581 | metric_frequency_sec = float(args.metric_frequency) * 60.0 582 | except (ValueError, TypeError): 583 | metric_frequency_sec = 60 584 | 585 | # this function will never return 586 | helper.maintenance_daemon( 587 | local_model_repo='/models', 588 | update_frequency_sec=update_frequency_sec, 589 | metric_frequency_sec=metric_frequency_sec, 590 | ) 591 | 592 | 593 | if __name__ == '__main__': 594 | main() 595 | -------------------------------------------------------------------------------- /clearml_serving/preprocess/preprocess_template.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Callable, Union 2 | 3 | 4 | # Preprocess class Must be named "Preprocess" 5 | # No need to inherit or to implement all methods 6 | class Preprocess(object): 7 | """ 8 | Preprocess class Must be named "Preprocess" 9 | Otherwise there are No limitations, No need to inherit or to implement all methods 10 | Notice! This is not thread safe! the same instance may be accessed from multiple threads simultaneously 11 | to store date in a safe way push it into the `state` dict argument of preprocessing/postprocessing functions 12 | 13 | Notice the execution flows is synchronous as follows: 14 | 15 | 1. RestAPI(...) -> body: Union[bytes, dict] 16 | 2. preprocess(body: Union[bytes, dict], ...) -> data: Any 17 | 3. process(data: Any, ...) -> data: Any 18 | 4. postprocess(data: Any, ...) -> result: dict 19 | 5. RestAPI(result: dict) -> returned request 20 | """ 21 | 22 | def __init__(self): 23 | # set internal state, this will be called only once. (i.e. not per request) 24 | # it will also set the internal model_endpoint to reference the specific model endpoint object being served 25 | self.model_endpoint = None # type: clearml_serving.serving.endpoints.ModelEndpoint 26 | 27 | def load(self, local_file_name: str) -> Any: # noqa 28 | """ 29 | OPTIONAL: provide loading method for the model 30 | useful if we need to load a model in a specific way for the prediction engine to work 31 | 32 | REMOVE FUNCTION IF NOT USED 33 | 34 | Notice! When used with specific engines (i.e. not Custom) 35 | The returned object will be passed as is to the inference engine, 36 | this means it must not be None, otherwise the endpoint will be ignored! 37 | 38 | :param local_file_name: file name / path to read load the model from 39 | 40 | :return: Object that will be called with .predict() method for inference. 41 | """ 42 | pass 43 | 44 | def preprocess( 45 | self, 46 | body: Union[bytes, dict], 47 | state: dict, 48 | collect_custom_statistics_fn: Optional[Callable[[dict], None]], 49 | ) -> Any: # noqa 50 | """ 51 | Optional: do something with the request data, return any type of object. 52 | The returned object will be passed as is to the inference engine 53 | 54 | :param body: dictionary or bytes as recieved from the RestAPI 55 | :param state: Use state dict to store data passed to the post-processing function call. 56 | This is a per-request state dict (meaning a new empty dict will be passed per request) 57 | Usage example: 58 | >>> def preprocess(..., state): 59 | state['preprocess_aux_data'] = [1,2,3] 60 | >>> def postprocess(..., state): 61 | print(state['preprocess_aux_data']) 62 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 63 | to the statictics collector servicd. 64 | None is passed if statiscs collector is not configured, or if the current request should not be collected 65 | 66 | Usage example: 67 | >>> print(body) 68 | {"x0": 1, "x1": 2} 69 | >>> if collect_custom_statistics_fn: 70 | >>> collect_custom_statistics_fn({"x0": 1, "x1": 2}) 71 | 72 | :return: Object to be passed directly to the model inference 73 | """ 74 | return body 75 | 76 | def postprocess( 77 | self, 78 | data: Any, 79 | state: dict, 80 | collect_custom_statistics_fn: Optional[Callable[[dict], None]], 81 | ) -> dict: # noqa 82 | """ 83 | Optional: post process the data returned from the model inference engine 84 | returned dict will be passed back as the request result as is. 85 | 86 | :param data: object as recieved from the inference model function 87 | :param state: Use state dict to store data passed to the post-processing function call. 88 | This is a per-request state dict (meaning a dict instance per request) 89 | Usage example: 90 | >>> def preprocess(..., state): 91 | state['preprocess_aux_data'] = [1,2,3] 92 | >>> def postprocess(..., state): 93 | print(state['preprocess_aux_data']) 94 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 95 | to the statictics collector servicd. 96 | None is passed if statiscs collector is not configured, or if the current request should not be collected 97 | 98 | Usage example: 99 | >>> if collect_custom_statistics_fn: 100 | >>> collect_custom_statistics_fn({"y": 1}) 101 | 102 | :return: Dictionary passed directly as the returned result of the RestAPI 103 | """ 104 | return data 105 | 106 | def process( 107 | self, 108 | data: Any, 109 | state: dict, 110 | collect_custom_statistics_fn: Optional[Callable[[dict], None]], 111 | ) -> Any: # noqa 112 | """ 113 | OPTIONAL: do something with the actual data, return any type of object. 114 | The returned object will be passed as is to the postprocess function engine 115 | 116 | REMOVE FUNCTION IF NOT USED 117 | 118 | :param data: object as recieved from the preprocessing function 119 | :param state: Use state dict to store data passed to the post-processing function call. 120 | This is a per-request state dict (meaning a dict instance per request) 121 | Usage example: 122 | >>> def preprocess(..., state): 123 | state['preprocess_aux_data'] = [1,2,3] 124 | >>> def postprocess(..., state): 125 | print(state['preprocess_aux_data']) 126 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 127 | to the statictics collector servicd. 128 | None is passed if statiscs collector is not configured, or if the current request should not be collected 129 | 130 | Usage example: 131 | >>> if collect_custom_statistics_fn: 132 | >>> collect_custom_statistics_fn({"type": "classification"}) 133 | 134 | :return: Object to be passed tp the post-processing function 135 | """ 136 | return data 137 | 138 | def send_request( # noqa 139 | self, 140 | endpoint: str, 141 | version: Optional[str] = None, 142 | data: Optional[dict] = None 143 | ) -> Optional[dict]: 144 | """ 145 | NOTICE! This method will be replaced in runtime, by the inference service 146 | 147 | Helper method to send model inference requests to the inference service itself. 148 | This is designed to help with model ensemble, model pipelines, etc. 149 | On request error return None, otherwise the request result data dictionary 150 | 151 | Usage example: 152 | 153 | >>> x0, x1 = 1, 2 154 | >>> result = self.send_request(endpoint="test_model_sklearn", version="1", data={"x0": x0, "x1": x1}) 155 | >>> y = result["y"] 156 | """ 157 | pass 158 | -------------------------------------------------------------------------------- /clearml_serving/serving/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-bullseye 2 | 3 | 4 | ENV LC_ALL=C.UTF-8 5 | 6 | # install base package 7 | RUN pip3 install --no-cache-dir clearml-serving 8 | 9 | # get latest execution code from the git repository 10 | # RUN cd $HOME && git clone https://github.com/allegroai/clearml-serving.git 11 | COPY clearml_serving /root/clearml/clearml_serving 12 | 13 | RUN pip3 install --no-cache-dir -r /root/clearml/clearml_serving/serving/requirements.txt 14 | 15 | # default serving port 16 | EXPOSE 8080 17 | 18 | # environement variable to load Task from CLEARML_SERVING_TASK_ID, CLEARML_SERVING_PORT 19 | 20 | WORKDIR /root/clearml/ 21 | ENTRYPOINT ["clearml_serving/serving/entrypoint.sh"] 22 | -------------------------------------------------------------------------------- /clearml_serving/serving/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-serving/1def0a6901617767687f2b747aaffdc060f96046/clearml_serving/serving/__init__.py -------------------------------------------------------------------------------- /clearml_serving/serving/endpoints.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from attr import attrib, attrs, asdict, validators 3 | 4 | 5 | def _engine_validator(inst, attr, value): # noqa 6 | from .preprocess_service import BasePreprocessRequest 7 | if not BasePreprocessRequest.validate_engine_type(value): 8 | raise TypeError("{} not supported engine type".format(value)) 9 | 10 | 11 | def _matrix_type_validator(inst, attr, value): # noqa 12 | if isinstance(value, (tuple, list)): 13 | for v in value: 14 | if v and not np.dtype(v): 15 | raise TypeError("{} not supported matrix type".format(v)) 16 | 17 | elif value and not np.dtype(value): 18 | raise TypeError("{} not supported matrix type".format(value)) 19 | 20 | 21 | def _list_type_convertor(inst): # noqa 22 | if inst is None: 23 | return None 24 | return inst if isinstance(inst, (tuple, list)) else [inst] 25 | 26 | 27 | def _nested_list_type_convertor(inst): # noqa 28 | if inst is None: 29 | return None 30 | if isinstance(inst, (tuple, list)) and all(not isinstance(i, (tuple, list)) for i in inst): 31 | return [inst] 32 | inst = inst if isinstance(inst, (tuple, list)) else [inst] 33 | return inst 34 | 35 | 36 | @attrs 37 | class BaseStruct(object): 38 | def as_dict(self, remove_null_entries=False): 39 | if not remove_null_entries: 40 | return asdict(self) 41 | return {k: v for k, v in asdict(self).items() if v is not None} 42 | 43 | 44 | @attrs 45 | class ModelMonitoring(BaseStruct): 46 | base_serving_url = attrib(type=str) # serving point url prefix (example: "detect_cat") 47 | engine_type = attrib(type=str, validator=_engine_validator) # engine type 48 | monitor_project = attrib(type=str, default=None) # monitor model project (for model auto update) 49 | monitor_name = attrib(type=str, default=None) # monitor model name (for model auto update, regexp selection) 50 | monitor_tags = attrib(type=list, default=[]) # monitor model tag (for model auto update) 51 | only_published = attrib(type=bool, default=False) # only select published models 52 | max_versions = attrib(type=int, default=None) # Maximum number of models to keep serving (latest X models) 53 | input_size = attrib(type=list, default=None, converter=_nested_list_type_convertor) # optional, model matrix size 54 | input_type = attrib(type=list, default=None, validator=_matrix_type_validator, converter=_list_type_convertor) 55 | input_name = attrib(type=list, default=None, converter=_list_type_convertor) # optional, input layer names 56 | output_size = attrib(type=list, default=None, converter=_nested_list_type_convertor) # optional, model matrix size 57 | output_type = attrib(type=list, default=None, validator=_matrix_type_validator, converter=_list_type_convertor) 58 | output_name = attrib(type=list, default=None, converter=_list_type_convertor) # optional, output layer names 59 | preprocess_artifact = attrib( 60 | type=str, default=None) # optional artifact name storing the model preprocessing code 61 | auxiliary_cfg = attrib(type=dict, default=None) # Auxiliary configuration (e.g. triton conf), Union[str, dict] 62 | 63 | 64 | @attrs 65 | class ModelEndpoint(BaseStruct): 66 | engine_type = attrib(type=str, validator=_engine_validator) # engine type 67 | serving_url = attrib(type=str) # full serving point url (including version) example: "detect_cat/v1" 68 | model_id = attrib(type=str, default=None) # model ID to serve (and download) 69 | version = attrib(type=str, default="") # key (version string), default no version 70 | preprocess_artifact = attrib( 71 | type=str, default=None) # optional artifact name storing the model preprocessing code 72 | input_size = attrib(type=list, default=None, converter=_nested_list_type_convertor) # optional, model matrix size 73 | input_type = attrib(type=list, default=None, validator=_matrix_type_validator, converter=_list_type_convertor) 74 | input_name = attrib(type=list, default=None, converter=_list_type_convertor) # optional, input layer names 75 | output_size = attrib(type=list, default=None, converter=_nested_list_type_convertor) # optional, model matrix size 76 | output_type = attrib(type=list, default=None, validator=_matrix_type_validator, converter=_list_type_convertor) 77 | output_name = attrib(type=list, default=None, converter=_list_type_convertor) # optional, output layer names 78 | auxiliary_cfg = attrib(type=dict, default=None) # Optional: Auxiliary configuration (e.g. triton conf), [str, dict] 79 | 80 | 81 | @attrs 82 | class CanaryEP(BaseStruct): 83 | endpoint = attrib(type=str) # load balancer endpoint 84 | weights = attrib(type=list) # list of weights (order should be matching fixed_endpoints or prefix) 85 | load_endpoints = attrib(type=list, default=[]) # list of endpoints to balance and route 86 | load_endpoint_prefix = attrib( 87 | type=str, default=None) # endpoint prefix to list 88 | # (any endpoint starting with this prefix will be listed, sorted lexicographically, or broken into / ) 89 | 90 | 91 | @attrs 92 | class EndpointMetricLogging(BaseStruct): 93 | @attrs 94 | class MetricType(BaseStruct): 95 | type = attrib(type=str, validator=validators.in_(("scalar", "enum", "value", "counter"))) 96 | buckets = attrib(type=list, default=None) 97 | 98 | endpoint = attrib(type=str) # Specific endpoint to log metrics w/ version (example: "model/1") 99 | # If endpoint name ends with a "*" any endpoint with a matching prefix will be selected 100 | 101 | log_frequency = attrib(type=float, default=None) # Specific endpoint to log frequency 102 | # (0.0 to 1.0, where 1.0 is 100% of all requests are logged) 103 | 104 | metrics = attrib( 105 | type=dict, default={}, 106 | converter=lambda x: { 107 | k: v if isinstance(v, EndpointMetricLogging.MetricType) 108 | else EndpointMetricLogging.MetricType(**v) for k, v in x.items() 109 | } 110 | ) # key=variable, value=MetricType 111 | 112 | # example: 113 | # {"x1": dict(type="scalar", buckets=[0,1,2,3]), 114 | # "y": dict(type="enum", buckets=["cat", "dog"]). 115 | # "latency": dict(type="value", buckets=[]). 116 | # } 117 | 118 | def as_dict(self, remove_null_entries=False): 119 | if not remove_null_entries: 120 | return {k: v.as_dict(remove_null_entries) if isinstance(v, BaseStruct) else v 121 | for k, v in asdict(self).items()} 122 | 123 | return {k: v.as_dict(remove_null_entries) if isinstance(v, BaseStruct) else v 124 | for k, v in asdict(self).items() if v is not None} 125 | -------------------------------------------------------------------------------- /clearml_serving/serving/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # print configuration 4 | echo CLEARML_SERVING_TASK_ID="$CLEARML_SERVING_TASK_ID" 5 | echo CLEARML_INFERENCE_TASK_ID="$CLEARML_INFERENCE_TASK_ID" 6 | echo CLEARML_SERVING_PORT="$CLEARML_SERVING_PORT" 7 | echo CLEARML_USE_GUNICORN="$CLEARML_USE_GUNICORN" 8 | echo CLEARML_EXTRA_PYTHON_PACKAGES="$CLEARML_EXTRA_PYTHON_PACKAGES" 9 | echo CLEARML_SERVING_NUM_PROCESS="$CLEARML_SERVING_NUM_PROCESS" 10 | echo CLEARML_SERVING_POLL_FREQ="$CLEARML_SERVING_POLL_FREQ" 11 | echo CLEARML_DEFAULT_KAFKA_SERVE_URL="$CLEARML_DEFAULT_KAFKA_SERVE_URL" 12 | 13 | SERVING_PORT="${CLEARML_SERVING_PORT:-8080}" 14 | GUNICORN_NUM_PROCESS="${CLEARML_SERVING_NUM_PROCESS:-4}" 15 | GUNICORN_SERVING_TIMEOUT="${GUNICORN_SERVING_TIMEOUT:-600}" 16 | GUNICORN_MAX_REQUESTS="${GUNICORN_MAX_REQUESTS:-0}" 17 | UVICORN_SERVE_LOOP="${UVICORN_SERVE_LOOP:-uvloop}" 18 | UVICORN_LOG_LEVEL="${UVICORN_LOG_LEVEL:-warning}" 19 | 20 | # set default internal serve endpoint (for request pipelining) 21 | CLEARML_DEFAULT_BASE_SERVE_URL="${CLEARML_DEFAULT_BASE_SERVE_URL:-http://127.0.0.1:$SERVING_PORT/serve}" 22 | CLEARML_DEFAULT_TRITON_GRPC_ADDR="${CLEARML_DEFAULT_TRITON_GRPC_ADDR:-127.0.0.1:8001}" 23 | 24 | # print configuration 25 | echo WEB_CONCURRENCY="$WEB_CONCURRENCY" 26 | echo SERVING_PORT="$SERVING_PORT" 27 | echo GUNICORN_NUM_PROCESS="$GUNICORN_NUM_PROCESS" 28 | echo GUNICORN_SERVING_TIMEOUT="$GUNICORN_SERVING_PORT" 29 | echo GUNICORN_MAX_REQUESTS="$GUNICORN_MAX_REQUESTS" 30 | echo GUNICORN_EXTRA_ARGS="$GUNICORN_EXTRA_ARGS" 31 | echo UVICORN_SERVE_LOOP="$UVICORN_SERVE_LOOP" 32 | echo UVICORN_EXTRA_ARGS="$UVICORN_EXTRA_ARGS" 33 | echo UVICORN_LOG_LEVEL="$UVICORN_LOG_LEVEL" 34 | echo CLEARML_DEFAULT_BASE_SERVE_URL="$CLEARML_DEFAULT_BASE_SERVE_URL" 35 | echo CLEARML_DEFAULT_TRITON_GRPC_ADDR="$CLEARML_DEFAULT_TRITON_GRPC_ADDR" 36 | 37 | # runtime add extra python packages 38 | if [ ! -z "$CLEARML_EXTRA_PYTHON_PACKAGES" ] 39 | then 40 | python3 -m pip install $CLEARML_EXTRA_PYTHON_PACKAGES 41 | fi 42 | 43 | if [ -z "$CLEARML_USE_GUNICORN" ] 44 | then 45 | if [ -z "$CLEARML_SERVING_NUM_PROCESS" ] 46 | then 47 | echo "Starting Uvicorn server - single worker" 48 | PYTHONPATH=$(pwd) python3 -m uvicorn \ 49 | clearml_serving.serving.main:app --log-level $UVICORN_LOG_LEVEL --host 0.0.0.0 --port $SERVING_PORT --loop $UVICORN_SERVE_LOOP \ 50 | $UVICORN_EXTRA_ARGS 51 | else 52 | echo "Starting Uvicorn server - multi worker" 53 | PYTHONPATH=$(pwd) python3 clearml_serving/serving/uvicorn_mp_entrypoint.py \ 54 | clearml_serving.serving.main:app --log-level $UVICORN_LOG_LEVEL --host 0.0.0.0 --port $SERVING_PORT --loop $UVICORN_SERVE_LOOP \ 55 | --workers $CLEARML_SERVING_NUM_PROCESS $UVICORN_EXTRA_ARGS 56 | fi 57 | else 58 | echo "Starting Gunicorn server" 59 | # start service 60 | PYTHONPATH=$(pwd) python3 -m gunicorn \ 61 | --preload clearml_serving.serving.main:app \ 62 | --workers $GUNICORN_NUM_PROCESS \ 63 | --worker-class uvicorn.workers.UvicornWorker \ 64 | --max-requests $GUNICORN_MAX_REQUESTS \ 65 | --timeout $GUNICORN_SERVING_TIMEOUT \ 66 | --bind 0.0.0.0:$SERVING_PORT \ 67 | $GUNICORN_EXTRA_ARGS 68 | fi 69 | -------------------------------------------------------------------------------- /clearml_serving/serving/init.py: -------------------------------------------------------------------------------- 1 | import os 2 | from clearml import Task 3 | from clearml_serving.serving.model_request_processor import ModelRequestProcessor 4 | from clearml_serving.serving.preprocess_service import BasePreprocessRequest 5 | 6 | 7 | def setup_task(force_threaded_logging=None): 8 | serving_service_task_id = os.environ.get("CLEARML_SERVING_TASK_ID", None) 9 | inference_service_task_id = os.environ.get("CLEARML_INFERENCE_TASK_ID", False) # according Task.init() docs 10 | 11 | # always use background thread, it requires less memory 12 | if force_threaded_logging or os.environ.get("CLEARML_BKG_THREAD_REPORT") in ("1", "Y", "y", "true"): 13 | os.environ["CLEARML_BKG_THREAD_REPORT"] = "1" 14 | Task._report_subprocess_enabled = False 15 | 16 | # get the serving controller task 17 | # noinspection PyProtectedMember 18 | serving_task = ModelRequestProcessor._get_control_plane_task(task_id=serving_service_task_id) 19 | # set to running (because we are here) 20 | if serving_task.status != "in_progress": 21 | serving_task.started(force=True) 22 | 23 | # create a new serving instance (for visibility and monitoring) 24 | instance_task = Task.init( 25 | project_name=serving_task.get_project_name(), 26 | task_name="{} - serve instance".format(serving_task.name), 27 | task_type="inference", # noqa 28 | continue_last_task=inference_service_task_id, 29 | ) 30 | instance_task.set_system_tags(["service"]) 31 | # make sure we start logging thread/process 32 | instance_logger = instance_task.get_logger() # noqa 33 | # this will use the main thread/process 34 | session_logger = serving_task.get_logger() 35 | 36 | # preload modules into memory before forking 37 | BasePreprocessRequest.load_modules() 38 | 39 | return serving_service_task_id, session_logger, instance_task.id 40 | -------------------------------------------------------------------------------- /clearml_serving/serving/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shlex 3 | import traceback 4 | import gzip 5 | import asyncio 6 | 7 | from fastapi import FastAPI, Request, Response, APIRouter, HTTPException 8 | from fastapi.routing import APIRoute 9 | from fastapi.responses import PlainTextResponse 10 | from grpc.aio import AioRpcError 11 | 12 | from starlette.background import BackgroundTask 13 | 14 | from typing import Optional, Dict, Any, Callable, Union 15 | 16 | from clearml_serving.version import __version__ 17 | from clearml_serving.serving.init import setup_task 18 | from clearml_serving.serving.model_request_processor import ( 19 | ModelRequestProcessor, 20 | EndpointNotFoundException, 21 | EndpointBackendEngineException, 22 | EndpointModelLoadException, 23 | ServingInitializationException, 24 | ) 25 | from clearml_serving.serving.utils import parse_grpc_errors 26 | 27 | 28 | class GzipRequest(Request): 29 | async def body(self) -> bytes: 30 | if not hasattr(self, "_body"): 31 | body = await super().body() 32 | if "gzip" in self.headers.getlist("Content-Encoding"): 33 | body = gzip.decompress(body) 34 | self._body = body # noqa 35 | return self._body 36 | 37 | 38 | class GzipRoute(APIRoute): 39 | def get_route_handler(self) -> Callable: 40 | original_route_handler = super().get_route_handler() 41 | 42 | async def custom_route_handler(request: Request) -> Response: 43 | request = GzipRequest(request.scope, request.receive) 44 | return await original_route_handler(request) 45 | 46 | return custom_route_handler 47 | 48 | 49 | # process Lock, so that we can have only a single process doing the model reloading at a time 50 | singleton_sync_lock = None # Lock() 51 | # shared Model processor object 52 | processor = None # type: Optional[ModelRequestProcessor] 53 | 54 | # create clearml Task and load models 55 | serving_service_task_id, session_logger, instance_id = setup_task() 56 | # polling frequency 57 | model_sync_frequency_secs = 5 58 | try: 59 | model_sync_frequency_secs = float(os.environ.get("CLEARML_SERVING_POLL_FREQ", model_sync_frequency_secs)) 60 | except (ValueError, TypeError): 61 | pass 62 | 63 | 64 | grpc_aio_ignore_errors = parse_grpc_errors(shlex.split(os.environ.get("CLEARML_SERVING_AIO_RPC_IGNORE_ERRORS", ""))) 65 | grpc_aio_verbose_errors = parse_grpc_errors(shlex.split(os.environ.get("CLEARML_SERVING_AIO_RPC_VERBOSE_ERRORS", ""))) 66 | 67 | 68 | class CUDAException(Exception): 69 | def __init__(self, exception: str): 70 | self.exception = exception 71 | 72 | 73 | # start FastAPI app 74 | app = FastAPI(title="ClearML Serving Service", version=__version__, description="ClearML Service Service router") 75 | 76 | 77 | @app.on_event("startup") 78 | async def startup_event(): 79 | global processor 80 | 81 | if processor: 82 | print( 83 | "ModelRequestProcessor already initialized [pid={}] [service_id={}]".format( 84 | os.getpid(), serving_service_task_id 85 | ) 86 | ) 87 | else: 88 | print("Starting up ModelRequestProcessor [pid={}] [service_id={}]".format(os.getpid(), serving_service_task_id)) 89 | processor = ModelRequestProcessor( 90 | task_id=serving_service_task_id, 91 | update_lock_guard=singleton_sync_lock, 92 | ) 93 | print("ModelRequestProcessor [id={}] loaded".format(processor.get_id())) 94 | processor.launch(poll_frequency_sec=model_sync_frequency_secs * 60) 95 | 96 | 97 | @app.on_event("shutdown") 98 | def shutdown_event(): 99 | print("RESTARTING INFERENCE SERVICE!") 100 | 101 | 102 | async def exit_app(): 103 | loop = asyncio.get_running_loop() 104 | loop.stop() 105 | 106 | 107 | @app.exception_handler(CUDAException) 108 | async def cuda_exception_handler(request, exc): 109 | task = BackgroundTask(exit_app) 110 | return PlainTextResponse("CUDA out of memory. Restarting service", status_code=500, background=task) 111 | 112 | 113 | router = APIRouter( 114 | prefix="/serve", 115 | tags=["models"], 116 | responses={404: {"description": "Model Serving Endpoint Not found"}}, 117 | route_class=GzipRoute, # mark-out to remove support for GZip content encoding 118 | ) 119 | 120 | 121 | # cover all routing options for model version `/{model_id}`, `/{model_id}/123`, `/{model_id}?version=123` 122 | @router.post("/{model_id}/{version}") 123 | @router.post("/{model_id}/") 124 | @router.post("/{model_id}") 125 | async def serve_model(model_id: str, version: Optional[str] = None, request: Union[bytes, Dict[Any, Any]] = None): 126 | try: 127 | return_value = await processor.process_request(base_url=model_id, version=version, request_body=request) 128 | except EndpointNotFoundException as ex: 129 | raise HTTPException(status_code=404, detail="Error processing request, endpoint was not found: {}".format(ex)) 130 | except (EndpointModelLoadException, EndpointBackendEngineException) as ex: 131 | session_logger.report_text( 132 | "[{}] Exception [{}] {} while processing request: {}\n{}".format( 133 | instance_id, type(ex), ex, request, "".join(traceback.format_exc()) 134 | ) 135 | ) 136 | raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex)) 137 | except ServingInitializationException as ex: 138 | session_logger.report_text( 139 | "[{}] Exception [{}] {} while loading serving inference: {}\n{}".format( 140 | instance_id, type(ex), ex, request, "".join(traceback.format_exc()) 141 | ) 142 | ) 143 | raise HTTPException(status_code=500, detail="Error [{}] processing request: {}".format(type(ex), ex)) 144 | except ValueError as ex: 145 | session_logger.report_text( 146 | "[{}] Exception [{}] {} while processing request: {}\n{}".format( 147 | instance_id, type(ex), ex, request, "".join(traceback.format_exc()) 148 | ) 149 | ) 150 | if "CUDA out of memory. " in str(ex) or "NVML_SUCCESS == r INTERNAL ASSERT FAILED" in str(ex): 151 | raise CUDAException(exception=ex) 152 | else: 153 | raise HTTPException(status_code=422, detail="Error [{}] processing request: {}".format(type(ex), ex)) 154 | except AioRpcError as ex: 155 | if grpc_aio_verbose_errors and ex.code() in grpc_aio_verbose_errors: 156 | session_logger.report_text( 157 | "[{}] Exception [AioRpcError] {} while processing request: {}".format(instance_id, ex, request) 158 | ) 159 | elif not grpc_aio_ignore_errors or ex.code() not in grpc_aio_ignore_errors: 160 | session_logger.report_text("[{}] Exception [AioRpcError] status={} ".format(instance_id, ex.code())) 161 | raise HTTPException( 162 | status_code=500, detail="Error [AioRpcError] processing request: status={}".format(ex.code()) 163 | ) 164 | except Exception as ex: 165 | session_logger.report_text( 166 | "[{}] Exception [{}] {} while processing request: {}\n{}".format( 167 | instance_id, type(ex), ex, request, "".join(traceback.format_exc()) 168 | ) 169 | ) 170 | raise HTTPException(status_code=500, detail="Error [{}] processing request: {}".format(type(ex), ex)) 171 | return return_value 172 | 173 | 174 | app.include_router(router) 175 | -------------------------------------------------------------------------------- /clearml_serving/serving/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml>=1.10.1,<2 2 | attrs>=20.3.0,<24 3 | fastapi[all]>=0.109.1,<0.111 4 | uvicorn[standard] 5 | gunicorn>=20.1.0,<20.2 6 | asyncio>=3.4.3,<3.5 ; python_version < '3.10' 7 | aiocache>=0.12,<0.13 8 | tritonclient[grpc]>=2.32,<2.33 9 | starlette 10 | numpy>=1.24,<1.27 11 | scikit-learn>=1.2.2,<1.3 12 | pandas>=1.5.3,<1.6 13 | grpcio 14 | referencing>=0.31.0 15 | Pillow>=10.0.1 16 | xgboost>=1.7.5,<1.8 17 | lightgbm>=3.3.2,<3.4 18 | requests>=2.31.0 19 | kafka-python>=2.0.2,<2.1 20 | lz4>=4.0.0,<5 21 | -------------------------------------------------------------------------------- /clearml_serving/serving/utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, Set 2 | 3 | import grpc 4 | 5 | 6 | def parse_grpc_errors(errors: List[str]) -> Set[grpc.StatusCode]: 7 | try: 8 | typed_errors = { 9 | int(e) if e.isdigit() else e.lower().replace("-", " ").replace("_", " ") 10 | for e in errors 11 | } 12 | if len(typed_errors) == 1 and next(iter(typed_errors)) in ("true", "false"): 13 | return set(grpc.StatusCode if next(iter(typed_errors)) == "true" else []) 14 | return {e for e in grpc.StatusCode if typed_errors.intersection(e.value)} 15 | except (ValueError, TypeError): 16 | pass 17 | return set() 18 | -------------------------------------------------------------------------------- /clearml_serving/serving/uvicorn_mp_entrypoint.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | from clearml_serving.serving.init import setup_task 3 | 4 | if __name__ == "__main__": 5 | setup_task(force_threaded_logging=True) 6 | uvicorn.main() 7 | -------------------------------------------------------------------------------- /clearml_serving/statistics/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-bullseye 2 | 3 | 4 | ENV LC_ALL=C.UTF-8 5 | 6 | # install base package 7 | RUN pip3 install --no-cache-dir clearml-serving 8 | 9 | # get latest execution code from the git repository 10 | # RUN cd $HOME && git clone https://github.com/allegroai/clearml-serving.git 11 | COPY clearml_serving /root/clearml/clearml_serving 12 | 13 | RUN pip3 install --no-cache-dir -r /root/clearml/clearml_serving/statistics/requirements.txt 14 | 15 | # default serving port 16 | EXPOSE 9999 17 | 18 | # environement variable to load Task from CLEARML_SERVING_TASK_ID, CLEARML_SERVING_PORT 19 | 20 | WORKDIR /root/clearml/ 21 | ENTRYPOINT ["clearml_serving/statistics/entrypoint.sh"] 22 | -------------------------------------------------------------------------------- /clearml_serving/statistics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-serving/1def0a6901617767687f2b747aaffdc060f96046/clearml_serving/statistics/__init__.py -------------------------------------------------------------------------------- /clearml_serving/statistics/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # print configuration 4 | echo CLEARML_SERVING_TASK_ID="$CLEARML_SERVING_TASK_ID" 5 | echo CLEARML_SERVING_PORT="$CLEARML_SERVING_PORT" 6 | echo CLEARML_EXTRA_PYTHON_PACKAGES="$CLEARML_EXTRA_PYTHON_PACKAGES" 7 | echo CLEARML_SERVING_POLL_FREQ="$CLEARML_SERVING_POLL_FREQ" 8 | echo CLEARML_DEFAULT_KAFKA_SERVE_URL="$CLEARML_DEFAULT_KAFKA_SERVE_URL" 9 | 10 | SERVING_PORT="${CLEARML_SERVING_PORT:-9999}" 11 | 12 | # set default internal serve endpoint (for request pipelining) 13 | CLEARML_DEFAULT_BASE_SERVE_URL="${CLEARML_DEFAULT_BASE_SERVE_URL:-http://127.0.0.1:$SERVING_PORT/serve}" 14 | CLEARML_DEFAULT_TRITON_GRPC_ADDR="${CLEARML_DEFAULT_TRITON_GRPC_ADDR:-127.0.0.1:8001}" 15 | 16 | # print configuration 17 | echo SERVING_PORT="$SERVING_PORT" 18 | 19 | # runtime add extra python packages 20 | if [ ! -z "$CLEARML_EXTRA_PYTHON_PACKAGES" ] 21 | then 22 | python3 -m pip install $CLEARML_EXTRA_PYTHON_PACKAGES 23 | fi 24 | 25 | echo "Starting Statistics Controller server" 26 | PYTHONPATH=$(pwd) python3 clearml_serving/statistics/main.py 27 | -------------------------------------------------------------------------------- /clearml_serving/statistics/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import prometheus_client 4 | from clearml import Task 5 | 6 | from clearml_serving.serving.model_request_processor import ModelRequestProcessor 7 | from clearml_serving.statistics.metrics import StatisticsController 8 | 9 | 10 | def main(): 11 | serving_service_task_id = os.environ.get("CLEARML_SERVING_TASK_ID", None) 12 | model_sync_frequency_secs = 5 13 | try: 14 | model_sync_frequency_secs = float(os.environ.get("CLEARML_SERVING_POLL_FREQ", model_sync_frequency_secs)) 15 | except (ValueError, TypeError): 16 | pass 17 | 18 | # noinspection PyProtectedMember 19 | serving_task = ModelRequestProcessor._get_control_plane_task(task_id=serving_service_task_id) 20 | # create a new serving instance (for visibility and monitoring) 21 | instance_task = Task.init( 22 | project_name=serving_task.get_project_name(), 23 | task_name="{} - statistics controller".format(serving_task.name), 24 | task_type="monitor", 25 | ) 26 | instance_task.set_system_tags(["service"]) 27 | # noinspection PyProtectedMember 28 | kafka_server_url = os.environ.get("CLEARML_DEFAULT_KAFKA_SERVE_URL", "localhost:9092") 29 | stats_controller = StatisticsController( 30 | task=instance_task, 31 | kafka_server_url=kafka_server_url, 32 | serving_id=serving_service_task_id, 33 | poll_frequency_min=model_sync_frequency_secs 34 | ) 35 | prometheus_client.start_http_server(int(os.environ.get("CLEARML_SERVING_PORT", 9999))) 36 | # we will never leave here 37 | stats_controller.start() 38 | 39 | 40 | if __name__ == '__main__': 41 | main() 42 | -------------------------------------------------------------------------------- /clearml_serving/statistics/metrics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | from copy import deepcopy 5 | from functools import partial 6 | from threading import Event, Thread 7 | from time import time, sleep 8 | 9 | from clearml import Task 10 | from typing import Optional, Dict, Any, Iterable, Set 11 | 12 | from prometheus_client import Histogram, Enum, Gauge, Counter, values 13 | from kafka import KafkaConsumer 14 | from prometheus_client.metrics import MetricWrapperBase, _validate_exemplar 15 | from prometheus_client.registry import REGISTRY 16 | from prometheus_client.samples import Exemplar, Sample 17 | from prometheus_client.context_managers import Timer 18 | from prometheus_client.utils import floatToGoString 19 | 20 | from ..serving.endpoints import EndpointMetricLogging 21 | from ..serving.model_request_processor import ModelRequestProcessor 22 | 23 | 24 | class ScalarHistogram(Histogram): 25 | 26 | def __init__(self, *args, **kwargs): 27 | super().__init__(*args, **kwargs) 28 | 29 | def observe(self, amount, exemplar=None): 30 | """Observe the given amount. 31 | 32 | The amount is usually positive or zero. Negative values are 33 | accepted but prevent current versions of Prometheus from 34 | properly detecting counter resets in the sum of 35 | observations. See 36 | https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations 37 | for details. 38 | """ 39 | self._raise_if_not_observable() 40 | if not isinstance(amount, (list, tuple)): 41 | amount = [amount] 42 | self._sum.inc(len(amount)) 43 | for v in amount: 44 | for i, bound in enumerate(self._upper_bounds): 45 | if v <= bound: 46 | self._buckets[i].inc(1) 47 | if exemplar: 48 | _validate_exemplar(exemplar) 49 | self._buckets[i].set_exemplar(Exemplar(exemplar, v, time())) 50 | break 51 | 52 | def _child_samples(self) -> Iterable[Sample]: 53 | samples = [] 54 | for i, bound in enumerate(self._upper_bounds): 55 | acc = self._buckets[i].get() 56 | samples.append( 57 | Sample('_bucket', {'le': floatToGoString(bound)}, acc, None, self._buckets[i].get_exemplar()) 58 | ) 59 | samples.append(Sample('_sum', {'le': floatToGoString(bound)}, self._sum.get(), None, None)) 60 | 61 | return tuple(samples) 62 | 63 | 64 | class EnumHistogram(MetricWrapperBase): 65 | """A Histogram tracks the size and number of events in buckets. 66 | 67 | You can use Histograms for aggregatable calculation of quantiles. 68 | 69 | Example use cases: 70 | - Response latency 71 | - Request size 72 | 73 | Example for a Histogram: 74 | 75 | from prometheus_client import Histogram 76 | 77 | h = Histogram('request_size_bytes', 'Request size (bytes)') 78 | h.observe(512) # Observe 512 (bytes) 79 | 80 | Example for a Histogram using time: 81 | 82 | from prometheus_client import Histogram 83 | 84 | REQUEST_TIME = Histogram('response_latency_seconds', 'Response latency (seconds)') 85 | 86 | @REQUEST_TIME.time() 87 | def create_response(request): 88 | '''A dummy function''' 89 | time.sleep(1) 90 | 91 | Example of using the same Histogram object as a context manager: 92 | 93 | with REQUEST_TIME.time(): 94 | pass # Logic to be timed 95 | 96 | The default buckets are intended to cover a typical web/rpc request from milliseconds to seconds. 97 | They can be overridden by passing `buckets` keyword argument to `Histogram`. 98 | """ 99 | _type = 'histogram' 100 | 101 | def __init__(self, 102 | name, 103 | documentation, 104 | buckets, 105 | labelnames=(), 106 | namespace='', 107 | subsystem='', 108 | unit='', 109 | registry=REGISTRY, 110 | _labelvalues=None, 111 | ): 112 | self._prepare_buckets(buckets) 113 | super().__init__( 114 | name=name, 115 | documentation=documentation, 116 | labelnames=labelnames, 117 | namespace=namespace, 118 | subsystem=subsystem, 119 | unit=unit, 120 | registry=registry, 121 | _labelvalues=_labelvalues, 122 | ) 123 | self._kwargs['buckets'] = buckets 124 | 125 | def _prepare_buckets(self, buckets): 126 | buckets = [str(b) for b in buckets] 127 | if buckets != sorted(buckets): 128 | # This is probably an error on the part of the user, 129 | # so raise rather than sorting for them. 130 | raise ValueError('Buckets not in sorted order') 131 | 132 | if len(buckets) < 2: 133 | raise ValueError('Must have at least two buckets') 134 | self._upper_bounds = buckets 135 | 136 | def _metric_init(self): 137 | self._buckets = {} 138 | self._created = time() 139 | bucket_labelnames = self._upper_bounds 140 | self._sum = values.ValueClass( 141 | self._type, self._name, self._name + '_sum', self._labelnames, self._labelvalues) 142 | for b in self._upper_bounds: 143 | self._buckets[b] = values.ValueClass( 144 | self._type, 145 | self._name, 146 | self._name + '_bucket', 147 | bucket_labelnames, 148 | self._labelvalues + (b,)) 149 | 150 | def observe(self, amount, exemplar=None): 151 | """Observe the given amount. 152 | 153 | The amount is usually positive or zero. Negative values are 154 | accepted but prevent current versions of Prometheus from 155 | properly detecting counter resets in the sum of 156 | observations. See 157 | https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations 158 | for details. 159 | """ 160 | self._raise_if_not_observable() 161 | if not isinstance(amount, (list, tuple)): 162 | amount = [amount] 163 | self._sum.inc(len(amount)) 164 | for v in amount: 165 | self._buckets[v].inc(1) 166 | if exemplar: 167 | _validate_exemplar(exemplar) 168 | self._buckets[v].set_exemplar(Exemplar(exemplar, 1, time())) 169 | 170 | def time(self): 171 | """Time a block of code or function, and observe the duration in seconds. 172 | 173 | Can be used as a function decorator or context manager. 174 | """ 175 | return Timer(self, 'observe') 176 | 177 | def _child_samples(self) -> Iterable[Sample]: 178 | samples = [] 179 | for i in self._buckets: 180 | acc = self._buckets[i].get() 181 | samples.append(Sample( 182 | '_bucket', {'enum': i}, acc, None, self._buckets[i].get_exemplar())) 183 | samples.append(Sample('_sum', {'enum': i}, self._sum.get(), None, None)) 184 | 185 | return tuple(samples) 186 | 187 | 188 | class StatisticsController(object): 189 | _reserved = { 190 | '_latency': partial(ScalarHistogram, buckets=(.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0)), 191 | '_count': Counter 192 | } 193 | _metric_type_class = {"scalar": ScalarHistogram, "enum": EnumHistogram, "value": Gauge, "counter": Counter} 194 | 195 | def __init__( 196 | self, 197 | task: Task, 198 | kafka_server_url: str, 199 | serving_id: Optional[str], 200 | poll_frequency_min: float = 5 201 | ): 202 | self.task = task 203 | self._serving_service_task_id = serving_id 204 | self._poll_frequency_min = float(poll_frequency_min) 205 | self._serving_service = None # type: Optional[ModelRequestProcessor] 206 | self._current_endpoints = {} # type: Optional[Dict[str, EndpointMetricLogging]] 207 | self._auto_added_endpoints = set() # type: Set[str] 208 | self._prometheus_metrics = {} # type: Optional[Dict[str, Dict[str, MetricWrapperBase]]] 209 | self._timestamp = time() 210 | self._sync_thread = None 211 | self._last_sync_time = time() 212 | self._dirty = False 213 | self._sync_event = Event() 214 | self._sync_threshold_sec = 30 215 | self._kafka_server = kafka_server_url 216 | # noinspection PyProtectedMember 217 | self._kafka_topic = ModelRequestProcessor._kafka_topic 218 | 219 | def start(self): 220 | self._serving_service = ModelRequestProcessor(task_id=self._serving_service_task_id) 221 | 222 | if not self._sync_thread: 223 | self._sync_thread = Thread(target=self._sync_daemon, daemon=True) 224 | self._sync_thread.start() 225 | 226 | # noinspection PyProtectedMember 227 | kafka_server = \ 228 | self._serving_service.get_configuration().get(ModelRequestProcessor._config_key_kafka_stats) or \ 229 | self._kafka_server 230 | 231 | print("Starting Kafka Statistics processing: {}".format(kafka_server)) 232 | 233 | while True: 234 | try: 235 | consumer = KafkaConsumer(self._kafka_topic, bootstrap_servers=kafka_server) 236 | break 237 | except Exception as ex: 238 | print("Error: failed opening Kafka consumer [{}]: {}".format(kafka_server, ex)) 239 | print("Retrying in 30 seconds") 240 | sleep(30) 241 | 242 | # we will never leave this loop 243 | while True: 244 | # noinspection PyBroadException 245 | try: 246 | message = next(consumer) 247 | except Exception: 248 | print("Warning: failed to pull kafka consumer pipe") 249 | sleep(5) 250 | continue 251 | 252 | # noinspection PyBroadException 253 | try: 254 | list_data = json.loads(message.value.decode("utf-8")) 255 | except Exception: 256 | print("Warning: failed to decode kafka stats message") 257 | continue 258 | 259 | for data in list_data: 260 | try: 261 | url = data.pop("_url", None) 262 | if not url: 263 | # should not happen 264 | continue 265 | endpoint_metric = self._current_endpoints.get(url) 266 | if not endpoint_metric: 267 | # add default one, we will just log the reserved valued: 268 | endpoint_metric = dict() 269 | self._current_endpoints[url] = EndpointMetricLogging(endpoint=url) 270 | self._auto_added_endpoints.add(url) 271 | # we should sync, 272 | if time()-self._last_sync_time > self._sync_threshold_sec: 273 | self._last_sync_time = time() 274 | self._sync_event.set() 275 | 276 | metric_url_log = self._prometheus_metrics.get(url) 277 | if not metric_url_log: 278 | # create a new one 279 | metric_url_log = dict() 280 | self._prometheus_metrics[url] = metric_url_log 281 | 282 | # check if we have the prometheus_logger 283 | for k, v in data.items(): 284 | prometheus_logger = metric_url_log.get(k) 285 | if not prometheus_logger: 286 | prometheus_logger = self._create_prometheus_logger_class(url, k, endpoint_metric) 287 | if not prometheus_logger: 288 | continue 289 | metric_url_log[k] = prometheus_logger 290 | 291 | self._report_value(prometheus_logger, v) 292 | 293 | except Exception as ex: 294 | print("Warning: failed to report stat to Prometheus: {}".format(ex)) 295 | continue 296 | 297 | @staticmethod 298 | def _report_value(prometheus_logger: Optional[MetricWrapperBase], v: Any) -> bool: 299 | if not prometheus_logger: 300 | # this means no one configured the variable to log 301 | return False 302 | elif isinstance(prometheus_logger, (Histogram, EnumHistogram)): 303 | prometheus_logger.observe(amount=v) 304 | elif isinstance(prometheus_logger, Gauge): 305 | prometheus_logger.set(value=v) 306 | elif isinstance(prometheus_logger, Counter): 307 | prometheus_logger.inc(amount=v) 308 | elif isinstance(prometheus_logger, Enum): 309 | prometheus_logger.state(state=v) 310 | else: 311 | # we should not get here 312 | return False 313 | 314 | return True 315 | 316 | def _create_prometheus_logger_class( 317 | self, 318 | url: str, 319 | variable_name: str, 320 | endpoint_config: EndpointMetricLogging 321 | ) -> Optional[MetricWrapperBase]: 322 | reserved_cls = self._reserved.get(variable_name) 323 | name = "{}:{}".format(url, variable_name) 324 | name = re.sub(r"[^(a-zA-Z0-9_:)]", "_", name) 325 | if reserved_cls: 326 | return reserved_cls(name=name, documentation="Built in {}".format(variable_name)) 327 | 328 | if not endpoint_config: 329 | # we should not end up here 330 | return None 331 | 332 | metric_ = endpoint_config.metrics.get(variable_name) 333 | if not metric_: 334 | return None 335 | metric_cls = self._metric_type_class.get(metric_.type) 336 | if not metric_cls: 337 | return None 338 | if metric_cls in (ScalarHistogram, EnumHistogram): 339 | return metric_cls( 340 | name=name, 341 | documentation="User defined metric {}".format(metric_.type), 342 | buckets=metric_.buckets 343 | ) 344 | return metric_cls(name=name, documentation="User defined metric {}".format(metric_.type)) 345 | 346 | def _sync_daemon(self): 347 | self._last_sync_time = time() 348 | poll_freq_sec = self._poll_frequency_min*60 349 | print("Instance [{}, pid={}]: Launching - configuration sync every {} sec".format( 350 | self.task.id, os.getpid(), poll_freq_sec)) 351 | while True: 352 | try: 353 | self._serving_service.reload() 354 | endpoint_metrics = self._serving_service.list_endpoint_logging() 355 | self._last_sync_time = time() 356 | # we might have added new urls (auto metric logging), we need to compare only configured keys 357 | current_endpoints = { 358 | k: v for k, v in self._current_endpoints.items() 359 | if k not in self._auto_added_endpoints} 360 | if current_endpoints == endpoint_metrics: 361 | self._sync_event.wait(timeout=poll_freq_sec) 362 | self._sync_event.clear() 363 | continue 364 | 365 | # update metrics: 366 | self._dirty = True 367 | self._auto_added_endpoints -= set(endpoint_metrics.keys()) 368 | # merge top level configuration (we might have auto logged url endpoints) 369 | self._current_endpoints.update(deepcopy(endpoint_metrics)) 370 | print("New configuration synced") 371 | except Exception as ex: 372 | print("Warning: failed to sync state from serving service Task: {}".format(ex)) 373 | continue 374 | -------------------------------------------------------------------------------- /clearml_serving/statistics/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml>=1.3.1 2 | numpy>=1.20,<1.24 3 | requests>=2.31.0 4 | kafka-python>=2.0.2,<2.1 5 | prometheus_client>=0.13.1,<0.14 6 | lz4>=4.0.0,<5 7 | pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 8 | -------------------------------------------------------------------------------- /clearml_serving/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.3.2' 2 | -------------------------------------------------------------------------------- /docker/datasource.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: Prometheus 5 | type: prometheus 6 | # Access mode - proxy (server in the UI) or direct (browser in the UI). 7 | access: proxy 8 | url: http://clearml-serving-prometheus:9090 9 | -------------------------------------------------------------------------------- /docker/docker-compose-triton-gpu.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | zookeeper: 5 | image: bitnami/zookeeper:3.7.0 6 | container_name: clearml-serving-zookeeper 7 | # ports: 8 | # - "2181:2181" 9 | environment: 10 | - ALLOW_ANONYMOUS_LOGIN=yes 11 | networks: 12 | - clearml-serving-backend 13 | 14 | kafka: 15 | image: bitnami/kafka:3.1.1 16 | container_name: clearml-serving-kafka 17 | # ports: 18 | # - "9092:9092" 19 | environment: 20 | - KAFKA_BROKER_ID=1 21 | - KAFKA_CFG_LISTENERS=PLAINTEXT://clearml-serving-kafka:9092 22 | - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://clearml-serving-kafka:9092 23 | - KAFKA_CFG_ZOOKEEPER_CONNECT=clearml-serving-zookeeper:2181 24 | - ALLOW_PLAINTEXT_LISTENER=yes 25 | - KAFKA_CREATE_TOPICS="topic_test:1:1" 26 | depends_on: 27 | - zookeeper 28 | networks: 29 | - clearml-serving-backend 30 | 31 | prometheus: 32 | image: prom/prometheus:v2.34.0 33 | container_name: clearml-serving-prometheus 34 | volumes: 35 | - ./prometheus.yml:/prometheus.yml 36 | command: 37 | - '--config.file=/prometheus.yml' 38 | - '--storage.tsdb.path=/prometheus' 39 | - '--web.console.libraries=/etc/prometheus/console_libraries' 40 | - '--web.console.templates=/etc/prometheus/consoles' 41 | - '--storage.tsdb.retention.time=200h' 42 | - '--web.enable-lifecycle' 43 | restart: unless-stopped 44 | # ports: 45 | # - "9090:9090" 46 | depends_on: 47 | - clearml-serving-statistics 48 | networks: 49 | - clearml-serving-backend 50 | 51 | alertmanager: 52 | image: prom/alertmanager:v0.23.0 53 | container_name: clearml-serving-alertmanager 54 | restart: unless-stopped 55 | # ports: 56 | # - "9093:9093" 57 | depends_on: 58 | - prometheus 59 | - grafana 60 | networks: 61 | - clearml-serving-backend 62 | 63 | grafana: 64 | image: grafana/grafana:8.4.4-ubuntu 65 | container_name: clearml-serving-grafana 66 | volumes: 67 | - './datasource.yml:/etc/grafana/provisioning/datasources/datasource.yaml' 68 | restart: unless-stopped 69 | ports: 70 | - "3000:3000" 71 | depends_on: 72 | - prometheus 73 | networks: 74 | - clearml-serving-backend 75 | 76 | 77 | clearml-serving-inference: 78 | image: allegroai/clearml-serving-inference:latest 79 | container_name: clearml-serving-inference 80 | restart: unless-stopped 81 | # optimize perforamnce 82 | security_opt: 83 | - seccomp:unconfined 84 | ports: 85 | - "8080:8080" 86 | environment: 87 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 88 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 89 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 90 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 91 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 92 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 93 | CLEARML_SERVING_PORT: ${CLEARML_SERVING_PORT:-8080} 94 | CLEARML_SERVING_POLL_FREQ: ${CLEARML_SERVING_POLL_FREQ:-1.0} 95 | CLEARML_DEFAULT_BASE_SERVE_URL: ${CLEARML_DEFAULT_BASE_SERVE_URL:-http://127.0.0.1:8080/serve} 96 | CLEARML_DEFAULT_KAFKA_SERVE_URL: ${CLEARML_DEFAULT_KAFKA_SERVE_URL:-clearml-serving-kafka:9092} 97 | CLEARML_DEFAULT_TRITON_GRPC_ADDR: ${CLEARML_DEFAULT_TRITON_GRPC_ADDR:-clearml-serving-triton:8001} 98 | CLEARML_USE_GUNICORN: ${CLEARML_USE_GUNICORN:-} 99 | CLEARML_SERVING_NUM_PROCESS: ${CLEARML_SERVING_NUM_PROCESS:-} 100 | CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-} 101 | AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-} 102 | AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-} 103 | AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-} 104 | GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-} 105 | AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-} 106 | AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-} 107 | depends_on: 108 | - kafka 109 | - clearml-serving-triton 110 | networks: 111 | - clearml-serving-backend 112 | 113 | clearml-serving-triton: 114 | image: allegroai/clearml-serving-triton:latest 115 | container_name: clearml-serving-triton 116 | restart: unless-stopped 117 | # optimize perforamnce 118 | security_opt: 119 | - seccomp:unconfined 120 | # ports: 121 | # - "8001:8001" 122 | environment: 123 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 124 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 125 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 126 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 127 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 128 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 129 | CLEARML_TRITON_POLL_FREQ: ${CLEARML_TRITON_POLL_FREQ:-1.0} 130 | CLEARML_TRITON_METRIC_FREQ: ${CLEARML_TRITON_METRIC_FREQ:-1.0} 131 | CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-} 132 | AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-} 133 | AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-} 134 | AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-} 135 | GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-} 136 | AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-} 137 | AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-} 138 | depends_on: 139 | - kafka 140 | networks: 141 | - clearml-serving-backend 142 | deploy: 143 | resources: 144 | reservations: 145 | devices: 146 | - capabilities: [gpu] 147 | 148 | clearml-serving-statistics: 149 | image: allegroai/clearml-serving-statistics:latest 150 | container_name: clearml-serving-statistics 151 | restart: unless-stopped 152 | # optimize perforamnce 153 | security_opt: 154 | - seccomp:unconfined 155 | # ports: 156 | # - "9999:9999" 157 | environment: 158 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 159 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 160 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 161 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 162 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 163 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 164 | CLEARML_DEFAULT_KAFKA_SERVE_URL: ${CLEARML_DEFAULT_KAFKA_SERVE_URL:-clearml-serving-kafka:9092} 165 | CLEARML_SERVING_POLL_FREQ: ${CLEARML_SERVING_POLL_FREQ:-1.0} 166 | depends_on: 167 | - kafka 168 | networks: 169 | - clearml-serving-backend 170 | 171 | 172 | networks: 173 | clearml-serving-backend: 174 | driver: bridge 175 | -------------------------------------------------------------------------------- /docker/docker-compose-triton.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | zookeeper: 5 | image: bitnami/zookeeper:3.7.0 6 | container_name: clearml-serving-zookeeper 7 | # ports: 8 | # - "2181:2181" 9 | environment: 10 | - ALLOW_ANONYMOUS_LOGIN=yes 11 | networks: 12 | - clearml-serving-backend 13 | 14 | kafka: 15 | image: bitnami/kafka:3.1.1 16 | container_name: clearml-serving-kafka 17 | # ports: 18 | # - "9092:9092" 19 | environment: 20 | - KAFKA_BROKER_ID=1 21 | - KAFKA_CFG_LISTENERS=PLAINTEXT://clearml-serving-kafka:9092 22 | - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://clearml-serving-kafka:9092 23 | - KAFKA_CFG_ZOOKEEPER_CONNECT=clearml-serving-zookeeper:2181 24 | - ALLOW_PLAINTEXT_LISTENER=yes 25 | - KAFKA_CREATE_TOPICS="topic_test:1:1" 26 | depends_on: 27 | - zookeeper 28 | networks: 29 | - clearml-serving-backend 30 | 31 | prometheus: 32 | image: prom/prometheus:v2.34.0 33 | container_name: clearml-serving-prometheus 34 | volumes: 35 | - ./prometheus.yml:/prometheus.yml 36 | command: 37 | - '--config.file=/prometheus.yml' 38 | - '--storage.tsdb.path=/prometheus' 39 | - '--web.console.libraries=/etc/prometheus/console_libraries' 40 | - '--web.console.templates=/etc/prometheus/consoles' 41 | - '--storage.tsdb.retention.time=200h' 42 | - '--web.enable-lifecycle' 43 | restart: unless-stopped 44 | # ports: 45 | # - "9090:9090" 46 | depends_on: 47 | - clearml-serving-statistics 48 | networks: 49 | - clearml-serving-backend 50 | 51 | alertmanager: 52 | image: prom/alertmanager:v0.23.0 53 | container_name: clearml-serving-alertmanager 54 | restart: unless-stopped 55 | # ports: 56 | # - "9093:9093" 57 | depends_on: 58 | - prometheus 59 | - grafana 60 | networks: 61 | - clearml-serving-backend 62 | 63 | grafana: 64 | image: grafana/grafana:8.4.4-ubuntu 65 | container_name: clearml-serving-grafana 66 | volumes: 67 | - './datasource.yml:/etc/grafana/provisioning/datasources/datasource.yaml' 68 | restart: unless-stopped 69 | ports: 70 | - "3000:3000" 71 | depends_on: 72 | - prometheus 73 | networks: 74 | - clearml-serving-backend 75 | 76 | 77 | clearml-serving-inference: 78 | image: allegroai/clearml-serving-inference:latest 79 | container_name: clearml-serving-inference 80 | restart: unless-stopped 81 | # optimize perforamnce 82 | security_opt: 83 | - seccomp:unconfined 84 | ports: 85 | - "8080:8080" 86 | environment: 87 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 88 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 89 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 90 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 91 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 92 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 93 | CLEARML_SERVING_PORT: ${CLEARML_SERVING_PORT:-8080} 94 | CLEARML_SERVING_POLL_FREQ: ${CLEARML_SERVING_POLL_FREQ:-1.0} 95 | CLEARML_DEFAULT_BASE_SERVE_URL: ${CLEARML_DEFAULT_BASE_SERVE_URL:-http://127.0.0.1:8080/serve} 96 | CLEARML_DEFAULT_KAFKA_SERVE_URL: ${CLEARML_DEFAULT_KAFKA_SERVE_URL:-clearml-serving-kafka:9092} 97 | CLEARML_DEFAULT_TRITON_GRPC_ADDR: ${CLEARML_DEFAULT_TRITON_GRPC_ADDR:-clearml-serving-triton:8001} 98 | CLEARML_USE_GUNICORN: ${CLEARML_USE_GUNICORN:-} 99 | CLEARML_SERVING_NUM_PROCESS: ${CLEARML_SERVING_NUM_PROCESS:-} 100 | CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-} 101 | AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-} 102 | AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-} 103 | AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-} 104 | GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-} 105 | AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-} 106 | AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-} 107 | depends_on: 108 | - kafka 109 | - clearml-serving-triton 110 | networks: 111 | - clearml-serving-backend 112 | 113 | clearml-serving-triton: 114 | image: allegroai/clearml-serving-triton:latest 115 | container_name: clearml-serving-triton 116 | restart: unless-stopped 117 | # optimize perforamnce 118 | security_opt: 119 | - seccomp:unconfined 120 | # ports: 121 | # - "8001:8001" 122 | environment: 123 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 124 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 125 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 126 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 127 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 128 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 129 | CLEARML_TRITON_POLL_FREQ: ${CLEARML_TRITON_POLL_FREQ:-1.0} 130 | CLEARML_TRITON_METRIC_FREQ: ${CLEARML_TRITON_METRIC_FREQ:-1.0} 131 | CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-} 132 | AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-} 133 | AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-} 134 | AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-} 135 | GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-} 136 | AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-} 137 | AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-} 138 | depends_on: 139 | - kafka 140 | networks: 141 | - clearml-serving-backend 142 | 143 | clearml-serving-statistics: 144 | image: allegroai/clearml-serving-statistics:latest 145 | container_name: clearml-serving-statistics 146 | restart: unless-stopped 147 | # optimize perforamnce 148 | security_opt: 149 | - seccomp:unconfined 150 | # ports: 151 | # - "9999:9999" 152 | environment: 153 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 154 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 155 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 156 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 157 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 158 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 159 | CLEARML_DEFAULT_KAFKA_SERVE_URL: ${CLEARML_DEFAULT_KAFKA_SERVE_URL:-clearml-serving-kafka:9092} 160 | CLEARML_SERVING_POLL_FREQ: ${CLEARML_SERVING_POLL_FREQ:-1.0} 161 | depends_on: 162 | - kafka 163 | networks: 164 | - clearml-serving-backend 165 | 166 | 167 | networks: 168 | clearml-serving-backend: 169 | driver: bridge 170 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | zookeeper: 5 | image: bitnami/zookeeper:3.7.0 6 | container_name: clearml-serving-zookeeper 7 | # ports: 8 | # - "2181:2181" 9 | environment: 10 | - ALLOW_ANONYMOUS_LOGIN=yes 11 | networks: 12 | - clearml-serving-backend 13 | 14 | kafka: 15 | image: bitnami/kafka:3.1.1 16 | container_name: clearml-serving-kafka 17 | # ports: 18 | # - "9092:9092" 19 | environment: 20 | - KAFKA_BROKER_ID=1 21 | - KAFKA_CFG_LISTENERS=PLAINTEXT://clearml-serving-kafka:9092 22 | - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://clearml-serving-kafka:9092 23 | - KAFKA_CFG_ZOOKEEPER_CONNECT=clearml-serving-zookeeper:2181 24 | - ALLOW_PLAINTEXT_LISTENER=yes 25 | - KAFKA_CREATE_TOPICS="topic_test:1:1" 26 | depends_on: 27 | - zookeeper 28 | networks: 29 | - clearml-serving-backend 30 | 31 | prometheus: 32 | image: prom/prometheus:v2.34.0 33 | container_name: clearml-serving-prometheus 34 | volumes: 35 | - ./prometheus.yml:/prometheus.yml 36 | command: 37 | - '--config.file=/prometheus.yml' 38 | - '--storage.tsdb.path=/prometheus' 39 | - '--web.console.libraries=/etc/prometheus/console_libraries' 40 | - '--web.console.templates=/etc/prometheus/consoles' 41 | - '--storage.tsdb.retention.time=200h' 42 | - '--web.enable-lifecycle' 43 | restart: unless-stopped 44 | # ports: 45 | # - "9090:9090" 46 | depends_on: 47 | - clearml-serving-statistics 48 | networks: 49 | - clearml-serving-backend 50 | 51 | alertmanager: 52 | image: prom/alertmanager:v0.23.0 53 | container_name: clearml-serving-alertmanager 54 | restart: unless-stopped 55 | # ports: 56 | # - "9093:9093" 57 | depends_on: 58 | - prometheus 59 | - grafana 60 | networks: 61 | - clearml-serving-backend 62 | 63 | grafana: 64 | image: grafana/grafana:8.4.4-ubuntu 65 | container_name: clearml-serving-grafana 66 | volumes: 67 | - './datasource.yml:/etc/grafana/provisioning/datasources/datasource.yaml' 68 | restart: unless-stopped 69 | ports: 70 | - "3000:3000" 71 | depends_on: 72 | - prometheus 73 | networks: 74 | - clearml-serving-backend 75 | 76 | 77 | clearml-serving-inference: 78 | image: allegroai/clearml-serving-inference:latest 79 | container_name: clearml-serving-inference 80 | restart: unless-stopped 81 | # optimize perforamnce 82 | security_opt: 83 | - seccomp:unconfined 84 | ports: 85 | - "8080:8080" 86 | environment: 87 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 88 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 89 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 90 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 91 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 92 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 93 | CLEARML_SERVING_PORT: ${CLEARML_SERVING_PORT:-8080} 94 | CLEARML_SERVING_POLL_FREQ: ${CLEARML_SERVING_POLL_FREQ:-1.0} 95 | CLEARML_DEFAULT_BASE_SERVE_URL: ${CLEARML_DEFAULT_BASE_SERVE_URL:-http://127.0.0.1:8080/serve} 96 | CLEARML_DEFAULT_KAFKA_SERVE_URL: ${CLEARML_DEFAULT_KAFKA_SERVE_URL:-clearml-serving-kafka:9092} 97 | CLEARML_DEFAULT_TRITON_GRPC_ADDR: ${CLEARML_DEFAULT_TRITON_GRPC_ADDR:-} 98 | CLEARML_USE_GUNICORN: ${CLEARML_USE_GUNICORN:-} 99 | CLEARML_SERVING_NUM_PROCESS: ${CLEARML_SERVING_NUM_PROCESS:-} 100 | CLEARML_EXTRA_PYTHON_PACKAGES: ${CLEARML_EXTRA_PYTHON_PACKAGES:-} 101 | AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-} 102 | AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-} 103 | AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-} 104 | GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-} 105 | AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-} 106 | AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-} 107 | depends_on: 108 | - kafka 109 | networks: 110 | - clearml-serving-backend 111 | 112 | clearml-serving-statistics: 113 | image: allegroai/clearml-serving-statistics:latest 114 | container_name: clearml-serving-statistics 115 | restart: unless-stopped 116 | # optimize perforamnce 117 | security_opt: 118 | - seccomp:unconfined 119 | # ports: 120 | # - "9999:9999" 121 | environment: 122 | CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-https://app.clear.ml} 123 | CLEARML_API_HOST: ${CLEARML_API_HOST:-https://api.clear.ml} 124 | CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-https://files.clear.ml} 125 | CLEARML_API_ACCESS_KEY: ${CLEARML_API_ACCESS_KEY} 126 | CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY} 127 | CLEARML_SERVING_TASK_ID: ${CLEARML_SERVING_TASK_ID:-} 128 | CLEARML_DEFAULT_KAFKA_SERVE_URL: ${CLEARML_DEFAULT_KAFKA_SERVE_URL:-clearml-serving-kafka:9092} 129 | CLEARML_SERVING_POLL_FREQ: ${CLEARML_SERVING_POLL_FREQ:-1.0} 130 | depends_on: 131 | - kafka 132 | networks: 133 | - clearml-serving-backend 134 | 135 | 136 | networks: 137 | clearml-serving-backend: 138 | driver: bridge 139 | -------------------------------------------------------------------------------- /docker/example.env: -------------------------------------------------------------------------------- 1 | CLEARML_WEB_HOST="https://app.clear.ml" 2 | CLEARML_API_HOST="https://api.clear.ml" 3 | CLEARML_FILES_HOST="https://files.clear.ml" 4 | CLEARML_API_ACCESS_KEY=" " 5 | CLEARML_API_SECRET_KEY=" " 6 | CLEARML_SERVING_TASK_ID=" " 7 | -------------------------------------------------------------------------------- /docker/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s # By default, scrape targets every 15 seconds. 3 | evaluation_interval: 15s # By default, scrape targets every 15 seconds. 4 | external_labels: 5 | monitor: 'clearml-serving' 6 | 7 | scrape_configs: 8 | # The job name is added as a label `job= ` to any timeseries scraped from this config. 9 | - job_name: 'prometheus' 10 | 11 | scrape_interval: 5s 12 | 13 | static_configs: 14 | - targets: ['localhost:9090'] 15 | 16 | # The job name is added as a label `job= ` to any timeseries scraped from this config. 17 | - job_name: 'clearml-inference-stats' 18 | 19 | scrape_interval: 5s 20 | 21 | static_configs: 22 | - targets: ['clearml-serving-statistics:9999'] 23 | -------------------------------------------------------------------------------- /docs/design_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-serving/1def0a6901617767687f2b747aaffdc060f96046/docs/design_diagram.png -------------------------------------------------------------------------------- /docs/grafana_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-serving/1def0a6901617767687f2b747aaffdc060f96046/docs/grafana_screenshot.png -------------------------------------------------------------------------------- /docs/webapp_screenshots.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-serving/1def0a6901617767687f2b747aaffdc060f96046/docs/webapp_screenshots.gif -------------------------------------------------------------------------------- /examples/custom/preprocess.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Optional 2 | 3 | import joblib 4 | import numpy as np 5 | 6 | 7 | # Notice Preprocess class Must be named "Preprocess" 8 | class Preprocess(object): 9 | """ 10 | Notice the execution flows is synchronous as follows: 11 | 12 | 1. RestAPI(...) -> body: dict 13 | 2. preprocess(body: dict, ...) -> data: Any 14 | 3. process(data: Any, ...) -> data: Any 15 | 4. postprocess(data: Any, ...) -> result: dict 16 | 5. RestAPI(result: dict) -> returned request 17 | """ 18 | def __init__(self): 19 | """ 20 | Set any initial property on the Task (usually model object) 21 | Notice these properties will be accessed from multiple threads. 22 | If you need a stateful (per request) data, use the `state` dict argument passed to pre/post/process functions 23 | """ 24 | # set internal state, this will be called only once. (i.e. not per request) 25 | self._model = None 26 | 27 | def load(self, local_file_name: str) -> Optional[Any]: # noqa 28 | """ 29 | Optional: provide loading method for the model 30 | useful if we need to load a model in a specific way for the prediction engine to work 31 | :param local_file_name: file name / path to read load the model from 32 | :return: Object that will be called with .predict() method for inference 33 | """ 34 | 35 | # Example now lets load the actual model 36 | 37 | self._model = joblib.load(local_file_name) 38 | 39 | def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 40 | """ 41 | Optional: do something with the request data, return any type of object. 42 | The returned object will be passed as is to the inference engine 43 | 44 | :param body: dictionary as recieved from the RestAPI 45 | :param state: Use state dict to store data passed to the post-processing function call. 46 | This is a per-request state dict (meaning a new empty dict will be passed per request) 47 | Usage example: 48 | >>> def preprocess(..., state): 49 | state['preprocess_aux_data'] = [1,2,3] 50 | >>> def postprocess(..., state): 51 | print(state['preprocess_aux_data']) 52 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 53 | to the statictics collector servicd. 54 | None is passed if statiscs collector is not configured, or if the current request should not be 55 | collected 56 | 57 | Usage example: 58 | >>> print(body) 59 | {"x0": 1, "x1": 2} 60 | >>> if collect_custom_statistics_fn: 61 | >>> collect_custom_statistics_fn({"x0": 1, "x1": 2}) 62 | 63 | :return: Object to be passed directly to the model inference 64 | """ 65 | 66 | # we expect to get a feature vector on the `feature` entry if the dict 67 | return np.array(body.get("features", []), dtype=np.float) 68 | 69 | def process( 70 | self, 71 | data: Any, 72 | state: dict, 73 | collect_custom_statistics_fn: Optional[Callable[[dict], None]], 74 | ) -> Any: # noqa 75 | """ 76 | Optional: do something with the actual data, return any type of object. 77 | The returned object will be passed as is to the postprocess function engine 78 | 79 | :param data: object as recieved from the preprocessing function 80 | :param state: Use state dict to store data passed to the post-processing function call. 81 | This is a per-request state dict (meaning a dict instance per request) 82 | Usage example: 83 | >>> def preprocess(..., state): 84 | state['preprocess_aux_data'] = [1,2,3] 85 | >>> def postprocess(..., state): 86 | print(state['preprocess_aux_data']) 87 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 88 | to the statictics collector servicd. 89 | None is passed if statiscs collector is not configured, or if the current request should not be collected 90 | 91 | Usage example: 92 | >>> if collect_custom_statistics_fn: 93 | >>> collect_custom_statistics_fn({"type": "classification"}) 94 | 95 | :return: Object to be passed tp the post-processing function 96 | """ 97 | 98 | # this is where we do the heavy lifting, i.e. run our model. 99 | # notice we know data is a numpy array of type float, because this is what we prepared in preprocessing function 100 | data = self._model.predict(np.atleast_2d(data)) 101 | # data is also a numpy array, as returned from our fit function 102 | return data 103 | 104 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 105 | """ 106 | Optional: post process the data returned from the model inference engine 107 | returned dict will be passed back as the request result as is. 108 | 109 | :param data: object as recieved from the inference model function 110 | :param state: Use state dict to store data passed to the post-processing function call. 111 | This is a per-request state dict (meaning a dict instance per request) 112 | Usage example: 113 | >>> def preprocess(..., state): 114 | state['preprocess_aux_data'] = [1,2,3] 115 | >>> def postprocess(..., state): 116 | print(state['preprocess_aux_data']) 117 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 118 | to the statictics collector servicd. 119 | None is passed if statiscs collector is not configured, or if the current request should not be 120 | collected 121 | 122 | Usage example: 123 | >>> if collect_custom_statistics_fn: 124 | >>> collect_custom_statistics_fn({"y": 1}) 125 | 126 | :return: Dictionary passed directly as the returned result of the RestAPI 127 | """ 128 | 129 | # Now we take the result numpy (predicted) and create a list of values to 130 | # send back as the restapi return value 131 | # data is the return value from model.predict we will put is inside a return value as Y 132 | return dict(predict=data.tolist()) 133 | -------------------------------------------------------------------------------- /examples/custom/readme.md: -------------------------------------------------------------------------------- 1 | # Train and Deploy custom model 2 | 3 | ## training mock custom model 4 | 5 | Run the mock python training code 6 | ```bash 7 | pip install -r examples/custom/requirements.txt 8 | python examples/custom/train_model.py 9 | ``` 10 | 11 | The output will be a model created on the project "serving examples", by the name "custom train model" 12 | 13 | ## setting up the serving service 14 | 15 | 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID) 16 | 2. Make sure to add any required additional packages (for your custom model) to the [docker-compose.yml](https://github.com/allegroai/clearml-serving/blob/826f503cf4a9b069b89eb053696d218d1ce26f47/docker/docker-compose.yml#L97) (or as environment variable to the `clearml-serving-inference` container), by defining for example: `CLEARML_EXTRA_PYTHON_PACKAGES="scikit-learn numpy"` 17 | 3. Create model endpoint: 18 | `clearml-serving --id model add --engine custom --endpoint "test_model_custom" --preprocess "examples/custom/preprocess.py" --name "custom train model" --project "serving examples"` 19 | 20 | Or auto update 21 | 22 | `clearml-serving --id model auto-update --engine custom --endpoint "test_model_custom_auto" --preprocess "examples/custom/preprocess.py" --name "custom train model" --project "serving examples" --max-versions 2` 23 | 24 | Or add Canary endpoint 25 | 26 | `clearml-serving --id model canary --endpoint "test_model_custom_auto" --weights 0.1 0.9 --input-endpoint-prefix test_model_custom_auto` 27 | 28 | 4. If you already have the `clearml-serving` docker-compose running, it might take it a minute or two to sync with the new endpoint. 29 | 30 | Or you can run the clearml-serving container independently `docker run -v ~/clearml.conf:/root/clearml.conf -p 8080:8080 -e CLEARML_SERVING_TASK_ID= clearml-serving:latest` 31 | 32 | 5. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): `curl -X POST "http://127.0.0.1:8080/serve/test_model_custom" -H "accept: application/json" -H "Content-Type: application/json" -d '{"features": [1, 2, 3]}'` 33 | 34 | 35 | > **_Notice:_** You can also change the serving service while it is already running! 36 | This includes adding/removing endpoints, adding canary model routing etc. 37 | by default new endpoints/models will be automatically updated after 1 minute 38 | -------------------------------------------------------------------------------- /examples/custom/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml >= 1.1.6 2 | scikit-learn 3 | pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 4 | -------------------------------------------------------------------------------- /examples/custom/train_model.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression 2 | from sklearn.datasets import make_blobs 3 | from joblib import dump 4 | from clearml import Task 5 | 6 | task = Task.init(project_name="serving examples", task_name="custom train model", output_uri=True) 7 | 8 | # generate 2d classification dataset 9 | X, y = make_blobs(n_samples=100, centers=2, n_features=3, random_state=1) 10 | # fit final model 11 | model = LogisticRegression() 12 | model.fit(X, y) 13 | 14 | dump(model, filename="custom-model.pkl", compress=9) 15 | 16 | -------------------------------------------------------------------------------- /examples/ensemble/preprocess.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | 5 | 6 | # Notice Preprocess class Must be named "Preprocess" 7 | class Preprocess(object): 8 | def __init__(self): 9 | # set internal state, this will be called only once. (i.e. not per request) 10 | pass 11 | 12 | def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 13 | # we expect to get two valid on the dict x0, and x1 14 | return [[body.get("x0", None), body.get("x1", None)], ] 15 | 16 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 17 | # post process the data returned from the model inference engine 18 | # data is the return value from model.predict we will put is inside a return value as Y 19 | return dict(y=data.tolist() if isinstance(data, np.ndarray) else data) 20 | -------------------------------------------------------------------------------- /examples/ensemble/readme.md: -------------------------------------------------------------------------------- 1 | # Train and Deploy Scikit-Learn model ensemble 2 | 3 | ## training mock voting regression model 4 | 5 | Run the mock python training code 6 | ```bash 7 | pip install -r examples/ensemble/requirements.txt 8 | python examples/ensemble/train_ensemble.py 9 | ``` 10 | 11 | The output will be a model created on the project "serving examples", by the name "train model ensemble" 12 | 13 | ## setting up the serving service 14 | 15 | 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID) 16 | 2. Create model endpoint: 17 | `clearml-serving --id model add --engine sklearn --endpoint "test_model_ensemble" --preprocess "examples/ensemble/preprocess.py" --name "train model ensemble" --project "serving examples"` 18 | 19 | Or auto update 20 | 21 | `clearml-serving --id model auto-update --engine sklearn --endpoint "test_model_ensemble_auto" --preprocess "examples/ensemble/preprocess.py" --name "train model ensemble" --project "serving examples" --max-versions 2` 22 | 23 | Or add Canary endpoint 24 | 25 | `clearml-serving --id model canary --endpoint "test_model_ensemble_auto" --weights 0.1 0.9 --input-endpoint-prefix test_model_ensemble_auto` 26 | 27 | 3. If you already have the `clearml-serving` docker-compose running, it might take it a minute or two to sync with the new endpoint. 28 | 29 | Or you can run the clearml-serving container independently `docker run -v ~/clearml.conf:/root/clearml.conf -p 8080:8080 -e CLEARML_SERVING_TASK_ID= clearml-serving:latest` 30 | 31 | 4. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): `curl -X POST "http://127.0.0.1:8080/serve/test_model_ensemble" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2}'` 32 | 33 | > **_Notice:_** You can also change the serving service while it is already running! 34 | This includes adding/removing endpoints, adding canary model routing etc. 35 | by default new endpoints/models will be automatically updated after 1 minute 36 | -------------------------------------------------------------------------------- /examples/ensemble/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml >= 1.1.6 2 | scikit-learn >= 1.0.2 3 | numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability 4 | pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 5 | -------------------------------------------------------------------------------- /examples/ensemble/train_ensemble.py: -------------------------------------------------------------------------------- 1 | from sklearn.neighbors import KNeighborsRegressor 2 | from sklearn.ensemble import RandomForestRegressor 3 | from sklearn.ensemble import VotingRegressor 4 | from sklearn.datasets import make_blobs 5 | from joblib import dump 6 | from clearml import Task 7 | 8 | task = Task.init(project_name="serving examples", task_name="train model ensemble", output_uri=True) 9 | 10 | # generate 2d classification dataset 11 | X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1) 12 | 13 | knn = KNeighborsRegressor(n_neighbors=5) 14 | knn.fit(X, y) 15 | 16 | rf = RandomForestRegressor(n_estimators=50) 17 | rf.fit(X, y) 18 | 19 | estimators = [("knn", knn), ("rf", rf), ] 20 | ensemble = VotingRegressor(estimators) 21 | ensemble.fit(X, y) 22 | 23 | dump(ensemble, filename="ensemble-vr.pkl", compress=9) 24 | -------------------------------------------------------------------------------- /examples/huggingface/docker-compose-override.yml: -------------------------------------------------------------------------------- 1 | services: 2 | clearml-serving-triton: 3 | image: allegroai/clearml-serving-triton:1.2.0-22.07 -------------------------------------------------------------------------------- /examples/huggingface/example_payload.json: -------------------------------------------------------------------------------- 1 | {"text": "This is a ClearML example to show how Triton binaries are deployed."} -------------------------------------------------------------------------------- /examples/huggingface/preprocess.py: -------------------------------------------------------------------------------- 1 | """Hugginface preprocessing module for ClearML Serving.""" 2 | from typing import Any 3 | from transformers import AutoTokenizer, PreTrainedTokenizer, TensorType 4 | 5 | 6 | # Notice Preprocess class Must be named "Preprocess" 7 | class Preprocess: 8 | """Processing class will be run by the ClearML inference services before and after each request.""" 9 | 10 | def __init__(self): 11 | """Set internal state, this will be called only once. (i.e. not per request).""" 12 | self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained("philschmid/MiniLM-L6-H384-uncased-sst2") 13 | 14 | def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 15 | """Will be run when a request comes into the ClearML inference service.""" 16 | tokens = self.tokenizer( 17 | text=body['text'], 18 | max_length=16, 19 | truncation=True, 20 | return_tensors=TensorType.NUMPY, 21 | ) 22 | 23 | return [tokens["input_ids"].tolist(), tokens["token_type_ids"].tolist(), tokens["attention_mask"].tolist()] 24 | 25 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 26 | """Will be run whan a request comes back from the Triton Engine.""" 27 | # post process the data returned from the model inference engine 28 | # data is the return value from model.predict we will put is inside a return value as Y 29 | return {'data': data.tolist()} 30 | -------------------------------------------------------------------------------- /examples/huggingface/readme.md: -------------------------------------------------------------------------------- 1 | # Example Huggingface on ClearML Serving 2 | 3 | Technically, the underlying NVIDIA Triton inference engine can handle almost any type of model, including Pytorch models which is how many Huggingface models are shipped out of the box. 4 | 5 | But in order to get better serving speeds, check out this [repository](https://github.com/ELS-RD/transformer-deploy), their [docs](https://els-rd.github.io/transformer-deploy/) and the excellent accompanying [blogpost](https://medium.com/towards-data-science/hugging-face-transformer-inference-under-1-millisecond-latency-e1be0057a51c) to convert huggingface models first into ONNX and then into TensorRT optimized binaries. 6 | 7 | ## Model vs Tokenizer 8 | 9 | Most Huggingface NLP models ship with a tokenizer as well. We don’t want to leave it to the end user to embed their own inputs. The blogpost above uses an ensemble endpoint in Triton that first runs some python code that contains the tokenizer and then sends the result to a second endpoint which contains the actual model. 10 | 11 | This is a good approach, but the tokenizer is CPU based and not independently scalable from the GPU based transformer model. With ClearML serving, we can move the tokenization step to the preprocessing script that we provide to the ClearML serving inference container, which will make this step completely autoscalable. 12 | 13 | ## Getting the right TensorRT <> Triton versions 14 | 15 | Chances are very high that the transformer-deploy image has a different triton version than what ClearML serving uses, which will give issues later on. Triton is very harsh on its version requirements. Please check the triton version we are using in `clearml_serving/engines/triton/Dockerfile` and compare it to the main Dockerfile from the `transformers-deploy` repo. Check [this](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html) page for more information about which TensorRT version is shipped in which Triton container. 16 | 17 | If they don't match up, either rebuild the ClearML triton image locally with the right triton version and make sure it is picked up by compose, or build the `transformers-deploy` image locally with the correct version and use it to run the model conversion. Your model has to be optimized using the exact same TensorRT version or it will not serve! 18 | 19 | ## Setting up for the example 20 | 21 | At the time of this writing, compiling a huggingface model from the `transformers-deploy` main branch means it is compiled using tensorRT version 8.4.1, which corresponds to Triton version 22.07. 22 | 23 | To get ClearML running on 22.07, all we need to do is change the base image name in the `docker-compose-triton-gpu.yml` file, the the correct version. 24 | 25 | ```diff 26 | ... 27 | clearml-serving-triton: 28 | - image: allegroai/clearml-serving-triton:latest 29 | + image: allegroai/clearml-serving-triton:1.2.0-22.07 30 | container_name: clearml-serving-triton 31 | restart: unless-stopped 32 | # optimize perforamnce 33 | security_opt: 34 | - seccomp:unconfined 35 | ... 36 | ``` 37 | Or you can build your own correct version by adapting the dockerfile in `clearml_serving/engines/triton/Dockerfile`, building it and making sure the triton compose yaml uses it instead. 38 | 39 | 40 | ## Setting up the serving service 41 | 42 | ### Get the repository (with the example) 43 | 44 | Clone the serving repository if you haven’t already. 45 | 46 | ```bash 47 | git clone https://github.com/allegroai/clearml-serving.git 48 | cd clearml-serving 49 | ``` 50 | 51 | ### Launch the serving task to clearml 52 | 53 | Install `clearml-serving` either via pip or from the repository. Create serving Service: 54 | 55 | ```bash 56 | clearml-serving create --name "huggingface serving example" 57 | ``` 58 | 59 | (write down the service ID, this is the service ID that is in your env file as well) 60 | 61 | ### Setting up the docker-compose serving stack 62 | Setup the `docker/example.env` file with your ClearML credentials, then add an extra line to install 3rd party packages. In this case, we want to also install the `transformers` package because we’re going to run the tokenizer in the inference container 63 | 64 | ```bash 65 | CLEARML_WEB_HOST="https://app.clear.ml" 66 | CLEARML_API_HOST="https://api.clear.ml" 67 | CLEARML_FILES_HOST="https://files.clear.ml" 68 | CLEARML_API_ACCESS_KEY="<>" 69 | CLEARML_API_SECRET_KEY="<>" 70 | CLEARML_SERVING_TASK_ID="<>" 71 | # Add this to install necessary packages 72 | CLEARML_EXTRA_PYTHON_PACKAGES=transformers 73 | # Change this depending on your machine and performance needs 74 | CLEARML_USE_GUNICORN=1 75 | CLEARML_SERVING_NUM_PROCESS=8 76 | ``` 77 | 78 | Huggingface models require Triton engine support, please use `docker-compose-triton.yml` / `docker-compose-triton-gpu.yml` or if running on Kubernetes, the matching helm chart to set things up. Check the repository main readme documentation if you need help. 79 | 80 | To run with the correct version of Triton for this example, do: 81 | ```bash 82 | docker compose --env-file docker/example.env -f docker/docker-compose-triton-gpu.yml -f examples/huggingface/docker-compose-override.yml up --force-recreate 83 | ``` 84 | This should get you a running ClearML stack with Triton which is reporting to a ClearML task in a project called `DevOps`. 85 | 86 | ### Getting the sample model 87 | 88 | If you didn’t use the transformers-deploy repository on your own model, you can run this single command to get a tensorRT binary of an example classification model. 89 | 90 | Please make sure you have properly installed docker and nvidia-container-toolkit, so it can be run on GPU. The command will download a `model.bin` file to the local directory for you to serve. 91 | 92 | ```bash 93 | curl https://clearml-public.s3.amazonaws.com/models/model_onnx.bin -o model.bin 94 | ``` 95 | 96 | ### Setup 97 | 98 | 1. Upload the TensorRT model (write down the model ID) 99 | 100 | ```bash 101 | clearml-serving --id model upload --name "Transformer ONNX" --project "Hugginface Serving" --path model.bin 102 | ``` 103 | 104 | 2. Create a model endpoint: 105 | 106 | ```bash 107 | # Without dynamic batching 108 | clearml-serving --id model add --engine triton --endpoint "transformer_model" --model-id --preprocess examples/huggingface/preprocess.py --input-size "[-1, -1]" "[-1, -1]" "[-1, -1]" --input-type int32 int32 int32 --input-name "input_ids" "token_type_ids" "attention_mask" --output-size "[-1, 2]" --output-type float32 --output-name "output" --aux-config platform=\"tensorrt_plan\" default_model_filename=\"model.bin\" 109 | 110 | # With dynamic batching 111 | clearml-serving --id model add --engine triton --endpoint "transformer_model" --model-id --preprocess examples/huggingface/preprocess.py --input-size "[-1]" "[-1]" "[-1]" --input-type int32 int32 int32 --input-name "input_ids" "token_type_ids" "attention_mask" --output-size "[2]" --output-type float32 --output-name "output" --aux-config platform=\"onnxruntime_onnx\" default_model_filename=\"model.bin\" dynamic_batching.preferred_batch_size="[1,2,4,8,16,32,64]" dynamic_batching.max_queue_delay_microseconds=5000000 max_batch_size=64 112 | ``` 113 | 114 | > Note the backslashes for string values! `platform=\"tensorrt_plan\" default_model_filename=\"model.bin\"` 115 | 116 | > **INFO**: the model input and output parameters are usually in a `config.pbtxt` file next to the model itself. 117 | 118 | 1. Make sure you have the `clearml-serving` `docker-compose-triton.yml` (or `docker-compose-triton-gpu.yml`) running, it might take it a minute or two to sync with the new endpoint. 119 | 2. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): 120 | 121 | > ***Notice:*** 122 | You can also change the serving service while it is already running! This includes adding/removing endpoints, adding canary model routing etc. by default new endpoints/models will be automatically updated after 1 minute 123 | > 124 | 125 | ## Running Inference 126 | 127 | After waiting a little bit for the stack to detect your new endpoint and load it, you can use curl to send a request: 128 | 129 | ```bash 130 | curl -X POST "http://127.0.0.1:8080/serve/transformer_model" -H "accept: application/json" -H "Content-Type: application/json" -d '{"text": "This is a ClearML example to show how Triton binaries are deployed."}' 131 | ``` 132 | 133 | Or use the notebook in this example folder to run it using python `requests` 134 | 135 | The inference request will be sent to the ClearML inference service first, which will run the raw request through the `preprocessing.py` file, which takes out the `text` value, runs it through the tokenizer and then sends the result to Triton, which runs the model and sends the output back to the same `preprocessing.py` file but in the postprocessing function this time, whose result is returned to the user. 136 | 137 | ## Benchmarking 138 | 139 | To run a load test on your endpoint to check its performance, use the following commands: 140 | ```bash 141 | ab -l -n 8000 -c 128 -H "accept: application/json" -H "Content-Type: application/json" -T "application/json" -p examples/huggingface/example_payload.json "http://127.0.0.1:8080/serve/transformer_model" 142 | ``` -------------------------------------------------------------------------------- /examples/huggingface/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml-serving 2 | pillow>=10.2.0 # not directly required, pinned by Snyk to avoid a vulnerability -------------------------------------------------------------------------------- /examples/keras/preprocess.py: -------------------------------------------------------------------------------- 1 | import io 2 | from typing import Any, Union 3 | 4 | import numpy as np 5 | from PIL import Image, ImageOps 6 | 7 | 8 | from clearml import StorageManager 9 | 10 | 11 | # Notice Preprocess class Must be named "Preprocess" 12 | class Preprocess(object): 13 | def __init__(self): 14 | # set internal state, this will be called only once. (i.e. not per request) 15 | pass 16 | 17 | def preprocess(self, body: Union[bytes, dict], state: dict, collect_custom_statistics_fn=None) -> Any: 18 | # we expect to get two valid on the dict x0, and x1 19 | if isinstance(body, bytes): 20 | # we expect to get a stream of encoded image bytes 21 | try: 22 | image = Image.open(io.BytesIO(body)).convert("RGB") 23 | except Exception: 24 | # value error would return 404, we want to return 500 so any other exception 25 | raise RuntimeError("Image could not be decoded") 26 | 27 | if isinstance(body, dict) and "url" in body.keys(): 28 | # image is given as url, and is fetched 29 | url = body.get("url") 30 | local_file = StorageManager.get_local_copy(remote_url=url) 31 | image = Image.open(local_file) 32 | 33 | image = ImageOps.grayscale(image).resize((28, 28)) 34 | return np.array([np.array(image)]) 35 | 36 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 37 | # post process the data returned from the model inference engine 38 | # data is the return value from model.predict we will put is inside a return value as Y 39 | if not isinstance(data, np.ndarray): 40 | # this should not happen 41 | return dict(digit=-1) 42 | 43 | # data is returned as probability per class (10 class/digits) 44 | return dict(digit=int(data.flatten().argmax())) 45 | -------------------------------------------------------------------------------- /examples/keras/readme.md: -------------------------------------------------------------------------------- 1 | # Train and Deploy Keras model with Nvidia Triton Engine 2 | 3 | ## training mnist digit classifier model 4 | 5 | Run the mock python training code 6 | ```bash 7 | pip install -r examples/keras/requirements.txt 8 | python examples/keras/train_keras_mnist.py 9 | ``` 10 | 11 | The output will be a model created on the project "serving examples", by the name "train keras model" 12 | 13 | ## setting up the serving service 14 | 15 | Prerequisites, Keras/Tensorflow models require Triton engine support, please use `docker-compose-triton.yml` / `docker-compose-triton-gpu.yml` or if running on Kubernetes, the matching helm chart. 16 | 17 | 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID) 18 | 2. Create model endpoint: 19 | 20 | `clearml-serving --id model add --engine triton --endpoint "test_model_keras" --preprocess "examples/keras/preprocess.py" --name "train keras model - serving_model" --project "serving examples" --input-size 1 784 --input-name "dense_input" --input-type float32 --output-size -1 10 --output-name "activation_2" --output-type float32 21 | ` 22 | 23 | Or auto update 24 | 25 | `clearml-serving --id model auto-update --engine triton --endpoint "test_model_auto" --preprocess "examples/keras/preprocess.py" --name "train keras model - serving_model" --project "serving examples" --max-versions 2 26 | --input-size 1 784 --input-name "dense_input" --input-type float32 27 | --output-size -1 10 --output-name "activation_2" --output-type float32` 28 | 29 | Or add Canary endpoint 30 | 31 | `clearml-serving --id model canary --endpoint "test_model_auto" --weights 0.1 0.9 --input-endpoint-prefix test_model_auto` 32 | 33 | 3. Make sure you have the `clearml-serving` `docker-compose-triton.yml` (or `docker-compose-triton-gpu.yml`) running, it might take it a minute or two to sync with the new endpoint. 34 | 35 | 4. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): \ 36 | `curl -X POST "http://127.0.0.1:8080/serve/test_model_keras" -H "accept: application/json" -H "Content-Type: application/json" -d '{"url": "https://raw.githubusercontent.com/allegroai/clearml-serving/main/examples/pytorch/5.jpg"}'` 37 | \ 38 | or send a local file to be classified with \ 39 | `curl -X POST "http://127.0.0.1:8080/serve/test_model_keras" -H "Content-Type: image/jpeg" --data-binary "@5.jpg"` 40 | 41 | > **_Notice:_** You can also change the serving service while it is already running! 42 | This includes adding/removing endpoints, adding canary model routing etc. 43 | by default new endpoints/models will be automatically updated after 1 minute -------------------------------------------------------------------------------- /examples/keras/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=2.0 2 | clearml 3 | Pillow -------------------------------------------------------------------------------- /examples/keras/train_keras_mnist.py: -------------------------------------------------------------------------------- 1 | # ClearML - Keras with Tensorboard example code, automatic logging model and Tensorboard outputs 2 | # 3 | # Train a simple deep NN on the MNIST dataset. 4 | # Then store a model to be served by clearml-serving 5 | import argparse 6 | import os 7 | import tempfile 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | from pathlib import Path 12 | from tensorflow.keras import utils as np_utils 13 | from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard 14 | from tensorflow.keras.datasets import mnist 15 | from tensorflow.keras.layers import Activation, Dense 16 | from tensorflow.keras.models import Sequential 17 | from tensorflow.keras.optimizers import RMSprop 18 | 19 | from clearml import Task 20 | 21 | 22 | class TensorBoardImage(TensorBoard): 23 | @staticmethod 24 | def make_image(tensor): 25 | from PIL import Image 26 | import io 27 | tensor = np.stack((tensor, tensor, tensor), axis=2) 28 | height, width, channels = tensor.shape 29 | image = Image.fromarray(tensor) 30 | output = io.BytesIO() 31 | image.save(output, format='PNG') 32 | image_string = output.getvalue() 33 | output.close() 34 | return tf.Summary.Image(height=height, 35 | width=width, 36 | colorspace=channels, 37 | encoded_image_string=image_string) 38 | 39 | def on_epoch_end(self, epoch, logs=None): 40 | if logs is None: 41 | logs = {} 42 | super(TensorBoardImage, self).on_epoch_end(epoch, logs) 43 | images = self.validation_data[0] # 0 - data; 1 - labels 44 | img = (255 * images[0].reshape(28, 28)).astype('uint8') 45 | 46 | image = self.make_image(img) 47 | summary = tf.Summary(value=[tf.Summary.Value(tag='image', image=image)]) 48 | self.writer.add_summary(summary, epoch) 49 | 50 | 51 | def main(): 52 | parser = argparse.ArgumentParser(description='Keras MNIST Example - training CNN classification model') 53 | parser.add_argument('--batch-size', type=int, default=128, help='input batch size for training (default: 128)') 54 | parser.add_argument('--epochs', type=int, default=1, help='number of epochs to train (default: 6)') 55 | args = parser.parse_args() 56 | 57 | # the data, shuffled and split between train and test sets 58 | nb_classes = 10 59 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 60 | 61 | X_train = X_train.reshape(60000, 784).astype('float32') / 255. 62 | X_test = X_test.reshape(10000, 784).astype('float32') / 255. 63 | print(X_train.shape[0], 'train samples') 64 | print(X_test.shape[0], 'test samples') 65 | 66 | # convert class vectors to binary class matrices 67 | Y_train = np_utils.to_categorical(y_train, nb_classes) 68 | Y_test = np_utils.to_categorical(y_test, nb_classes) 69 | 70 | model = Sequential() 71 | model.add(Dense(512, input_shape=(784,))) 72 | model.add(Activation('relu')) 73 | # model.add(Dropout(0.2)) 74 | model.add(Dense(512)) 75 | model.add(Activation('relu')) 76 | # model.add(Dropout(0.2)) 77 | model.add(Dense(10)) 78 | model.add(Activation('softmax')) 79 | 80 | model2 = Sequential() 81 | model2.add(Dense(512, input_shape=(784,))) 82 | model2.add(Activation('relu')) 83 | 84 | model.summary() 85 | 86 | model.compile( 87 | loss='categorical_crossentropy', 88 | optimizer=RMSprop(), 89 | metrics=['accuracy'] 90 | ) 91 | 92 | # Connecting ClearML with the current process, 93 | # from here on everything is logged automatically 94 | task = Task.init(project_name='serving examples', task_name='train keras model', output_uri=True) 95 | 96 | # Advanced: setting model class enumeration 97 | labels = dict(('digit_%d' % i, i) for i in range(10)) 98 | task.set_model_label_enumeration(labels) 99 | 100 | output_folder = os.path.join(tempfile.gettempdir(), 'keras_example_new_temp_now') 101 | 102 | board = TensorBoard(histogram_freq=1, log_dir=output_folder, write_images=False) 103 | model_store = ModelCheckpoint(filepath=os.path.join(output_folder, 'weight.{epoch}.hdf5')) 104 | 105 | # load previous model, if it is there 106 | # noinspection PyBroadException 107 | try: 108 | model.load_weights(os.path.join(output_folder, 'weight.1.hdf5')) 109 | except Exception: 110 | pass 111 | 112 | model.fit( 113 | X_train, Y_train, 114 | batch_size=args.batch_size, epochs=args.epochs, 115 | callbacks=[board, model_store], 116 | verbose=1, validation_data=(X_test, Y_test) 117 | ) 118 | score = model.evaluate(X_test, Y_test, verbose=0) 119 | 120 | # store the model in a format that can be served 121 | model.save('serving_model', include_optimizer=False) 122 | 123 | print('Test score: {}'.format(score[0])) 124 | print('Test accuracy: {}'.format(score[1])) 125 | 126 | 127 | if __name__ == '__main__': 128 | main() 129 | -------------------------------------------------------------------------------- /examples/lightgbm/preprocess.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | 5 | 6 | # Notice Preprocess class Must be named "Preprocess" 7 | class Preprocess(object): 8 | def __init__(self): 9 | # set internal state, this will be called only once. (i.e. not per request) 10 | pass 11 | 12 | def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 13 | # we expect to get four valid numbers on the dict: x0, x1, x2, x3 14 | return np.array( 15 | [[body.get("x0", None), body.get("x1", None), body.get("x2", None), body.get("x3", None)], ], 16 | dtype=np.float32 17 | ) 18 | 19 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 20 | # post process the data returned from the model inference engine 21 | # data is the return value from model.predict we will put is inside a return value as Y 22 | # we pick the most probably class and return the class index (argmax) 23 | return dict(y=int(np.argmax(data)) if isinstance(data, np.ndarray) else data) 24 | -------------------------------------------------------------------------------- /examples/lightgbm/readme.md: -------------------------------------------------------------------------------- 1 | # Train and Deploy LightGBM model 2 | 3 | ## training iris classifier model 4 | 5 | Run the mock python training code 6 | ```bash 7 | pip install -r examples/lightgbm/requirements.txt 8 | python examples/lightgbm/train_model.py 9 | ``` 10 | 11 | The output will be a model created on the project "serving examples", by the name "train lightgbm model" 12 | 13 | ## setting up the serving service 14 | 15 | 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID) 16 | 17 | 2. Create model endpoint: 18 | 19 | `clearml-serving --id model add --engine lightgbm --endpoint "test_model_lgbm" --preprocess "examples/lightgbm/preprocess.py" --name "train lightgbm model - lgbm_model" --project "serving examples"` 20 | 21 | Or auto-update 22 | 23 | `clearml-serving --id model auto-update --engine lightgbm --endpoint "test_model_auto" --preprocess "examples/lightgbm/preprocess.py" --name "train lightgbm model - lgbm_model" --project "serving examples" --max-versions 2` 24 | 25 | Or add Canary endpoint 26 | 27 | `clearml-serving --id model canary --endpoint "test_model_auto" --weights 0.1 0.9 --input-endpoint-prefix test_model_auto` 28 | 29 | 3. If you already have the `clearml-serving` docker-compose running, it might take it a minute or two to sync with the new endpoint. 30 | 31 | Or you can run the clearml-serving container independently `docker run -v ~/clearml.conf:/root/clearml.conf -p 8080:8080 -e CLEARML_SERVING_TASK_ID= clearml-serving:latest` 32 | 33 | 4. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): `curl -X POST "http://127.0.0.1:8080/serve/test_model_lgbm" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2, "x2": 3, "x3": 4}'` 34 | 35 | > **_Notice:_** You can also change the serving service while it is already running! 36 | This includes adding/removing endpoints, adding canary model routing etc. 37 | -------------------------------------------------------------------------------- /examples/lightgbm/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml >= 1.1.6 2 | lightgbm 3 | 4 | pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 5 | -------------------------------------------------------------------------------- /examples/lightgbm/train_model.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | from sklearn.datasets import load_iris 3 | from sklearn.model_selection import train_test_split 4 | 5 | from clearml import Task 6 | 7 | task = Task.init(project_name="serving examples", task_name="train lightgbm model", output_uri=True) 8 | 9 | iris = load_iris() 10 | y = iris['target'] 11 | X = iris['data'] 12 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) 13 | dtrain = lgb.Dataset(X_train, label=y_train) 14 | 15 | params = { 16 | 'objective': 'multiclass', 17 | 'metric': 'softmax', 18 | 'num_class': 3 19 | } 20 | lgb_model = lgb.train(params=params, train_set=dtrain) 21 | 22 | lgb_model.save_model("lgbm_model") 23 | -------------------------------------------------------------------------------- /examples/pipeline/async_preprocess.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | 3 | 4 | # register with --engine custom_async 5 | # Notice Preprocess class Must be named "Preprocess" 6 | class Preprocess(object): 7 | def __init__(self): 8 | pass 9 | 10 | async def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 11 | # we expect to get two valid on the dict x0, and x1 12 | return body 13 | 14 | async def postprocess(self, data: List[dict], state: dict, collect_custom_statistics_fn=None) -> dict: 15 | # we will here average the results and return the new value 16 | # assume data is a list of dicts greater than 1 17 | 18 | # average result 19 | return dict(y=0.5 * data[0]['y'][0] + 0.5 * data[1]['y'][0]) 20 | 21 | async def process(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> Any: 22 | """ 23 | do something with the actual data, return any type of object. 24 | The returned object will be passed as is to the postprocess function engine 25 | """ 26 | predict_a = self.send_request(endpoint="/test_model_sklearn_a/", version=None, data=data) 27 | predict_b = self.send_request(endpoint="/test_model_sklearn_b/", version=None, data=data) 28 | 29 | predict_a = await predict_a 30 | predict_b = await predict_b 31 | 32 | if not predict_b or not predict_a: 33 | raise ValueError("Error requesting inference endpoint test_model_sklearn a/b") 34 | 35 | return [predict_a, predict_b] 36 | 37 | async def send_request(self, endpoint, version, data) -> List[dict]: 38 | # Mock Function! 39 | # replaced by real send request function when constructed by the inference service 40 | pass 41 | -------------------------------------------------------------------------------- /examples/pipeline/preprocess.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | from typing import Any, List 3 | 4 | 5 | # Notice Preprocess class Must be named "Preprocess" 6 | class Preprocess(object): 7 | def __init__(self): 8 | # set internal state, this will be called only once. (i.e. not per request) 9 | self.executor = ThreadPoolExecutor(max_workers=32) 10 | 11 | def postprocess(self, data: List[dict], state: dict, collect_custom_statistics_fn=None) -> dict: 12 | # we will here average the results and return the new value 13 | # assume data is a list of dicts greater than 1 14 | 15 | # average result 16 | return dict(y=0.5 * data[0]['y'][0] + 0.5 * data[1]['y'][0]) 17 | 18 | def process(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> Any: 19 | """ 20 | do something with the actual data, return any type of object. 21 | The returned object will be passed as is to the postprocess function engine 22 | """ 23 | predict_a = self.executor.submit(self.send_request, endpoint="/test_model_sklearn_a/", version=None, data=data) 24 | predict_b = self.executor.submit(self.send_request, endpoint="/test_model_sklearn_b/", version=None, data=data) 25 | 26 | predict_a = predict_a.result() 27 | predict_b = predict_b.result() 28 | 29 | if not predict_b or not predict_a: 30 | raise ValueError("Error requesting inference endpoint test_model_sklearn a/b") 31 | 32 | return [predict_a, predict_b] 33 | 34 | def send_request(self, endpoint, version, data) -> List[dict]: 35 | # Mock Function! 36 | # replaced by real send request function when constructed by the inference service 37 | pass 38 | -------------------------------------------------------------------------------- /examples/pipeline/readme.md: -------------------------------------------------------------------------------- 1 | # Deploy a model inference pipeline 2 | 3 | ## prerequisites 4 | 5 | Training a scikit-learn model (see example/sklearn) 6 | 7 | ## setting up the serving service 8 | 9 | 1. Create serving Service (if not already running): 10 | `clearml-serving create --name "serving example"` (write down the service ID) 11 | 12 | 2. Create model base two endpoints: 13 | `clearml-serving --id model add --engine sklearn --endpoint "test_model_sklearn_a" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model" --project "serving examples"` 14 | 15 | `clearml-serving --id model add --engine sklearn --endpoint "test_model_sklearn_b" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model" --project "serving examples"` 16 | 17 | 3. Create pipeline model endpoint: 18 | 19 | Threaded version 20 | 21 | `clearml-serving --id model add --engine custom --endpoint "test_model_pipeline" --preprocess "examples/pipeline/preprocess.py"` 22 | 23 | AsyncIO version 24 | 25 | `clearml-serving --id model add --engine custom_async --endpoint "test_model_pipeline" --preprocess "examples/pipeline/async_preprocess.py"` 26 | 27 | 4. If you already have the `clearml-serving` docker-compose running, it might take it a minute or two to sync with the new endpoint. 28 | 29 | Or you can run the clearml-serving container independently `docker run -v ~/clearml.conf:/root/clearml.conf -p 8080:8080 -e CLEARML_SERVING_TASK_ID= clearml-serving:latest` 30 | 31 | 5. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): `curl -X POST "http://127.0.0.1:8080/serve/test_model_pipeline" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2}'` 32 | 33 | 34 | > **_Notice:_** You can also change the serving service while it is already running! 35 | This includes adding/removing endpoints, adding canary model routing etc. 36 | by default new endpoints/models will be automatically updated after 1 minute 37 | -------------------------------------------------------------------------------- /examples/preprocess_template/preprocess_template.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Callable, Union 2 | 3 | 4 | # Preprocess class Must be named "Preprocess" 5 | # No need to inherit or to implement all methods 6 | class Preprocess(object): 7 | """ 8 | Preprocess class Must be named "Preprocess" 9 | Otherwise there are No limitations, No need to inherit or to implement all methods 10 | Notice! This is not thread safe! the same instance may be accessed from multiple threads simultaneously 11 | to store date in a safe way push it into the `state` dict argument of preprocessing/postprocessing functions 12 | 13 | Notice the execution flows is synchronous as follows: 14 | 15 | 1. RestAPI(...) -> body: Union[bytes, dict] 16 | 2. preprocess(body: Union[bytes, dict], ...) -> data: Any 17 | 3. process(data: Any, ...) -> data: Any 18 | 4. postprocess(data: Any, ...) -> result: dict 19 | 5. RestAPI(result: dict) -> returned request 20 | """ 21 | 22 | def __init__(self): 23 | # set internal state, this will be called only once. (i.e. not per request) 24 | # it will also set the internal model_endpoint to reference the specific model endpoint object being served 25 | self.model_endpoint = None # type: clearml_serving.serving.endpoints.ModelEndpoint 26 | 27 | def load(self, local_file_name: str) -> Any: # noqa 28 | """ 29 | Optional: provide loading method for the model 30 | useful if we need to load a model in a specific way for the prediction engine to work 31 | 32 | Notice! When used with specific engines (i.e. not Custom) 33 | The returned object will be passed as is to the inference engine, 34 | this means it must not be None, otherwise the endpoint will be ignored! 35 | 36 | :param local_file_name: file name / path to read load the model from 37 | 38 | :return: Object that will be called with .predict() method for inference. 39 | """ 40 | pass 41 | 42 | def preprocess( 43 | self, 44 | body: Union[bytes, dict], 45 | state: dict, 46 | collect_custom_statistics_fn: Optional[Callable[[dict], None]], 47 | ) -> Any: # noqa 48 | """ 49 | Optional: do something with the request data, return any type of object. 50 | The returned object will be passed as is to the inference engine 51 | 52 | :param body: dictionary or bytes as recieved from the RestAPI 53 | :param state: Use state dict to store data passed to the post-processing function call. 54 | This is a per-request state dict (meaning a new empty dict will be passed per request) 55 | Usage example: 56 | >>> def preprocess(..., state): 57 | state['preprocess_aux_data'] = [1,2,3] 58 | >>> def postprocess(..., state): 59 | print(state['preprocess_aux_data']) 60 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 61 | to the statictics collector servicd. 62 | None is passed if statiscs collector is not configured, or if the current request should not be collected 63 | 64 | Usage example: 65 | >>> print(body) 66 | {"x0": 1, "x1": 2} 67 | >>> if collect_custom_statistics_fn: 68 | >>> collect_custom_statistics_fn({"x0": 1, "x1": 2}) 69 | 70 | :return: Object to be passed directly to the model inference 71 | """ 72 | return body 73 | 74 | def postprocess( 75 | self, 76 | data: Any, 77 | state: dict, 78 | collect_custom_statistics_fn: Optional[Callable[[dict], None]], 79 | ) -> dict: # noqa 80 | """ 81 | Optional: post process the data returned from the model inference engine 82 | returned dict will be passed back as the request result as is. 83 | 84 | :param data: object as recieved from the inference model function 85 | :param state: Use state dict to store data passed to the post-processing function call. 86 | This is a per-request state dict (meaning a dict instance per request) 87 | Usage example: 88 | >>> def preprocess(..., state): 89 | state['preprocess_aux_data'] = [1,2,3] 90 | >>> def postprocess(..., state): 91 | print(state['preprocess_aux_data']) 92 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 93 | to the statictics collector servicd. 94 | None is passed if statiscs collector is not configured, or if the current request should not be collected 95 | 96 | Usage example: 97 | >>> if collect_custom_statistics_fn: 98 | >>> collect_custom_statistics_fn({"y": 1}) 99 | 100 | :return: Dictionary passed directly as the returned result of the RestAPI 101 | """ 102 | return data 103 | 104 | def process( 105 | self, 106 | data: Any, 107 | state: dict, 108 | collect_custom_statistics_fn: Optional[Callable[[dict], None]], 109 | ) -> Any: # noqa 110 | """ 111 | Optional: do something with the actual data, return any type of object. 112 | The returned object will be passed as is to the postprocess function engine 113 | 114 | :param data: object as recieved from the preprocessing function 115 | :param state: Use state dict to store data passed to the post-processing function call. 116 | This is a per-request state dict (meaning a dict instance per request) 117 | Usage example: 118 | >>> def preprocess(..., state): 119 | state['preprocess_aux_data'] = [1,2,3] 120 | >>> def postprocess(..., state): 121 | print(state['preprocess_aux_data']) 122 | :param collect_custom_statistics_fn: Optional, if provided allows to send a custom set of key/values 123 | to the statictics collector servicd. 124 | None is passed if statiscs collector is not configured, or if the current request should not be collected 125 | 126 | Usage example: 127 | >>> if collect_custom_statistics_fn: 128 | >>> collect_custom_statistics_fn({"type": "classification"}) 129 | 130 | :return: Object to be passed tp the post-processing function 131 | """ 132 | return data 133 | 134 | def send_request( # noqa 135 | self, 136 | endpoint: str, 137 | version: Optional[str] = None, 138 | data: Optional[dict] = None 139 | ) -> Optional[dict]: 140 | """ 141 | NOTICE: This method will be replaced in runtime, by the inference service 142 | 143 | Helper method to send model inference requests to the inference service itself. 144 | This is designed to help with model ensemble, model pipelines, etc. 145 | On request error return None, otherwise the request result data dictionary 146 | 147 | Usage example: 148 | 149 | >>> x0, x1 = 1, 2 150 | >>> result = self.send_request(endpoint="test_model_sklearn", version="1", data={"x0": x0, "x1": x1}) 151 | >>> y = result["y"] 152 | """ 153 | pass 154 | -------------------------------------------------------------------------------- /examples/pytorch/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clearml/clearml-serving/1def0a6901617767687f2b747aaffdc060f96046/examples/pytorch/5.jpg -------------------------------------------------------------------------------- /examples/pytorch/preprocess.py: -------------------------------------------------------------------------------- 1 | import io 2 | from typing import Any, Union 3 | 4 | import numpy as np 5 | from PIL import Image, ImageOps 6 | 7 | 8 | from clearml import StorageManager 9 | 10 | 11 | # Notice Preprocess class Must be named "Preprocess" 12 | class Preprocess(object): 13 | def __init__(self): 14 | # set internal state, this will be called only once. (i.e. not per request) 15 | pass 16 | 17 | def preprocess(self, body: Union[bytes, dict], state: dict, collect_custom_statistics_fn=None) -> Any: 18 | # we expect to get two valid on the dict x0, and x1 19 | if isinstance(body, bytes): 20 | # we expect to get a stream of encoded image bytes 21 | try: 22 | image = Image.open(io.BytesIO(body)).convert("RGB") 23 | except Exception: 24 | # value error would return 404, we want to return 500 so any other exception 25 | raise RuntimeError("Image could not be decoded") 26 | 27 | if isinstance(body, dict) and "url" in body.keys(): 28 | # image is given as url, and is fetched 29 | url = body.get("url") 30 | local_file = StorageManager.get_local_copy(remote_url=url) 31 | image = Image.open(local_file) 32 | 33 | image = ImageOps.grayscale(image).resize((28, 28)) 34 | return np.array([np.array(image)]) 35 | 36 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 37 | # post process the data returned from the model inference engine 38 | # data is the return value from model.predict we will put is inside a return value as Y 39 | if not isinstance(data, np.ndarray): 40 | # this should not happen 41 | return dict(digit=-1) 42 | 43 | # data is returned as probability per class (10 class/digits) 44 | return dict(digit=int(data.flatten().argmax())) 45 | -------------------------------------------------------------------------------- /examples/pytorch/readme.md: -------------------------------------------------------------------------------- 1 | # Train and Deploy Pytorch model with Nvidia Triton Engine 2 | 3 | ## training mnist digit classifier model 4 | 5 | Run the mock python training code 6 | ```bash 7 | pip install -r examples/pytorch/requirements.txt 8 | python examples/pytorch/train_pytorch_mnist.py 9 | ``` 10 | 11 | The output will be a model created on the project "serving examples", by the name "train pytorch model" 12 | *Notice* Only TorchScript models are supported by Triton server 13 | 14 | ## setting up the serving service 15 | 16 | 17 | Prerequisites, PyTorch models require Triton engine support, please use `docker-compose-triton.yml` / `docker-compose-triton-gpu.yml` or if running on Kubernetes, the matching helm chart. 18 | 19 | 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID) 20 | 2. Create model endpoint: 21 | 22 | `clearml-serving --id model add --engine triton --endpoint "test_model_pytorch" --preprocess "examples/pytorch/preprocess.py" --name "train pytorch model" --project "serving examples" 23 | --input-size 1 28 28 --input-name "INPUT__0" --input-type float32 24 | --output-size -1 10 --output-name "OUTPUT__0" --output-type float32 25 | ` 26 | 27 | Or auto update 28 | 29 | `clearml-serving --id model auto-update --engine triton --endpoint "test_model_pytorch_auto" --preprocess "examples/pytorch/preprocess.py" --name "train pytorch model" --project "serving examples" --max-versions 2 30 | --input-size 1 28 28 --input-name "INPUT__0" --input-type float32 31 | --output-size -1 10 --output-name "OUTPUT__0" --output-type float32` 32 | 33 | Or add Canary endpoint 34 | 35 | `clearml-serving --id model canary --endpoint "test_model_pytorch_auto" --weights 0.1 0.9 --input-endpoint-prefix test_model_pytorch_auto` 36 | 37 | 3. Make sure you have the `clearml-serving` `docker-compose-triton.yml` (or `docker-compose-triton-gpu.yml`) running, it might take it a minute or two to sync with the new endpoint. 38 | 39 | 4. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): \ 40 | `curl -X POST "http://127.0.0.1:8080/serve/test_model_pytorch" -H "accept: application/json" -H "Content-Type: application/json" -d '{"url": "https://raw.githubusercontent.com/allegroai/clearml-serving/main/examples/pytorch/5.jpg"}'` \ 41 | or send a local file to be classified with \ 42 | `curl -X POST "http://127.0.0.1:8080/serve/test_model_pytorch" -H "Content-Type: image/jpeg" --data-binary "@5.jpg"` 43 | 44 | > **_Notice:_** You can also change the serving service while it is already running! 45 | This includes adding/removing endpoints, adding canary model routing etc. 46 | by default new endpoints/models will be automatically updated after 1 minute 47 | -------------------------------------------------------------------------------- /examples/pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | torchvision 2 | torch 3 | clearml 4 | Pillow 5 | tensorboard 6 | -------------------------------------------------------------------------------- /examples/pytorch/train_pytorch_mnist.py: -------------------------------------------------------------------------------- 1 | # ClearML - Example of pytorch with tensorboard>=v1.14 2 | # 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import os 7 | from tempfile import gettempdir 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | from torchvision import datasets, transforms 14 | from torch.autograd import Variable 15 | from torch.utils.tensorboard import SummaryWriter 16 | 17 | from clearml import Task, OutputModel 18 | 19 | 20 | class Net(nn.Module): 21 | 22 | def __init__(self): 23 | super(Net, self).__init__() 24 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 25 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 26 | self.conv2_drop = nn.Dropout2d() 27 | self.fc1 = nn.Linear(320, 50) 28 | self.fc2 = nn.Linear(50, 10) 29 | 30 | def forward(self, x): 31 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 32 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 33 | x = x.view(-1, 320) 34 | x = F.relu(self.fc1(x)) 35 | x = F.dropout(x, training=self.training) 36 | x = self.fc2(x) 37 | return F.log_softmax(x, dim=1) 38 | 39 | 40 | def train(model, epoch, train_loader, args, optimizer, writer): 41 | model.train() 42 | for batch_idx, (data, target) in enumerate(train_loader): 43 | if args.cuda: 44 | data, target = data.cuda(), target.cuda() 45 | data, target = Variable(data), Variable(target) 46 | optimizer.zero_grad() 47 | output = model(data) 48 | loss = F.nll_loss(output, target) 49 | loss.backward() 50 | optimizer.step() 51 | if batch_idx % args.log_interval == 0: 52 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 53 | epoch, batch_idx * len(data), len(train_loader.dataset), 54 | 100. * batch_idx / len(train_loader), loss.data.item())) 55 | niter = epoch*len(train_loader)+batch_idx 56 | writer.add_scalar('Train/Loss', loss.data.item(), niter) 57 | 58 | 59 | def test(model, test_loader, args, optimizer, writer): 60 | model.eval() 61 | test_loss = 0 62 | correct = 0 63 | for niter, (data, target) in enumerate(test_loader): 64 | if args.cuda: 65 | data, target = data.cuda(), target.cuda() 66 | data, target = Variable(data), Variable(target) 67 | output = model(data) 68 | test_loss += F.nll_loss(output, target, reduction='sum').data.item() # sum up batch loss 69 | pred = output.data.max(1)[1] # get the index of the max log-probability 70 | pred = pred.eq(target.data).cpu().sum() 71 | writer.add_scalar('Test/Loss', pred, niter) 72 | correct += pred 73 | if niter % 100 == 0: 74 | writer.add_image('test', data[0, :, :, :], niter) 75 | 76 | test_loss /= len(test_loader.dataset) 77 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 78 | test_loss, correct, len(test_loader.dataset), 79 | 100. * correct / len(test_loader.dataset))) 80 | 81 | 82 | def main(): 83 | # Training settings 84 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 85 | parser.add_argument('--batch-size', type=int, default=64, metavar='N', 86 | help='input batch size for training (default: 64)') 87 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 88 | help='input batch size for testing (default: 1000)') 89 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 90 | help='number of epochs to train (default: 10)') 91 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR', 92 | help='learning rate (default: 0.01)') 93 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M', 94 | help='SGD momentum (default: 0.5)') 95 | parser.add_argument('--no-cuda', action='store_true', default=False, 96 | help='disables CUDA training') 97 | parser.add_argument('--seed', type=int, default=1, metavar='S', 98 | help='random seed (default: 1)') 99 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 100 | help='how many batches to wait before logging training status') 101 | args = parser.parse_args() 102 | 103 | # Connecting ClearML with the current process, 104 | # from here on everything is logged automatically 105 | task = Task.init(project_name='serving examples', task_name='train pytorch model', output_uri=True) # noqa: F841 106 | writer = SummaryWriter('runs') 107 | writer.add_text('TEXT', 'This is some text', 0) 108 | args.cuda = not args.no_cuda and torch.cuda.is_available() 109 | 110 | torch.manual_seed(args.seed) 111 | if args.cuda: 112 | torch.cuda.manual_seed(args.seed) 113 | 114 | kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else {} 115 | train_loader = torch.utils.data.DataLoader(datasets.MNIST('./data', train=True, download=True, 116 | transform=transforms.Compose([ 117 | transforms.ToTensor(), 118 | transforms.Normalize((0.1307,), (0.3081,))])), 119 | batch_size=args.batch_size, shuffle=True, **kwargs) 120 | test_loader = torch.utils.data.DataLoader(datasets.MNIST('./data', train=False, 121 | transform=transforms.Compose([ 122 | transforms.ToTensor(), 123 | transforms.Normalize((0.1307,), (0.3081,))])), 124 | batch_size=args.test_batch_size, shuffle=True, **kwargs) 125 | 126 | model = Net() 127 | if args.cuda: 128 | model.cuda() 129 | 130 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) 131 | 132 | for epoch in range(1, args.epochs + 1): 133 | train(model, epoch, train_loader, args, optimizer, writer) 134 | 135 | # store in a way we can easily load into triton without having to have the model class 136 | torch.jit.script(model).save('serving_model.pt') 137 | OutputModel().update_weights('serving_model.pt') 138 | test(model, test_loader, args, optimizer, writer) 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | -------------------------------------------------------------------------------- /examples/sklearn/preprocess.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | 5 | 6 | # Notice Preprocess class Must be named "Preprocess" 7 | class Preprocess(object): 8 | def __init__(self): 9 | # set internal state, this will be called only once. (i.e. not per request) 10 | pass 11 | 12 | def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 13 | # we expect to get two valid on the dict x0, and x1 14 | return [[body.get("x0", None), body.get("x1", None)], ] 15 | 16 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 17 | # post process the data returned from the model inference engine 18 | # data is the return value from model.predict we will put is inside a return value as Y 19 | return dict(y=data.tolist() if isinstance(data, np.ndarray) else data) 20 | -------------------------------------------------------------------------------- /examples/sklearn/readme.md: -------------------------------------------------------------------------------- 1 | # Train and Deploy Scikit-Learn model 2 | 3 | ## training mock logistic regression model 4 | 5 | Run the mock python training code 6 | ```bash 7 | pip install -r examples/sklearn/requirements.txt 8 | python examples/sklearn/train_model.py 9 | ``` 10 | 11 | The output will be a model created on the project "serving examples", by the name "train sklearn model" 12 | 13 | ## setting up the serving service 14 | 15 | 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID) 16 | 2. Create model endpoint: 17 | `clearml-serving --id model add --engine sklearn --endpoint "test_model_sklearn" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model - sklearn-model" --project "serving examples"` 18 | 19 | Or auto update 20 | 21 | `clearml-serving --id model auto-update --engine sklearn --endpoint "test_model_sklearn_auto" --preprocess "examples/sklearn/preprocess.py" --name "train sklearn model - sklearn-model" --project "serving examples" --max-versions 2` 22 | 23 | Or add Canary endpoint 24 | 25 | `clearml-serving --id model canary --endpoint "test_model_sklearn_auto" --weights 0.1 0.9 --input-endpoint-prefix test_model_sklearn_auto` 26 | 27 | 3. If you already have the `clearml-serving` docker-compose running, it might take it a minute or two to sync with the new endpoint. 28 | 29 | Or you can run the clearml-serving container independently `docker run -v ~/clearml.conf:/root/clearml.conf -p 8080:8080 -e CLEARML_SERVING_TASK_ID= clearml-serving:latest` 30 | 31 | 4. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): `curl -X POST "http://127.0.0.1:8080/serve/test_model_sklearn" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2}'` 32 | 33 | 34 | > **_Notice:_** You can also change the serving service while it is already running! 35 | This includes adding/removing endpoints, adding canary model routing etc. 36 | by default new endpoints/models will be automatically updated after 1 minute 37 | -------------------------------------------------------------------------------- /examples/sklearn/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml >= 1.1.6 2 | scikit-learn 3 | pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 4 | -------------------------------------------------------------------------------- /examples/sklearn/train_model.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression 2 | from sklearn.datasets import make_blobs 3 | from joblib import dump 4 | from clearml import Task 5 | 6 | task = Task.init(project_name="serving examples", task_name="train sklearn model", output_uri=True) 7 | 8 | # generate 2d classification dataset 9 | X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1) 10 | # fit final model 11 | model = LogisticRegression() 12 | model.fit(X, y) 13 | 14 | dump(model, filename="sklearn-model.pkl", compress=9) 15 | 16 | -------------------------------------------------------------------------------- /examples/xgboost/preprocess.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | import xgboost as xgb 5 | 6 | 7 | # Notice Preprocess class Must be named "Preprocess" 8 | class Preprocess(object): 9 | def __init__(self): 10 | # set internal state, this will be called only once. (i.e. not per request) 11 | pass 12 | 13 | def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any: 14 | # we expect to get four valid numbers on the dict: x0, x1, x2, x3 15 | return xgb.DMatrix( 16 | [[body.get("x0", None), body.get("x1", None), body.get("x2", None), body.get("x3", None)]]) 17 | 18 | def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict: 19 | # post process the data returned from the model inference engine 20 | # data is the return value from model.predict we will put is inside a return value as Y 21 | return dict(y=data.tolist() if isinstance(data, np.ndarray) else data) 22 | -------------------------------------------------------------------------------- /examples/xgboost/readme.md: -------------------------------------------------------------------------------- 1 | # Train and Deploy XGBoost model 2 | 3 | ## training iris classifier model 4 | 5 | Run the mock python training code 6 | ```bash 7 | pip install -r examples/xgboost/requirements.txt 8 | python examples/xgboost/train_model.py 9 | ``` 10 | 11 | The output will be a model created on the project "serving examples", by the name "train xgboost model" 12 | 13 | ## setting up the serving service 14 | 15 | 1. Create serving Service: `clearml-serving create --name "serving example"` (write down the service ID) 16 | 2. Create model endpoint: 17 | 18 | 3. `clearml-serving --id model add --engine xgboost --endpoint "test_model_xgb" --preprocess "examples/xgboost/preprocess.py" --name "train xgboost model - xgb_model" --project "serving examples"` 19 | 20 | Or auto update 21 | 22 | `clearml-serving --id model auto-update --engine xgboost --endpoint "test_model_xgb_auto" --preprocess "examples/xgboost/preprocess.py" --name "train xgboost model - xgb_model" --project "serving examples" --max-versions 2` 23 | 24 | Or add Canary endpoint 25 | 26 | `clearml-serving --id model canary --endpoint "test_model_xgb_auto" --weights 0.1 0.9 --input-endpoint-prefix test_model_xgb_auto` 27 | 28 | 3. If you already have the `clearml-serving` docker-compose running, it might take it a minute or two to sync with the new endpoint. 29 | 30 | Or you can run the clearml-serving container independently `docker run -v ~/clearml.conf:/root/clearml.conf -p 8080:8080 -e CLEARML_SERVING_TASK_ID= clearml-serving:latest` 31 | 32 | 4. Test new endpoint (do notice the first call will trigger the model pulling, so it might take longer, from here on, it's all in memory): `curl -X POST "http://127.0.0.1:8080/serve/test_model_xgb" -H "accept: application/json" -H "Content-Type: application/json" -d '{"x0": 1, "x1": 2, "x2": 3, "x3": 4}'` 33 | 34 | > **_Notice:_** You can also change the serving service while it is already running! 35 | This includes adding/removing endpoints, adding canary model routing etc. 36 | -------------------------------------------------------------------------------- /examples/xgboost/requirements.txt: -------------------------------------------------------------------------------- 1 | clearml >= 1.1.6 2 | xgboost 3 | 4 | pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 5 | -------------------------------------------------------------------------------- /examples/xgboost/train_model.py: -------------------------------------------------------------------------------- 1 | import xgboost as xgb 2 | from sklearn.datasets import load_iris 3 | from sklearn.model_selection import train_test_split 4 | 5 | from clearml import Task 6 | 7 | task = Task.init(project_name="serving examples", task_name="train xgboost model", output_uri=True) 8 | 9 | X, y = load_iris(return_X_y=True) 10 | X_train, X_test, y_train, y_test = train_test_split( 11 | X, y, test_size=0.2, random_state=100 12 | ) 13 | 14 | dtrain = xgb.DMatrix(X_train, label=y_train) 15 | dtest = xgb.DMatrix(X_test, label=y_test) 16 | 17 | params = {"objective": "reg:squarederror", "eval_metric": "rmse"} 18 | 19 | 20 | bst = xgb.train( 21 | params, 22 | dtrain, 23 | num_boost_round=100, 24 | evals=[(dtrain, "train"), (dtest, "test")], 25 | verbose_eval=0, 26 | ) 27 | 28 | bst.save_model("xgb_model") 29 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | clearml >= 1.3.1 2 | pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | `clearml-serving` - Model-Serving Orchestration and Repository Solution 3 | https://github.com/clearml/clearml-serving 4 | """ 5 | 6 | import os.path 7 | # Always prefer setuptools over distutils 8 | from setuptools import setup, find_packages 9 | 10 | 11 | def read_text(filepath): 12 | with open(filepath, "r", encoding="utf-8") as f: 13 | return f.read() 14 | 15 | 16 | here = os.path.dirname(__file__) 17 | # Get the long description from the README file 18 | long_description = read_text(os.path.join(here, 'README.md')) 19 | 20 | 21 | def read_version_string(version_file): 22 | for line in read_text(version_file).splitlines(): 23 | if line.startswith('__version__'): 24 | delim = '"' if '"' in line else "'" 25 | return line.split(delim)[1] 26 | else: 27 | raise RuntimeError("Unable to find version string.") 28 | 29 | 30 | version = read_version_string("clearml_serving/version.py") 31 | 32 | requirements = read_text(os.path.join(here, 'requirements.txt')).splitlines() 33 | 34 | setup( 35 | name='clearml-serving', 36 | version=version, 37 | description='clearml-serving - Model-Serving Orchestration and Repository Solution', 38 | long_description=long_description, 39 | long_description_content_type='text/markdown', 40 | # The project's main homepage. 41 | url='https://github.com/clearml/clearml-serving.git', 42 | author='ClearML', 43 | author_email='support@clear.ml', 44 | license='Apache License 2.0', 45 | classifiers=[ 46 | 'Development Status :: 5 - Production/Stable', 47 | 'Intended Audience :: Developers', 48 | 'Intended Audience :: Science/Research', 49 | 'Operating System :: POSIX :: Linux', 50 | 'Operating System :: MacOS :: MacOS X', 51 | 'Operating System :: Microsoft', 52 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 53 | 'Topic :: Software Development', 54 | 'Topic :: Software Development :: Version Control', 55 | 'Topic :: System :: Logging', 56 | 'Topic :: System :: Monitoring', 57 | 'Programming Language :: Python :: 3.6', 58 | 'Programming Language :: Python :: 3.7', 59 | 'Programming Language :: Python :: 3.8', 60 | 'Programming Language :: Python :: 3.9', 61 | 'Programming Language :: Python :: 3.10', 62 | 'Programming Language :: Python :: 3.11', 63 | 'License :: OSI Approved :: Apache Software License', 64 | ], 65 | keywords='clearml mlops devops trains development machine deep learning version control machine-learning ' 66 | 'machinelearning deeplearning deep-learning model-serving', 67 | packages=find_packages(exclude=['contrib', 'docs', 'data', 'examples', 'tests']), 68 | install_requires=requirements, 69 | # To provide executable scripts, use entry points in preference to the 70 | # "scripts" keyword. Entry points provide cross-platform support and allow 71 | # pip to create the appropriate form of executable for the target platform. 72 | entry_points={ 73 | 'console_scripts': [ 74 | 'clearml-serving = clearml_serving.__main__:main', 75 | ], 76 | }, 77 | ) 78 | --------------------------------------------------------------------------------