├── .gitignore ├── README.md ├── __init__.py ├── api.py ├── automl.py ├── legacy └── pipeline.py ├── pipeline.py ├── pymlpipeUI.py ├── queue.py ├── requirements.txt ├── samples ├── runner_pipeline_server.py ├── runner_pymlpipeUI.py ├── test_MLpipeline.py ├── test_api.py ├── test_automl_run.py ├── test_cases_pipeline.py ├── test_create_pipeline.py ├── test_dl_torch_train.py └── test_mltrain.py ├── static ├── Screenshot 2022-07-04 at 1.42.35 PM.png ├── Screenshot 2022-07-04 at 1.42.52 PM.png ├── Screenshot 2022-07-04 at 1.43.03 PM.png ├── Screenshot 2022-07-04 at 1.43.52 PM.png ├── Screenshot 2022-07-04 at 1.44.05 PM.png ├── Screenshot 2022-07-16 at 8.03.29 PM.png ├── Screenshot 2022-07-16 at 8.03.50 PM.png ├── Screenshot 2022-07-16 at 8.04.00 PM.png ├── Screenshot 2022-07-16 at 8.04.08 PM.png ├── Screenshot 2022-07-16 at 8.04.21 PM.png ├── XAI.png ├── download.png ├── favicon.ico ├── filter.svg ├── logo.svg ├── pipelineUI 2.png ├── pipelineUI 3.png ├── pipelineUI.png ├── pipelineUI_1.png ├── pipelineUI_2.png ├── start.png ├── start.svg └── start1.svg ├── tabular.py ├── templates ├── check_deployment.html ├── deployments.html ├── index.html ├── job_view.html ├── jobs.html ├── run.html └── template.html └── utils ├── __init__.py ├── _sklearn_prediction.py ├── _torch_prediction.py ├── _xai.py ├── change2graph.py ├── database.py ├── factory.py ├── getschema.py ├── uiutils.py └── yamlio.py /.gitignore: -------------------------------------------------------------------------------- 1 | */__pycache__ 2 | *.pkl 3 | modelrun 4 | *.csv 5 | *.pyc 6 | 7 | # Created by https://www.toptal.com/developers/gitignore/api/python 8 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 9 | 10 | ### Python ### 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | cover/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | .pybuilder/ 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | # For a library or package, you might want to ignore these files since the code is 97 | # intended to run in multiple environments; otherwise, check them in: 98 | # .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # poetry 108 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 109 | # This is especially recommended for binary packages to ensure reproducibility, and is more 110 | # commonly ignored for libraries. 111 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 112 | #poetry.lock 113 | 114 | # pdm 115 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 116 | #pdm.lock 117 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 118 | # in version control. 119 | # https://pdm.fming.dev/#use-with-ide 120 | .pdm.toml 121 | 122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 123 | __pypackages__/ 124 | 125 | # Celery stuff 126 | celerybeat-schedule 127 | celerybeat.pid 128 | 129 | # SageMath parsed files 130 | *.sage.py 131 | 132 | # Environments 133 | .env 134 | .venv 135 | env/ 136 | venv/ 137 | ENV/ 138 | env.bak/ 139 | venv.bak/ 140 | 141 | # Spyder project settings 142 | .spyderproject 143 | .spyproject 144 | 145 | # Rope project settings 146 | .ropeproject 147 | 148 | # mkdocs documentation 149 | /site 150 | 151 | # mypy 152 | .mypy_cache/ 153 | .dmypy.json 154 | dmypy.json 155 | 156 | # Pyre type checker 157 | .pyre/ 158 | 159 | # pytype static type analyzer 160 | .pytype/ 161 | 162 | # Cython debug symbols 163 | cython_debug/ 164 | 165 | # PyCharm 166 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 167 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 168 | # and can be added to the global gitignore or merged into this file. For a more nuclear 169 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 170 | #.idea/ 171 | 172 | # End of https://www.toptal.com/developers/gitignore/api/python -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/main/static/logo.svg?raw=true) 4 | 5 | 6 | 7 | [![Downloads](https://static.pepy.tech/personalized-badge/pymlpipe?period=total&units=international_system&left_color=black&right_color=green&left_text=Downloads)](https://pepy.tech/project/pymlpipe) 8 | 9 | [![Downloads](https://pepy.tech/badge/pymlpipe/month)](https://pepy.tech/project/pymlpipe) 10 | 11 | ![alt text](https://badgen.net/badge/version/0.2.7/red?icon=github) 12 | 13 | ![](https://badgen.net/pypi/python/black) 14 | 15 | ![](https://badgen.net/badge/pypi/0.2.6/orange?icon=pypi) 16 | 17 | ![](https://badgen.net/pypi/license/pip) 18 | 19 | # PyMLpipe 20 | 21 | 22 | 23 | PyMLpipe is a Python library for ease Machine Learning Model monitoring and Deployment. 24 | 25 | * Simple 26 | * Intuative 27 | * Easy to use 28 | 29 | **What's New in 0.2.7** 30 | 1. Explainable AI 31 | 3. Data Pipeline 32 | 4. AutoML support 33 | 34 | Please Find the Full [documentation](https://neelindresh.github.io/pymlpipe.documentation.io/) here! 35 | 36 | ## Installation 37 | 38 | Use the package manager [pip](https://pypi.org/project/pymlpipe/) to install PyMLpipe. 39 | 40 | 41 | 42 | ```bash 43 | 44 | pip install pymlpipe 45 | 46 | ``` 47 | 48 | or 49 | 50 | ```bash 51 | 52 | pip3 install pymlpipe 53 | 54 | ``` 55 | 56 | ## Frame Work Supports 57 | 58 | - [X] Scikit-Learn 59 | 60 | - [X] XGBoost 61 | 62 | - [X] LightGBM 63 | 64 | - [X] Pytorch 65 | 66 | - [ ] Tensorflow 67 | 68 | - [ ] Keras 69 | 70 | 71 | 72 | 73 | 74 | ## Tutorial (Scikit-Learn|XGBoost|LightGBM) 75 | 76 | 77 | 78 | * Load the python package 79 | 80 | 81 | 82 | ```python 83 | 84 | from pymlpipe.tabular import PyMLPipe 85 | 86 | ``` 87 | 88 | 89 | 90 | * Initiate the `PyMLPipe` class 91 | 92 | 93 | 94 | ```python 95 | 96 | mlp=PyMLPipe() 97 | 98 | ``` 99 | 100 | 101 | 102 | * Set an Experiment Name `[Optional]`-Default experiment name is `'0'` 103 | 104 | 105 | 106 | ```python 107 | 108 | mlp.set_experiment("IrisDataV2") 109 | 110 | ``` 111 | 112 | 113 | 114 | * Set a version `[Optional]`-Default there is no version 115 | 116 | 117 | 118 | ```python 119 | 120 | mlp.set_version(0.1) 121 | 122 | ``` 123 | 124 | 125 | 126 | * Initiate the context manager - This is create a unique ID for each model run. 127 | 128 | - when `.run()` is used - Automatic unique ID is generated 129 | 130 | - you can also provide `runid` argument in the `.run()` this will the use the given `runid` for next storing. 131 | 132 | 133 | 134 | ```python 135 | 136 | with mlp.run(): 137 | 138 | ``` 139 | 140 | Or 141 | 142 | 143 | 144 | ```python 145 | 146 | with mlp.run(runid='mlopstest'): 147 | 148 | ``` 149 | 150 | 151 | 152 | * Set a Tag `[Optional]` by using `set_tag()`-Default there is no tags 153 | 154 | 155 | 156 | ```python 157 | 158 | mlp.set_tag('tag') 159 | 160 | ``` 161 | 162 | Or 163 | 164 | 165 | 166 | * Set multiple Tags `[Optional]` by using `set_tags()`-Default there is no tags 167 | 168 | ```python 169 | 170 | mlp.set_tags(["Classification","test run","logisticRegression"]) 171 | 172 | 173 | 174 | ``` 175 | 176 | 177 | 178 | * Set Metrics values `[Optional]` by using `log_matric(metric_name,metric_value)`-Default there is no metrics 179 | 180 | This will help in comparing performance of different models and model versions 181 | 182 | ```python 183 | 184 | mlp.log_metric("Accuracy", accuracy_score(testy,predictions)) 185 | 186 | 187 | 188 | 189 | mlp.log_metric("Accuracy", .92) 190 | 191 | 192 | 193 | ``` 194 | 195 | 196 | 197 | * Set multiple Metrics values `[Optional]` by using `log_matrics({metric_name:metric_value})`-Default there is no metrics 198 | 199 | ```python 200 | 201 | mlp.log_metrics( 202 | { 203 | "Accuracy": accuracy_score(testy,predictions), 204 | "Precision": precision_score(testy,predictions,average='macro'), 205 | "Recall": recall_score(testy,predictions,average='macro'), 206 | } 207 | ) 208 | 209 | mlp.log_metrics({ 210 | "Accuracy": .92, 211 | "Precision": .87, 212 | "Recall": .98, 213 | } 214 | ) 215 | ``` 216 | 217 | * Save an artifact `[Optional]` - You can save training/testing/validation/dev/prod data for monitoring and comparison 218 | 219 | - This will also help in generating `DATA SCHEMA` 220 | 221 | - `register_artifact()` -takes 3 arguments 222 | 223 | - name of artifact 224 | 225 | - Pandas Dataframe 226 | 227 | - type of artifact - `[training, testing, validation, dev, prod]` 228 | 229 | - You can also use `register_artifact_with_path()` - This will save the artifact from the disk. 230 | 231 | - Path for the file 232 | 233 | - type of artifact - `[training, testing, validation, dev, prod]` 234 | 235 | 236 | 237 | ```python 238 | mlp.register_artifact("train.csv", trainx) 239 | mlp.register_artifact("train.csv", trainx) 240 | ``` 241 | 242 | * Register Model `[Optional]` - You can register the model. This will help in Quick deployment 243 | 244 | 245 | ```python 246 | mlp.scikit_learn.register_model("logistic regression", model) 247 | ``` 248 | 249 | #### XAI 250 | 251 | To get model explaination , feature importance we can use `explainer()` 252 | explainer takes two objects 253 | - model - the model used for training 254 | - trainx - the training data 255 | 256 | ```python 257 | mlp.explainer(model,trainx) 258 | ``` 259 | 260 | ## Quick Start (Scikit-Learn|XGBoost|LightGBM) 261 | 262 | 263 | 264 | ```python 265 | 266 | from sklearn.datasets import load_iris 267 | import pandas as pd 268 | from sklearn.model_selection import train_test_split 269 | from sklearn.linear_model import LogisticRegression 270 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score 271 | #import PyMLPipe from tabular 272 | from pymlpipe.tabular import PyMLPipe 273 | 274 | # Initiate the class 275 | mlp=PyMLPipe() 276 | # Set experiment name 277 | mlp.set_experiment("IrisDataV2") 278 | # Set Version name 279 | mlp.set_version(0.2) 280 | 281 | iris_data=load_iris() 282 | 283 | data=iris_data["data"] 284 | 285 | target=iris_data["target"] 286 | 287 | df=pd.DataFrame(data,columns=iris_data["feature_names"]) 288 | 289 | trainx,testx,trainy,testy=train_test_split(df,target) 290 | 291 | # to start monitering use mlp.run() 292 | 293 | with mlp.run(): 294 | # set tags 295 | mlp.set_tags(["Classification","test run","logisticRegression"]) 296 | model=LogisticRegression() 297 | model.fit(trainx, trainy) 298 | predictions=model.predict(testx) 299 | # log performace metrics 300 | mlp.log_metric("Accuracy", accuracy_score(testy,predictions)) 301 | mlp.log_metric("Precision", precision_score(testy,predictions,average='macro')) 302 | mlp.log_metric("Recall", recall_score(testy,predictions,average='macro')) 303 | mlp.log_metric("F1", f1_score(testy,predictions,average='macro')) 304 | # Save train data and test data 305 | mlp.register_artifact("train", trainx) 306 | mlp.register_artifact("test", testx,artifact_type="testing") 307 | # Save the model 308 | mlp.scikit_learn.register_model("logistic regression", model) 309 | # Model explainer 310 | mlp.explainer(model,trainx) 311 | ``` 312 | 313 | 314 | 315 | ## Launch UI 316 | To start the UI 317 | 318 | ```bash 319 | 320 | pymlpipeui 321 | 322 | ``` 323 | 324 | or 325 | 326 | ```python 327 | 328 | from pymlpipe.pymlpipeUI import start_ui 329 | 330 | start_ui(host='0.0.0.0', port=8085) 331 | 332 | ``` 333 | 334 | #### Sample UI 335 | 336 | 337 | 338 | 339 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.42.35%20PM.png?raw=true) 340 | 341 | 342 | 343 | --- 344 | 345 | 346 | 347 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.42.52%20PM.png?raw=true) 348 | 349 | --- 350 | XAI 351 | 352 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/XAI.png) 353 | 354 | --- 355 | 356 | #### One Click Deployment -click the deploy button to deploy the model and get a endpoint 357 | 358 | 359 | 360 | 361 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.43.03%20PM.png?raw=true) 362 | 363 | 364 | 365 | --- 366 | 367 | 368 | 369 | 370 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.43.52%20PM.png?raw=true) 371 | 372 | 373 | 374 | --- 375 | 376 | 377 | 378 | ## Send the data to the Prediction end point in the format 379 | 380 | 381 | 382 | - Each list is a row of data 383 | 384 | ```python 385 | 386 | { 387 | "data":[ 388 | [5.6,3.0,4.5,1.5], 389 | [5.6,3.0,4.5,1.5] 390 | ] 391 | } 392 | 393 | ``` 394 | 395 | 396 | 397 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.44.05%20PM.png?raw=true) 398 | 399 | 400 | 401 | --- 402 | 403 | ## Tutorial (Pytorch) 404 | 405 | #### The previous methods can be used as it is. New methods are shown below 406 | 407 | * Log continious Metrics `.log_metrics_continious(dict)--> dict of metrics`\ 408 | 409 | - logs the metrics in a continious manner for each epoch 410 | 411 | 412 | 413 | ```pytorch 414 | 415 | mlp.log_metrics_continious({ 416 | 417 | "accuracy": .9, 418 | 419 | "precision": .8, 420 | 421 | "recall": .7 422 | 423 | }) 424 | 425 | ``` 426 | 427 | 428 | 429 | * To register a pytorch model use `.pytorch.register_model(modelname, modelobject)` 430 | 431 | - this will Save the model in a .pt file as a `torch.jit` format for serveing and prediction 432 | 433 | 434 | 435 | ```python 436 | mlp.pytorch.register_model("pytorch_example1", model) 437 | ``` 438 | 439 | * To register a pytorch model use `.pytorch.register_model_with_runtime(modelname, modelobject, train_data_sample)` 440 | 441 | 442 | 443 | - `train_data_sample`- is a sample of input data. it can be random numbers but needs tensor dimension 444 | 445 | - This method is `preferred` as in `future releases` this models can be then converted to other formats as well ex: "onnx", "hd5" 446 | 447 | 448 | 449 | ```python 450 | mlp.pytorch.register_model_with_runtime("pytorch_example1", model, train_x) 451 | ``` 452 | 453 | 454 | 455 | ## Quick Start (Pytorch) 456 | 457 | ```python 458 | 459 | import torch 460 | import pandas as pd 461 | from sklearn.preprocessing import LabelEncoder 462 | from sklearn.model_selection import train_test_split 463 | from sklearn.metrics import accuracy_score,f1_score 464 | from pymlpipe.tabular import PyMLPipe 465 | 466 | df=pd.read_csv("train.csv") 467 | 468 | encoders=["area_code","state","international_plan","voice_mail_plan","churn"] 469 | 470 | for i in encoders: 471 | 472 | le=LabelEncoder() 473 | 474 | df[i]=le.fit_transform(df[i]) 475 | 476 | trainy=df["churn"] 477 | 478 | trainx=df[['state', 'account_length', 'area_code', 'international_plan', 479 | 'voice_mail_plan', 'number_vmail_messages', 'total_day_minutes', 480 | 'total_day_calls', 'total_day_charge', 'total_eve_minutes', 481 | 'total_eve_calls', 'total_eve_charge', 'total_night_minutes', 482 | 'total_night_calls', 'total_night_charge', 'total_intl_minutes', 483 | 'total_intl_calls', 'total_intl_charge', 484 | 'number_customer_service_calls']] 485 | 486 | 487 | class Model(torch.nn.Module): 488 | 489 | def __init__(self,col_size): 490 | 491 | super().__init__() 492 | 493 | # using sequencial 494 | 495 | self.seq=torch.nn.Sequential( 496 | torch.nn.Linear(col_size,15), 497 | torch.nn.ReLU(), 498 | torch.nn.Linear(15,10), 499 | torch.nn.ReLU(), 500 | torch.nn.Linear(10,1) 501 | ) 502 | 503 | #using torch layers 504 | 505 | def forward(self,x): 506 | out=self.seq(x) 507 | 508 | return torch.sigmoid(out) 509 | 510 | model=Model(len(trainx.columns)) 511 | train_x,test_x,train_y,test_y=train_test_split(trainx,trainy) 512 | train_x=torch.from_numpy(train_x.values) 513 | train_x=train_x.type(torch.FloatTensor) 514 | train_y=torch.from_numpy(train_y.values) 515 | train_y=train_y.type(torch.FloatTensor) 516 | test_x=torch.from_numpy(test_x.values) 517 | test_x=test_x.type(torch.FloatTensor) 518 | test_y=torch.from_numpy(test_y.values) 519 | test_y=test_y.type(torch.FloatTensor) 520 | 521 | optimizer=torch.optim.SGD(model.parameters(),lr=0.001) 522 | criterion=torch.nn.BCELoss() 523 | 524 | def validate(model,testx,testy): 525 | 526 | prediction=model(testx) 527 | 528 | prediction=torch.where(prediction>.5,1,0 529 | accu=accuracy_score( 530 | prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy() 531 | ) 532 | 533 | f1=f1_score(prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy()) 534 | 535 | return {"accuracy":accu,"f1":f1} 536 | 537 | 538 | 539 | 540 | epochs=100 541 | 542 | batch_size=1000 543 | 544 | 545 | 546 | mlp=PyMLPipe() 547 | 548 | mlp.set_experiment("Pytorch") 549 | 550 | mlp.set_version(0.2) 551 | 552 | 553 | 554 | with mlp.run(): 555 | 556 | mlp.register_artifact("churndata.csv",df) 557 | 558 | mlp.log_params({ 559 | 560 | "lr":0.01, 561 | 562 | "optimizer":"SGD", 563 | 564 | "loss_fuction":"BCEloss" 565 | 566 | }) 567 | 568 | for epoch in range(epochs): 569 | 570 | loss_batch=0 571 | 572 | for batch in range(1000,5000,1000): 573 | optimizer.zero_grad() 574 | train_data=train_x[batch-1000:batch] 575 | output=model(train_data) 576 | loss=criterion(output,train_y[batch-1000:batch].unsqueeze(1)) 577 | loss.backward() 578 | optimizer.step() 579 | loss_batch+=loss.item() 580 | 581 | metrics=validate(model,test_x,test_y) 582 | metrics["loss"]=loss_batch 583 | metrics["epoch"]=epoch 584 | mlp.log_metrics_continious(metrics) 585 | mlp.pytorch.register_model("pytorch_example1", model) 586 | 587 | ``` 588 | 589 | 590 | 591 | ## UI for Pytorch Models 592 | 593 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.03.29%20PM.png?raw=true) 594 | 595 | 596 | 597 | ###### Visualize the Model details 598 | 599 | 600 | 601 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.03.50%20PM.png?raw=true) 602 | 603 | 604 | 605 | ###### Visualize the Model Architecture 606 | 607 | 608 | 609 | 610 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.04.00%20PM.png?raw=true) 611 | 612 | 613 | 614 | ###### View Training Logs 615 | 616 | 617 | 618 | 619 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.04.08%20PM.png?raw=true) 620 | 621 | 622 | 623 | ###### Visualize Training Logs 624 | 625 | 626 | 627 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.04.21%20PM.png?raw=true) 628 | 629 | 630 | 631 | 632 | ### Sample input for prediction 633 | 634 | `GET REQUEST` - to get info for the model 635 | 636 | - `info` : Contains model information 637 | 638 | - `request_body`: Sample post Request 639 | 640 | ```python 641 | 642 | { 643 | 644 | "info": { 645 | 646 | "experiment_id": "Pytorch", 647 | 648 | "model_deployment_number": "51c186ddd125386c", 649 | 650 | "model_mode": "non_runtime", 651 | 652 | "model_type": "torch", 653 | 654 | "model_url": "/predict/51c186ddd125386c", 655 | 656 | "run_id": "3fffe458-9676-4bc7-a6c0-a3b4cf38e277", 657 | 658 | "status": "running" 659 | 660 | }, 661 | 662 | "request_body": { 663 | "data": [ 664 | [ 665 | 42.0,120.0,1.0,0.0,0.0,0.0,185.7,133.0,31.57,235.1,149.0,19.98, 666 | 256.4,78.0,11.54,16.9,6.0,4.56,0.0 667 | ] 668 | ], 669 | "dtype": "float" 670 | } 671 | } 672 | 673 | ``` 674 | 675 | 676 | 677 | For `POST REQUEST` 678 | 679 | -`data`--> list: contains data rows for prediction supports both batch prediction and single instance ex: data --> [ [ 0,1,2,3],[3,4,56 ] ] 680 | 681 | -`dtype`--> str: for type conversion converts the data into required data type tensor 682 | 683 | 684 | 685 | 686 | ``` 687 | 688 | { 689 | "data": [ 690 | [ 691 | 42.0,120.0,1.0,0.0,0.0,0.0,185.7,133.0,31.57,235.1,149.0,19.98, 692 | 256.4,78.0,11.54,16.9,6.0,4.56,0.0 693 | ] 694 | ], 695 | "dtype": "float" 696 | } 697 | 698 | ``` 699 | 700 | ## Quick Start (AutoML) 701 | 702 | 703 | 704 | ```python 705 | 706 | from automl import AutoMLPipe 707 | from sklearn.datasets import load_iris,load_diabetes 708 | import pandas as pd 709 | import numpy as np 710 | 711 | def main(): 712 | 713 | load_data=load_diabetes() 714 | data=load_data["data"] 715 | target=load_data["target"] 716 | 717 | df=pd.DataFrame(data,columns=load_data["feature_names"]) 718 | automl_obj=AutoMLPipe( 719 | exp_name="DiabAutoMLV1", 720 | task="regression", 721 | metric="RMSE", 722 | data=df, 723 | label=target, 724 | tags=["new_data","reg"], 725 | test_size=0.2, 726 | version=1.0, 727 | transform=True, 728 | scale='normalize', 729 | cols_to_scale=[], 730 | categorical_cols=[], 731 | register_model=True, 732 | explain=True,exclude=[] 733 | ) 734 | preds,result=automl_obj.run_automl(tune=True,tune_best=False) 735 | #DataFrame with comparative metrics of all the models 736 | print(result) 737 | #Dictionary with model names and the predictions 738 | print(preds) 739 | if __name__ == '__main__': 740 | 741 | main() 742 | 743 | ``` 744 | 745 | The AutoML class is simple to run and with the help of few lines of code you'll be able to run several models on your data. You can even choose to hyperparameter tune every model or you can just tune the best model based on the metric that you provide. Below are the simple steps to start your AutoML experiment. 746 | 747 | - Load the data 748 | 749 | - Transform it into X & y datasets. 750 | 751 | - Instanciate the AutoMLPipe class: 752 | 753 | - `exp_name`: name of experiment 754 | 755 | - `task`: regression/classification 756 | 757 | - `metric`: for classification -> accuracy,recall,precision,f1/ for regression -> MAE,MSE,RMSE,R2 Score 758 | 759 | - `data`: data on which the model to be fit 760 | 761 | - `label`: target variable 762 | 763 | - `tags`: list of custom-tags for the run 764 | 765 | - `test_size`: size of test dataset 766 | 767 | - `version`: experiment version 768 | 769 | - `transform`: If transformation is to be applied on the dataset. 770 | 771 | - `scale`: 'standard'/'minmax'/'normalize' 772 | 773 | - `cols_to_scale`: list of columns to scale. Should be numeric or float 774 | 775 | - `categorical_cols`: columns to one-hot encode 776 | 777 | - `register_model`: register experiement model 778 | 779 | - `register_artifacts`: register experiment artifacts 780 | 781 | - `explain`: xai implementation 782 | 783 | - `exclude`: models to be excluded during autoML runs 784 | 785 | - run the experiment by calling the `run_automl` function. 786 | 787 | - `tune=True`: Every autoML models will be hyperparameter tuned. 788 | 789 | - `tune_best=True`: Only the best model will be hyperparameter tuned. 790 | 791 | - Now you can see the experiment running in the ui page and also in the console. 792 | 793 | - Once it is completed you will get results and predictions of the runs. 794 | 795 | - If `tune_best=False`: The `result` will have the dataframe with metrics of each model. The `pred` will contain the dictionary of all the prediction values of all the models. 796 | 797 | - If `tune_best=True`: The `result` will have the dataframe with metrics of each model. The `pred` will contain the a list of prediction values of the hyperparameter tuned best model. 798 | 799 | 800 | 801 | 802 | ## Quick Start (Data Pipeline) 803 | 804 | This is a sample code for data pipeline. 805 | **Please don't take the code too seriously** 806 | 807 | 808 | ```python 809 | #filename : sample.py 810 | from pymlpipe import pipeline 811 | 812 | 813 | 814 | pl=pipeline.PipeLine("TestCase") 815 | 816 | 817 | # Just some random functions 818 | def fetch_data(): 819 | 820 | dict_data={ 821 | 822 | "var":"this is a random string:", 823 | 824 | "path":"this is some random path" 825 | 826 | } 827 | 828 | return dict_data 829 | 830 | def get_dict_values(data_dict): 831 | 832 | new_var=[v for k,v in data_dict.items()] 833 | 834 | return new_var 835 | 836 | 837 | 838 | def get_dict_keys(data_dict): 839 | 840 | new_var=[k for k,v in data_dict.items()] 841 | 842 | return new_var 843 | 844 | def a_edge_node(values): 845 | 846 | print(values) 847 | 848 | def dump_data(keys,values): 849 | 850 | dict_data_rev={k:v for k,v in zip(keys,values)} 851 | 852 | print(dict_data_rev) 853 | 854 | 855 | 856 | pl.add_node("fetch_data",fetch_data,entry_node=True) 857 | 858 | pl.add_node("get_dict_values",get_dict_values,input_nodes=["fetch_data"]) 859 | 860 | pl.add_node("get_dict_keys",get_dict_keys,input_nodes=["fetch_data"]) 861 | 862 | pl.add_node("a_edge_node",a_edge_node,input_nodes=["get_dict_values"]) 863 | 864 | 865 | 866 | pl.add_node("dump_data",dump_data,input_nodes=["get_dict_keys","get_dict_values"]) 867 | 868 | 869 | 870 | pl.register_dag() 871 | 872 | 873 | 874 | ``` 875 | 876 | To define a pipeline Object we can use: 877 | We are nameing the pipeline `TestCase` 878 | 879 | ```python 880 | from pymlpipe import pipeline 881 | pl=pipeline.PipeLine("TestCase") 882 | ``` 883 | 884 | The `add_node` function takes 885 | 886 | ``` 887 | 888 | node_name (str): Name of the node 889 | 890 | function (_type_): Python function you want to execute 891 | 892 | input_nodes (list, optional): List of nodes that are connected to this node. The connected nodes should return a value which will act as an input to the node . Defaults to None. 893 | 894 | entry_node (bool, optional): boolean flag indicating if this is the starting node(first node). Defaults to False. 895 | 896 | args (list, optional): Run time arguments . Defaults to None. 897 | ``` 898 | 899 | 900 | ```python 901 | 902 | pl.add_node("fetch_data",fetch_data,entry_node=True) 903 | ``` 904 | 905 | The `register_dag` function creates a Dag 906 | 907 | ```python 908 | pl.register_dag() 909 | ``` 910 | 911 | StepRun : Once done you can run the file using `python3 sample.py` 912 | 913 | To test the Code you can run 914 | ```python 915 | from pymlpipe import pipeline 916 | 917 | 918 | 919 | ppl=pipeline.PipeLine("TestCase") 920 | 921 | ppl.load_pipeline() 922 | 923 | ppl.run() 924 | ``` 925 | 926 | 927 | The `load_pipeline` will load the pipeline dag saved after *StepRun* 928 | The `run` function will run the given pipeline. 929 | ```python 930 | ppl.run() 931 | ``` 932 | 933 | 934 | or you can go to web browser by running the command 935 | 936 | ```bash 937 | ~ pymlpipeui 938 | ``` 939 | 940 | Or starting the UI with 941 | 942 | ```python 943 | from pymlpipe.pymlpipeUI import start_ui 944 | 945 | start_ui(host='0.0.0.0', port=8085,debug=True) 946 | ``` 947 | 948 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI.png) 949 | 950 | This is a sample control page for the pipeline 951 | 952 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI%202.png) 953 | 954 | Sample Dag 955 | 956 | Node in GREEN.--> Completed Node 957 | Node in RED. --> Failed Node 958 | 959 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI_1.png) 960 | 961 | 962 | # Integrate with Model monitering 963 | 964 | ```python 965 | from pymlpipe import pipeline 966 | import pandas as pd 967 | from sklearn.datasets import load_iris 968 | import pandas as pd 969 | from sklearn.model_selection import train_test_split 970 | from pymlpipe.tabular import PyMLPipe 971 | from sklearn.linear_model import LogisticRegression 972 | from sklearn.ensemble import RandomForestClassifier 973 | from sklearn.tree import DecisionTreeClassifier 974 | from xgboost import XGBClassifier 975 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score 976 | import time 977 | 978 | ppl=pipeline.PipeLine("IrisData") 979 | mlp=PyMLPipe() 980 | mlp.set_experiment("pipelinecheck") 981 | mlp.set_version(0.1) 982 | 983 | def get_data(): 984 | iris_data=load_iris() 985 | data=iris_data["data"] 986 | target=iris_data["target"] 987 | df=pd.DataFrame(data,columns=iris_data["feature_names"]) 988 | trainx,testx,trainy,testy=train_test_split(df,target) 989 | return {"trainx":trainx,"trainy":trainy,"testx":testx,"testy":testy} 990 | 991 | def get_model(model): 992 | if model==0: 993 | return LogisticRegression() 994 | elif model==1: 995 | return RandomForestClassifier() 996 | 997 | def train_model(data,model_name): 998 | with mlp.run(): 999 | trainx,trainy=data["trainx"],data["trainy"] 1000 | mlp.set_tags(["Classification","test run","logisticRegression"]) 1001 | model=get_model(model_name) 1002 | model.fit(trainx, trainy) 1003 | mlp.scikit_learn.register_model(str(model_name), model) 1004 | return model 1005 | 1006 | def evaluate(data,model): 1007 | 1008 | testx,testy=data["testx"],data["testy"] 1009 | 1010 | print(model.predict(testx)) 1011 | 1012 | 1013 | ppl.add_node("data", get_data,entry_node=True) 1014 | 1015 | for idx,model in enumerate([0,1]): 1016 | 1017 | ppl.add_node( 1018 | f"model_train{str(idx)}", 1019 | train_model, 1020 | input_nodes=["data"], 1021 | args={"model_name":model}, 1022 | ) 1023 | ppl.add_node( 1024 | f"eval_train{str(idx)}", 1025 | evaluate, 1026 | input_nodes=["data", f"model_train{str(idx)}"], 1027 | ) 1028 | 1029 | ppl.register_dag() 1030 | 1031 | ``` 1032 | 1033 | 1034 | You can integrate the pipeline with model monitering using the same format as we did for `pymlpipe.tabular` 1035 | 1036 | ```python 1037 | mlp=PyMLPipe() 1038 | mlp.set_experiment("pipelinecheck") 1039 | mlp.set_version(0.1) 1040 | . 1041 | . 1042 | . 1043 | with mlp.run(): 1044 | trainx,trainy=data["trainx"],data["trainy"] 1045 | mlp.set_tags(["Classification","test run","logisticRegression"]) 1046 | model=get_model(model_name) 1047 | model.fit(trainx, trainy) 1048 | mlp.scikit_learn.register_model(str(model_name), model) 1049 | ``` 1050 | 1051 | 1052 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI_2.png) 1053 | 1054 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI%203.png) 1055 | 1056 | --- 1057 | 1058 | 1059 | 1060 | ## Contributing 1061 | 1062 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. 1063 | 1064 | 1065 | 1066 | Please make sure to update tests as appropriate. 1067 | 1068 | 1069 | 1070 | ## License 1071 | 1072 | [MIT](https://choosealicense.com/licenses/mit/) -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | #load balancer :https://gist.github.com/zhouchangxun/5750b4636cc070ac01385d89946e0a7b -------------------------------------------------------------------------------- /api.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pymlpipe.utils import yamlio 3 | from pymlpipe.utils import factory 4 | import pandas as pd 5 | class Client: 6 | def __init__(self,path:str=None): 7 | if path: 8 | self.path=path 9 | elif "modelrun" in os.listdir(): 10 | self.path=os.path.join(os.getcwd(),"modelrun") 11 | 12 | print(f"No Path specified, defaulting to current path {self.path}") 13 | 14 | def get_all_experiments(self): 15 | all_experiments=yamlio.read_yaml( 16 | os.path.join(self.path, factory.DEFAULT["ModelRunInfo"]) 17 | ) 18 | return list(all_experiments.keys()) 19 | 20 | def get_all_run_ids(self,experiment_name): 21 | all_tunids=yamlio.read_yaml( 22 | os.path.join(self.path, factory.DEFAULT["ModelRunInfo"]) 23 | ) 24 | return all_tunids[experiment_name]["runs"] 25 | 26 | def get_run_details(self,experiment_name,runid): 27 | return yamlio.read_yaml( 28 | os.path.join( 29 | self.path, experiment_name, runid, factory.DEFAULT["RunInfo"] 30 | ) 31 | ) 32 | def get_all_run_details(self,experiment_name): 33 | all_runids=yamlio.read_yaml( 34 | os.path.join(self.path, factory.DEFAULT["ModelRunInfo"]) 35 | ) 36 | all_paths={id : os.path.join( 37 | self.path, experiment_name, id, factory.DEFAULT["RunInfo"] 38 | ) for id in all_runids[experiment_name]["runs"]} 39 | return { 40 | 41 | id: yamlio.read_yaml(path) for id,path in all_paths.items() 42 | } 43 | def get_metrics_comparison(self,experiment_name:str,format:str=None,sort_by:str=None,with_version=False): 44 | all_runids=yamlio.read_yaml( 45 | os.path.join(self.path, factory.DEFAULT["ModelRunInfo"]) 46 | ) 47 | all_paths={id : os.path.join( 48 | self.path, experiment_name, id, factory.DEFAULT["RunInfo"] 49 | ) for id in all_runids[experiment_name]["runs"]} 50 | data={ 51 | 52 | id: yamlio.read_yaml(path) for id,path in all_paths.items() 53 | } 54 | comparison={} 55 | for id,d in data.items(): 56 | comparison[id]={ 57 | "model": d["model"]["model_class"], 58 | 59 | } 60 | if with_version: comparison[id]["version"]=d["version"] 61 | comparison[id].update(d["metrics"]) 62 | if format: 63 | if sort_by: 64 | return pd.DataFrame(comparison).T.sort_values(by=sort_by,ascending=False) 65 | return pd.DataFrame(comparison).T 66 | else: 67 | comparison 68 | 69 | def get_model_details(self,experiment_name,runid,format:str=None): 70 | data=yamlio.read_yaml( 71 | os.path.join( 72 | self.path, experiment_name, runid, factory.DEFAULT["RunInfo"] 73 | ) 74 | ) 75 | model_details=data["model"] 76 | _exceptions_=["model_params","model_tags"] 77 | print(model_details.keys()) 78 | model_info=[] 79 | model_info.extend( 80 | {"name": model_detail, "value": model_details[model_detail]} 81 | for model_detail in model_details 82 | if model_detail not in _exceptions_ 83 | ) 84 | model_params = [ 85 | {"name": params, "value": model_details["model_params"][params]} 86 | for params in model_details["model_params"] 87 | ] 88 | model_tags = [ 89 | {"name": params, "value": model_details["model_tags"][params]} 90 | for params in model_details["model_tags"] 91 | ] 92 | if format: 93 | return pd.DataFrame(model_info),pd.DataFrame(model_params),pd.DataFrame(model_tags) 94 | else: 95 | return model_info,model_params,model_tags 96 | 97 | -------------------------------------------------------------------------------- /automl.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from tqdm import tqdm 4 | from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier,BaggingRegressor,AdaBoostRegressor, ExtraTreesRegressor, RandomForestRegressor,GradientBoostingRegressor 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.linear_model import LogisticRegression,PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier,LinearRegression, Lasso, Ridge, ElasticNet, BayesianRidge, HuberRegressor, PoissonRegressor,PassiveAggressiveRegressor 7 | from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor 8 | from sklearn.svm import LinearSVC,SVR 9 | from sklearn.neural_network import MLPClassifier, MLPRegressor 10 | from xgboost import XGBClassifier, XGBRegressor 11 | from catboost import CatBoostClassifier, CatBoostRegressor 12 | from lightgbm import LGBMClassifier, LGBMRegressor 13 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,r2_score,mean_absolute_error,mean_squared_error,make_scorer 14 | from pymlpipe.tabular import PyMLPipe 15 | #from tabular import PyMLPipe 16 | from sklearn.model_selection import GridSearchCV 17 | from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer, RobustScaler 18 | from itertools import chain 19 | 20 | 21 | class AutoMLPipe(): 22 | def __init__(self,exp_name,task,metric,data,label,tags=[],test_size=0.20,version=1.0,transform=False,scale='standard',cols_to_scale=[],categorical_cols=[],register_model=False,register_artifacts=False,explain=False,exclude=[]): 23 | ''' 24 | exp_name: name of experiment 25 | task: regression/classification 26 | metric: for classification -> accuracy,recall,precision,f1/ for regression -> MAE,MSE,RMSE,R2 Score 27 | data: data on which the model to be fit 28 | label: target variable 29 | tags: list of custom-tags for the run 30 | test_size: size of test dataset 31 | version: experiment version 32 | transform:bool 33 | scale: 'standard'/'minmax'/'normalize' 34 | cols_to_scale: list of columns to scale. Should be numeric or float 35 | categorical_cols: columns to one-hot encode 36 | register_model: register experiement model 37 | register_artifacts: register experiment artifacts 38 | explain= xai implementation 39 | exclude: models to be excluded during autoML runs 40 | ''' 41 | self.exp_name=exp_name 42 | self.task=task 43 | self.metric=metric 44 | self.data=data 45 | self.label=label 46 | self.test_size=test_size 47 | self.version=version 48 | self.exclude=exclude 49 | self.transform=transform 50 | self.scale=scale 51 | self.cols_to_scale=cols_to_scale 52 | self.categorical_cols=categorical_cols 53 | self.mlp=PyMLPipe() 54 | self.register_model=register_model 55 | self.register=register_artifacts 56 | self.explain=explain 57 | self.tags=tags 58 | self.classification_models={ 59 | 'LogisticRegression': LogisticRegression(), 60 | 'AdaBoostClassifier':AdaBoostClassifier(), 61 | 'BaggingClassifier': BaggingClassifier(), 62 | 'ExtraTreesClassifier' : ExtraTreesClassifier(), 63 | 'GradientBoostingClassifier' : GradientBoostingClassifier(), 64 | 'RandomForestClassifier': RandomForestClassifier(), 65 | 'DecisionTreeClassifier': DecisionTreeClassifier(), 66 | 'RidgeClassifier': RidgeClassifier(), 67 | 'SGDClassifier':SGDClassifier(), 68 | 'PassiveAggressiveClassifier':PassiveAggressiveClassifier(), 69 | 'LinearSVC': LinearSVC(), 70 | 'MLPClassifier': MLPClassifier(), 71 | 'XGBClassifier': XGBClassifier(n_jobs=-1), 72 | 'LGBMClassifier': LGBMClassifier(n_jobs=-1), 73 | 'CatBoostClassifier': CatBoostClassifier()} 74 | self.regression_models={ 75 | 'LinearRegression': LinearRegression(), 76 | 'SVR' : SVR(), 77 | 'AdaBoostRegressor' : AdaBoostRegressor(), 78 | 'DecisionTreeRegressor' : DecisionTreeRegressor(), 79 | 'Lasso' : Lasso(), 80 | 'Ridge' : Ridge(), 81 | 'MLPRegressor' : MLPRegressor(), 82 | 'RandomForestRegressor' : RandomForestRegressor(), 83 | 'ExtraTreesRegressor' : ExtraTreesRegressor(), 84 | 'GradientBoostingRegressor' : GradientBoostingRegressor(), 85 | 'BaggingRegressor' : BaggingRegressor(), 86 | 'ElasticNet' : ElasticNet(), 87 | 'PassiveAggressiveRegressor': PassiveAggressiveRegressor(), 88 | 'BayesianRidge' : BayesianRidge(), 89 | 'HuberRegressor' : HuberRegressor(), 90 | 'PoissonRegressor' : PoissonRegressor(), 91 | 'XGBRegressor': XGBRegressor(n_jobs=-1), 92 | 'LGBMRegressor': LGBMRegressor(n_jobs=-1), 93 | 'CatBoostRegressor': CatBoostRegressor() 94 | 95 | } 96 | self.explain_exclude=['AdaBoostClassifier','BaggingClassifier','GradientBoostingClassifier','MLPClassifier','LinearSVC','AdaBoostRegressor','BaggingRegressor','SVR','MLPRegressor'] 97 | self.param_grid=dict() 98 | self.param_grid['LogisticRegression']={ 99 | 'penalty': ['l1','l2'], 100 | 'C': [0.1,1], 101 | 'solver': ['liblinear', 'newton-cg'], 102 | } 103 | self.param_grid['PassiveAggressiveClassifier']={ 104 | 'C': [0.01,0.1,1], 105 | 106 | } 107 | self.param_grid['PassiveAggressiveRegressor']={ 108 | 'C': [0.1,0.5,1], 109 | 110 | } 111 | self.param_grid['RidgeClassifier']={ 112 | 'alpha':[0.01,0.1,1], 113 | 'solver': ['auto','sag','cholesky'] 114 | } 115 | self.param_grid['SGDClassifier']={ 116 | 'loss': ['hinge','log_loss', 'modified_huber','squared_error'], 117 | 'penalty': ['l1','l2'] 118 | } 119 | self.param_grid['DecisionTreeClassifier']={ 120 | 'criterion': ['gini','entropy'], 121 | 'max_depth': [None,2,3], 122 | 'min_samples_split': [2,3,4] 123 | } 124 | self.param_grid['AdaBoostClassifier']={ 125 | 'n_estimators': [10,100,500], 126 | 'learning_rate': [0.01,0.1,1], 127 | } 128 | self.param_grid['BaggingClassifier']={ 129 | 'n_estimators': [10,100,500], 130 | } 131 | self.param_grid['BaggingRegressor']={ 132 | 'n_estimators': [10,100,500], 133 | } 134 | self.param_grid['ExtraTreesClassifier']={ 135 | 'n_estimators': [10,100,500], 136 | 'max_depth': [None,2,3], 137 | 'min_samples_split': [2,3,4] 138 | } 139 | self.param_grid['ExtraTreesRegressor']={ 140 | 'n_estimators': [10,100,500], 141 | 'max_depth': [None,2,3], 142 | 'min_samples_split': [2,3,4] 143 | } 144 | self.param_grid['GradientBoostingClassifier']={ 145 | 'n_estimators': [10,100,500], 146 | 'learning_rate': [0.01,0.1], 147 | 'criterion': ['friedman_mse','squared_error'] 148 | } 149 | self.param_grid['GradientBoostingRegressor']={ 150 | 'n_estimators': [10,100,500], 151 | 'learning_rate': [0.01,0.1], 152 | 'criterion': ['friedman_mse','squared_error'] 153 | } 154 | self.param_grid['RandomForestClassifier']={ 155 | 'n_estimators': [10,100,500], 156 | 'max_depth': [None,2,3], 157 | 'min_samples_split': [2,3,4] 158 | } 159 | self.param_grid['RandomForestRegressor']={ 160 | 'n_estimators': [10,100,500], 161 | 'max_depth': [None,2,3], 162 | 'min_samples_split': [2,3,4] 163 | } 164 | self.param_grid['LinearSVC']={ 165 | 'loss': ['hinge','log_loss', 'modified_huber','squared_error'], 166 | 'C': [0.1,0.5,1] 167 | } 168 | self.param_grid['MLPClassifier']={ 169 | 'activation': ['tanh','relu'], 170 | 'solver': ['sgd','adam'] 171 | } 172 | self.param_grid['MLPRegressor']={ 173 | 'activation': ['tanh','relu'], 174 | 'solver': ['sgd','adam'] 175 | } 176 | self.param_grid['LinearRegression']={ 177 | 'n_jobs' : [-1] 178 | } 179 | self.param_grid['SVR']={ 180 | 'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'] 181 | ,'gamma' : ['scale','auto'] 182 | , 'C': [0.1,0.5,1] 183 | } 184 | self.param_grid['AdaBoostRegressor']={ 185 | 'n_estimators': [10,100,500], 186 | 'learning_rate': [0.01,0.1,1], 187 | 'loss' : ['linear','square','exponential'] 188 | } 189 | self.param_grid['DecisionTreeRegressor']={ 190 | #'criterion': ['gini','entropy'], 191 | 'splitter' : ['best'], 192 | 'max_depth': [None,2,3], 193 | 'min_samples_split': [2,3,4] 194 | } 195 | self.param_grid['Lasso']={ 196 | 'selection' : ['cyclic', 'random'] 197 | } 198 | self.param_grid['Ridge']={ 199 | 'solver' : ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'] 200 | } 201 | self.param_grid['PoissonRegressor']={ 202 | 'alpha': [0.5,1,1.5] 203 | } 204 | self.param_grid['HuberRegressor']={ 205 | 'epsilon': [1.35,1.5,1.75,2] 206 | } 207 | self.param_grid['ElasticNet']={ 208 | 'l1_ratio': [0.3,0.5,0.6,0.7] 209 | } 210 | self.param_grid['BayesianRidge']={ 211 | 'n_iter': [100,300,500] 212 | } 213 | self.param_grid['XGBClassifier']={ 214 | 'n_estimators': [10,100,500], 215 | 'max_depth': [None,2,3], 216 | 'learning_rate': [0.01,0.1] 217 | } 218 | self.param_grid['LGBMClassifier']={ 219 | 'n_estimators': [10,100,500], 220 | 'max_depth': [None,2,3], 221 | 'num_leaves': [20,30,40] 222 | } 223 | self.param_grid['CatBoostClassifier']={ 224 | 'n_estimators': [10,100,500], 225 | 'max_depth': [2,3], 226 | 'learning_rate': [0.01,0.1] 227 | } 228 | self.param_grid['XGBRegressor']={ 229 | 'n_estimators': [10,100,500], 230 | 'max_depth': [None,2,3], 231 | 'learning_rate': [0.01,0.1] 232 | } 233 | self.param_grid['LGBMRegressor']={ 234 | 'n_estimators': [10,100,500], 235 | 'max_depth': [None,2,3], 236 | 'num_leaves': [20,30,40] 237 | } 238 | self.param_grid['CatBoostRegressor']={ 239 | 'n_estimators': [10,100,500], 240 | 'max_depth': [2,3], 241 | 'learning_rate': [0.01,0.1] 242 | } 243 | 244 | def run_automl(self,tune=False,tune_best=False): 245 | ''' 246 | tune: param tune all the models 247 | tune_best: param tune the best model 248 | ''' 249 | 250 | # Set Experiment name 251 | self.mlp.set_experiment(self.exp_name) 252 | # Set Version name 253 | self.mlp.set_version(self.version) 254 | 255 | if self.transform==True: 256 | numeric_cols=self.data.select_dtypes(include=[np.number]).columns 257 | numeric_cols=[item for item in numeric_cols if item not in self.categorical_cols] 258 | cols_to_scale=self.cols_to_scale 259 | if cols_to_scale==[]: 260 | cols_to_scale=numeric_cols 261 | check = all(item in numeric_cols for item in cols_to_scale) 262 | if check==True: 263 | if self.scale=='standard': 264 | scaler = StandardScaler() 265 | elif self.scale=='minmax': 266 | scaler = MinMaxScaler() 267 | elif self.scale=='normalize': 268 | scaler = Normalizer() 269 | elif self.scale=='robust': 270 | scaler = RobustScaler() 271 | scaler.fit(self.data[cols_to_scale]) 272 | self.data[cols_to_scale] = scaler.transform(self.data[cols_to_scale]) 273 | else: 274 | print('Scaling operation cannot be completed as column type is not int/float') 275 | 276 | if self.categorical_cols!=[]: 277 | self.data = pd.get_dummies(self.data, columns = self.categorical_cols) 278 | self.exclude=[x.lower() for x in self.exclude] 279 | trainx,testx,trainy,testy=train_test_split(self.data,self.label,test_size=self.test_size) 280 | result=pd.DataFrame() 281 | prediction_set={} 282 | if self.task=='classification': 283 | for model_name,model in tqdm(self.classification_models.items()): 284 | model_ex=model_name.lower() 285 | if model_ex not in self.exclude: 286 | if tune==True: 287 | try: 288 | predictions,result_set=self.param_tune_model(model_name,trainx,testx,trainy,testy) 289 | predictions=predictions.tolist() 290 | if model_name=='CatBoostClassifier': 291 | predictions=list(chain(*predictions)) 292 | fin=dict() 293 | fin['name']=model_name 294 | fin['accuracy']=result_set['accuracy'] 295 | fin['precision']=result_set['precision'] 296 | fin['recall']=result_set['recall'] 297 | fin['f1']=result_set['f1_score'] 298 | except Exception as e: 299 | print (e) 300 | continue 301 | 302 | else: 303 | try: 304 | with self.mlp.run(): 305 | print(model_name) 306 | default_tags=[model_name,"Classification"] 307 | tag_list=default_tags+self.tags 308 | self.mlp.set_tags(tag_list) 309 | 310 | model=model 311 | model.fit(trainx, trainy) 312 | predictions=model.predict(testx) 313 | predictions=predictions.tolist() 314 | if model_name=='CatBoostClassifier': 315 | predictions=list(chain(*predictions)) 316 | 317 | self.mlp.log_metric("accuracy", accuracy_score(testy,predictions)) 318 | self.mlp.log_metric("precision", precision_score(testy,predictions,average='macro')) 319 | self.mlp.log_metric("recall", recall_score(testy,predictions,average='macro')) 320 | self.mlp.log_metric("f1_score", f1_score(testy,predictions,average='macro')) 321 | if self.explain==True: 322 | if model_name not in self.explain_exclude: 323 | self.mlp.explainer(model,trainx) 324 | else: print('XAI is not available for ',model_name) 325 | if self.register==True: 326 | self.mlp.register_artifact("train", trainx) 327 | self.mlp.register_artifact("test", testx,artifact_type="testing") 328 | if self.register_model==True: 329 | self.mlp.scikit_learn.register_model(model_name, model) 330 | 331 | result1=self.mlp.get_info() 332 | fin=dict() 333 | fin['name']=model_name 334 | fin['accuracy']=result1['metrics']['accuracy'] 335 | fin['precision']=result1['metrics']['precision'] 336 | fin['recall']=result1['metrics']['recall'] 337 | fin['f1']=result1['metrics']['f1_score'] 338 | except Exception as e: 339 | print(e) 340 | continue 341 | 342 | prediction_set[model_name]=predictions 343 | result=result.append(fin,ignore_index=True) 344 | elif self.task=='regression': 345 | for model_name,model in tqdm(self.regression_models.items()): 346 | model_ex=model_name.lower() 347 | if model_ex not in self.exclude: 348 | if tune==True: 349 | try: 350 | predictions,result_set=self.param_tune_model(model_name,trainx,testx,trainy,testy) 351 | predictions=predictions.tolist() 352 | fin=dict() 353 | fin['name']=model_name 354 | fin['MAE']=result_set['MAE'] 355 | fin['MSE']=result_set['MSE'] 356 | fin['R2 Score']=result_set['R2 Score'] 357 | fin['RMSE']=result_set['RMSE'] 358 | except Exception as e: 359 | print(e) 360 | continue 361 | else: 362 | try: 363 | with self.mlp.run(): 364 | default_tags=[model_name,"Regression"] 365 | tag_list=default_tags+self.tags 366 | self.mlp.set_tags(tag_list) 367 | model=model 368 | model.fit(trainx, trainy) 369 | predictions=model.predict(testx) 370 | predictions=predictions.tolist() 371 | 372 | # log performace metrics 373 | self.mlp.log_metric("R2 Score", r2_score(testy,predictions)) 374 | self.mlp.log_metric("MAE", mean_absolute_error(testy,predictions)) 375 | self.mlp.log_metric("MSE", mean_squared_error(testy,predictions)) 376 | self.mlp.log_metric("RMSE", mean_squared_error(testy,predictions,squared=False)) 377 | if self.explain==True: 378 | if model_name not in self.explain_exclude: 379 | self.mlp.explainer(model,trainx) 380 | else: print('XAI is not available for ',model_name) 381 | if self.register==True: 382 | # Save train data and test data 383 | self.mlp.register_artifact("train", trainx) 384 | self.mlp.register_artifact("test", testx,artifact_type="testing") 385 | # Save the model 386 | if self.register_model==True: 387 | self.mlp.scikit_learn.register_model(model_name, model) 388 | result1=self.mlp.get_info() 389 | 390 | fin=dict() 391 | fin['name']=model_name 392 | fin['MAE']=result1['metrics']['MAE'] 393 | fin['MSE']=result1['metrics']['MSE'] 394 | fin['RMSE']=result1['metrics']['RMSE'] 395 | fin['R2 Score']=result1['metrics']['R2 Score'] 396 | except Exception as e: 397 | print (e) 398 | continue 399 | 400 | prediction_set[model_name]=predictions 401 | result=result.append(fin, ignore_index=True) 402 | 403 | if self.task=='classification' or self.metric=='R2 Score': 404 | result.sort_values(by=self.metric,ascending=False,inplace=True) 405 | else: 406 | result.sort_values(by=self.metric,ascending=True,inplace=True) 407 | 408 | if tune_best==False: 409 | return prediction_set,result 410 | else: 411 | result=result.head(1) 412 | best_model_name=str(result.name.values[0]) 413 | 414 | prediction_set,result=self.param_tune_model(trainx=trainx,testx=testx,trainy=trainy,testy=testy,model_tune=best_model_name) 415 | return prediction_set,result 416 | 417 | def param_tune_model(self,model_tune,trainx,testx,trainy,testy): 418 | 419 | self.mlp.set_experiment(self.exp_name) 420 | best_model_name=model_tune 421 | 422 | with self.mlp.run(): 423 | if self.task=="classification": 424 | default_tags=["Hyper-param-tuning-clf",best_model_name] 425 | tag_list=default_tags+self.tags 426 | self.mlp.set_tags(tag_list) 427 | final_model=self.classification_models[best_model_name].fit(trainx, trainy) 428 | if self.metric=='accuracy': score= make_scorer(accuracy_score,average='weighted') 429 | elif self.metric=='recall': score=make_scorer(recall_score,average='weighted') 430 | elif self.metric=='precision': score=make_scorer(precision_score,average='weighted') 431 | else: score=make_scorer(f1_score,average='weighted') 432 | CV_cfl = GridSearchCV(estimator = final_model, param_grid = self.param_grid[best_model_name], scoring= score, cv=3, verbose = 2) 433 | CV_cfl.fit(trainx, trainy) 434 | self.mlp.log_params(CV_cfl.best_params_) 435 | predictions=CV_cfl.best_estimator_.predict(testx) 436 | 437 | if self.explain==True: 438 | if best_model_name not in self.explain_exclude: 439 | self.mlp.explainer(CV_cfl.best_estimator_,trainx) 440 | else: print('XAI is not available for ',best_model_name) 441 | 442 | result_set={ 443 | "accuracy": accuracy_score(testy,predictions), 444 | "precision": precision_score(testy,predictions,average='macro'), 445 | "recall": recall_score(testy,predictions,average='macro'), 446 | "f1_score": f1_score(testy,predictions,average='macro')} 447 | self.mlp.log_metrics(result_set) 448 | elif self.task=="regression": 449 | default_tags=["Hyper-param-tuning-reg",best_model_name] 450 | tag_list=default_tags+self.tags 451 | self.mlp.set_tags(tag_list) 452 | final_model=self.regression_models[best_model_name].fit(trainx, trainy) 453 | if self.metric=='MSE': score= 'neg_mean_squared_error' 454 | elif self.metric=='MAE': score='neg_mean_absolute_error' 455 | elif self.metric=='R2 Score': score='r2' 456 | else: score='neg_root_mean_squared_error' 457 | CV_cfl = GridSearchCV(estimator = final_model, param_grid = self.param_grid[best_model_name], scoring=score,cv=3,verbose = 2) 458 | CV_cfl.fit(trainx, trainy) 459 | self.mlp.log_params(CV_cfl.best_params_) 460 | predictions=CV_cfl.best_estimator_.predict(testx) 461 | 462 | result_set={ 463 | "MSE": mean_squared_error(testy,predictions), 464 | "MAE": mean_absolute_error(testy,predictions), 465 | "R2 Score": r2_score(testy,predictions), 466 | "RMSE": mean_squared_error(testy,predictions,squared=False)} 467 | self.mlp.log_metrics(result_set) 468 | if self.explain==True: 469 | if best_model_name not in self.explain_exclude: 470 | self.mlp.explainer(CV_cfl.best_estimator_,trainx) 471 | else: print('XAI is not available for ',best_model_name) 472 | if self.register_model==True: 473 | self.mlp.scikit_learn.register_model(best_model_name, CV_cfl.best_estimator_) 474 | if self.register==True: 475 | # Save train data and test data 476 | self.mlp.register_artifact("train", trainx) 477 | self.mlp.register_artifact("test", testx,artifact_type="testing") 478 | 479 | return predictions,result_set 480 | 481 | -------------------------------------------------------------------------------- /legacy/pipeline.py: -------------------------------------------------------------------------------- 1 | import dill 2 | import cloudpickle 3 | from pymlpipe.utils import database,yamlio 4 | import os 5 | import datetime 6 | import traceback,sys 7 | #checking 8 | 9 | class Node: 10 | def __init__(self,name, func,path): 11 | self.name=name 12 | self.func=func 13 | self.path=path 14 | self.save(name, func) 15 | 16 | def save(self,name,func): 17 | #mainify(func) 18 | #dill.dump(func, open(name+".pkl", "wb")) 19 | 20 | self.filename=os.path.join(self.path,name+".mld") 21 | cloudpickle.dump(func, open(self.filename, "wb")) 22 | 23 | 24 | class Pipeline: 25 | def __init__(self,name): 26 | path=os.getcwd() 27 | self.PIPELINE_FOLDER="ML_pipelines" 28 | database.create_folder(path) 29 | self.name = name 30 | self.base_path=database.create_folder(path,self.PIPELINE_FOLDER) 31 | self.path_pipe=database.create_folder(self.base_path,self.name) 32 | 33 | self.dag={"sequence":[],"nodes":{},"edges":[],"node_order":{},"node_details":{}} 34 | self.sequence=[] 35 | self.node_order={} 36 | self.is_entry_node=False 37 | 38 | def _make_edges(self,node,edges): 39 | """Create the DAG edges for the nodes, in a src--> trg format 40 | 41 | Args: 42 | node (_type_): _description_ 43 | edges (_type_): _description_ 44 | 45 | Returns: 46 | _type_: _description_ 47 | """ 48 | edge_list=[] 49 | for edge in edges: 50 | edge_list.append({"src":edge,"target":node}) 51 | return edge_list 52 | 53 | 54 | def add_node(self,name,func,node_input=None,entry_node=False,args=None): 55 | if name in self.sequence: 56 | raise ValueError(f"Node Name {name} already exists! Please provide different Name") 57 | self.sequence.append(name) 58 | node=Node(name,func,self.path_pipe) 59 | self.dag["nodes"][name]={"path":node.filename,"mould_file":node.name+".mld","entry":entry_node,"args":args,"folder":self.PIPELINE_FOLDER,"subfolder":self.name,} 60 | self.dag["node_details"][name]={"status":"Queued","start_time":"-","end_time":"-","log":""} 61 | if entry_node: 62 | self.is_entry_node=True 63 | if node_input!=None: 64 | self.node_order[name]=node_input 65 | return node 66 | 67 | def load(self,name): 68 | #return dill.load(open(name+".pkl", "rb")) 69 | #return cloudpickle.load(open(os.path.join(self.path_pipe,self.name+".yaml"), "rb")) 70 | return cloudpickle.load(open(name,'rb')) 71 | 72 | 73 | def add_edge(self,node_1,node_2): 74 | if not isinstance(node_1,Node) and isinstance(node_2,Node): 75 | raise TypeError("node_1 or node_2 is not type Node") 76 | self.dag["edges"].append({"src":node_1.name,"target":node_2.name}) 77 | 78 | 79 | def register(self): 80 | already_exist=False 81 | exists_idx=None 82 | if not self.is_entry_node: 83 | raise ValueError("Entry Node is not defined!!! Please 'entry_node'=True for the starting node") 84 | self.dag["sequence"]=self.sequence 85 | self.dag["node_order"]=self.node_order 86 | #self.dag["graph"]= 87 | graph={} 88 | ''' 89 | for seq in self.sequence: 90 | if seq in graph: 91 | graph[seq].append([{"edges":i,"status":None} for i in self.dag["edges"] if i["src"]==seq]) 92 | else: 93 | graph[seq]=[{"edges":i,"status":None} for i in self.dag["edges"] if i["src"]==seq] 94 | print(graph) 95 | ''' 96 | data=yamlio.read_yaml(os.path.join(self.base_path,"info.yaml")) 97 | 98 | for idx,d in enumerate(data): 99 | if d["pipelinename"]==self.name: 100 | already_exist=True 101 | exists_idx=idx 102 | if not already_exist: 103 | data.append({ 104 | "pipelinename":self.name, 105 | "path":self.path_pipe, 106 | "folder":self.PIPELINE_FOLDER, 107 | "subfolder":self.name, 108 | "created_at": datetime.datetime.now(), 109 | "status":"-", 110 | "jobtime":"", 111 | "jobtime":"-" 112 | }) 113 | else: 114 | data[idx].update({ 115 | "pipelinename":self.name, 116 | "path":self.path_pipe, 117 | "folder":self.PIPELINE_FOLDER, 118 | "subfolder":self.name, 119 | "created_at": datetime.datetime.now(), 120 | "status":"-", 121 | "jobtime":"", 122 | "jobtime":"-" 123 | }) 124 | 125 | yamlio.write_to_yaml(os.path.join(self.base_path,"info.yaml"), data) 126 | yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"), self.dag) 127 | 128 | def load_pipeline(self): 129 | self.dag=yamlio.read_yaml(os.path.join(self.path_pipe,self.name+".yaml")) 130 | 131 | def _find_next_node(self,node_name): 132 | return self.dag["graph"][node_name] 133 | 134 | def _create_graph(self,edges): 135 | graph={} 136 | for edge in edges: 137 | if edge["src"] in graph: 138 | graph[edge["src"]].append(edge["target"]) 139 | else: 140 | graph[edge["src"]]=[edge["target"]] 141 | return graph 142 | def _make_previous_output(self,_prev_outputs,neighbor,functions_args): 143 | inp=[] 144 | #print(functions_args) 145 | for n in neighbor: 146 | #when output --> tuple,list 147 | if isinstance(_prev_outputs[n], tuple) or isinstance(_prev_outputs[n], list): 148 | inp.extend(list(_prev_outputs[n])) 149 | 150 | if functions_args!=None: 151 | inp.extend(functions_args) 152 | #make output --> dict 153 | else: # or isinstance(_prev_outputs[n], str) or isinstance(_prev_outputs[n], int) or isinstance(_prev_outputs[n], float): 154 | inp.append(_prev_outputs[n]) 155 | 156 | if functions_args!=None: 157 | inp.extend(functions_args) 158 | #make output --> str,float,int 159 | 160 | print("input-->",inp) 161 | return inp 162 | 163 | def _change_status(self,node,status,info=None): 164 | dag=yamlio.read_yaml(os.path.join(self.path_pipe,self.name+".yaml")) 165 | if status=="Started": 166 | 167 | dag["node_details"][node]["status"]=status 168 | 169 | dag["node_details"][node]["start_time"]=str(datetime.datetime.now()) 170 | dag["node_details"][node]["log"]="======"+status.upper()+"======"+str(datetime.datetime.now())+"\n" 171 | elif status=="Completed" or status=="Failed": 172 | dag["node_details"][node]["status"]=status 173 | dag["node_details"][node]["end_time"]=str(datetime.datetime.now()) 174 | dag["node_details"][node]["log"]+="======"+status.upper()+"======"+str(datetime.datetime.now())+"\n" 175 | if info!=None: 176 | dag["node_details"][node]["log"]+="\n"+str(info)+"======" 177 | 178 | 179 | dag=yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"),dag) 180 | 181 | 182 | 183 | def bfs(self,graph,entry_node,_prev_outputs,_functions,_node_order,functions_args,job_name,flag_variable_path): 184 | visited = [entry_node] # List to keep track of visited nodes. 185 | queue = [entry_node] #Initialize a queue 186 | while queue: 187 | s = queue.pop(0) 188 | 189 | if s in graph: 190 | for neighbour in graph[s]: 191 | if neighbour not in visited: 192 | func=_functions[neighbour] 193 | self._change_status(neighbour,"Started") 194 | if not self._check_for_job_status(job_name,flag_variable_path): sys.exit() 195 | #print(self._make_previous_output(_prev_outputs,_node_order[neighbour])) 196 | print(_prev_outputs) 197 | try: 198 | _prev_outputs[neighbour]=func(*self._make_previous_output(_prev_outputs,_node_order[neighbour],functions_args[neighbour])) 199 | 200 | self._change_status(neighbour,"Completed") 201 | 202 | #func() 203 | except Exception as e: 204 | #print(neighbour) 205 | print(traceback.format_exc()) 206 | #raceback.print_exception(*sys.exc_info()) 207 | self._change_status(neighbour,"Failed",info=traceback.format_exc()) 208 | 209 | visited.append(neighbour) 210 | queue.append(neighbour) 211 | return _prev_outputs 212 | 213 | def _get_path(self,base_path,folder): 214 | return os.path.join(base_path,folder) 215 | def _check_for_job_status(self,jobname,queue_name): 216 | all_jobs=yamlio.read_yaml(queue_name) 217 | status=[j["status"] for j in all_jobs if j["pipelinename"]==jobname] 218 | return False if status[0]=="Stopped" else True 219 | 220 | def run(self,*args,**kwargs): 221 | if len(self.dag["sequence"])==0: 222 | raise ValueError("Error!!! No Dag Provided!!!!") 223 | #if not self.is_entry_node: 224 | # raise ValueError("Error!!! Entry Node Not defined please provide and entry node with entry_node=True!!!!") 225 | entrynode=[] 226 | functions={} 227 | output_nodes={} 228 | functions_args={} 229 | for node in self.dag["nodes"]: 230 | #print(node,self.dag["nodes"][node]) 231 | if self.dag["nodes"][node]["entry"]: 232 | entrynode.append(node) 233 | functions[node]=self.load(self._get_path(self.path_pipe, self.dag["nodes"][node]["mould_file"]))#self.dag["nodes"][node]["path"]) 234 | functions_args[node]=self.dag["nodes"][node]["args"] 235 | graph=self._create_graph(self.dag["edges"]) 236 | self.dag["node_details"]={node:{"status":"Queued","start_time":"-","end_time":"-","log":""} for node in self.dag["node_details"]} 237 | yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"), self.dag) 238 | for node in entrynode: 239 | func=functions[node] 240 | self._change_status(node,"Started") 241 | try: 242 | print("node", node) 243 | output_nodes[node]=func(*args,**kwargs) 244 | self._change_status(node,"Completed") 245 | except Exception as e: 246 | print(node) 247 | 248 | traceback.print_exception(*sys.exc_info()) 249 | self._change_status(node,"Failed") 250 | output_nodes=self.bfs(graph,node,output_nodes,functions,self.dag["node_order"],functions_args) 251 | return output_nodes 252 | 253 | 254 | 255 | 256 | def run_serialized(self,flag_variable_path,job_name,*args,**kwargs): 257 | if len(self.dag["sequence"])==0: 258 | raise ValueError("Error!!! No Dag Provided!!!!") 259 | #if not self.is_entry_node: 260 | # raise ValueError("Error!!! Entry Node Not defined please provide and entry node with entry_node=True!!!!") 261 | entrynode=[] 262 | functions={} 263 | output_nodes={} 264 | functions_args={} 265 | for node in self.dag["nodes"]: 266 | #print(node,self.dag["nodes"][node]) 267 | if self.dag["nodes"][node]["entry"]: 268 | entrynode.append(node) 269 | functions[node]=self.load(self._get_path(self.path_pipe, self.dag["nodes"][node]["mould_file"]))#self.dag["nodes"][node]["path"]) 270 | functions_args[node]=self.dag["nodes"][node]["args"] 271 | graph=self._create_graph(self.dag["edges"]) 272 | self.dag["node_details"]={node:{"status":"Queued","start_time":"-","end_time":"-","log":""} for node in self.dag["node_details"]} 273 | yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"), self.dag) 274 | for node in entrynode: 275 | func=functions[node] 276 | self._change_status(node,"Started") 277 | try: 278 | print("node", node,self._check_for_job_status(job_name,flag_variable_path)) 279 | if not self._check_for_job_status(job_name,flag_variable_path): 280 | sys.exit() 281 | output_nodes[node]=func(*args,**kwargs) 282 | self._change_status(node,"Completed") 283 | except Exception as e: 284 | 285 | 286 | traceback.print_exception(*sys.exc_info()) 287 | self._change_status(node,"Failed") 288 | output_nodes=self.bfs(graph,node,output_nodes,functions,self.dag["node_order"],functions_args,job_name,flag_variable_path) 289 | return output_nodes 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | -------------------------------------------------------------------------------- /pipeline.py: -------------------------------------------------------------------------------- 1 | 2 | import cloudpickle 3 | from pymlpipe.utils import database,yamlio 4 | import os 5 | import datetime 6 | import traceback,sys 7 | import inspect 8 | ##---To Be moved into separate file--## 9 | 10 | __FOLDER__="ML_pipelines" 11 | 12 | 13 | class Node: 14 | def __init__(self,name, func,path): 15 | self.name=name 16 | self.func=func 17 | self.path=path 18 | self.save(name, func) 19 | 20 | def save(self,name: str,func) -> None: 21 | """_summary_: Saves a python Function into Mould file 22 | 23 | Args: 24 | name (str): Name of the function 25 | func (Object): Actual python function 26 | """ 27 | 28 | self.name_of_file = f"{self.name}.mld" 29 | self.filename=os.path.join(self.path,self.name_of_file) 30 | cloudpickle.dump(func, open(self.filename, "wb")) 31 | 32 | class PipeLine: 33 | def __init__(self,pipeline_name,pipeline_path=None): 34 | self.path=os.getcwd() 35 | self.pipeline_name=pipeline_name 36 | self.pipeline_path = __FOLDER__ if pipeline_path is None else pipeline_path 37 | database.create_folder(self.path) 38 | self.base_path=database.create_folder(self.path,self.pipeline_path) 39 | self.path_pipe=database.create_folder(self.base_path,self.pipeline_name) 40 | 41 | self.dag={"nodes":{},"graph":{},"args_map":{},"node_details":{}} 42 | self.status_code={0:"Started",1:"Completed",2:"Queued",3:"Failed"} 43 | self._args_tag="args@" 44 | 45 | def add_node(self,node_name:str,function,input_nodes:list =None,args:dict=None,entry_node:bool=False) -> None: 46 | """_summary_ 47 | 48 | Args: 49 | node_name (str): Name of the node 50 | function (_type_): Python function you want to execute 51 | input_nodes (list, optional): List of nodes that are connected to this node. The connected nodes should return a value which will act as an input to the node . Defaults to None. 52 | entry_node (bool, optional): boolean flag indicating if this is the starting node(first node). Defaults to False. 53 | args (list, optional): Run time arguments . Defaults to None. 54 | 55 | Raises: 56 | ValueError: _description_ 57 | TypeError: _description_ 58 | TypeError: _description_ 59 | TypeError: _description_ 60 | """ 61 | '''Exception Start''' 62 | if entry_node and "root" in self.dag["graph"]: 63 | raise ValueError("Error!!! entry_node is already set. Two nodes cannot be Entry Node in DAG.") 64 | if input_nodes != None and not isinstance(input_nodes,list): 65 | raise TypeError( 66 | f"Error!!! 'input_node' expected to be type:list got {type(input_nodes)}" 67 | ) 68 | if not isinstance(entry_node,bool): 69 | raise TypeError( 70 | f"Error!!! 'entry_node' expected to be type:bool got {type(input_nodes)}" 71 | ) 72 | if not isinstance(node_name,str): 73 | raise TypeError( 74 | f"Error!!! 'node_name' expected to be type:str got {type(input_nodes)}" 75 | ) 76 | 77 | '''Exception End''' 78 | 79 | 80 | node=Node(node_name,function,self.path_pipe) 81 | self.dag["nodes"][node_name]={ 82 | 'filename':node.name_of_file, 83 | 'root_path':self.pipeline_path, 84 | 'sub_path':self.pipeline_name, 85 | "edge_nodes":input_nodes, 86 | "args":args 87 | } 88 | 89 | _arg_names=inspect.getfullargspec(function).args 90 | ## Mapping args to the input node 91 | if args !=None: 92 | for arg_name in _arg_names: 93 | if arg_name in args: 94 | input_nodes.append(f"{self._args_tag}{arg_name}") 95 | _mapper = dict(zip(input_nodes,_arg_names)) if len(_arg_names)!=0 else {} 96 | #print("_mapper:",_mapper) 97 | self.dag["args_map"][node_name]=_mapper 98 | if entry_node: 99 | self.dag["graph"]["root"]=[node_name] 100 | else: 101 | for ipnode in input_nodes: 102 | if ipnode.startswith(self._args_tag): 103 | continue 104 | if ipnode in self.dag["graph"]: 105 | self.dag["graph"][ipnode].append(node_name) 106 | else: 107 | self.dag["graph"][ipnode]=[node_name] 108 | if node_name not in self.dag["graph"]: 109 | self.dag["graph"][node_name]=[] 110 | self.dag["node_details"][node_name]={"status":self.status_code[2],"start_time":"-","end_time":"-","log":""} 111 | 112 | 113 | def register_dag(self): 114 | """_summary_: Registers the pipeline as an Dag Object 115 | """ 116 | path_to_yaml=self.path_pipe 117 | file_name = f"{self.pipeline_name}.yaml" 118 | 119 | data=yamlio.read_yaml(os.path.join(self.base_path,"info.yaml")) 120 | info={ 121 | "pipelinename":self.pipeline_name, 122 | "folder":self.pipeline_path, 123 | "subfolder":self.pipeline_name, 124 | "created_at": datetime.datetime.now(), 125 | "status":"-", 126 | "jobtime":"", 127 | "jobtime":"-" 128 | } 129 | already_exist=False 130 | 131 | for idx,d in enumerate(data): 132 | if d["pipelinename"]==self.pipeline_name: 133 | already_exist=True 134 | exists_idx=idx 135 | 136 | if not already_exist: 137 | data.append(info) 138 | else: 139 | data[idx].update(info) 140 | 141 | 142 | yamlio.write_to_yaml(os.path.join(self.base_path,"info.yaml"), data) 143 | yamlio.write_to_yaml(os.path.join(path_to_yaml,file_name),self.dag) 144 | 145 | def __load__mld_file(self,info:dict)->object: 146 | """_summary_: Load Mould File with all the injected dependencies 147 | 148 | Args: 149 | info (dict): dictinary containing the location of the mould file 150 | 151 | Returns: 152 | object: returns a python object 153 | """ 154 | loader_path=os.path.join(self.path,info["root_path"],info["sub_path"],info['filename']) 155 | return cloudpickle.load(open(loader_path,'rb')) 156 | 157 | def load_pipeline(self): 158 | """_summary_: Load pipeline from specific location 159 | """ 160 | dag=yamlio.read_yaml(os.path.join(self.pipeline_path,self.pipeline_name,f'{self.pipeline_name}.yaml')) 161 | self.dag=dag 162 | 163 | 164 | 165 | 166 | def _get_input_for_func(self,dag_states:dict,node_dict:dict,out_put_nodes:dict)-> dict: 167 | """_summary_: get the inputs for each node 168 | 169 | Args: 170 | dag_states (dict): contains mapped variable : [the is the argument name as defined in the function] 171 | out_put_nodes (dict): contains the previous outputs for the functions that have completed running 172 | node_info (dict): dictinary containing the location of the mould file and input nodes connected to the given node 173 | 174 | Returns: 175 | dict: returns a dictionary for : mapping that can be used in the next node 176 | """ 177 | # sourcery skip: assign-if-exp, reintroduce-else, swap-if-expression 178 | 179 | ##if no args are there 180 | 181 | if not dag_states: return dag_states 182 | input_dict={} 183 | for func_name_,map_name_ in dag_states.items(): 184 | ##if there are any external arguments 185 | if not func_name_.startswith(self._args_tag): 186 | input_dict[map_name_]=out_put_nodes[func_name_] 187 | else: 188 | input_dict[map_name_]=node_dict["args"][map_name_] 189 | return input_dict 190 | 191 | 192 | def __change_status__(self,status:str,node_name:str,log:str=None): 193 | """_summary_: Change Node status 194 | 195 | Args: 196 | status (str): What is the status for the Node 197 | node_name (str): Name of the Node 198 | log (str, optional): Any Log files to be added. Defaults to None. 199 | """ 200 | if status==0: 201 | self.dag["node_details"][node_name] = { 202 | "start_time": str(datetime.datetime.now()), 203 | "log": f"======{self.status_code[status].upper()}======{str(datetime.datetime.now())}\n", 204 | "status":self.status_code[status] 205 | } 206 | elif status in {1, 3}: 207 | self.dag["node_details"][node_name] = { 208 | "end_time": str(datetime.datetime.now()), 209 | "log": f"======{self.status_code[status].upper()}======{str(datetime.datetime.now())}\n", 210 | "status":self.status_code[status] 211 | 212 | } 213 | if log!=None: 214 | self.dag["node_details"][node_name]["log"] += "\n" + log + "======" 215 | 216 | def _check_for_job_status(self,jobname,queue_name): 217 | all_jobs=yamlio.read_yaml(queue_name) 218 | status=[j["status"] for j in all_jobs if j["pipelinename"]==jobname] 219 | return status[0] != "Stopped" 220 | 221 | 222 | def bfs(self, graph:dict, entry_node:str,node_info:dict,dag_states:dict): #function for BFS 223 | """_summary_: Breadth-first search 224 | 225 | Args: 226 | graph (dict): contains DAG structure of the nodes "root" is the starting node {root : [nodeA],nodeA :[nodeB, nodeC]} 227 | entery_node (str): the entry node is the "root" node 228 | node_info (dict): dictinary containing the location of the mould file and input nodes connected to the given node 229 | dag_states (dict): ontains mapped variable : [the is the argument name as defined in the function] 230 | """ 231 | 232 | visited = [entry_node] # List for visited nodes. 233 | queue = [entry_node] 234 | 235 | output_list={} 236 | while queue: # Creating loop to visit each node 237 | m = queue.pop(0) 238 | for neighbour in graph[m]: 239 | if neighbour not in visited: 240 | try: 241 | print("Node-->",neighbour) 242 | function_=self.__load__mld_file(node_info[neighbour]) 243 | 244 | output_list[neighbour]=function_(**self._get_input_for_func(dag_states[neighbour],node_info[neighbour],output_list)) 245 | self.__change_status__(1,neighbour) 246 | except Exception as e: 247 | print(traceback.format_exc()) 248 | self.__change_status__(3,neighbour,traceback.format_exc()) 249 | 250 | visited.append(neighbour) 251 | queue.append(neighbour) 252 | yamlio.write_to_yaml(os.path.join(self.path_pipe,f"{self.pipeline_name}.yaml"),self.dag) 253 | 254 | 255 | 256 | def run(self): 257 | #Initialize a queue 258 | dag=self.dag 259 | self.bfs(dag["graph"],"root",node_info=dag["nodes"],dag_states=dag["args_map"]) 260 | yamlio.write_to_yaml(os.path.join(self.path_pipe,f"{self.pipeline_name}.yaml"),self.dag) 261 | 262 | def run_serialized(self,flag_variable_path,job_name): 263 | #Initialize a queue 264 | dag=self.dag 265 | print("node", self._check_for_job_status(job_name,flag_variable_path)) 266 | if not self._check_for_job_status(job_name,flag_variable_path): 267 | sys.exit() 268 | 269 | #RESET status 270 | for node_name in dag["node_details"]: 271 | self.dag["node_details"][node_name]={"status":self.status_code[2],"start_time":"-","end_time":"-","log":""} 272 | yamlio.write_to_yaml(os.path.join(self.path_pipe,f"{self.pipeline_name}.yaml"),self.dag) 273 | 274 | 275 | # After RUn complete write status code 276 | self.bfs(dag["graph"],"root",node_info=dag["nodes"],dag_states=dag["args_map"]) 277 | 278 | 279 | def __get_dag__(self): 280 | return self.dag -------------------------------------------------------------------------------- /pymlpipeUI.py: -------------------------------------------------------------------------------- 1 | import flask 2 | import os 3 | from pymlpipe.utils import yamlio 4 | from pymlpipe.utils import uiutils 5 | from pymlpipe.utils import change2graph 6 | from pymlpipe.utils import database 7 | 8 | from flask_api import FlaskAPI 9 | import numpy as np 10 | import json 11 | import uuid 12 | from datetime import datetime 13 | 14 | import pandas as pd 15 | 16 | app = FlaskAPI(__name__) 17 | 18 | 19 | BASE_DIR=os.getcwd() 20 | MODEL_FOLDER_NAME="modelrun" 21 | PIPELINE_FOLDER_NAME="ML_pipelines" 22 | MODEL_DIR=os.path.join(BASE_DIR,MODEL_FOLDER_NAME) 23 | PIPELINE_DIR=os.path.join(BASE_DIR,PIPELINE_FOLDER_NAME) 24 | 25 | EXPERIMENT_FILE="experiment.yaml" 26 | DEPLOYMENT_FILE="deployment.yaml" 27 | QUEUE_NAME="queue.yaml" 28 | 29 | #ALL_DEPLOYED_MODELS=[] 30 | PREDICTORS={} 31 | app.secret_key="PYMLPIPE_SEC_KEY" 32 | 33 | 34 | @app.route("/") 35 | def index(): 36 | ''' 37 | if "status" in flask.request.args: 38 | 39 | if flask.request.args["status"]=="501": 40 | deploy_status=False 41 | 42 | ''' 43 | metric_filters={} 44 | tag_filters=[] 45 | if len(flask.request.args): 46 | if "metrics" in flask.request.args: 47 | metric_filters[flask.request.args['metrics']]=flask.request.args["metricsfilter"] 48 | elif "tags" in flask.request.args: 49 | tag_filters=flask.request.args["tags"].split(",") 50 | 51 | experiment_lists=yamlio.read_yaml(os.path.join(MODEL_DIR,EXPERIMENT_FILE)) 52 | if len(experiment_lists)==0: 53 | return flask.render_template("index.html", 54 | runs=[], 55 | run_details={}, 56 | metrics=[], 57 | current_experiment=None 58 | ) 59 | info={} 60 | metrics=[] 61 | exp_wise_metrics={} 62 | tags=[] 63 | error="" 64 | for experiment,run_data in experiment_lists.items(): 65 | exp_wise_metrics[experiment]=[] 66 | for run_id in run_data["runs"]: 67 | print(run_data['experiment_path'],run_id,"info.yaml") 68 | run_folder=os.path.join(run_data['experiment_path'],run_id,"info.yaml") 69 | run_details=yamlio.read_yaml(run_folder) 70 | info[run_id]=run_details 71 | if 'tags' in run_details: 72 | tags.extend(run_details["tags"]) 73 | if "metrics" in run_details: 74 | metrics.extend(list(run_details["metrics"].keys())) 75 | mm=[i for i in list(run_details["metrics"].keys()) if i not in exp_wise_metrics[experiment]] 76 | exp_wise_metrics[experiment].extend(mm) 77 | 78 | #filter emmpty runs: 79 | info={run:info[run] for run in info if len(info[run])>0} 80 | 81 | 82 | if len(metric_filters)>0: 83 | newinfo={} 84 | for run_id,details in info.items(): 85 | 86 | for mfilter in metric_filters: 87 | if mfilter in details["metrics"]: 88 | fv=details["metrics"][mfilter] 89 | try: 90 | if eval(str(fv)+metric_filters[mfilter]): 91 | newinfo[run_id]=details 92 | except Exception as e: 93 | error=e 94 | else: 95 | newinfo[run_id]=details 96 | info=newinfo 97 | elif len(tag_filters)>0: 98 | newinfo={} 99 | for run_id,details in info.items(): 100 | if len(set(tag_filters).intersection(set(details["tags"])))>0: 101 | newinfo[run_id]=details 102 | info=newinfo 103 | 104 | exp_names=list(experiment_lists.keys()) 105 | 106 | 107 | return flask.render_template("index.html", 108 | runs=experiment_lists, 109 | run_details=info, 110 | metrics=list(set(metrics)), 111 | current_experiment=exp_names, 112 | tags=list(set(tags)), 113 | exp_wise_metrics=exp_wise_metrics, 114 | error=error 115 | ) 116 | @app.route("/run//") 117 | def runpage(run_id): 118 | deploy_status=True 119 | if "status" in flask.request.args: 120 | 121 | if flask.request.args["status"]=="501": 122 | deploy_status=False 123 | 124 | experiments,run_id=run_id.split("@") 125 | experiment_lists=yamlio.read_yaml(os.path.join(MODEL_DIR,EXPERIMENT_FILE)) 126 | run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,run_id,'info.yaml')) 127 | 128 | model_type="" 129 | metrics_log={} 130 | metrics_log_plot={} 131 | graph_dict={} 132 | expertiment_details={ 133 | "RUN_ID":run_id, 134 | "EXPERIMENT NAME":experiments, 135 | "EXECUTION DATE TIME":run_details["execution_time"] 136 | } 137 | if 'tags' in run_details: 138 | expertiment_details["TAGS"]=run_details['tags'] 139 | else: 140 | expertiment_details["TAGS"]="-" 141 | if 'version' in run_details: 142 | expertiment_details["VERSION"]=run_details['version'] 143 | else: 144 | expertiment_details["VERSION"]="-" 145 | 146 | if "metrics_log" in run_details and len(run_details["metrics_log"])>0: 147 | metrics_log["data"]=run_details["metrics_log"] 148 | metrics_log["cols"]=list(run_details["metrics_log"][0].keys()) 149 | last_key=None 150 | for m in metrics_log["data"]: 151 | for k,v in m.items(): 152 | if k in metrics_log_plot: 153 | metrics_log_plot[k].append(v) 154 | else: 155 | metrics_log_plot[k]=[v] 156 | last_key=k 157 | 158 | metrics_log_plot["range"]=list(range(len(metrics_log_plot[last_key]))) 159 | 160 | 161 | if "model" in run_details and "model_type" in run_details["model"]: 162 | model_type=run_details["model"]["model_type"] 163 | #print(run_details["model"]["model_ops"]) 164 | 165 | if "model_ops" in run_details["model"]: 166 | graph_dict=change2graph.makegraph(run_details["model"]["model_ops"],run_details["model"]["model_architecture"]) 167 | XAI="" 168 | if "XAI" in run_details: 169 | XAI_temp=run_details["XAI"] 170 | XAI_feature_map=pd.read_csv(XAI_temp["feature_explainer"]) 171 | XAI_feature_map=XAI_feature_map.round(3) 172 | print(XAI_temp) 173 | XAI={ 174 | "table":{ 175 | "columns":XAI_feature_map.columns, 176 | "rows":XAI_feature_map.values 177 | }, 178 | "image": flask.Markup(open(XAI_temp["shap"]).read()) if XAI_temp["shap"]!="" else "" 179 | } 180 | #print(XAI_feature_map.values) 181 | return flask.render_template('run.html', 182 | run_id=run_id, 183 | experiments=experiments, 184 | expertiment_details=expertiment_details, 185 | artifact_details=run_details["artifact"], 186 | metrics_details=run_details["metrics"], 187 | model_details=run_details["model"], 188 | param_details=run_details["params"], 189 | schema_details=run_details["artifact_schema"], 190 | is_deployed=True if "model_path" in run_details["model"] else False, 191 | deploy_status=deploy_status, 192 | metrics_log=metrics_log, 193 | metrics_log_plot=metrics_log_plot, 194 | model_type=model_type, 195 | graph_dict=graph_dict, 196 | XAI=XAI 197 | ) 198 | @app.route("/download_artifact/") 199 | def download_artifact(uid): 200 | experiments,run_id,filename=uid.split("@") 201 | #run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,run_id,'info.yaml')) 202 | return flask.send_from_directory(os.path.join(MODEL_DIR,experiments,run_id,"artifacts"), filename,as_attachment=True) 203 | 204 | @app.route("/download_model/") 205 | def download_model(uid): 206 | experiments,run_id,filename,model_type=uid.split("@") 207 | if model_type=="scikit-learn": 208 | filename=filename+".pkl" 209 | elif model_type=="torch": 210 | filename=filename+".pt" 211 | #run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,run_id,'info.yaml')) 212 | return flask.send_from_directory(os.path.join(MODEL_DIR,experiments,run_id,"models"), filename,as_attachment=True) 213 | 214 | @app.route("/deployments//") 215 | def deployments(run_id): 216 | 217 | experiments,runid=run_id.split("@") 218 | run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,runid,'info.yaml')) 219 | deployed=uiutils.deployment_handler(run_details["model"]["model_path"], 220 | run_details["model"]["model_type"], 221 | run_details["model"]["model_mode"]) 222 | run_hash= str(uuid.uuid3(uuid.NAMESPACE_DNS, run_id)).replace("-", "")[:16] 223 | if run_hash not in PREDICTORS: 224 | PREDICTORS[run_hash]=deployed 225 | ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE)) 226 | ALL_DEPLOYED_MODELS.append( 227 | { 228 | "run_id":runid, 229 | "experiment_id":experiments, 230 | "model_path":run_details["model"]["model_path"], 231 | "model_type":run_details["model"]["model_type"], 232 | "model_deployment_number": run_hash, 233 | "model_url":"/predict/"+run_hash, 234 | "status":'running', 235 | "model_mode": run_details["model"]["model_mode"] 236 | } 237 | ) 238 | yamlio.write_to_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE),ALL_DEPLOYED_MODELS) 239 | return flask.redirect(flask.url_for("show_deployments")) 240 | return flask.redirect("/run/"+run_id+"?status=501") 241 | 242 | @app.route("/show_deployments/") 243 | def show_deployments(): 244 | ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE)) 245 | return flask.render_template('deployments.html', 246 | ALL_DEPLOYED_MODELS=ALL_DEPLOYED_MODELS 247 | ) 248 | 249 | 250 | @app.route("/predict/",methods=["GET","POST"]) 251 | def predict(hashno): 252 | 253 | ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE)) 254 | info_dict={} 255 | model_type=None 256 | for model in ALL_DEPLOYED_MODELS: 257 | if model["model_deployment_number"]==hashno and model["status"]=="running": 258 | del model['model_path'] 259 | info_dict=model 260 | model_type=model["model_type"] 261 | break 262 | 263 | if len(info_dict)==0 or hashno not in PREDICTORS: 264 | return {"info":{ 265 | "error":404, 266 | "msg":"No such endpoint present" 267 | } 268 | } 269 | if flask.request.method=="POST": 270 | 271 | data=flask.request.data 272 | dtype=None 273 | if "dtype" in data: 274 | dtype=data["dtype"] 275 | predictions,status=PREDICTORS[hashno].predict(np.array(data['data']),dtype) 276 | if status==1: 277 | return { 278 | "deployment no":hashno, 279 | "error": predictions 280 | } 281 | return { 282 | "deployment no":hashno, 283 | "predictions":[float(p) for p in predictions] 284 | } 285 | if model_type=="scikit-learn": 286 | return { 287 | "info":info_dict, 288 | "request_body":{ 289 | "data":[ 290 | [ 291 | 5.6, 292 | 3.0, 293 | 4.5, 294 | 1.5 295 | ], 296 | [ 297 | 5.6, 298 | 3.0, 299 | 4.5, 300 | 1.5 301 | ] 302 | ] 303 | }} 304 | else: 305 | return { 306 | "info":info_dict, 307 | "request_body":{ 308 | "data": [ 309 | [ 42.0, 310 | 120.0, 311 | 1.0, 312 | 0.0, 313 | 0.0, 314 | 0.0, 315 | 185.7, 316 | 133.0, 317 | 31.57, 318 | 235.1, 319 | 149.0, 320 | 19.98, 321 | 256.4, 322 | 78.0, 323 | 11.54, 324 | 16.9, 325 | 6.0, 326 | 4.56, 327 | 0.0 328 | ] 329 | ], 330 | "dtype": "float" 331 | } 332 | 333 | } 334 | @app.route("/deployment/stop/",methods=["GET"]) 335 | def stop_deployment(deployment_no): 336 | global PREDICTORS 337 | ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE)) 338 | for idx,d in enumerate(ALL_DEPLOYED_MODELS): 339 | 340 | if d['model_deployment_number']==deployment_no: 341 | print("here here") 342 | ALL_DEPLOYED_MODELS[idx]['status']="stopped" 343 | yamlio.write_to_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE),ALL_DEPLOYED_MODELS) 344 | PREDICTORS={i:j for i,j in PREDICTORS.items() if i!=deployment_no} 345 | return {"status":200} 346 | 347 | 348 | @app.route("/deployment/start/",methods=["GET"]) 349 | def start_deployment(deployment_no): 350 | global PREDICTORS 351 | ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE)) 352 | for idx,d in enumerate(ALL_DEPLOYED_MODELS): 353 | 354 | if d['model_deployment_number']==deployment_no: 355 | 356 | ALL_DEPLOYED_MODELS[idx]['status']="running" 357 | model_type=ALL_DEPLOYED_MODELS[idx]["model_type"] 358 | 359 | PREDICTORS[deployment_no]=uiutils.deployment_handler(d["model_path"], model_type, d["model_mode"]) 360 | yamlio.write_to_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE),ALL_DEPLOYED_MODELS) 361 | 362 | return {"status":200} 363 | 364 | 365 | @app.route("/jobs/") 366 | def jobs(): 367 | all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,"info.yaml")) 368 | 369 | return flask.render_template("jobs.html", 370 | pipeline=all_pipelines 371 | ) 372 | 373 | @app.route("/jobs/run/") 374 | def runjobs(runid): 375 | #all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,QUEUE_NAME)) 376 | all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,"info.yaml")) 377 | ''' 378 | all_pipelines.append({ 379 | "pipelinename":runid, 380 | "datetime": datetime.now(), 381 | "status":"Queued", 382 | "ops":{} 383 | }) 384 | ''' 385 | for idx,p in enumerate(all_pipelines): 386 | if p["pipelinename"]==runid: 387 | if all_pipelines[idx]["status"]=="Started": 388 | all_pipelines[idx]["status"]="Stopped" 389 | all_pipelines[idx]["jobtime"]=datetime.now() 390 | else: 391 | all_pipelines[idx]["status"]="Queued" 392 | all_pipelines[idx]["jobtime"]=datetime.now() 393 | 394 | 395 | 396 | yamlio.write_to_yaml(os.path.join(PIPELINE_DIR,"info.yaml"),all_pipelines) 397 | return flask.redirect(flask.url_for("jobs")) 398 | 399 | @app.route("/jobs/view/") 400 | def viewjobs(runid): 401 | #all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,QUEUE_NAME)) 402 | all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,runid,runid+".yaml")) 403 | 404 | grapg_dict=change2graph.makegraph_pipeline(all_pipelines["graph"],all_pipelines["node_details"]) 405 | nodes_logs={k:all_pipelines["node_details"][k]["log"] for k in all_pipelines["node_details"]} 406 | #nodes_logs={} 407 | return flask.render_template("job_view.html", 408 | pipelinename=runid, 409 | grapg_dict=grapg_dict, 410 | nodes=nodes_logs, 411 | initital_node=nodes_logs[list(nodes_logs.keys())[0]] 412 | ) 413 | 414 | 415 | 416 | def start_ui(host=None,port=None,debug=False): 417 | '''Implemet logic for try catch''' 418 | 419 | ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE)) 420 | for i in ALL_DEPLOYED_MODELS: 421 | model_type=i["model_type"] 422 | 423 | deployed=uiutils.deployment_handler(i["model_path"], model_type, i["model_mode"]) 424 | PREDICTORS[i['model_deployment_number']]=deployed 425 | if host==None and port==None: 426 | app.run(debug=debug) 427 | elif host==None: 428 | app.run(port=port,debug=debug) 429 | elif port==None: 430 | app.run(host=host,debug=debug) 431 | else: 432 | app.run(host=host,port=port,debug=debug) 433 | 434 | 435 | 436 | if __name__ == '__main__': 437 | app.run() -------------------------------------------------------------------------------- /queue.py: -------------------------------------------------------------------------------- 1 | from pymlpipe.utils import yamlio 2 | import os 3 | import time 4 | import pymlpipe.pipeline as pipeline 5 | 6 | BASE_DIR=os.getcwd() 7 | queue_store="ML_pipelines" 8 | queue_name="info.yaml" 9 | def execute_from_queue(name,path): 10 | print(f"Start execution :{name}") 11 | ppl=pipeline.PipeLine(name) 12 | ppl.load_pipeline() 13 | ppl.run_serialized(flag_variable_path=path,job_name=name) 14 | print("End execution :") 15 | 16 | 17 | 18 | def change_status(queue,status,job_id=None): 19 | for idx,job in enumerate(queue): 20 | if job["status"] == "Queued" and job_id is None: 21 | queue[idx]["status"]=status 22 | return job["pipelinename"],queue 23 | elif job["status"] == "Started" and job_id is None: 24 | return None,queue 25 | elif job["status"] == "Started": 26 | if job["pipelinename"]==job_id: 27 | queue[idx]["status"]=status 28 | return job["pipelinename"],queue 29 | 30 | return None,queue 31 | 32 | 33 | def start_server(check_in:int=5): 34 | if not isinstance(check_in,int): 35 | raise ValueError( 36 | f"ERROR!!! 'check_in' should be in sec [int] found {type(check_in)}" 37 | ) 38 | 39 | while True: 40 | print('-- START--') 41 | queue=yamlio.read_yaml(os.path.join(BASE_DIR,queue_store,queue_name)) 42 | job_name,queue=change_status(queue,"Started") 43 | yamlio.write_to_yaml(os.path.join(BASE_DIR,queue_store,queue_name),queue) 44 | if job_name!=None: 45 | execute_from_queue(job_name,path=os.path.join(BASE_DIR,queue_store,queue_name)) 46 | job_name,queue=change_status(queue,"Completed",job_name) 47 | yamlio.write_to_yaml(os.path.join(BASE_DIR,queue_store,queue_name),queue) 48 | time.sleep(5) 49 | print('-- END--') -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | atomicwrites==1.4.0 2 | attrs==21.4.0 3 | catboost==1.1.1 4 | click==8.1.3 5 | cloudpickle==2.2.1 6 | contourpy==1.0.7 7 | cycler==0.11.0 8 | Flask==2.1.2 9 | Flask-API==3.0.post1 10 | fonttools==4.38.0 11 | graphviz==0.20.1 12 | importlib-metadata==4.12.0 13 | itsdangerous==2.1.2 14 | Jinja2==3.1.2 15 | joblib==1.1.0 16 | kiwisolver==1.4.4 17 | lightgbm==3.3.5 18 | llvmlite==0.39.1 19 | MarkupSafe==2.1.1 20 | matplotlib==3.6.3 21 | more-itertools==8.13.0 22 | numba==0.56.4 23 | numpy==1.23.0 24 | packaging==21.3 25 | pandas==1.4.3 26 | Pillow==9.4.0 27 | plotly==5.13.0 28 | pluggy==0.13.1 29 | py==1.11.0 30 | pyparsing==3.0.9 31 | pytest==5.2.0 32 | python-dateutil==2.8.2 33 | pytz==2022.1 34 | PyYAML==6.0 35 | scikit-learn==1.1.1 36 | scipy==1.8.1 37 | shap==0.41.0 38 | six==1.16.0 39 | sklearn==0.0 40 | slicer==0.0.7 41 | tenacity==8.2.1 42 | threadpoolctl==3.1.0 43 | torch==1.12.0 44 | tqdm==4.64.1 45 | typing-extensions==4.3.0 46 | wcwidth==0.2.5 47 | Werkzeug==2.1.2 48 | xgboost==1.7.3 49 | zipp==3.8.0 50 | -------------------------------------------------------------------------------- /samples/runner_pipeline_server.py: -------------------------------------------------------------------------------- 1 | from pymlpipe import queue 2 | 3 | queue.start_server() -------------------------------------------------------------------------------- /samples/runner_pymlpipeUI.py: -------------------------------------------------------------------------------- 1 | from pymlpipe.pymlpipeUI import start_ui 2 | 3 | 4 | start_ui(host='0.0.0.0', port=8085,debug=True) 5 | -------------------------------------------------------------------------------- /samples/test_MLpipeline.py: -------------------------------------------------------------------------------- 1 | from pymlpipe import pipeline 2 | import pandas as pd 3 | from sklearn.datasets import load_iris 4 | import pandas as pd 5 | from sklearn.model_selection import train_test_split 6 | from pymlpipe.tabular import PyMLPipe 7 | from sklearn.linear_model import LogisticRegression 8 | from sklearn.ensemble import RandomForestClassifier 9 | from sklearn.tree import DecisionTreeClassifier 10 | from xgboost import XGBClassifier 11 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score 12 | import time 13 | 14 | ppl=pipeline.PipeLine("IrisData") 15 | mlp=PyMLPipe() 16 | mlp.set_experiment("pipelinecheck") 17 | mlp.set_version(0.1) 18 | 19 | def get_data(): 20 | iris_data=load_iris() 21 | data=iris_data["data"] 22 | target=iris_data["target"] 23 | df=pd.DataFrame(data,columns=iris_data["feature_names"]) 24 | #df["target"]=target 25 | trainx,testx,trainy,testy=train_test_split(df,target) 26 | 27 | return {"trainx":trainx,"trainy":trainy,"testx":testx,"testy":testy} 28 | 29 | def get_model(model): 30 | if model==0: 31 | return LogisticRegression() 32 | elif model==1: 33 | return RandomForestClassifier() 34 | 35 | def train_model(data,model_name): 36 | with mlp.run(): 37 | trainx,trainy=data["trainx"],data["trainy"] 38 | mlp.set_tags(["Classification","test run","logisticRegression"]) 39 | model=get_model(model_name) 40 | model.fit(trainx, trainy) 41 | 42 | mlp.scikit_learn.register_model(str(model_name), model) 43 | 44 | #print(model) 45 | #model.fit(trainx, trainy) 46 | time.sleep(60) 47 | return model 48 | 49 | def evaluate(data,model): 50 | testx,testy=data["testx"],data["testy"] 51 | print(model.predict(testx)) 52 | 53 | 54 | n1=ppl.add_node("data", get_data,entry_node=True) 55 | for idx,model in enumerate([0,1]): 56 | ppl.add_node( 57 | f"model_train{str(idx)}", 58 | train_model, 59 | input_nodes=["data"], 60 | args={"model_name":model}, 61 | ) 62 | ppl.add_node( 63 | f"eval_train{str(idx)}", 64 | evaluate, 65 | input_nodes=["data", f"model_train{str(idx)}"], 66 | ) 67 | 68 | #ppl.add_edge(n1, n2) 69 | #ppl.add_edge(n2, n3) 70 | 71 | #n1>>[n2,n3] 72 | ppl.register_dag() 73 | #ppl.run() 74 | 75 | -------------------------------------------------------------------------------- /samples/test_api.py: -------------------------------------------------------------------------------- 1 | from pymlpipe.api import Client 2 | 3 | ml_connect=Client() 4 | print(ml_connect.get_all_experiments()) 5 | print(ml_connect.get_all_run_ids("IrisAutoML")) 6 | #print(ml_connect.get_run_details("Pytorch","01d9d974-284c-4775-95bc-792491267d05")) 7 | #print(ml_connect.get_all_run_details("IrisAutoML")) 8 | #print(ml_connect.get_metrics_comparison("Pytorch",format="pandas",sort_by="f1")) 9 | print(ml_connect.get_model_details("IrisAutoML","680f5dcf-e207-4cb5-adb9-cc6d7fbb8b16",format="pandas")) 10 | -------------------------------------------------------------------------------- /samples/test_automl_run.py: -------------------------------------------------------------------------------- 1 | from pymlpipe.automl import AutoMLPipe 2 | from sklearn.datasets import load_iris 3 | import pandas as pd 4 | import numpy as np 5 | 6 | def main(): 7 | 8 | iris_data=load_iris() 9 | data=iris_data["data"] 10 | target=iris_data["target"] 11 | 12 | df=pd.DataFrame(data,columns=iris_data["feature_names"]) 13 | automl_obj=AutoMLPipe("IrisAutoML","classification", 14 | "precision", 15 | df, 16 | target, 17 | tags=["new_data","clf"], 18 | transform=True, 19 | scale='normalize', 20 | register_model=True, 21 | version=1.0, 22 | exclude=['log_reg',"lgbmc"]) 23 | preds,result=automl_obj.run_automl(tune=False,tune_best=False) 24 | 25 | #DataFrame with comparative metrics of all the models 26 | print(result) 27 | #Dictionary with model names and the predictions 28 | print(preds) 29 | 30 | if __name__ == '__main__': 31 | main() 32 | 33 | -------------------------------------------------------------------------------- /samples/test_cases_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/samples/test_cases_pipeline.py -------------------------------------------------------------------------------- /samples/test_create_pipeline.py: -------------------------------------------------------------------------------- 1 | #from pymlpipe import pipeline 2 | from pymlpipe import pipeline 3 | import pandas as pd 4 | 5 | #ppl=pipeline.Pipeline("PIPELINEV2") 6 | 7 | ppl=pipeline.PipeLine("PIPELINEV2") 8 | def node1(): 9 | path="train.csv" 10 | df=pd.read_csv(path) 11 | return df 12 | def node2(df): 13 | stats=df.describe() 14 | stats.columns=[col+"_node2" for col in stats.columns] 15 | return stats 16 | def node3(df): 17 | stats=df.describe() 18 | stats.columns=[col+"_node3" for col in stats.columns] 19 | return stats 20 | 21 | def node4(node1_df,node2_df): 22 | print(node1_df.append(node2_df)) 23 | def node5(node1_df,node2_df,node3df): 24 | print(node1_df.append(node2_df)) 25 | 26 | ppl.add_node("node1", node1,entry_node=True) 27 | ppl.add_node("node2", node3,input_nodes=["node1"]) 28 | ppl.add_node("node3", node2,input_nodes=["node1"]) 29 | ppl.add_node("node5", node2,input_nodes=["node1"]) 30 | ppl.add_node("node6", node2,input_nodes=["node1"]) 31 | ppl.add_node("node4", node4,input_nodes=["node2","node3"]) 32 | ppl.add_node("node7", node5,input_nodes=["node5","node6"]) 33 | 34 | 35 | 36 | 37 | #n1>>[n2,n3] 38 | ppl.register_dag() 39 | ppl.run() -------------------------------------------------------------------------------- /samples/test_dl_torch_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pandas as pd 3 | from sklearn.preprocessing import LabelEncoder 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score,f1_score 6 | from pymlpipe.tabular import PyMLPipe 7 | df=pd.read_csv("train.csv") 8 | encoders=["area_code","state","international_plan","voice_mail_plan","churn"] 9 | 10 | for i in encoders: 11 | le=LabelEncoder() 12 | df[i]=le.fit_transform(df[i]) 13 | 14 | 15 | trainy=df["churn"] 16 | trainx=df[['state', 'account_length', 'area_code', 'international_plan', 17 | 'voice_mail_plan', 'number_vmail_messages', 'total_day_minutes', 18 | 'total_day_calls', 'total_day_charge', 'total_eve_minutes', 19 | 'total_eve_calls', 'total_eve_charge', 'total_night_minutes', 20 | 'total_night_calls', 'total_night_charge', 'total_intl_minutes', 21 | 'total_intl_calls', 'total_intl_charge', 22 | 'number_customer_service_calls']] 23 | 24 | 25 | class Model(torch.nn.Module): 26 | def __init__(self,col_size): 27 | super().__init__() 28 | self.seq=torch.nn.Sequential( 29 | torch.nn.Linear(col_size,15), 30 | torch.nn.ReLU(), 31 | torch.nn.Linear(15,10), 32 | torch.nn.ReLU(), 33 | torch.nn.Linear(10,1) 34 | ) 35 | ''' 36 | self.linear_layer_1= 37 | self.relu_1=torch.nn.ReLU() 38 | self.linear_layer_2=torch.nn.Linear(15,10) 39 | self.relu_2=torch.nn.ReLU() 40 | self.linear_layer_3=torch.nn.Linear(10,1) 41 | self.linear_layer_4=torch.nn.Linear(10,1) 42 | ''' 43 | 44 | 45 | def forward(self,x): 46 | out=self.seq(x) 47 | 48 | return torch.sigmoid(out) 49 | 50 | model=Model(len(trainx.columns)) 51 | 52 | train_x,test_x,train_y,test_y=train_test_split(trainx,trainy) 53 | 54 | train_x=torch.from_numpy(train_x.values) 55 | train_x=train_x.type(torch.FloatTensor) 56 | train_y=torch.from_numpy(train_y.values) 57 | train_y=train_y.type(torch.FloatTensor) 58 | 59 | test_x=torch.from_numpy(test_x.values) 60 | test_x=test_x.type(torch.FloatTensor) 61 | test_y=torch.from_numpy(test_y.values) 62 | test_y=test_y.type(torch.FloatTensor) 63 | 64 | 65 | optimizer=torch.optim.SGD(model.parameters(),lr=0.001) 66 | 67 | criterion=torch.nn.BCELoss() 68 | 69 | 70 | def validate(model,testx,testy): 71 | prediction=model(testx) 72 | prediction=torch.where(prediction>.5,1,0) 73 | accu=accuracy_score(prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy()) 74 | f1=f1_score(prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy()) 75 | return {"accuracy":accu,"f1":f1} 76 | 77 | 78 | epochs=100 79 | batch_size=1000 80 | 81 | mlp=PyMLPipe() 82 | mlp.set_experiment("Pytorch") 83 | mlp.set_version(0.2) 84 | 85 | with mlp.run(): 86 | mlp.register_artifact("churndata.csv",df) 87 | mlp.log_params({ 88 | "lr":0.01, 89 | "optimizer":"SGD", 90 | "loss_fuction":"BCEloss" 91 | }) 92 | for epoch in range(epochs): 93 | loss_batch=0 94 | for batch in range(1000,5000,1000): 95 | optimizer.zero_grad() 96 | train_data=train_x[batch-1000:batch] 97 | output=model(train_data) 98 | loss=criterion(output,train_y[batch-1000:batch].unsqueeze(1)) 99 | loss.backward() 100 | optimizer.step() 101 | loss_batch+=loss.item() 102 | 103 | metrics=validate(model,test_x,test_y) 104 | metrics["loss"]=loss_batch 105 | metrics["epoch"]=epoch 106 | mlp.log_metrics_continious(metrics) 107 | mlp.pytorch.register_model("pytorch_example1", model) 108 | -------------------------------------------------------------------------------- /samples/test_mltrain.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | import pandas as pd 3 | from sklearn.model_selection import train_test_split 4 | from pymlpipe.tabular import PyMLPipe 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier 7 | from sklearn.tree import DecisionTreeClassifier 8 | from xgboost import XGBClassifier 9 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score 10 | 11 | 12 | 13 | mlp=PyMLPipe() 14 | mlp.set_experiment("IrisDataV2") 15 | mlp.set_version(0.1) 16 | 17 | iris_data=load_iris() 18 | data=iris_data["data"] 19 | target=iris_data["target"] 20 | df=pd.DataFrame(data,columns=iris_data["feature_names"]) 21 | #df["target"]=target 22 | trainx,testx,trainy,testy=train_test_split(df,target) 23 | 24 | with mlp.run(): 25 | mlp.set_tags(["Classification","test run","logisticRegression"]) 26 | model=LogisticRegression() 27 | model.fit(trainx, trainy) 28 | predictions=model.predict(testx) 29 | mlp.log_metrics({"Accuracy":accuracy_score(testy,predictions), 30 | "Precision": precision_score(testy,predictions,average='macro'), 31 | "Recall": recall_score(testy,predictions,average='macro'), 32 | "F1": f1_score(testy,predictions,average='macro') 33 | }) 34 | mlp.register_artifact("train.csv", trainx) 35 | mlp.register_artifact("test.csv", testx,artifact_type="testing") 36 | mlp.scikit_learn.register_model("logistic regression", model) 37 | mlp.explainer(model,trainx) 38 | 39 | 40 | 41 | 42 | 43 | 44 | with mlp.run(): 45 | mlp.set_tags(["Classification","test run","dtree"]) 46 | model=DecisionTreeClassifier() 47 | model.fit(trainx, trainy) 48 | predictions=model.predict(testx) 49 | 50 | mlp.log_metrics({"Accuracy":accuracy_score(testy,predictions),"Precision": precision_score(testy,predictions,average='macro')}) 51 | 52 | mlp.log_metric("Recall", recall_score(testy,predictions,average='macro')) 53 | mlp.log_metric("F1", f1_score(testy,predictions,average='macro')) 54 | 55 | #mlp.log_metrics({"r2":0.1,"mse":1.1}) 56 | mlp.register_artifact("train.csv", trainx) 57 | mlp.register_artifact("test.csv", testx,artifact_type="testing") 58 | mlp.scikit_learn.register_model("dtree", model) 59 | mlp.explainer(model,trainx) 60 | 61 | with mlp.run(): 62 | mlp.set_tags(["Classification","test run","rf"]) 63 | model=RandomForestClassifier() 64 | model.fit(trainx, trainy) 65 | predictions=model.predict(testx) 66 | 67 | mlp.log_metric("Accuracy", accuracy_score(testy,predictions)) 68 | mlp.log_metric("Precision", precision_score(testy,predictions,average='macro')) 69 | mlp.log_metric("Recall", recall_score(testy,predictions,average='macro')) 70 | mlp.log_metric("F1", f1_score(testy,predictions,average='macro')) 71 | mlp.register_artifact("train.csv", trainx,) 72 | mlp.register_artifact("test.csv", testx,artifact_type="testing") 73 | mlp.scikit_learn.register_model("randomForest", model) 74 | mlp.explainer(model,trainx) 75 | 76 | with mlp.run(): 77 | mlp.set_tags(["Classification","test run","xgb"]) 78 | model=XGBClassifier() 79 | model.fit(trainx, trainy) 80 | predictions=model.predict(testx) 81 | 82 | mlp.log_metric("Accuracy", accuracy_score(testy,predictions)) 83 | mlp.log_metric("Precision", precision_score(testy,predictions,average='macro')) 84 | mlp.log_metric("Recall", recall_score(testy,predictions,average='macro')) 85 | mlp.log_metric("F1", f1_score(testy,predictions,average='macro')) 86 | mlp.register_artifact("train.csv", trainx) 87 | mlp.register_artifact("test.csv", testx,artifact_type="testing") 88 | mlp.scikit_learn.register_model("xgboost", model) 89 | mlp.explainer(model,trainx) 90 | 91 | with mlp.run(): 92 | mlp.set_tags(["Classification","test run","xgb"]) 93 | model=AdaBoostClassifier() 94 | model.fit(trainx, trainy) 95 | predictions=model.predict(testx) 96 | 97 | mlp.log_metric("Accuracy", accuracy_score(testy,predictions)) 98 | mlp.log_metric("Precision", precision_score(testy,predictions,average='macro')) 99 | mlp.log_metric("Recall", recall_score(testy,predictions,average='macro')) 100 | mlp.log_metric("F1", f1_score(testy,predictions,average='macro')) 101 | mlp.register_artifact("train.csv", trainx) 102 | mlp.register_artifact("test.csv", testx,artifact_type="testing") 103 | mlp.scikit_learn.register_model("adaboost", model) 104 | mlp.explainer(model,trainx) 105 | 106 | -------------------------------------------------------------------------------- /static/Screenshot 2022-07-04 at 1.42.35 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.42.35 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-04 at 1.42.52 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.42.52 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-04 at 1.43.03 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.43.03 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-04 at 1.43.52 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.43.52 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-04 at 1.44.05 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.44.05 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-16 at 8.03.29 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.03.29 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-16 at 8.03.50 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.03.50 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-16 at 8.04.00 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.04.00 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-16 at 8.04.08 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.04.08 PM.png -------------------------------------------------------------------------------- /static/Screenshot 2022-07-16 at 8.04.21 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.04.21 PM.png -------------------------------------------------------------------------------- /static/XAI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/XAI.png -------------------------------------------------------------------------------- /static/download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/download.png -------------------------------------------------------------------------------- /static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/favicon.ico -------------------------------------------------------------------------------- /static/filter.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /static/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /static/pipelineUI 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI 2.png -------------------------------------------------------------------------------- /static/pipelineUI 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI 3.png -------------------------------------------------------------------------------- /static/pipelineUI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI.png -------------------------------------------------------------------------------- /static/pipelineUI_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI_1.png -------------------------------------------------------------------------------- /static/pipelineUI_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI_2.png -------------------------------------------------------------------------------- /static/start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/start.png -------------------------------------------------------------------------------- /static/start.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /static/start1.svg: -------------------------------------------------------------------------------- 1 | 17.Power -------------------------------------------------------------------------------- /tabular.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pymlpipe.utils.database import create_folder 3 | from pymlpipe.utils.getschema import schema_ 4 | from pymlpipe.utils import _xai as xai 5 | import uuid 6 | import yaml 7 | from contextlib import contextmanager 8 | import pandas as pd 9 | import shutil 10 | import pickle 11 | import sklearn 12 | import datetime 13 | import torch 14 | import torch.fx 15 | 16 | 17 | _COLOR_MAP = { 18 | "placeholder": "AliceBlue", 19 | "call_module": "LemonChiffon1", 20 | "get_param": "Yellow2", 21 | "get_attr": "LightGrey", 22 | "output": "PowderBlue", 23 | } 24 | 25 | 26 | class Context_Manager: 27 | """_summary_: Context Manager for with statement 28 | 1. creates folders and subfolders 29 | 2. creates runid for a run instance 30 | """ 31 | def __init__(self,name,feature_store,run_id=None): 32 | super(PyMLPipe) 33 | if run_id==None: 34 | self.runid = str(uuid.uuid4()) 35 | else: 36 | self.runid=run_id 37 | self.name = name 38 | self.feature_store=feature_store 39 | self.exp_path=os.path.join(self.feature_store,self.name,self.runid) 40 | self.folders={"artifacts":os.path.join(self.exp_path,"artifacts"), 41 | "metrics":os.path.join(self.exp_path,"metrics"), 42 | "models":os.path.join(self.exp_path,"models"), 43 | "params":os.path.join(self.exp_path,"params")} 44 | self.info_dict=[] 45 | def get_path(self): 46 | """_summary_ 47 | 48 | Returns: 49 | _type_: _description_ 50 | """ 51 | return self.exp_path 52 | 53 | def structure(self): 54 | """_summary_ 55 | 56 | Returns: 57 | _type_: _description_ 58 | """ 59 | self.exp_path=create_folder(self.feature_store,self.name) 60 | self.exp_path=create_folder(self.exp_path,self.runid) 61 | self._create_all_folders(self.exp_path) 62 | return self.exp_path 63 | 64 | def _create_all_folders(self,exp_path): 65 | """_summary_ 66 | 67 | Args: 68 | exp_path (_type_): _description_ 69 | """ 70 | for i in self.folders: 71 | create_folder(exp_path,i) 72 | def write_to_yaml(self,info): 73 | with open(os.path.join(self.exp_path,"info.yaml"), 'w') as file: 74 | documents = yaml.dump(info, file) 75 | 76 | 77 | 78 | class PyMLPipe: 79 | def __init__(self): 80 | 81 | self.feature_store=create_folder(os.getcwd()) 82 | self.experiment_name='0' 83 | self.folders=None 84 | self.experiment_path=None 85 | self.info={} 86 | self.info["tags"]=[] 87 | self.info["metrics"]={} 88 | self.info["params"]={} 89 | self.info["artifact"]=[] 90 | self.info["model"]={} 91 | self.info["artifact_schema"]=[] 92 | self.info["metrics_log"]=[] 93 | self._is_continious_logging=False 94 | 95 | 96 | def __reset__(self): 97 | self.feature_store=create_folder(os.getcwd()) 98 | self.folders=None 99 | self.experiment_path=None 100 | 101 | self.info["tags"]=[] 102 | self.info["metrics"]={} 103 | self.info["params"]={} 104 | self.info["artifact"]=[] 105 | self.info["model"]={} 106 | self.info["artifact_schema"]=[] 107 | self.info["metrics_log"]=[] 108 | 109 | @contextmanager 110 | def run(self,experiment_name=None,runid=None): 111 | """_summary_: start a context manager for with statement 112 | 1. When run is started it will create 113 | a. RUN ID 114 | b. EXPERIMENT ID 115 | c. FOLDERS for storing the details 116 | 117 | Args: 118 | experiment_name (str, optional): gives a experiment name. Defaults to None. 119 | runid (str, optional): gives a runid. Defaults to None. 120 | 121 | Returns: 122 | class context_run(object): object for the context manager 123 | """ 124 | if experiment_name!=None: 125 | self.experiment_name=experiment_name 126 | r=Context_Manager(self.experiment_name, 127 | self.feature_store,runid) 128 | 129 | self._write_info_run(self.experiment_name, r.runid) 130 | r.structure() 131 | self.context_manager=r 132 | #initialize models 133 | self.scikit_learn=ScikitLearn(self.context_manager.folders) 134 | self.pytorch=Pytorch(self.context_manager.folders) 135 | yield r 136 | self.info["execution_time"]=str(datetime.datetime.now()).split(".")[0] 137 | if self.scikit_learn.registered: 138 | self.info["model"]={"model_name":self.scikit_learn.model_name, 139 | "model_path":self.scikit_learn.model_path, 140 | "model_params": self.scikit_learn.model_params, 141 | "model_class":self.scikit_learn.model_class, 142 | "model_type":self.scikit_learn.model_type, 143 | "model_tags":self.scikit_learn.model_tags, 144 | "registered":self.scikit_learn.registered, 145 | "model_mode":self.scikit_learn.model_mode 146 | } 147 | 148 | elif self.pytorch.registered: 149 | self.info["model"]={"model_name":self.pytorch.model_name, 150 | "model_path":self.pytorch.model_path, 151 | "model_architecture":self.pytorch.model_architecture, 152 | "model_class":self.pytorch.model_class, 153 | "model_type":self.pytorch.model_type, 154 | "model_ops":self.pytorch.model_ops, 155 | "registered":self.pytorch.registered, 156 | "model_mode":self.pytorch.model_mode 157 | } 158 | #print(self.info) 159 | if len(self.info["metrics"])==0 and self._is_continious_logging: 160 | self.info["metrics"]=self.info["metrics_log"][-1] 161 | 162 | self.context_manager.write_to_yaml(self.info) 163 | self.__reset__() 164 | def explainer(self,model,trainx): 165 | """_summary_: This is an explainer API that do global explainibilty. 166 | 167 | Args: 168 | model (scikit-learn): Model Object 169 | trainx (Pandas DataFrame): Data Frame for Global Explainability 170 | 171 | Raises: 172 | TypeError: _description_ 173 | """ 174 | if not isinstance(trainx, pd.DataFrame): 175 | raise TypeError("Error: Please provide a valid data pd.Dataframe or correct artifact Name") 176 | model_type=str(type(model)) 177 | if ('sklearn' not in model_type) and ("catboost" not in model_type): 178 | raise TypeError("Error: Scikit-learn or Catboost or Xgboost Expected got {model_type}".format(model_type=model_type) ) 179 | explainer_instance=xai.Explainer(model,trainx,self.context_manager.folders["artifacts"]) 180 | artifacts=explainer_instance.explain() 181 | self.info["XAI"]=artifacts 182 | 183 | def set_experiment(self,name): 184 | """_summary_: sets the experiment name 185 | 186 | Args: 187 | name (str): name of the experiment 188 | """ 189 | self.experiment_name=name 190 | exp_path=create_folder(self.feature_store,self.experiment_name) 191 | self._write_info_experiment(name,exp_path) 192 | 193 | 194 | def set_tag(self,tag_value): 195 | """_summary_: sets a tag for a perticular run 196 | Args: 197 | name (str or int or float): tag name 198 | Raises: 199 | TypeError: Supported type 'str','int','float' 200 | """ 201 | 202 | 203 | if isinstance(tag_value,dict) or isinstance(tag_value,list) or isinstance(tag_value,set): 204 | raise TypeError("unsupported type, Expected 'str','int','float' got "+str(type(tag_value))) 205 | self.info["tags"].append(tag_value) 206 | 207 | 208 | 209 | def set_tags(self,tag_dict:list): 210 | """_summary_:sets N no of tags for a perticular run 211 | 212 | Args: 213 | tag_dict (list): tag names in list format 214 | 215 | Raises: 216 | TypeError: Expected 'list' 217 | """ 218 | 219 | if isinstance(tag_dict,list): 220 | self.info["tags"].extend(tag_dict) 221 | else: 222 | raise TypeError("unsupported type, Expected 'list' got "+str(type(tag_dict))) 223 | 224 | def get_tags(self): 225 | """_summary_: get all the tags that are associated with the run 226 | 227 | Returns: 228 | list: tags that are associated with the run 229 | """ 230 | return self.info["tags"] 231 | 232 | def set_version(self,version): 233 | """_summary_:sets version number for the perticular run 234 | 235 | Args: 236 | version (str or int or float): version number 237 | 238 | Raises: 239 | TypeError: Expected 'str','int','float' 240 | """ 241 | if isinstance(version,dict) or isinstance(version,list) or isinstance(version,set): 242 | raise TypeError("unsupported type, Expected 'str','int','float' got "+str(type(tag_dict))) 243 | self.info["version"]=version 244 | 245 | 246 | def get_version(self): 247 | """_summary_:get the version number associated with the run 248 | 249 | 250 | Returns: 251 | _type_: version number 252 | """ 253 | return self.info["version"] 254 | 255 | def log_metrics(self,metric_dict:dict): 256 | """_summary_: log metrics for the model run 257 | 258 | Args: 259 | metric_dict (dict): key value pair with metric name and metric value 260 | 261 | Raises: 262 | TypeError: Expected 'dict' 263 | """ 264 | 265 | if isinstance(metric_dict,dict): 266 | self.info["metrics"].update({i:float("{0:.2f}".format(j)) for i,j in metric_dict.items()}) 267 | else: 268 | raise TypeError("unsupported type, Expected 'dict' got "+str(type(metric_dict))) 269 | 270 | def log_metrics_continious(self,metric_dict:dict): 271 | """_summary_ 272 | 273 | Args: 274 | metric_dict (dict): key value pair with metric name and metric value 275 | 276 | Raises: 277 | TypeError: Expected 'dict' 278 | """ 279 | if isinstance(metric_dict,dict): 280 | self.info["metrics_log"].append({i:float("{0:.2f}".format(j)) for i,j in metric_dict.items()}) 281 | else: 282 | raise TypeError("Expected Type dict got " +type(metric_dict)) 283 | self._is_continious_logging=True 284 | 285 | def log_metric(self,metric_name,metric_value): 286 | """_summary_: log single metric for the model run 287 | 288 | Args: 289 | metric_name (str): name of the metric 290 | metric_value (int or float): value of the metric 291 | 292 | Raises: 293 | TypeError: metric_name expected to be str 294 | TypeError: metric_value expected to be int or float 295 | """ 296 | 297 | mv=None 298 | if not isinstance(metric_value,int) and not isinstance(metric_value,float): 299 | raise TypeError("unsupported type, 'metric_value' Expected 'int','float' got "+str(type(metric_value))) 300 | if not isinstance(metric_name,str): 301 | raise TypeError("unsupported type, 'metric_value' Expected 'str' got "+str(type(metric_name))) 302 | 303 | 304 | self.info["metrics"][metric_name]=float("{0:.2f}".format(metric_value)) 305 | 306 | 307 | def log_params(self,param_dict:dict): 308 | """_summary_: log parameters for the model run 309 | 310 | Args: 311 | param_dict (dict): key value pair with parameter name and parameter value 312 | 313 | Raises: 314 | TypeError: Expected 'dict' 315 | """ 316 | 317 | 318 | if isinstance(param_dict,dict): 319 | self.info["params"].update(param_dict) 320 | else: 321 | raise TypeError("unsupported type, Expected 'dict' got "+str(type(metric_dict))) 322 | 323 | 324 | def log_param(self,param_name,param_value): 325 | """_summary_:log single parameter for the model run 326 | 327 | Args: 328 | param_name (str): _description_ 329 | param_value (int or float or str): _description_ 330 | 331 | Raises: 332 | TypeError: param_name Expected 'str' 333 | TypeError: param_value Expected 'int','float','str' 334 | """ 335 | 336 | mv=None 337 | if not isinstance(param_value,int) and not isinstance(param_value,float) and not isinstance(param_value,str): 338 | raise TypeError("unsupported type, 'param_value' Expected 'int','float','str' got "+str(type(metric_value))) 339 | if not isinstance(param_name,str): 340 | raise TypeError("unsupported type, 'param_name' Expected 'str' got "+str(type(metric_name))) 341 | self.info["params"][param_name]=param_value 342 | 343 | def register_artifact(self,artifact_name,artifact,artifact_type="training"): 344 | """_summary_: Save Artifact as part of data verion control 345 | 346 | Args: 347 | artifact_name (str): name of the artifact 348 | artifact (pandas DataFrame): pandas DataFrame object with the data 349 | artifact_type (str, optional): Defaults to "training". artifact_type can be [training,testing,validation,dev,prod] 350 | 351 | Raises: 352 | TypeError: Expected DataFrame object 353 | ValueError: artifact_name should have a string value 354 | """ 355 | if not isinstance(artifact, pd.DataFrame): 356 | raise TypeError("Please provide DataFrame in 'artifact'") 357 | if artifact_name=="" or artifact_name==None: 358 | raise ValueError("Please provide a name in 'artifact_name' which is not '' or None") 359 | path=os.path.join(self.context_manager.folders["artifacts"],artifact_name) 360 | dataschema=artifact.describe(include='all') 361 | 362 | artifact.to_csv(path,index=False) 363 | 364 | 365 | 366 | 367 | self.info["artifact"].append({ 368 | "name":artifact_name, 369 | "path":path, 370 | "tag":artifact_type 371 | }) 372 | schema_data,schema_details=schema_(artifact) 373 | self.info["artifact_schema"].append({ 374 | "name":artifact_name, 375 | "schema":schema_data, 376 | "details":schema_details 377 | } 378 | ) 379 | 380 | 381 | def register_artifact_with_path(self,artifact,artifact_type="training"): 382 | """_summary_ 383 | 384 | Args: 385 | artifact (str): path of the artifact 386 | artifact_type (str, optional): _description_. Defaults to "training".artifact_type can be [training,testing,validation,dev,prod] 387 | 388 | Raises: 389 | TypeError: artifact path should be str 390 | ValueError: artifact path should be correct 391 | """ 392 | if not isinstance(artifact, str): 393 | raise TypeError("Please provide full path of artifact") 394 | if not os.path.exists(artifact): 395 | raise ValueError("Please provide correct path of artifact") 396 | 397 | shutil.copy(artifact, self.context_manager.folders["artifacts"]) 398 | 399 | path=os.path.join(self.context_manager.folders["artifacts"],os.path.basename(artifact)) 400 | self.info["artifact"].append({ 401 | "name":os.path.basename(path), 402 | "path":path, 403 | "tag":artifact_type 404 | }) 405 | filename=os.path.basename(artifact) 406 | if filename.endswith('.csv'): 407 | artifact=pd.read_csv(path) 408 | elif filename.endswith('.xlxs'): 409 | artifact=pd.read_excel(path) 410 | elif filename.endswith('.parquet'): 411 | artifact=pd.read_parquet(path) 412 | else: 413 | print("Error: Unknown file type cannot generate Schema!!!!") 414 | return 415 | 416 | schema_data,schema_details=schema_(artifact) 417 | self.info["artifact_schema"].append({ 418 | "name":filename, 419 | "schema":schema_data, 420 | "details":schema_details 421 | } 422 | ) 423 | 424 | def get_info(self): 425 | """_summary_: get the whole run details 426 | 427 | Returns: 428 | dict: information about the whole run 429 | """ 430 | return self.info 431 | 432 | 433 | def get_artifact(self): 434 | """_summary_: get the artifact details 435 | 436 | Returns: 437 | dict: returns the artifact detail 438 | """ 439 | return self.info["artifact"] 440 | 441 | def _write_info_experiment(self,experiment_name,path): 442 | """_summary_: writes to the experiment schema 443 | 444 | Args: 445 | experiment_name (str): name of the experiment 446 | path (str): path to save the run details 447 | """ 448 | fulllist={} 449 | if os.path.exists(os.path.join(self.feature_store,"experiment.yaml")): 450 | with open(os.path.join(self.feature_store,"experiment.yaml")) as file: 451 | fulllist = yaml.load(file, Loader=yaml.FullLoader) 452 | if experiment_name not in fulllist: 453 | fulllist[experiment_name]={"experiment_path":path, 454 | "runs":[], 455 | "execution_time":str(datetime.datetime.now()).split(".")[0] 456 | } 457 | else: 458 | fulllist[experiment_name]["execution_time"]=str(datetime.datetime.now()).split(".")[0] 459 | else: 460 | fulllist[experiment_name]={"experiment_path":path, 461 | "runs":[], 462 | "execution_time":str(datetime.datetime.now()).split(".")[0] 463 | } 464 | 465 | 466 | with open(os.path.join(self.feature_store,"experiment.yaml"), 'w') as file: 467 | documents = yaml.dump(fulllist, file) 468 | 469 | 470 | def _write_info_run(self,experiment_name,run_id): 471 | """_summary_:writes to the run schema 472 | 473 | Args: 474 | experiment_name (str): name of the experiment 475 | run_id (str): ID for the running instance 476 | """ 477 | fulllist={} 478 | 479 | with open(os.path.join(self.feature_store,"experiment.yaml")) as file: 480 | fulllist = yaml.load(file, Loader=yaml.FullLoader) 481 | fulllist[experiment_name]["runs"].append(run_id) 482 | 483 | 484 | 485 | with open(os.path.join(self.feature_store,"experiment.yaml"), 'w') as file: 486 | documents = yaml.dump(fulllist, file) 487 | 488 | 489 | def set_uri(self): 490 | pass 491 | 492 | 493 | 494 | #explainer* 495 | #https://github.com/SauceCat/PDPbox 496 | #https://github.com/AustinRochford/PyCEbox 497 | 498 | 499 | class ScikitLearn: 500 | def __init__(self,folders): 501 | self.folders=folders 502 | self.model_name="" 503 | self.model_path="" 504 | self.model_class="" 505 | self.model_type="" 506 | self.model_params={} 507 | self.model_tags={} 508 | self.registered=False 509 | self.model_mode="" 510 | 511 | 512 | def register_model(self,model_name,model): 513 | if "sklearn" in str(type(model)) or "catboost" in str(type(model)): 514 | 515 | pickle.dump(model, open(os.path.join(self.folders["models"],model_name+'.pkl'), 'wb')) 516 | self.model_type="scikit-learn" 517 | else: 518 | raise TypeError("Error:Expected ScikitLearn Module!!!!") 519 | self.model=model 520 | self.model_name=model_name 521 | self.model_path=os.path.join(self.folders["models"],model_name+'.pkl') 522 | self.model_class=type(model).__name__ 523 | self.model_params=model.get_params() 524 | self.model_tags={tag:str(value) for tag,value in model._get_tags().items()} 525 | self.registered=True 526 | 527 | 528 | class Pytorch: 529 | def __init__(self,folders): 530 | self.folders=folders 531 | self.model_name="" 532 | self.model_path="" 533 | self.model_class="" 534 | self.model_type="" 535 | self.model_architecture=[] 536 | self.model_ops=[] 537 | self.registered=False 538 | self.model_mode="" 539 | 540 | def register_model(self,model_name,model): 541 | """_summary_: Save the model as an aritifact object 542 | 543 | Args: 544 | model_name (str): name of file to be saved 545 | model (Pytorch Model): the model 546 | 547 | Raises: 548 | Exception: 549 | """ 550 | try: 551 | model_scripted = torch.jit.script(model) 552 | model_scripted.save(os.path.join(self.folders["models"],model_name+'.pt')) 553 | self.model_type="torch" 554 | except Exception as e: 555 | raise Exception(e) 556 | self.model_name=model_name 557 | self.model_path=os.path.join(self.folders["models"],model_name+'.pt') 558 | self.model_class=type(model).__name__ 559 | self.registered=True 560 | self.model_architecture=self._get_model_arch(model) 561 | self.model_ops=self._get_model_ops(model) 562 | self.model_mode="non_runtime" 563 | 564 | def register_model_with_runtime(self,model_name,model,data): 565 | """_summary_: Save the model as an aritifact object with runtime details. 566 | This helps in Saving the model for model conversion 567 | 568 | Args: 569 | model_name (str): name of file to be saved 570 | model (Pytorch Model): the model 571 | data (TorchTensor): Data used for training. 1 row of data is enogh 572 | 573 | Raises: 574 | Exception: _description_ 575 | """ 576 | try: 577 | traced_cell = torch.jit.trace(model, data) 578 | torch.jit.save(traced_cell, os.path.join(self.folders["models"],model_name+".pt")) 579 | except Exception as e: 580 | raise Exception(e) 581 | self.model=model 582 | self.model_name=model_name 583 | self.model_path=os.path.join(self.folders["models"],model_name+'.pt') 584 | self.model_class=type(model).__name__ 585 | self.registered=True 586 | self.model_architecture=self._get_model_arch(model) 587 | self.model_ops=self._get_model_ops(model) 588 | self.model_mode="runtime" 589 | 590 | def _load_model(self,model_name): 591 | model = torch.jit.load(model_name) 592 | return 593 | 594 | def _load_model_with_runtime(self,model_name): 595 | loaded_trace = torch.jit.load(model_name) 596 | return loaded_trace 597 | 598 | def _get_model_ops(self,model): 599 | """_summary_: get forward operations in for pytorch model 600 | 601 | Args: 602 | model (Pytorch Model): Pytorch model 603 | 604 | Returns: 605 | list: all tensor operations 606 | """ 607 | gm = torch.fx.symbolic_trace(model) 608 | ops_data={} 609 | for idx, n in enumerate(gm.graph.nodes): 610 | ops_data[f"op_{idx}"]={ 611 | "name":str(n), 612 | "op":n.__dict__["op"], 613 | "input_node":{str(k): str(v) for k,v in n.__dict__['_input_nodes'].items()}, 614 | "args":[str(i) for i in n.__dict__["_args"]], 615 | "prev":str(n.__dict__["_prev"]), 616 | "next":str(n.__dict__["_next"]), 617 | "users":{str(k): str(v) for k,v in n.__dict__['users'].items()}, 618 | } 619 | return ops_data 620 | 621 | def _get_model_arch(self,model): 622 | """_summary_: get forward operations in for pytorch model 623 | 624 | Args: 625 | model (Pytorch Model): Pytorch model 626 | 627 | Returns: 628 | list: all Layers in model 629 | """ 630 | arch=[] 631 | for layers,details in dict(model.named_modules()).items(): 632 | _temp={} 633 | if layers!="": 634 | _temp["layer_name"]=layers.replace(".","_") 635 | _temp["layer"]=str(details) 636 | _temp["layer_type"]=type(details).__name__ 637 | _temp["layer_class"]=str(type(details)).strip("<").strip(">").split(" ")[1] 638 | _temp["params"]={} 639 | for params in details.__dict__: 640 | if not params.startswith("_"): 641 | _temp["params"][params]=details.__dict__[params] 642 | if len(_temp)>0: 643 | arch.append(_temp) 644 | return arch 645 | 646 | -------------------------------------------------------------------------------- /templates/check_deployment.html: -------------------------------------------------------------------------------- 1 | {% extends "template.html" %} 2 | 3 | {% block content %} 4 | 13 | 20 |
21 |
22 |

/POST/

23 |
24 | 25 |
26 | 27 |
28 | 32 |
33 | 34 | 35 |
36 | {% endblock %} -------------------------------------------------------------------------------- /templates/deployments.html: -------------------------------------------------------------------------------- 1 | {% extends "template.html" %} 2 | 3 | {% block content %} 4 | 31 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | {%for details in ALL_DEPLOYED_MODELS%} 52 | 53 | 54 | 55 | 56 | 57 | 62 | 63 | 71 | 72 | 73 | 74 | {%endfor%} 75 | 76 |
Experiment NoRun IDDeployment NoDeployment URLStatusActions
{{details["experiment_id"]}}{{details["run_id"]}}{{details["model_deployment_number"]}} 58 | {%if details["status"]=='running'%} 59 | {{details["model_url"]}} 60 | {%endif%} 61 | {{details["status"]}} 64 | {%if details["status"]=='running'%} 65 | 66 | {%elif details["status"]=='stopped'%} 67 | 68 | {%endif%} 69 | 70 |
77 | 78 | {% endblock %} -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | PyMLPipe 7 | 8 | 9 | 10 | 11 | 12 | 13 | 39 | 189 | 190 |
191 |
192 | 193 |

PyMLPipe

194 |
195 |
196 | 197 |
198 | {% if error != "" %} 199 |
200 | 201 | {{error}} Please provide a valid expression Ex: >10 202 |
203 | {%else%} 204 | 208 | {%endif%} 209 |
210 | 211 | 212 |
213 | 214 | 252 | 253 |
254 | 255 |
256 |
257 |
258 | 261 | 275 |
276 |
277 |

278 | 279 | 284 | 285 |

286 |

287 | 288 |

289 |

290 | 291 | Filter 292 | 293 |

294 |
295 | 296 |
297 |
298 |
299 |
300 | 314 |
315 |
316 | 317 | 318 |
319 | 320 |
321 | 322 | 323 |
324 |
325 | 326 |
327 |
328 | 329 |
330 | {%for experiment in runs%} 331 | 332 | {%if loop.index==1%} 333 | {%set display='table'%} 334 | {%else%} 335 | {%set display='none'%} 336 | {%endif%} 337 | 338 | 339 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | {%for metric in exp_wise_metrics[experiment]%} 366 | 375 | 376 | {%endfor%} 377 | 378 | 379 | 380 | 381 | {%for run in runs[experiment]['runs']%} 382 | {%if run in run_details%} 383 | 384 | 385 | 386 | 387 | 388 | 389 | 394 | {%for metric in exp_wise_metrics[experiment]%} 395 | 396 | {%endfor%} 397 | 398 | {%endif%} 399 | {%endfor%} 400 | 401 | 402 | 403 | 404 | 405 |
Run IDDateModelversiontags 367 |
368 | 369 | 370 |
371 | {{metric}} 372 |
373 | 374 |
{{run[:13]}}...{{run_details[run].execution_time}}{{run_details[run].model.model_name}}{{run_details[run].version}} 390 | {%for tag in run_details[run].tags%} 391 | {{tag}} 392 | {%endfor%} 393 | {{run_details[run]["metrics"][metric]|string}}
406 | {%endfor%} 407 | 408 |
409 | 410 |
411 |
412 | 413 |
414 |
415 |
416 | 417 |
418 |
419 | 420 |
421 |
422 | 423 |
424 | 425 | 446 | -------------------------------------------------------------------------------- /templates/job_view.html: -------------------------------------------------------------------------------- 1 | {% extends "template.html" %} 2 | 3 | {% block content %} 4 | 11 | 19 | 20 | 27 |
28 | 29 |
30 |
31 |
32 |
33 | 38 |
39 |
40 | 41 |
42 |
43 |
44 | 85 | {% endblock %} -------------------------------------------------------------------------------- /templates/jobs.html: -------------------------------------------------------------------------------- 1 | {% extends "template.html" %} 2 | 3 | {% block content %} 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | {%for pipe in pipeline%} 25 | 26 | 29 | 32 | 35 | 38 | {%if pipe["status"]=="Queued" or pipe["status"]=="Started"%} 39 | 43 | {%else%} 44 | 48 | 49 | {%endif%} 50 | 51 | {%endfor%} 52 | 53 |
Pipeline NameCreated OnStart TimeStatusActions
27 | {{pipe["pipelinename"]}} 28 | 30 | {{pipe["created_at"]}} 31 | 33 | {{pipe["jobtime"]}} 34 | 36 | {{pipe["status"]}} 37 | 40 | Stop 41 | view 42 | 45 | Start 46 | view 47 |
54 | 55 | {% endblock %} -------------------------------------------------------------------------------- /templates/template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | PyMLPipe 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 30 | 31 |
32 |
33 |

PyMLPipe

34 |
35 |
36 |
37 |
38 | {% block content %} 39 | {% endblock %} 40 |
41 |
42 | 43 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.0' 2 | __author__= "indresh bhattacharya" -------------------------------------------------------------------------------- /utils/_sklearn_prediction.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | class Deployment: 3 | def __init__(self,model_path): 4 | self.model_path = model_path 5 | self.model=pickle.load(open(self.model_path,'rb')) 6 | 7 | 8 | def predict(self,data,dtype): 9 | status=0 10 | try: 11 | return self.model.predict(data),status 12 | except Exception as e: 13 | status=1 14 | return str(e),status 15 | 16 | -------------------------------------------------------------------------------- /utils/_torch_prediction.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import torch 3 | class Deployment: 4 | def __init__(self,model_path,typeof="non_runtime"): 5 | 6 | self.model_path = model_path 7 | if typeof=="non_runtime": 8 | self.model=self._load_model(self.model_path) 9 | elif typeof=="runtime": 10 | self.model=self._load_model_with_runtime(self.model_path) 11 | 12 | 13 | def _load_model(self,model_name): 14 | model = torch.jit.load(model_name) 15 | return model 16 | 17 | def _load_model_with_runtime(self,model_name): 18 | loaded_trace = torch.jit.load(model_name) 19 | return loaded_trace 20 | 21 | def predict(self,data,dtype): 22 | status=0 23 | try: 24 | if dtype=="float": 25 | data=torch.from_numpy(data).type(torch.FloatTensor) 26 | return self.model(data).detach().numpy(),status 27 | elif dtype=="double": 28 | data=torch.from_numpy(data).type(torch.DoubleTensor) 29 | return self.model(data).detach().numpy(),status 30 | elif dtype=="int": 31 | data=torch.from_numpy(data).type(torch.IntTensor) 32 | return self.model(data).detach().numpy(),status 33 | except Exception as e: 34 | status=1 35 | return str(e),status 36 | 37 | -------------------------------------------------------------------------------- /utils/_xai.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as pl 2 | from sklearn import inspection 3 | import shap 4 | import pandas as pd 5 | import os 6 | import numpy as np 7 | 8 | 9 | 10 | XAI_MAP={ 11 | "TreeBasedModels": ['BaseDecisionTree', 12 | 'DecisionTreeClassifier', 13 | 'DecisionTreeRegressor', 14 | 'ExtraTreeClassifier', 15 | 'ExtraTreeRegressor', 16 | 'BaseEnsemble', 17 | 'RandomForestClassifier', 18 | 'RandomForestRegressor', 19 | 'RandomTreesEmbedding', 20 | 'ExtraTreesClassifier', 21 | 'ExtraTreesRegressor', 22 | 'BaggingClassifier', 23 | 'BaggingRegressor', 24 | 'IsolationForest', 25 | 'GradientBoostingClassifier', 26 | 'GradientBoostingRegressor', 27 | 'AdaBoostClassifier', 28 | 'AdaBoostRegressor', 29 | 'VotingClassifier', 30 | 'VotingRegressor', 31 | 'StackingClassifier', 32 | 'StackingRegressor', 33 | "XGBClassifier", 34 | "XGBRegressor", 35 | "CatBoostClassifier", 36 | "CatBoostRegressor", 37 | "LGBMClassifier", 38 | "LGBMRegressor" 39 | ], 40 | "LinearModels": ['ARDRegression', 41 | 'BayesianRidge', 42 | 'ElasticNet', 43 | 'ElasticNetCV', 44 | 'Hinge', 45 | 'Huber', 46 | 'HuberRegressor', 47 | 'Lars', 48 | 'LarsCV', 49 | 'Lasso', 50 | 'LassoCV', 51 | 'LassoLars', 52 | 'LassoLarsCV', 53 | 'LassoLarsIC', 54 | 'LinearRegression', 55 | 'LogisticRegression', 56 | 'LogisticRegressionCV', 57 | 'ModifiedHuber', 58 | 'MultiTaskElasticNet', 59 | 'MultiTaskElasticNetCV', 60 | 'MultiTaskLasso', 61 | 'MultiTaskLassoCV', 62 | 'OrthogonalMatchingPursuit', 63 | 'OrthogonalMatchingPursuitCV', 64 | 'PassiveAggressiveClassifier', 65 | 'PassiveAggressiveRegressor', 66 | 'Perceptron', 67 | 'Ridge', 68 | 'RidgeCV', 69 | 'RidgeClassifier', 70 | 'RidgeClassifierCV', 71 | 'SGDClassifier', 72 | 'SGDRegressor', 73 | 'SquaredLoss', 74 | 'TheilSenRegressor', 75 | 'RANSACRegressor', 76 | 'PoissonRegressor', 77 | 'GammaRegressor', 78 | 'TweedieRegressor'], 79 | } 80 | 81 | class Explainer(): 82 | def __init__(self,model,data,artifact_path): 83 | self.model=model 84 | self.data=data 85 | self.artifact_path=artifact_path 86 | self.feature_map=self.data.columns 87 | def explain(self): 88 | model_class=type(self.model) 89 | model_name=type(self.model).__name__ 90 | flag=False 91 | if model_name in XAI_MAP["LinearModels"]: 92 | self.coef_based_feature_importance(self.model,np.std(self.data,0),self.feature_map,os.path.join(self.artifact_path,"explainer")) 93 | try: 94 | self.tree_linear_summary_plot(self.model,self.data,self.feature_map,os.path.join(self.artifact_path,"explainer")) 95 | except Exception as e: 96 | flag=True 97 | print("Warning:Instance of model {model} not supported".format(model=model_name)) 98 | 99 | elif model_name in XAI_MAP["TreeBasedModels"]: 100 | self.tree_based_feature_importance(self.model,self.feature_map,os.path.join(self.artifact_path,"explainer")) 101 | try: 102 | self.tree_expainer_summary_plot(self.model,self.data,self.feature_map,os.path.join(self.artifact_path,"explainer")) 103 | except Exception as e: 104 | flag=True 105 | print("Warning: Instance of model {model} not supported".format(model=model_name)) 106 | 107 | else: 108 | #implement XAI for NeuralNetworks 109 | pass 110 | if not flag: 111 | return { 112 | "feature_explainer":os.path.join(self.artifact_path,"explainer.csv"), 113 | "shap":os.path.join(self.artifact_path,"explainer.svg") 114 | } 115 | else: 116 | return { 117 | "feature_explainer":os.path.join(self.artifact_path,"explainer.csv"), 118 | "shap":"" 119 | } 120 | 121 | def tree_expainer_summary_plot(self,model,xtrain,feature_map,fig_name): 122 | shap_xgb_explainer = shap.TreeExplainer(model) 123 | shap_xgb_values_train = shap_xgb_explainer.shap_values(xtrain) 124 | shap.summary_plot(shap_xgb_values_train, xtrain,feature_names=feature_map,show=False) 125 | pl.savefig("{fig_name}.svg".format(fig_name=fig_name),dpi=700,bbox_inches='tight') 126 | pl.close('all') 127 | 128 | 129 | def tree_linear_summary_plot(self,model,xtrain,feature_map,fig_name): 130 | shap_xgb_explainer = shap.LinearExplainer(model,xtrain) 131 | shap_xgb_values_train = shap_xgb_explainer.shap_values(xtrain) 132 | shap.summary_plot(shap_xgb_values_train, xtrain,feature_names=feature_map,show=False) 133 | pl.savefig("{fig_name}.svg".format(fig_name=fig_name),dpi=700,bbox_inches='tight') 134 | pl.close('all') 135 | 136 | def permutation_feature_importance(self,model,trainx,trainy): 137 | permutation_imp=inspection.permutation_importance(model, trainx, trainy, n_jobs=-1,scoring='accuracy', n_repeats=8,) 138 | return permutation_imp.importances_mean 139 | 140 | 141 | def tree_based_feature_importance(self,model,feature_map,path): 142 | model_ranks=pd.DataFrame([{"feature":f,"importance":fi} for f,fi in zip(feature_map,model.feature_importances_)]) 143 | dt_rank_df = pd.DataFrame({"feature":model_ranks["feature"],"importance":model_ranks["importance"],'rank': model_ranks["importance"].rank(method='first', ascending=False).astype(int)}) 144 | dt_rank_df.to_csv('{path}.csv'.format(path=path),index=False) 145 | 146 | def coef_based_feature_importance(self,model,std,feature_map,path): 147 | maps={"feature":feature_map,} 148 | n_coff=0 149 | for idx,i in enumerate(model.coef_): 150 | maps["coef_norm_"+str(idx)]=model.coef_[idx] *std 151 | n_coff+=1 152 | df=pd.DataFrame(maps) 153 | df=df.round(3) 154 | df["avg_coef_norm"]=df.sum(axis=1)/n_coff 155 | 156 | ndf=df.sort_values(by="avg_coef_norm",ascending=False) 157 | ndf.to_csv('{path}.csv'.format(path=path),index=False) 158 | 159 | def permuatation_feature_importance(self,model,test_x,test_y,feature_map): 160 | importancef=inspection.permutation_importance(model,test_x,test_y,n_jobs=-1, n_repeats=8) 161 | return pd.DataFrame([{"feature":f,"importance":fi} for f,fi in zip(feature_map,importancef.importances_mean)]) 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /utils/change2graph.py: -------------------------------------------------------------------------------- 1 | 2 | def search_arch(architecture, node): 3 | for arch in architecture: 4 | if arch["layer_name"] == node: 5 | return arch 6 | return None 7 | 8 | def makegraph(ops,architecture): 9 | #print(ops) 10 | #print(architecture) 11 | graph_dict={"nodes":[],"edges":[]} 12 | for op in ops: 13 | #prev=ops[op]['name'] if ops[op]['prev']=="" else ops[op]['prev'] 14 | #next_pt=ops[op]['name'] if ops[op]['next']=="" else ops[op]['next'] 15 | arch_details=search_arch(architecture, ops[op]['name']) 16 | if arch_details!=None: 17 | graph_dict["nodes"].append({'data':{ 'id': ops[op]['name'] , 18 | "label":op+"_"+ops[op]['name'] , 19 | "type":arch_details["layer_type"] , 20 | "details":[k+"="+str(v) for k,v in arch_details["params"].items()] 21 | } }) 22 | else: 23 | graph_dict["nodes"].append({'data':{ 'id': ops[op]['name'] , 24 | "label":op+"_"+ops[op]['name'], 25 | "type": ops[op]['name'], 26 | "details":[ops[op]['op']] 27 | } }) 28 | if ops[op]['next']!="": 29 | graph_dict["edges"].append({ 'data': { 'id': op, 'source': ops[op]['name'], 'target': ops[op]['next']} }) 30 | return graph_dict 31 | 32 | 33 | ''' 34 | def makegraph_pipeline(edges,sequence,node_details): 35 | #print(node_details) 36 | graph_dict={"nodes":[],"edges":[]} 37 | color={"Queued":"#828282","Completed":"#80ff80","Failed":"#fc3d03","Started":"#ffff33"} 38 | for op in sequence: 39 | #print(op) 40 | graph_dict["nodes"].append({'data':{ 'id': op , 41 | "label":op , 42 | "color":color[node_details[op]["status"]] 43 | } }) 44 | 45 | for edge in edges: 46 | if edge["src"]==op: 47 | graph_dict["edges"].append({'data':{ 'id': edge["src"]+ edge["target"], 'source': edge["src"], 'target': edge["target"]} }) 48 | 49 | return graph_dict 50 | ''' 51 | def makegraph_pipeline(graph:dict,node_details:dict): 52 | """_summary_: Make graph format for Web Visualization 53 | 54 | Args: 55 | graph (dict): Contains the data structure for node -edge connection 56 | node_details (dict): Contains status and log history of nodes 57 | Returns: 58 | dict: Returns a dictionary with web format 59 | """ 60 | color={"Queued":"#828282","Completed":"#80ff80","Failed":"#fc3d03","Started":"#ffff33"} 61 | entry_node="root" 62 | graph_dict={"nodes":[],"edges":[]} 63 | _args_tag="args@" 64 | for op in graph: 65 | if op.startswith(_args_tag): 66 | op=op.strip(_args_tag) 67 | if entry_node==op: 68 | graph_dict["nodes"].append({'data':{ 'id': op , 69 | "label":op , 70 | "color":color["Completed"] 71 | } }) 72 | else: 73 | graph_dict["nodes"].append({'data':{ 'id': op , 74 | "label":op , 75 | "color":color[node_details[op]["status"]] 76 | } }) 77 | for edge in graph[op]: 78 | graph_dict["edges"].append({'data':{ 'id': op+ edge, 'source': op, 'target': edge} }) 79 | return graph_dict -------------------------------------------------------------------------------- /utils/database.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | MODEL_FOLDER_NAME="modelrun" 5 | 6 | PIPELINE_FOLDER_NAME="ML_pipelines" 7 | 8 | def create_folder(folder_path,name=None): 9 | """_summary_:create a folder for storing model information 10 | 11 | Returns: 12 | str: path for storing model details 13 | """ 14 | folder=MODEL_FOLDER_NAME 15 | if name!=None: folder=name 16 | path=os.path.join(folder_path,folder) 17 | 18 | if not os.path.exists(path): 19 | os.mkdir(path) 20 | return path 21 | 22 | 23 | def getfolders(path): 24 | return os.listdir(path) 25 | 26 | 27 | -------------------------------------------------------------------------------- /utils/factory.py: -------------------------------------------------------------------------------- 1 | """_summary_Contains all files and Folder names 2 | """ 3 | 4 | DEFAULT={ 5 | "ModelRunSave":"modelrun", 6 | "ModelRunInfo": "experiment.yaml", 7 | "RunInfo":"info.yaml", 8 | } -------------------------------------------------------------------------------- /utils/getschema.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | def schema_(data): 3 | """_summary_: Generate schema object for a dataframe 4 | 5 | Args: 6 | data (Pandas DataFrame): Pandas Artifact 7 | 8 | Returns: 9 | dict: with column schema 10 | """ 11 | schema={} 12 | details=[] 13 | for col in data: 14 | schema[col]={ 15 | 'min':float("{0:.4f}".format(data[col].min())), 16 | 'max':float("{0:.4f}".format(data[col].max())), 17 | 'std':float("{0:.4f}".format(data[col].std())), 18 | "variance":float("{0:.4f}".format(data[col].var())), 19 | "mean":float("{0:.4f}".format(data[col].mean())), 20 | "median":float("{0:.4f}".format(data[col].median())), 21 | "data type":str(data[col].dtype), 22 | "unique_values":int(len(data[col].unique())), 23 | "25th percentile":float("{0:.4f}".format(data[col].quantile(0.25))), 24 | "50% percentile":float("{0:.4f}".format(data[col].quantile(0.5))), 25 | "75% percentile":float("{0:.4f}".format(data[col].quantile(0.75))), 26 | } 27 | if len(details)==0: 28 | details=list(schema[col].keys()) 29 | #print("-------->",schema) 30 | return schema,details -------------------------------------------------------------------------------- /utils/uiutils.py: -------------------------------------------------------------------------------- 1 | 2 | from pymlpipe.utils import _sklearn_prediction,_torch_prediction 3 | 4 | 5 | def deployment_handler(model_path,model_type,runtime): 6 | if model_type=="scikit-learn": 7 | 8 | deployed=_sklearn_prediction.Deployment(model_path) 9 | elif model_type=="torch": 10 | deployed=_torch_prediction.Deployment(model_path,typeof=runtime) 11 | return deployed -------------------------------------------------------------------------------- /utils/yamlio.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os 3 | def read_yaml(path): 4 | if not os.path.exists(path): 5 | return [] 6 | with open(path) as file: 7 | fulllist = yaml.load(file, Loader=yaml.FullLoader) 8 | return fulllist 9 | 10 | 11 | def write_to_yaml(path,info): 12 | with open(os.path.join(path), 'w') as file: 13 | documents = yaml.dump(info, file) 14 | --------------------------------------------------------------------------------