├── .gitignore
├── README.md
├── __init__.py
├── api.py
├── automl.py
├── legacy
    └── pipeline.py
├── pipeline.py
├── pymlpipeUI.py
├── queue.py
├── requirements.txt
├── samples
    ├── runner_pipeline_server.py
    ├── runner_pymlpipeUI.py
    ├── test_MLpipeline.py
    ├── test_api.py
    ├── test_automl_run.py
    ├── test_cases_pipeline.py
    ├── test_create_pipeline.py
    ├── test_dl_torch_train.py
    └── test_mltrain.py
├── static
    ├── Screenshot 2022-07-04 at 1.42.35 PM.png
    ├── Screenshot 2022-07-04 at 1.42.52 PM.png
    ├── Screenshot 2022-07-04 at 1.43.03 PM.png
    ├── Screenshot 2022-07-04 at 1.43.52 PM.png
    ├── Screenshot 2022-07-04 at 1.44.05 PM.png
    ├── Screenshot 2022-07-16 at 8.03.29 PM.png
    ├── Screenshot 2022-07-16 at 8.03.50 PM.png
    ├── Screenshot 2022-07-16 at 8.04.00 PM.png
    ├── Screenshot 2022-07-16 at 8.04.08 PM.png
    ├── Screenshot 2022-07-16 at 8.04.21 PM.png
    ├── XAI.png
    ├── download.png
    ├── favicon.ico
    ├── filter.svg
    ├── logo.svg
    ├── pipelineUI 2.png
    ├── pipelineUI 3.png
    ├── pipelineUI.png
    ├── pipelineUI_1.png
    ├── pipelineUI_2.png
    ├── start.png
    ├── start.svg
    └── start1.svg
├── tabular.py
├── templates
    ├── check_deployment.html
    ├── deployments.html
    ├── index.html
    ├── job_view.html
    ├── jobs.html
    ├── run.html
    └── template.html
└── utils
    ├── __init__.py
    ├── _sklearn_prediction.py
    ├── _torch_prediction.py
    ├── _xai.py
    ├── change2graph.py
    ├── database.py
    ├── factory.py
    ├── getschema.py
    ├── uiutils.py
    └── yamlio.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | */__pycache__
  2 | *.pkl
  3 | modelrun
  4 | *.csv
  5 | *.pyc
  6 | 
  7 | # Created by https://www.toptal.com/developers/gitignore/api/python
  8 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  9 | 
 10 | ### Python ###
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .nox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | *.py,cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | cover/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | db.sqlite3
 72 | db.sqlite3-journal
 73 | 
 74 | # Flask stuff:
 75 | instance/
 76 | .webassets-cache
 77 | 
 78 | # Scrapy stuff:
 79 | .scrapy
 80 | 
 81 | # Sphinx documentation
 82 | docs/_build/
 83 | 
 84 | # PyBuilder
 85 | .pybuilder/
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # IPython
 92 | profile_default/
 93 | ipython_config.py
 94 | 
 95 | # pyenv
 96 | #   For a library or package, you might want to ignore these files since the code is
 97 | #   intended to run in multiple environments; otherwise, check them in:
 98 | # .python-version
 99 | 
100 | # pipenv
101 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | #   install all needed dependencies.
105 | #Pipfile.lock
106 | 
107 | # poetry
108 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
109 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
110 | #   commonly ignored for libraries.
111 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
112 | #poetry.lock
113 | 
114 | # pdm
115 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116 | #pdm.lock
117 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
118 | #   in version control.
119 | #   https://pdm.fming.dev/#use-with-ide
120 | .pdm.toml
121 | 
122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
123 | __pypackages__/
124 | 
125 | # Celery stuff
126 | celerybeat-schedule
127 | celerybeat.pid
128 | 
129 | # SageMath parsed files
130 | *.sage.py
131 | 
132 | # Environments
133 | .env
134 | .venv
135 | env/
136 | venv/
137 | ENV/
138 | env.bak/
139 | venv.bak/
140 | 
141 | # Spyder project settings
142 | .spyderproject
143 | .spyproject
144 | 
145 | # Rope project settings
146 | .ropeproject
147 | 
148 | # mkdocs documentation
149 | /site
150 | 
151 | # mypy
152 | .mypy_cache/
153 | .dmypy.json
154 | dmypy.json
155 | 
156 | # Pyre type checker
157 | .pyre/
158 | 
159 | # pytype static type analyzer
160 | .pytype/
161 | 
162 | # Cython debug symbols
163 | cython_debug/
164 | 
165 | # PyCharm
166 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
167 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
168 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
169 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
170 | #.idea/
171 | 
172 | # End of https://www.toptal.com/developers/gitignore/api/python


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
   1 |   
   2 | 
   3 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/main/static/logo.svg?raw=true)
   4 | 
   5 |   
   6 | 
   7 | [![Downloads](https://static.pepy.tech/personalized-badge/pymlpipe?period=total&units=international_system&left_color=black&right_color=green&left_text=Downloads)](https://pepy.tech/project/pymlpipe)
   8 | 
   9 | [![Downloads](https://pepy.tech/badge/pymlpipe/month)](https://pepy.tech/project/pymlpipe)
  10 | 
  11 | ![alt text](https://badgen.net/badge/version/0.2.7/red?icon=github)
  12 | 
  13 | ![](https://badgen.net/pypi/python/black)
  14 | 
  15 | ![](https://badgen.net/badge/pypi/0.2.6/orange?icon=pypi)
  16 | 
  17 | ![](https://badgen.net/pypi/license/pip)
  18 | 
  19 | # PyMLpipe
  20 | 
  21 |   
  22 | 
  23 | PyMLpipe is a Python library for ease Machine Learning Model monitoring and Deployment.
  24 | 
  25 | * Simple
  26 | * Intuative
  27 | * Easy to use
  28 | 
  29 | **What's New in 0.2.7**
  30 | 1. Explainable AI 
  31 | 3. Data Pipeline
  32 | 4. AutoML support
  33 | 
  34 | Please Find the Full [documentation](https://neelindresh.github.io/pymlpipe.documentation.io/) here!
  35 | 
  36 | ## Installation
  37 | 
  38 | Use the package manager [pip](https://pypi.org/project/pymlpipe/) to install PyMLpipe.
  39 | 
  40 |   
  41 | 
  42 | ```bash
  43 | 
  44 | pip install pymlpipe
  45 | 
  46 | ```
  47 | 
  48 | or
  49 | 
  50 | ```bash
  51 | 
  52 | pip3 install pymlpipe
  53 | 
  54 | ```
  55 | 
  56 | ## Frame Work Supports
  57 | 
  58 | - [X] Scikit-Learn
  59 | 
  60 | - [X] XGBoost
  61 | 
  62 | - [X] LightGBM
  63 | 
  64 | - [X] Pytorch
  65 | 
  66 | - [ ] Tensorflow
  67 | 
  68 | - [ ] Keras
  69 | 
  70 |   
  71 |   
  72 |   
  73 | 
  74 | ## Tutorial (Scikit-Learn|XGBoost|LightGBM)
  75 | 
  76 |   
  77 | 
  78 | * Load the python package
  79 | 
  80 |   
  81 | 
  82 | ```python
  83 | 
  84 | from pymlpipe.tabular import PyMLPipe
  85 | 
  86 | ```
  87 | 
  88 |   
  89 | 
  90 | * Initiate the `PyMLPipe` class
  91 | 
  92 |   
  93 | 
  94 | ```python
  95 | 
  96 | mlp=PyMLPipe()
  97 | 
  98 | ```
  99 | 
 100 |   
 101 | 
 102 | * Set an Experiment Name `[Optional]`-Default experiment name is `'0'`
 103 | 
 104 |   
 105 | 
 106 | ```python
 107 | 
 108 | mlp.set_experiment("IrisDataV2")
 109 | 
 110 | ```
 111 | 
 112 |   
 113 | 
 114 | * Set a version `[Optional]`-Default there is no version
 115 | 
 116 |   
 117 | 
 118 | ```python
 119 | 
 120 | mlp.set_version(0.1)
 121 | 
 122 | ```
 123 | 
 124 |   
 125 | 
 126 | * Initiate the context manager - This is create a unique ID for each model run.
 127 | 
 128 | - when `.run()` is used - Automatic unique ID is generated
 129 | 
 130 | - you can also provide `runid` argument in the `.run()` this will the use the given `runid` for next storing.
 131 | 
 132 |   
 133 | 
 134 | ```python
 135 | 
 136 | with mlp.run():
 137 | 
 138 | ```
 139 | 
 140 | Or
 141 | 
 142 |   
 143 | 
 144 | ```python
 145 | 
 146 | with mlp.run(runid='mlopstest'):
 147 | 
 148 | ```
 149 | 
 150 |   
 151 | 
 152 | * Set a Tag `[Optional]` by using `set_tag()`-Default there is no tags
 153 | 
 154 |   
 155 | 
 156 | ```python
 157 | 
 158 | mlp.set_tag('tag')
 159 | 
 160 | ```
 161 | 
 162 | Or
 163 | 
 164 |   
 165 | 
 166 | * Set multiple Tags `[Optional]` by using `set_tags()`-Default there is no tags
 167 | 
 168 | ```python
 169 | 
 170 | mlp.set_tags(["Classification","test run","logisticRegression"])
 171 | 
 172 |   
 173 | 
 174 | ```
 175 | 
 176 |   
 177 | 
 178 | * Set Metrics values `[Optional]` by using `log_matric(metric_name,metric_value)`-Default there is no metrics
 179 | 
 180 | This will help in comparing performance of different models and model versions
 181 | 
 182 | ```python
 183 | 
 184 | mlp.log_metric("Accuracy", accuracy_score(testy,predictions))
 185 | 
 186 |   
 187 |   
 188 | 
 189 | mlp.log_metric("Accuracy", .92)
 190 | 
 191 |   
 192 | 
 193 | ```
 194 | 
 195 |   
 196 | 
 197 | * Set multiple Metrics values `[Optional]` by using `log_matrics({metric_name:metric_value})`-Default there is no metrics
 198 | 
 199 | ```python
 200 | 
 201 | mlp.log_metrics(
 202 | 	{
 203 | 	"Accuracy": accuracy_score(testy,predictions),
 204 | 	"Precision": precision_score(testy,predictions,average='macro'),
 205 | 	"Recall": recall_score(testy,predictions,average='macro'),
 206 | 	}
 207 | )
 208 | 
 209 | mlp.log_metrics({
 210 | 	"Accuracy": .92,
 211 | 	"Precision": .87,
 212 | 	"Recall": .98,
 213 | 	}
 214 | )
 215 | ```
 216 | 
 217 | * Save an artifact `[Optional]` - You can save training/testing/validation/dev/prod data for monitoring and comparison
 218 | 
 219 | - This will also help in generating `DATA SCHEMA`
 220 | 
 221 | - `register_artifact()` -takes 3 arguments
 222 | 
 223 | - name of artifact
 224 | 
 225 | - Pandas Dataframe
 226 | 
 227 | - type of artifact - `[training, testing, validation, dev, prod]`
 228 | 
 229 | - You can also use `register_artifact_with_path()` - This will save the artifact from the disk.
 230 | 
 231 | - Path for the file
 232 | 
 233 | - type of artifact - `[training, testing, validation, dev, prod]`
 234 | 
 235 |   
 236 | 
 237 | ```python
 238 | mlp.register_artifact("train.csv", trainx)
 239 | mlp.register_artifact("train.csv", trainx)
 240 | ```
 241 | 
 242 | * Register Model `[Optional]` - You can register the model. This will help in Quick deployment
 243 | 
 244 | 
 245 | ```python
 246 | mlp.scikit_learn.register_model("logistic regression", model)
 247 | ```
 248 | 
 249 | #### XAI
 250 | 
 251 | To get model explaination , feature importance we can use `explainer()` 
 252 | explainer takes two objects 
 253 | - model - the model used for training
 254 | - trainx - the training data
 255 | 
 256 | ```python
 257 | mlp.explainer(model,trainx)
 258 | ```
 259 | 
 260 | ## Quick Start (Scikit-Learn|XGBoost|LightGBM)
 261 | 
 262 |   
 263 | 
 264 | ```python
 265 | 
 266 | from sklearn.datasets import load_iris
 267 | import pandas as pd
 268 | from sklearn.model_selection import train_test_split
 269 | from sklearn.linear_model import LogisticRegression
 270 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
 271 | #import PyMLPipe from tabular
 272 | from pymlpipe.tabular import PyMLPipe
 273 | 
 274 | # Initiate the class
 275 | mlp=PyMLPipe()
 276 | # Set experiment name
 277 | mlp.set_experiment("IrisDataV2")
 278 | # Set Version name
 279 | mlp.set_version(0.2)
 280 | 
 281 | iris_data=load_iris()
 282 | 
 283 | data=iris_data["data"]
 284 | 
 285 | target=iris_data["target"]
 286 | 
 287 | df=pd.DataFrame(data,columns=iris_data["feature_names"])
 288 | 
 289 | trainx,testx,trainy,testy=train_test_split(df,target)
 290 | 
 291 | # to start monitering use mlp.run()
 292 | 
 293 | with mlp.run():
 294 | 	# set tags
 295 | 	mlp.set_tags(["Classification","test run","logisticRegression"])
 296 | 	model=LogisticRegression()
 297 | 	model.fit(trainx, trainy)
 298 | 	predictions=model.predict(testx)
 299 | 	# log performace metrics
 300 | 	mlp.log_metric("Accuracy", accuracy_score(testy,predictions))
 301 | 	mlp.log_metric("Precision", precision_score(testy,predictions,average='macro'))
 302 | 	mlp.log_metric("Recall", recall_score(testy,predictions,average='macro'))
 303 | 	mlp.log_metric("F1", f1_score(testy,predictions,average='macro'))
 304 | 	# Save train data and test data
 305 | 	mlp.register_artifact("train", trainx)
 306 | 	mlp.register_artifact("test", testx,artifact_type="testing")
 307 | 	# Save the model
 308 | 	mlp.scikit_learn.register_model("logistic regression", model)
 309 | 	# Model explainer 
 310 | 	mlp.explainer(model,trainx)
 311 | ```
 312 | 
 313 |   
 314 | 
 315 | ## Launch UI
 316 | To start the UI
 317 | 
 318 | ```bash
 319 | 
 320 | pymlpipeui
 321 | 
 322 | ```
 323 | 
 324 | or
 325 | 
 326 | ```python
 327 | 
 328 | from pymlpipe.pymlpipeUI import start_ui
 329 | 
 330 | start_ui(host='0.0.0.0', port=8085)
 331 | 
 332 | ```
 333 | 
 334 | #### Sample UI
 335 | 
 336 |   
 337 |   
 338 | 
 339 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.42.35%20PM.png?raw=true)
 340 | 
 341 |   
 342 | 
 343 | ---
 344 | 
 345 |   
 346 | 
 347 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.42.52%20PM.png?raw=true)
 348 | 
 349 |   ---
 350 | XAI 
 351 | 
 352 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/XAI.png)
 353 | 
 354 | ---
 355 | 
 356 | #### One Click Deployment -click the deploy button to deploy the model and get a endpoint
 357 | 
 358 |   
 359 |   
 360 | 
 361 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.43.03%20PM.png?raw=true)
 362 | 
 363 |   
 364 | 
 365 | ---
 366 | 
 367 |   
 368 |   
 369 | 
 370 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.43.52%20PM.png?raw=true)
 371 | 
 372 |   
 373 | 
 374 | ---
 375 | 
 376 |   
 377 | 
 378 | ## Send the data to the Prediction end point in the format
 379 | 
 380 |   
 381 | 
 382 | - Each list is a row of data
 383 | 
 384 | ```python
 385 | 
 386 | {
 387 | 	"data":[
 388 | 		[5.6,3.0,4.5,1.5],
 389 | 		[5.6,3.0,4.5,1.5]	
 390 | 	]
 391 | }
 392 | 
 393 | ```
 394 | 
 395 |   
 396 | 
 397 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/development/static/Screenshot%202022-07-04%20at%201.44.05%20PM.png?raw=true)
 398 | 
 399 |   
 400 | 
 401 | ---
 402 | 
 403 | ## Tutorial (Pytorch)
 404 | 
 405 | #### The previous methods can be used as it is. New methods are shown below
 406 | 
 407 | * Log continious Metrics `.log_metrics_continious(dict)--> dict of metrics`\
 408 | 
 409 | - logs the metrics in a continious manner for each epoch
 410 | 
 411 |   
 412 | 
 413 | ```pytorch
 414 | 
 415 | mlp.log_metrics_continious({
 416 | 
 417 | 	"accuracy": .9,
 418 | 	
 419 | 	"precision": .8,
 420 | 	
 421 | 	"recall": .7
 422 | 
 423 | })
 424 | 
 425 | ```
 426 | 
 427 |   
 428 | 
 429 | * To register a pytorch model use `.pytorch.register_model(modelname, modelobject)`
 430 | 
 431 | - this will Save the model in a .pt file as a `torch.jit` format for serveing and prediction
 432 | 
 433 |   
 434 | 
 435 | ```python
 436 | mlp.pytorch.register_model("pytorch_example1", model)
 437 | ```
 438 | 
 439 | * To register a pytorch model use `.pytorch.register_model_with_runtime(modelname, modelobject, train_data_sample)`
 440 | 
 441 |   
 442 | 
 443 | - `train_data_sample`- is a sample of input data. it can be random numbers but needs tensor dimension
 444 | 
 445 | - This method is `preferred` as in `future releases` this models can be then converted to other formats as well ex: "onnx", "hd5"
 446 | 
 447 |   
 448 | 
 449 | ```python
 450 | mlp.pytorch.register_model_with_runtime("pytorch_example1", model, train_x)
 451 | ```
 452 | 
 453 |   
 454 | 
 455 | ## Quick Start (Pytorch)
 456 | 
 457 | ```python
 458 | 
 459 | import torch
 460 | import pandas as pd
 461 | from sklearn.preprocessing import LabelEncoder
 462 | from sklearn.model_selection import train_test_split
 463 | from sklearn.metrics import accuracy_score,f1_score
 464 | from pymlpipe.tabular import PyMLPipe
 465 | 
 466 | df=pd.read_csv("train.csv")
 467 | 
 468 | encoders=["area_code","state","international_plan","voice_mail_plan","churn"]
 469 | 
 470 | for i in encoders:
 471 | 
 472 | 	le=LabelEncoder()
 473 | 	
 474 | 	df[i]=le.fit_transform(df[i])
 475 | 
 476 | trainy=df["churn"]
 477 | 
 478 | trainx=df[['state', 'account_length', 'area_code', 'international_plan',
 479 | 'voice_mail_plan', 'number_vmail_messages', 'total_day_minutes',
 480 | 'total_day_calls', 'total_day_charge', 'total_eve_minutes',
 481 | 'total_eve_calls', 'total_eve_charge', 'total_night_minutes',
 482 | 'total_night_calls', 'total_night_charge', 'total_intl_minutes',
 483 | 'total_intl_calls', 'total_intl_charge',
 484 | 'number_customer_service_calls']]
 485 | 
 486 | 
 487 | class Model(torch.nn.Module):
 488 | 	
 489 | 	def __init__(self,col_size):
 490 | 	
 491 | 		super().__init__()
 492 | 		
 493 | 		# using sequencial
 494 | 		
 495 | 		self.seq=torch.nn.Sequential(
 496 | 			torch.nn.Linear(col_size,15),
 497 | 			torch.nn.ReLU(),
 498 | 			torch.nn.Linear(15,10),
 499 | 			torch.nn.ReLU(),
 500 | 			torch.nn.Linear(10,1)
 501 | 		)
 502 | 
 503 | 		#using torch layers
 504 | 
 505 | 	def forward(self,x):
 506 | 		out=self.seq(x)
 507 | 	
 508 | 		return torch.sigmoid(out)
 509 | 
 510 | model=Model(len(trainx.columns))
 511 | train_x,test_x,train_y,test_y=train_test_split(trainx,trainy)
 512 | train_x=torch.from_numpy(train_x.values)
 513 | train_x=train_x.type(torch.FloatTensor)
 514 | train_y=torch.from_numpy(train_y.values)
 515 | train_y=train_y.type(torch.FloatTensor)
 516 | test_x=torch.from_numpy(test_x.values)
 517 | test_x=test_x.type(torch.FloatTensor)
 518 | test_y=torch.from_numpy(test_y.values)
 519 | test_y=test_y.type(torch.FloatTensor)
 520 | 
 521 | optimizer=torch.optim.SGD(model.parameters(),lr=0.001)
 522 | criterion=torch.nn.BCELoss()
 523 | 
 524 | def validate(model,testx,testy):
 525 | 
 526 | 	prediction=model(testx)
 527 | 	
 528 | 	prediction=torch.where(prediction>.5,1,0
 529 | 	accu=accuracy_score(
 530 | 	prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy()
 531 | 	)
 532 | 	
 533 | 	f1=f1_score(prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy())
 534 | 	
 535 | 	return {"accuracy":accu,"f1":f1}
 536 | 
 537 |   
 538 |   
 539 | 
 540 | epochs=100
 541 | 
 542 | batch_size=1000
 543 | 
 544 |   
 545 | 
 546 | mlp=PyMLPipe()
 547 | 
 548 | mlp.set_experiment("Pytorch")
 549 | 
 550 | mlp.set_version(0.2)
 551 | 
 552 |   
 553 | 
 554 | with mlp.run():
 555 | 
 556 | 	mlp.register_artifact("churndata.csv",df)
 557 | 	
 558 | 	mlp.log_params({
 559 | 	
 560 | 	"lr":0.01,
 561 | 	
 562 | 	"optimizer":"SGD",
 563 | 	
 564 | 	"loss_fuction":"BCEloss"
 565 | 	
 566 | 	})
 567 | 	
 568 | 	for epoch in range(epochs):
 569 | 	
 570 | 		loss_batch=0
 571 | 	
 572 | 		for batch in range(1000,5000,1000):
 573 | 			optimizer.zero_grad()
 574 | 			train_data=train_x[batch-1000:batch]
 575 | 			output=model(train_data)
 576 | 			loss=criterion(output,train_y[batch-1000:batch].unsqueeze(1))
 577 | 			loss.backward()
 578 | 			optimizer.step()
 579 | 			loss_batch+=loss.item()
 580 | 			
 581 | 		metrics=validate(model,test_x,test_y)
 582 | 		metrics["loss"]=loss_batch
 583 | 		metrics["epoch"]=epoch
 584 | 		mlp.log_metrics_continious(metrics)
 585 | 		mlp.pytorch.register_model("pytorch_example1", model)
 586 | 
 587 | ```
 588 | 
 589 |   
 590 | 
 591 | ## UI for Pytorch Models
 592 | 
 593 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.03.29%20PM.png?raw=true)
 594 | 
 595 |   
 596 | 
 597 | ###### Visualize the Model details
 598 | 
 599 |   
 600 | 
 601 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.03.50%20PM.png?raw=true)
 602 | 
 603 |   
 604 | 
 605 | ###### Visualize the Model Architecture
 606 | 
 607 |   
 608 |   
 609 | 
 610 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.04.00%20PM.png?raw=true)
 611 | 
 612 |   
 613 | 
 614 | ###### View Training Logs
 615 | 
 616 |   
 617 |   
 618 | 
 619 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.04.08%20PM.png?raw=true)
 620 | 
 621 |   
 622 | 
 623 | ###### Visualize Training Logs
 624 | 
 625 |   
 626 | 
 627 | ![alt text](https://github.com/neelindresh/pymlpipe/blob/dev/static/Screenshot%202022-07-16%20at%208.04.21%20PM.png?raw=true)
 628 | 
 629 |   
 630 |   
 631 | 
 632 | ### Sample input for prediction
 633 | 
 634 | `GET REQUEST` - to get info for the model
 635 | 
 636 | - `info` : Contains model information
 637 | 
 638 | - `request_body`: Sample post Request
 639 | 
 640 | ```python
 641 | 
 642 | {
 643 | 
 644 | "info": {
 645 | 
 646 | 			"experiment_id": "Pytorch",
 647 | 			
 648 | 			"model_deployment_number": "51c186ddd125386c",
 649 | 			
 650 | 			"model_mode": "non_runtime",
 651 | 			
 652 | 			"model_type": "torch",
 653 | 			
 654 | 			"model_url": "/predict/51c186ddd125386c",
 655 | 			
 656 | 			"run_id": "3fffe458-9676-4bc7-a6c0-a3b4cf38e277",
 657 | 			
 658 | 			"status": "running"
 659 | 
 660 | 	},
 661 | 
 662 | 	"request_body": {
 663 | 		"data": [
 664 | 		[
 665 | 			42.0,120.0,1.0,0.0,0.0,0.0,185.7,133.0,31.57,235.1,149.0,19.98,
 666 | 			256.4,78.0,11.54,16.9,6.0,4.56,0.0
 667 | 		]
 668 | 	],
 669 | 	"dtype": "float"	
 670 | 	}
 671 | }
 672 | 
 673 | ```
 674 | 
 675 |   
 676 | 
 677 | For `POST REQUEST`
 678 | 
 679 | -`data`--> list: contains data rows for prediction supports both batch prediction and single instance ex: data --> [ [ 0,1,2,3],[3,4,56 ] ]
 680 | 
 681 | -`dtype`--> str: for type conversion converts the data into required data type tensor
 682 | 
 683 |   
 684 |   
 685 | 
 686 | ```
 687 | 
 688 | {
 689 | "data": [
 690 | 			[
 691 | 			42.0,120.0,1.0,0.0,0.0,0.0,185.7,133.0,31.57,235.1,149.0,19.98,
 692 | 			256.4,78.0,11.54,16.9,6.0,4.56,0.0
 693 | 			]
 694 | 		],
 695 | 		"dtype": "float"
 696 | }
 697 | 
 698 | ```
 699 | 
 700 | ## Quick Start (AutoML)
 701 | 
 702 |   
 703 | 
 704 | ```python
 705 | 
 706 | from automl import AutoMLPipe
 707 | from sklearn.datasets import load_iris,load_diabetes
 708 | import pandas as pd
 709 | import numpy as np
 710 | 
 711 | def main():
 712 | 
 713 | 	load_data=load_diabetes()
 714 | 	data=load_data["data"]
 715 | 	target=load_data["target"]
 716 | 	
 717 | 	df=pd.DataFrame(data,columns=load_data["feature_names"])
 718 | 	automl_obj=AutoMLPipe(
 719 | 		exp_name="DiabAutoMLV1",
 720 | 		task="regression",
 721 | 		metric="RMSE",
 722 | 		data=df,
 723 | 		label=target,
 724 | 		tags=["new_data","reg"],
 725 | 		test_size=0.2,
 726 | 		version=1.0,
 727 | 		transform=True,
 728 | 		scale='normalize',
 729 | 		cols_to_scale=[],
 730 | 		categorical_cols=[],
 731 | 		register_model=True,
 732 | 		explain=True,exclude=[]
 733 | 	)
 734 | 	preds,result=automl_obj.run_automl(tune=True,tune_best=False)
 735 | 	#DataFrame with comparative metrics of all the models
 736 | 	print(result)
 737 | 	#Dictionary with model names and the predictions
 738 | 	print(preds)
 739 | if __name__ == '__main__':
 740 | 
 741 | main()
 742 | 
 743 | ```
 744 | 
 745 | The AutoML class is simple to run and with the help of few lines of code you'll be able to run several models on your data. You can even choose to hyperparameter tune every model or you can just tune the best model based on the metric that you provide. Below are the simple steps to start your AutoML experiment.
 746 | 
 747 | - Load the data
 748 | 
 749 | - Transform it into X & y datasets.
 750 | 
 751 | - Instanciate the AutoMLPipe class:
 752 | 
 753 | - `exp_name`: name of experiment
 754 | 
 755 | - `task`: regression/classification
 756 | 
 757 | - `metric`: for classification -> accuracy,recall,precision,f1/ for regression -> MAE,MSE,RMSE,R2 Score
 758 | 
 759 | - `data`: data on which the model to be fit
 760 | 
 761 | - `label`: target variable
 762 | 
 763 | - `tags`: list of custom-tags for the run
 764 | 
 765 | - `test_size`: size of test dataset
 766 | 
 767 | - `version`: experiment version
 768 | 
 769 | - `transform`: If transformation is to be applied on the dataset.
 770 | 
 771 | - `scale`: 'standard'/'minmax'/'normalize'
 772 | 
 773 | - `cols_to_scale`: list of columns to scale. Should be numeric or float
 774 | 
 775 | - `categorical_cols`: columns to one-hot encode
 776 | 
 777 | - `register_model`: register experiement model
 778 | 
 779 | - `register_artifacts`: register experiment artifacts
 780 | 
 781 | - `explain`: xai implementation
 782 | 
 783 | - `exclude`: models to be excluded during autoML runs
 784 | 
 785 | - run the experiment by calling the `run_automl` function.
 786 | 
 787 | - `tune=True`: Every autoML models will be hyperparameter tuned.
 788 | 
 789 | - `tune_best=True`: Only the best model will be hyperparameter tuned.
 790 | 
 791 | - Now you can see the experiment running in the ui page and also in the console.
 792 | 
 793 | - Once it is completed you will get results and predictions of the runs.
 794 | 
 795 | - If `tune_best=False`: The `result` will have the dataframe with metrics of each model. The `pred` will contain the dictionary of all the prediction values of all the models.
 796 | 
 797 | - If `tune_best=True`: The `result` will have the dataframe with metrics of each model. The `pred` will contain the a list of prediction values of the hyperparameter tuned best model.
 798 | 
 799 | 
 800 |   
 801 |   
 802 |   ## Quick Start (Data Pipeline)
 803 | 
 804 | This is a sample code for data pipeline. 
 805 | **Please don't take the code too seriously**
 806 | 
 807 |   
 808 | ```python 
 809 | #filename : sample.py
 810 | from pymlpipe import pipeline
 811 | 
 812 |   
 813 | 
 814 | pl=pipeline.PipeLine("TestCase")
 815 | 
 816 |   
 817 | # Just some random functions
 818 | def fetch_data():
 819 | 
 820 | 	dict_data={
 821 | 
 822 | 	"var":"this is a random string:",
 823 | 	
 824 | 	"path":"this is some random path"
 825 | 	
 826 | 	}
 827 | 
 828 | return dict_data
 829 | 
 830 | def get_dict_values(data_dict):
 831 | 
 832 | 	new_var=[v for k,v in data_dict.items()]
 833 | 
 834 | return new_var
 835 | 
 836 |   
 837 | 
 838 | def get_dict_keys(data_dict):
 839 | 
 840 | 	new_var=[k for k,v in data_dict.items()]
 841 | 
 842 | return new_var
 843 | 
 844 | def a_edge_node(values):
 845 | 
 846 | 	print(values)
 847 | 
 848 | def dump_data(keys,values):
 849 | 
 850 | 	dict_data_rev={k:v for k,v in zip(keys,values)}
 851 | 
 852 | 	print(dict_data_rev)
 853 |   
 854 |   
 855 | 
 856 | pl.add_node("fetch_data",fetch_data,entry_node=True)
 857 | 
 858 | pl.add_node("get_dict_values",get_dict_values,input_nodes=["fetch_data"])
 859 | 
 860 | pl.add_node("get_dict_keys",get_dict_keys,input_nodes=["fetch_data"])
 861 | 
 862 | pl.add_node("a_edge_node",a_edge_node,input_nodes=["get_dict_values"])
 863 | 
 864 |   
 865 | 
 866 | pl.add_node("dump_data",dump_data,input_nodes=["get_dict_keys","get_dict_values"])
 867 | 
 868 | 
 869 | 
 870 | pl.register_dag()
 871 | 
 872 | 
 873 | 
 874 | ```
 875 | 
 876 | To define a pipeline Object we can use:
 877 | We are nameing the pipeline `TestCase`
 878 | 
 879 | ```python
 880 | from pymlpipe import pipeline
 881 | pl=pipeline.PipeLine("TestCase")
 882 | ```
 883 | 
 884 | The `add_node` function takes
 885 | 
 886 | ```
 887 | 
 888 | node_name (str): Name of the node
 889 | 
 890 | function (_type_): Python function you want to execute
 891 | 
 892 | input_nodes (list, optional): List of nodes that are connected to this node. The connected nodes should return a value which will act as an input to the node . Defaults to None.
 893 | 
 894 | entry_node (bool, optional): boolean flag indicating if this is the starting node(first node). Defaults to False.
 895 | 
 896 | args (list, optional): Run time arguments . Defaults to None.
 897 | ```
 898 | 
 899 | 
 900 | ```python
 901 | 
 902 | pl.add_node("fetch_data",fetch_data,entry_node=True)
 903 | ```
 904 | 
 905 | The `register_dag` function creates a Dag 
 906 | 
 907 | ```python
 908 | pl.register_dag()
 909 | ```
 910 | 
 911 | StepRun : Once done you can run the file using `python3 sample.py`
 912 | 
 913 | To test the Code you can run 
 914 | ```python
 915 | from pymlpipe import pipeline
 916 | 
 917 |   
 918 | 
 919 | ppl=pipeline.PipeLine("TestCase")
 920 | 
 921 | ppl.load_pipeline()
 922 | 
 923 | ppl.run()
 924 | ```
 925 | 
 926 | 
 927 | The `load_pipeline` will load the pipeline dag saved after *StepRun* 
 928 | The `run` function will run the given pipeline.
 929 | ```python
 930 | ppl.run()
 931 | ```
 932 | 
 933 | 
 934 | or you can go to web browser by running the command
 935 | 
 936 | ```bash
 937 | ~ pymlpipeui
 938 | ```
 939 | 
 940 | Or starting the UI with
 941 | 
 942 | ```python
 943 | from pymlpipe.pymlpipeUI import start_ui
 944 | 
 945 | start_ui(host='0.0.0.0', port=8085,debug=True)
 946 | ```
 947 | 
 948 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI.png)
 949 | 
 950 | This is a sample control page for the pipeline
 951 | 
 952 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI%202.png)
 953 | 
 954 | Sample Dag  
 955 | 
 956 | Node in  <span style="color:green">GREEN</span>.--> Completed Node
 957 | Node in <span style="color:red">RED</span>. --> Failed Node
 958 | 
 959 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI_1.png)
 960 | 
 961 | 
 962 | # Integrate with  Model monitering
 963 | 
 964 | ```python
 965 | from pymlpipe import pipeline
 966 | import pandas as pd
 967 | from sklearn.datasets import load_iris
 968 | import pandas as pd
 969 | from sklearn.model_selection import train_test_split
 970 | from pymlpipe.tabular import PyMLPipe
 971 | from sklearn.linear_model import LogisticRegression
 972 | from sklearn.ensemble import RandomForestClassifier
 973 | from sklearn.tree import DecisionTreeClassifier
 974 | from xgboost import XGBClassifier
 975 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
 976 | import time
 977 | 
 978 | ppl=pipeline.PipeLine("IrisData")
 979 | mlp=PyMLPipe()
 980 | mlp.set_experiment("pipelinecheck")
 981 | mlp.set_version(0.1)
 982 | 
 983 | def get_data():
 984 | 	iris_data=load_iris()
 985 | 	data=iris_data["data"]
 986 | 	target=iris_data["target"]
 987 | 	df=pd.DataFrame(data,columns=iris_data["feature_names"])
 988 | 	trainx,testx,trainy,testy=train_test_split(df,target)
 989 | 	return {"trainx":trainx,"trainy":trainy,"testx":testx,"testy":testy}
 990 | 
 991 | def get_model(model):
 992 | 	if model==0:
 993 | 		return LogisticRegression()
 994 | 	elif model==1:
 995 | 		return RandomForestClassifier()
 996 | 
 997 | def train_model(data,model_name):
 998 | 	with mlp.run():
 999 | 		trainx,trainy=data["trainx"],data["trainy"]
1000 | 		mlp.set_tags(["Classification","test run","logisticRegression"])
1001 | 		model=get_model(model_name)
1002 | 		model.fit(trainx, trainy)
1003 | 		mlp.scikit_learn.register_model(str(model_name), model)
1004 | 	return model
1005 | 
1006 | def evaluate(data,model):
1007 | 
1008 | 	testx,testy=data["testx"],data["testy"]
1009 | 	
1010 | 	print(model.predict(testx))
1011 | 
1012 | 
1013 | ppl.add_node("data", get_data,entry_node=True)
1014 | 
1015 | for idx,model in enumerate([0,1]):
1016 | 
1017 | 	ppl.add_node(
1018 | 		f"model_train{str(idx)}",
1019 | 		train_model,
1020 | 		input_nodes=["data"],
1021 | 		args={"model_name":model},
1022 | 	)
1023 | 	ppl.add_node(
1024 | 		f"eval_train{str(idx)}",
1025 | 		evaluate,
1026 | 		input_nodes=["data", f"model_train{str(idx)}"],
1027 | 	)
1028 | 
1029 | ppl.register_dag()
1030 | 
1031 | ```
1032 | 
1033 | 
1034 | You can integrate the pipeline with model monitering using the same format as we did for `pymlpipe.tabular`
1035 | 
1036 | ```python
1037 | mlp=PyMLPipe()
1038 | mlp.set_experiment("pipelinecheck")
1039 | mlp.set_version(0.1)
1040 | .
1041 | .
1042 | .
1043 | with mlp.run():
1044 | 	trainx,trainy=data["trainx"],data["trainy"]
1045 | 	mlp.set_tags(["Classification","test run","logisticRegression"])
1046 | 	model=get_model(model_name)
1047 | 	model.fit(trainx, trainy)
1048 | 	mlp.scikit_learn.register_model(str(model_name), model)
1049 | ```
1050 | 
1051 | 
1052 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI_2.png)
1053 | 
1054 | ![alt text](https://raw.githubusercontent.com/neelindresh/pymlpipe/dev/static/pipelineUI%203.png)
1055 | 
1056 | ---
1057 | 
1058 |   
1059 | 
1060 | ## Contributing
1061 | 
1062 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
1063 | 
1064 |   
1065 | 
1066 | Please make sure to update tests as appropriate.
1067 | 
1068 |   
1069 | 
1070 | ## License
1071 | 
1072 | [MIT](https://choosealicense.com/licenses/mit/)


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | #load balancer :https://gist.github.com/zhouchangxun/5750b4636cc070ac01385d89946e0a7b


--------------------------------------------------------------------------------
/api.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pymlpipe.utils import yamlio
 3 | from pymlpipe.utils import factory
 4 | import pandas as pd
 5 | class Client:
 6 |     def __init__(self,path:str=None):
 7 |         if path:
 8 |             self.path=path
 9 |         elif "modelrun" in os.listdir():
10 |             self.path=os.path.join(os.getcwd(),"modelrun")
11 |             
12 |             print(f"No Path specified, defaulting to current path {self.path}")
13 |             
14 |     def get_all_experiments(self):
15 |         all_experiments=yamlio.read_yaml(
16 |             os.path.join(self.path, factory.DEFAULT["ModelRunInfo"])
17 |         )
18 |         return list(all_experiments.keys())
19 |     
20 |     def get_all_run_ids(self,experiment_name):
21 |         all_tunids=yamlio.read_yaml(
22 |             os.path.join(self.path, factory.DEFAULT["ModelRunInfo"])
23 |         )
24 |         return all_tunids[experiment_name]["runs"]
25 |     
26 |     def get_run_details(self,experiment_name,runid):
27 |         return yamlio.read_yaml(
28 |             os.path.join(
29 |                 self.path, experiment_name, runid, factory.DEFAULT["RunInfo"]
30 |             )
31 |         )
32 |     def get_all_run_details(self,experiment_name):
33 |         all_runids=yamlio.read_yaml(
34 |             os.path.join(self.path, factory.DEFAULT["ModelRunInfo"])
35 |         )
36 |         all_paths={id : os.path.join(
37 |                 self.path, experiment_name, id, factory.DEFAULT["RunInfo"]
38 |             ) for id in all_runids[experiment_name]["runs"]}
39 |         return {
40 |             
41 |             id: yamlio.read_yaml(path) for id,path in all_paths.items()
42 |         }
43 |     def get_metrics_comparison(self,experiment_name:str,format:str=None,sort_by:str=None,with_version=False):
44 |         all_runids=yamlio.read_yaml(
45 |             os.path.join(self.path, factory.DEFAULT["ModelRunInfo"])
46 |         )
47 |         all_paths={id : os.path.join(
48 |                 self.path, experiment_name, id, factory.DEFAULT["RunInfo"]
49 |             ) for id in all_runids[experiment_name]["runs"]}
50 |         data={
51 | 
52 |             id: yamlio.read_yaml(path) for id,path in all_paths.items()
53 |         }
54 |         comparison={}
55 |         for id,d in data.items():
56 |             comparison[id]={
57 |                 "model": d["model"]["model_class"],
58 |                 
59 |                 }
60 |             if with_version: comparison[id]["version"]=d["version"] 
61 |             comparison[id].update(d["metrics"])
62 |         if format:
63 |             if sort_by:
64 |                 return pd.DataFrame(comparison).T.sort_values(by=sort_by,ascending=False)
65 |             return pd.DataFrame(comparison).T
66 |         else:
67 |             comparison
68 |             
69 |     def get_model_details(self,experiment_name,runid,format:str=None):
70 |         data=yamlio.read_yaml(
71 |             os.path.join(
72 |                 self.path, experiment_name, runid, factory.DEFAULT["RunInfo"]
73 |             )
74 |         )
75 |         model_details=data["model"]
76 |         _exceptions_=["model_params","model_tags"]
77 |         print(model_details.keys())
78 |         model_info=[]
79 |         model_info.extend(
80 |             {"name": model_detail, "value": model_details[model_detail]}
81 |             for model_detail in model_details
82 |             if model_detail not in _exceptions_
83 |         )
84 |         model_params = [
85 |             {"name": params, "value": model_details["model_params"][params]}
86 |             for params in model_details["model_params"]
87 |         ]
88 |         model_tags = [
89 |             {"name": params, "value": model_details["model_tags"][params]}
90 |             for params in model_details["model_tags"]
91 |         ]
92 |         if format:
93 |             return pd.DataFrame(model_info),pd.DataFrame(model_params),pd.DataFrame(model_tags)
94 |         else:
95 |             return model_info,model_params,model_tags
96 |             
97 |         


--------------------------------------------------------------------------------
/automl.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier,BaggingRegressor,AdaBoostRegressor, ExtraTreesRegressor, RandomForestRegressor,GradientBoostingRegressor
  5 | from sklearn.model_selection import train_test_split
  6 | from sklearn.linear_model import LogisticRegression,PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier,LinearRegression, Lasso, Ridge, ElasticNet, BayesianRidge, HuberRegressor, PoissonRegressor,PassiveAggressiveRegressor
  7 | from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
  8 | from sklearn.svm import LinearSVC,SVR
  9 | from sklearn.neural_network import MLPClassifier, MLPRegressor
 10 | from xgboost import XGBClassifier, XGBRegressor
 11 | from catboost import CatBoostClassifier, CatBoostRegressor
 12 | from lightgbm import LGBMClassifier, LGBMRegressor
 13 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,r2_score,mean_absolute_error,mean_squared_error,make_scorer
 14 | from pymlpipe.tabular import PyMLPipe 
 15 | #from tabular import PyMLPipe
 16 | from sklearn.model_selection import GridSearchCV
 17 | from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer, RobustScaler
 18 | from itertools import chain
 19 | 
 20 | 
 21 | class AutoMLPipe():
 22 |     def __init__(self,exp_name,task,metric,data,label,tags=[],test_size=0.20,version=1.0,transform=False,scale='standard',cols_to_scale=[],categorical_cols=[],register_model=False,register_artifacts=False,explain=False,exclude=[]):
 23 |         '''
 24 |         exp_name: name of experiment
 25 |         task: regression/classification
 26 |         metric: for classification -> accuracy,recall,precision,f1/ for regression -> MAE,MSE,RMSE,R2 Score
 27 |         data: data on which the model to be fit
 28 |         label: target variable
 29 |         tags: list of custom-tags for the run
 30 |         test_size: size of test dataset
 31 |         version: experiment version
 32 |         transform:bool
 33 |         scale: 'standard'/'minmax'/'normalize'
 34 |         cols_to_scale: list of columns to scale. Should be numeric or float
 35 |         categorical_cols: columns to one-hot encode
 36 |         register_model: register experiement model
 37 |         register_artifacts: register experiment artifacts
 38 |         explain= xai implementation 
 39 |         exclude: models to be excluded during autoML runs
 40 |         '''
 41 |         self.exp_name=exp_name
 42 |         self.task=task
 43 |         self.metric=metric
 44 |         self.data=data
 45 |         self.label=label
 46 |         self.test_size=test_size
 47 |         self.version=version
 48 |         self.exclude=exclude
 49 |         self.transform=transform
 50 |         self.scale=scale
 51 |         self.cols_to_scale=cols_to_scale
 52 |         self.categorical_cols=categorical_cols
 53 |         self.mlp=PyMLPipe()
 54 |         self.register_model=register_model
 55 |         self.register=register_artifacts
 56 |         self.explain=explain
 57 |         self.tags=tags
 58 |         self.classification_models={
 59 |                 'LogisticRegression': LogisticRegression(),
 60 |                 'AdaBoostClassifier':AdaBoostClassifier(),
 61 |                 'BaggingClassifier': BaggingClassifier(),
 62 |                 'ExtraTreesClassifier' : ExtraTreesClassifier(),
 63 |                 'GradientBoostingClassifier' : GradientBoostingClassifier(),
 64 |                 'RandomForestClassifier': RandomForestClassifier(),
 65 |                 'DecisionTreeClassifier': DecisionTreeClassifier(),
 66 |                 'RidgeClassifier': RidgeClassifier(),
 67 |                 'SGDClassifier':SGDClassifier(),
 68 |                 'PassiveAggressiveClassifier':PassiveAggressiveClassifier(),
 69 |                 'LinearSVC': LinearSVC(),
 70 |                 'MLPClassifier': MLPClassifier(),
 71 |                 'XGBClassifier': XGBClassifier(n_jobs=-1),
 72 |                 'LGBMClassifier': LGBMClassifier(n_jobs=-1),
 73 |                 'CatBoostClassifier': CatBoostClassifier()}
 74 |         self.regression_models={
 75 |             'LinearRegression': LinearRegression(),
 76 |             'SVR' : SVR(),
 77 |             'AdaBoostRegressor' : AdaBoostRegressor(),
 78 |             'DecisionTreeRegressor' : DecisionTreeRegressor(),
 79 |             'Lasso' : Lasso(),
 80 |             'Ridge' : Ridge(),
 81 |             'MLPRegressor' : MLPRegressor(),
 82 |             'RandomForestRegressor' : RandomForestRegressor(),
 83 |             'ExtraTreesRegressor' : ExtraTreesRegressor(),
 84 |             'GradientBoostingRegressor' : GradientBoostingRegressor(),
 85 |             'BaggingRegressor' : BaggingRegressor(),
 86 |             'ElasticNet' : ElasticNet(),
 87 |             'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
 88 |             'BayesianRidge' : BayesianRidge(),
 89 |             'HuberRegressor' : HuberRegressor(),
 90 |             'PoissonRegressor' : PoissonRegressor(),
 91 |             'XGBRegressor': XGBRegressor(n_jobs=-1),
 92 |             'LGBMRegressor': LGBMRegressor(n_jobs=-1),
 93 |             'CatBoostRegressor': CatBoostRegressor()
 94 | 
 95 |         }
 96 |         self.explain_exclude=['AdaBoostClassifier','BaggingClassifier','GradientBoostingClassifier','MLPClassifier','LinearSVC','AdaBoostRegressor','BaggingRegressor','SVR','MLPRegressor']
 97 |         self.param_grid=dict()
 98 |         self.param_grid['LogisticRegression']={
 99 |             'penalty': ['l1','l2'],
100 |             'C': [0.1,1],
101 |             'solver': ['liblinear', 'newton-cg'],
102 |         }
103 |         self.param_grid['PassiveAggressiveClassifier']={
104 |             'C': [0.01,0.1,1],
105 | 
106 |         }
107 |         self.param_grid['PassiveAggressiveRegressor']={
108 |             'C': [0.1,0.5,1],
109 | 
110 |         }
111 |         self.param_grid['RidgeClassifier']={
112 |             'alpha':[0.01,0.1,1],
113 |             'solver': ['auto','sag','cholesky']
114 |         }
115 |         self.param_grid['SGDClassifier']={
116 |             'loss': ['hinge','log_loss', 'modified_huber','squared_error'],
117 |             'penalty': ['l1','l2']
118 |         }
119 |         self.param_grid['DecisionTreeClassifier']={
120 |             'criterion': ['gini','entropy'],
121 |             'max_depth': [None,2,3],
122 |             'min_samples_split': [2,3,4]
123 |         }
124 |         self.param_grid['AdaBoostClassifier']={
125 |             'n_estimators': [10,100,500],
126 |             'learning_rate': [0.01,0.1,1],
127 |         }
128 |         self.param_grid['BaggingClassifier']={
129 |             'n_estimators': [10,100,500],
130 |         }
131 |         self.param_grid['BaggingRegressor']={
132 |             'n_estimators': [10,100,500],
133 |         }
134 |         self.param_grid['ExtraTreesClassifier']={
135 |             'n_estimators': [10,100,500],
136 |             'max_depth': [None,2,3],
137 |             'min_samples_split': [2,3,4]
138 |         }
139 |         self.param_grid['ExtraTreesRegressor']={
140 |             'n_estimators': [10,100,500],
141 |             'max_depth': [None,2,3],
142 |             'min_samples_split': [2,3,4]
143 |         }
144 |         self.param_grid['GradientBoostingClassifier']={
145 |             'n_estimators': [10,100,500],
146 |             'learning_rate': [0.01,0.1],
147 |             'criterion': ['friedman_mse','squared_error']
148 |         }
149 |         self.param_grid['GradientBoostingRegressor']={
150 |             'n_estimators': [10,100,500],
151 |             'learning_rate': [0.01,0.1],
152 |             'criterion': ['friedman_mse','squared_error']
153 |         }
154 |         self.param_grid['RandomForestClassifier']={
155 |             'n_estimators': [10,100,500],
156 |             'max_depth': [None,2,3],
157 |             'min_samples_split': [2,3,4]
158 |         }
159 |         self.param_grid['RandomForestRegressor']={
160 |             'n_estimators': [10,100,500],
161 |             'max_depth': [None,2,3],
162 |             'min_samples_split': [2,3,4]
163 |         }
164 |         self.param_grid['LinearSVC']={
165 |             'loss': ['hinge','log_loss', 'modified_huber','squared_error'],
166 |             'C': [0.1,0.5,1]
167 |         }
168 |         self.param_grid['MLPClassifier']={
169 |             'activation': ['tanh','relu'],
170 |             'solver': ['sgd','adam']
171 |         }
172 |         self.param_grid['MLPRegressor']={
173 |             'activation': ['tanh','relu'],
174 |             'solver': ['sgd','adam']
175 |         }
176 |         self.param_grid['LinearRegression']={
177 |                 'n_jobs' : [-1]
178 |             }
179 |         self.param_grid['SVR']={
180 |             'kernel' : ['linear', 'poly', 'rbf', 'sigmoid']
181 |             ,'gamma' : ['scale','auto']
182 |             , 'C': [0.1,0.5,1]
183 |         }
184 |         self.param_grid['AdaBoostRegressor']={
185 |             'n_estimators': [10,100,500],
186 |             'learning_rate': [0.01,0.1,1],
187 |             'loss' : ['linear','square','exponential']
188 |         }
189 |         self.param_grid['DecisionTreeRegressor']={
190 |             #'criterion': ['gini','entropy'],
191 |             'splitter' : ['best'],
192 |             'max_depth': [None,2,3],
193 |             'min_samples_split': [2,3,4]
194 |         }
195 |         self.param_grid['Lasso']={
196 |             'selection' : ['cyclic', 'random']
197 |         }
198 |         self.param_grid['Ridge']={
199 |             'solver' : ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
200 |         }
201 |         self.param_grid['PoissonRegressor']={
202 |             'alpha': [0.5,1,1.5]
203 |         }
204 |         self.param_grid['HuberRegressor']={
205 |             'epsilon': [1.35,1.5,1.75,2]
206 |         }
207 |         self.param_grid['ElasticNet']={
208 |             'l1_ratio': [0.3,0.5,0.6,0.7]
209 |         }
210 |         self.param_grid['BayesianRidge']={
211 |             'n_iter': [100,300,500]
212 |         }
213 |         self.param_grid['XGBClassifier']={
214 |             'n_estimators': [10,100,500],
215 |             'max_depth': [None,2,3],
216 |             'learning_rate': [0.01,0.1]
217 |         }
218 |         self.param_grid['LGBMClassifier']={
219 |             'n_estimators': [10,100,500],
220 |             'max_depth': [None,2,3],
221 |             'num_leaves': [20,30,40]
222 |         }
223 |         self.param_grid['CatBoostClassifier']={
224 |             'n_estimators': [10,100,500],
225 |             'max_depth': [2,3],
226 |            'learning_rate': [0.01,0.1]
227 |         }
228 |         self.param_grid['XGBRegressor']={
229 |             'n_estimators': [10,100,500],
230 |             'max_depth': [None,2,3],
231 |             'learning_rate': [0.01,0.1]
232 |         }
233 |         self.param_grid['LGBMRegressor']={
234 |             'n_estimators': [10,100,500],
235 |             'max_depth': [None,2,3],
236 |             'num_leaves': [20,30,40]
237 |         }
238 |         self.param_grid['CatBoostRegressor']={
239 |             'n_estimators': [10,100,500],
240 |             'max_depth': [2,3],
241 |            'learning_rate': [0.01,0.1]
242 |         }   
243 | 
244 |     def run_automl(self,tune=False,tune_best=False):
245 |         '''
246 |         tune: param tune all the models
247 |         tune_best: param tune the best model
248 |         '''
249 | 
250 |         # Set Experiment name
251 |         self.mlp.set_experiment(self.exp_name)
252 |         # Set Version name
253 |         self.mlp.set_version(self.version)
254 | 
255 |         if self.transform==True:
256 |             numeric_cols=self.data.select_dtypes(include=[np.number]).columns
257 |             numeric_cols=[item for item in numeric_cols if item not in self.categorical_cols]
258 |             cols_to_scale=self.cols_to_scale
259 |             if cols_to_scale==[]:
260 |                 cols_to_scale=numeric_cols
261 |             check =  all(item in numeric_cols for item in cols_to_scale)
262 |             if check==True:
263 |                 if self.scale=='standard':
264 |                     scaler = StandardScaler()
265 |                 elif self.scale=='minmax':
266 |                     scaler = MinMaxScaler()
267 |                 elif self.scale=='normalize':
268 |                     scaler = Normalizer()
269 |                 elif self.scale=='robust':
270 |                     scaler = RobustScaler()                       
271 |                 scaler.fit(self.data[cols_to_scale])
272 |                 self.data[cols_to_scale] = scaler.transform(self.data[cols_to_scale])     
273 |             else:
274 |                 print('Scaling operation cannot be completed as column type is not int/float')
275 | 
276 |         if self.categorical_cols!=[]:
277 |             self.data = pd.get_dummies(self.data, columns = self.categorical_cols)
278 |         self.exclude=[x.lower() for x in self.exclude]
279 |         trainx,testx,trainy,testy=train_test_split(self.data,self.label,test_size=self.test_size)      
280 |         result=pd.DataFrame()
281 |         prediction_set={}
282 |         if self.task=='classification':
283 |             for model_name,model in tqdm(self.classification_models.items()):
284 |                 model_ex=model_name.lower()
285 |                 if model_ex not in self.exclude:        
286 |                     if tune==True:
287 |                         try:   
288 |                             predictions,result_set=self.param_tune_model(model_name,trainx,testx,trainy,testy)
289 |                             predictions=predictions.tolist()
290 |                             if model_name=='CatBoostClassifier':
291 |                                 predictions=list(chain(*predictions))
292 |                             fin=dict()
293 |                             fin['name']=model_name
294 |                             fin['accuracy']=result_set['accuracy']
295 |                             fin['precision']=result_set['precision']
296 |                             fin['recall']=result_set['recall']
297 |                             fin['f1']=result_set['f1_score']
298 |                         except Exception as e:
299 |                             print (e)
300 |                             continue
301 |                     
302 |                     else:
303 |                         try:
304 |                             with self.mlp.run():
305 |                                 print(model_name)
306 |                                 default_tags=[model_name,"Classification"]
307 |                                 tag_list=default_tags+self.tags
308 |                                 self.mlp.set_tags(tag_list)
309 |                             
310 |                                 model=model
311 |                                 model.fit(trainx, trainy)
312 |                                 predictions=model.predict(testx)
313 |                                 predictions=predictions.tolist()
314 |                                 if model_name=='CatBoostClassifier':
315 |                                     predictions=list(chain(*predictions))
316 | 
317 |                                 self.mlp.log_metric("accuracy", accuracy_score(testy,predictions))
318 |                                 self.mlp.log_metric("precision", precision_score(testy,predictions,average='macro'))
319 |                                 self.mlp.log_metric("recall", recall_score(testy,predictions,average='macro'))
320 |                                 self.mlp.log_metric("f1_score", f1_score(testy,predictions,average='macro'))
321 |                                 if self.explain==True:
322 |                                     if model_name not in self.explain_exclude:
323 |                                         self.mlp.explainer(model,trainx) 
324 |                                     else: print('XAI is not available for ',model_name) 
325 |                                 if self.register==True:
326 |                                     self.mlp.register_artifact("train", trainx)
327 |                                     self.mlp.register_artifact("test", testx,artifact_type="testing")
328 |                                 if self.register_model==True:
329 |                                     self.mlp.scikit_learn.register_model(model_name, model)
330 |                             
331 |                                 result1=self.mlp.get_info()
332 |                                 fin=dict()
333 |                                 fin['name']=model_name
334 |                                 fin['accuracy']=result1['metrics']['accuracy']
335 |                                 fin['precision']=result1['metrics']['precision']
336 |                                 fin['recall']=result1['metrics']['recall']
337 |                                 fin['f1']=result1['metrics']['f1_score']
338 |                         except Exception as e:
339 |                             print(e)                        
340 |                             continue
341 |                     
342 |                     prediction_set[model_name]=predictions
343 |                     result=result.append(fin,ignore_index=True)
344 |         elif self.task=='regression':
345 |             for model_name,model in tqdm(self.regression_models.items()):
346 |                 model_ex=model_name.lower()
347 |                 if model_ex not in self.exclude: 
348 |                     if tune==True:
349 |                         try:
350 |                             predictions,result_set=self.param_tune_model(model_name,trainx,testx,trainy,testy)
351 |                             predictions=predictions.tolist()
352 |                             fin=dict()
353 |                             fin['name']=model_name
354 |                             fin['MAE']=result_set['MAE']
355 |                             fin['MSE']=result_set['MSE']
356 |                             fin['R2 Score']=result_set['R2 Score']
357 |                             fin['RMSE']=result_set['RMSE']
358 |                         except Exception as e:
359 |                             print(e)
360 |                             continue
361 |                     else:
362 |                         try:
363 |                             with self.mlp.run():
364 |                                 default_tags=[model_name,"Regression"]
365 |                                 tag_list=default_tags+self.tags
366 |                                 self.mlp.set_tags(tag_list)
367 |                                 model=model
368 |                                 model.fit(trainx, trainy)
369 |                                 predictions=model.predict(testx)
370 |                                 predictions=predictions.tolist()
371 |                             
372 |                                 # log performace metrics
373 |                                 self.mlp.log_metric("R2 Score", r2_score(testy,predictions))
374 |                                 self.mlp.log_metric("MAE", mean_absolute_error(testy,predictions))
375 |                                 self.mlp.log_metric("MSE", mean_squared_error(testy,predictions))
376 |                                 self.mlp.log_metric("RMSE", mean_squared_error(testy,predictions,squared=False))
377 |                                 if self.explain==True:
378 |                                     if model_name not in self.explain_exclude:
379 |                                         self.mlp.explainer(model,trainx) 
380 |                                     else: print('XAI is not available for ',model_name)
381 |                                 if self.register==True:
382 |                                     # Save train data and test data
383 |                                     self.mlp.register_artifact("train", trainx)
384 |                                     self.mlp.register_artifact("test", testx,artifact_type="testing")
385 |                                 # Save the model
386 |                                 if self.register_model==True:
387 |                                         self.mlp.scikit_learn.register_model(model_name, model)
388 |                                 result1=self.mlp.get_info()
389 |                             
390 |                                 fin=dict()
391 |                                 fin['name']=model_name
392 |                                 fin['MAE']=result1['metrics']['MAE']
393 |                                 fin['MSE']=result1['metrics']['MSE']
394 |                                 fin['RMSE']=result1['metrics']['RMSE']
395 |                                 fin['R2 Score']=result1['metrics']['R2 Score']
396 |                         except Exception as e:
397 |                             print (e)
398 |                             continue    
399 |                     
400 |                     prediction_set[model_name]=predictions  
401 |                     result=result.append(fin, ignore_index=True)    
402 |                
403 |         if self.task=='classification' or self.metric=='R2 Score': 
404 |             result.sort_values(by=self.metric,ascending=False,inplace=True)
405 |         else:
406 |             result.sort_values(by=self.metric,ascending=True,inplace=True)
407 |             
408 |         if tune_best==False:
409 |             return prediction_set,result
410 |         else:
411 |             result=result.head(1)
412 |             best_model_name=str(result.name.values[0])
413 |             
414 |             prediction_set,result=self.param_tune_model(trainx=trainx,testx=testx,trainy=trainy,testy=testy,model_tune=best_model_name)
415 |             return prediction_set,result
416 | 
417 |     def param_tune_model(self,model_tune,trainx,testx,trainy,testy):
418 |        
419 |         self.mlp.set_experiment(self.exp_name)    
420 |         best_model_name=model_tune
421 |         
422 |         with self.mlp.run():
423 |             if self.task=="classification":
424 |                 default_tags=["Hyper-param-tuning-clf",best_model_name]
425 |                 tag_list=default_tags+self.tags
426 |                 self.mlp.set_tags(tag_list)
427 |                 final_model=self.classification_models[best_model_name].fit(trainx, trainy)
428 |                 if self.metric=='accuracy': score= make_scorer(accuracy_score,average='weighted')
429 |                 elif self.metric=='recall': score=make_scorer(recall_score,average='weighted')
430 |                 elif self.metric=='precision': score=make_scorer(precision_score,average='weighted')
431 |                 else: score=make_scorer(f1_score,average='weighted')
432 |                 CV_cfl = GridSearchCV(estimator = final_model, param_grid = self.param_grid[best_model_name], scoring= score, cv=3, verbose = 2)
433 |                 CV_cfl.fit(trainx, trainy)
434 |                 self.mlp.log_params(CV_cfl.best_params_)
435 |                 predictions=CV_cfl.best_estimator_.predict(testx)
436 | 
437 |                 if self.explain==True:
438 |                     if best_model_name not in self.explain_exclude:
439 |                         self.mlp.explainer(CV_cfl.best_estimator_,trainx) 
440 |                     else: print('XAI is not available for ',best_model_name)      
441 | 
442 |                 result_set={
443 |                     "accuracy": accuracy_score(testy,predictions),
444 |                     "precision": precision_score(testy,predictions,average='macro'),
445 |                     "recall": recall_score(testy,predictions,average='macro'),
446 |                     "f1_score": f1_score(testy,predictions,average='macro')}
447 |                 self.mlp.log_metrics(result_set)
448 |             elif self.task=="regression":
449 |                 default_tags=["Hyper-param-tuning-reg",best_model_name]
450 |                 tag_list=default_tags+self.tags
451 |                 self.mlp.set_tags(tag_list)
452 |                 final_model=self.regression_models[best_model_name].fit(trainx, trainy)
453 |                 if self.metric=='MSE': score= 'neg_mean_squared_error'
454 |                 elif self.metric=='MAE': score='neg_mean_absolute_error'
455 |                 elif self.metric=='R2 Score': score='r2'
456 |                 else: score='neg_root_mean_squared_error'
457 |                 CV_cfl = GridSearchCV(estimator = final_model, param_grid = self.param_grid[best_model_name], scoring=score,cv=3,verbose = 2)
458 |                 CV_cfl.fit(trainx, trainy)
459 |                 self.mlp.log_params(CV_cfl.best_params_)
460 |                 predictions=CV_cfl.best_estimator_.predict(testx)
461 | 
462 |                 result_set={
463 |                     "MSE": mean_squared_error(testy,predictions),
464 |                     "MAE": mean_absolute_error(testy,predictions),
465 |                     "R2 Score": r2_score(testy,predictions),
466 |                     "RMSE": mean_squared_error(testy,predictions,squared=False)}
467 |                 self.mlp.log_metrics(result_set)
468 |                 if self.explain==True:
469 |                     if best_model_name not in self.explain_exclude:
470 |                         self.mlp.explainer(CV_cfl.best_estimator_,trainx) 
471 |                     else: print('XAI is not available for ',best_model_name)
472 |             if self.register_model==True:
473 |                 self.mlp.scikit_learn.register_model(best_model_name, CV_cfl.best_estimator_)
474 |             if self.register==True:
475 |                 # Save train data and test data
476 |                 self.mlp.register_artifact("train", trainx)
477 |                 self.mlp.register_artifact("test", testx,artifact_type="testing")
478 | 
479 |             return predictions,result_set
480 |         
481 |        


--------------------------------------------------------------------------------
/legacy/pipeline.py:
--------------------------------------------------------------------------------
  1 | import dill
  2 | import cloudpickle
  3 | from pymlpipe.utils import database,yamlio
  4 | import os
  5 | import datetime
  6 | import traceback,sys
  7 | #checking
  8 | 
  9 | class Node:
 10 |     def __init__(self,name, func,path):
 11 |         self.name=name
 12 |         self.func=func
 13 |         self.path=path
 14 |         self.save(name, func)
 15 |         
 16 |     def save(self,name,func):
 17 |         #mainify(func)
 18 |         #dill.dump(func, open(name+".pkl", "wb"))
 19 |         
 20 |         self.filename=os.path.join(self.path,name+".mld")
 21 |         cloudpickle.dump(func, open(self.filename, "wb"))
 22 |         
 23 |     
 24 | class Pipeline:
 25 |     def __init__(self,name):
 26 |         path=os.getcwd()
 27 |         self.PIPELINE_FOLDER="ML_pipelines"
 28 |         database.create_folder(path)
 29 |         self.name = name
 30 |         self.base_path=database.create_folder(path,self.PIPELINE_FOLDER)
 31 |         self.path_pipe=database.create_folder(self.base_path,self.name)
 32 |         
 33 |         self.dag={"sequence":[],"nodes":{},"edges":[],"node_order":{},"node_details":{}}
 34 |         self.sequence=[]
 35 |         self.node_order={}
 36 |         self.is_entry_node=False
 37 |         
 38 |     def _make_edges(self,node,edges):
 39 |         """Create the DAG edges for the nodes, in a src--> trg format
 40 | 
 41 |         Args:
 42 |             node (_type_): _description_
 43 |             edges (_type_): _description_
 44 | 
 45 |         Returns:
 46 |             _type_: _description_
 47 |         """
 48 |         edge_list=[]
 49 |         for edge in edges:
 50 |             edge_list.append({"src":edge,"target":node})
 51 |         return edge_list
 52 |     
 53 |     
 54 |     def add_node(self,name,func,node_input=None,entry_node=False,args=None):
 55 |         if name in self.sequence:
 56 |             raise ValueError(f"Node Name {name} already exists! Please provide different Name")
 57 |         self.sequence.append(name)
 58 |         node=Node(name,func,self.path_pipe)
 59 |         self.dag["nodes"][name]={"path":node.filename,"mould_file":node.name+".mld","entry":entry_node,"args":args,"folder":self.PIPELINE_FOLDER,"subfolder":self.name,}
 60 |         self.dag["node_details"][name]={"status":"Queued","start_time":"-","end_time":"-","log":""}
 61 |         if entry_node:
 62 |             self.is_entry_node=True
 63 |         if node_input!=None:
 64 |             self.node_order[name]=node_input
 65 |         return node
 66 |         
 67 |     def load(self,name):  
 68 |         #return dill.load(open(name+".pkl", "rb"))
 69 |         #return cloudpickle.load(open(os.path.join(self.path_pipe,self.name+".yaml"), "rb"))
 70 |         return cloudpickle.load(open(name,'rb'))
 71 |     
 72 |     
 73 |     def add_edge(self,node_1,node_2):
 74 |         if not isinstance(node_1,Node) and isinstance(node_2,Node):
 75 |             raise TypeError("node_1 or node_2 is not type Node")
 76 |         self.dag["edges"].append({"src":node_1.name,"target":node_2.name})
 77 |         
 78 |     
 79 |     def register(self):
 80 |         already_exist=False
 81 |         exists_idx=None
 82 |         if not self.is_entry_node:
 83 |             raise ValueError("Entry Node is not defined!!! Please 'entry_node'=True for the starting node")
 84 |         self.dag["sequence"]=self.sequence
 85 |         self.dag["node_order"]=self.node_order
 86 |         #self.dag["graph"]=
 87 |         graph={}
 88 |         '''
 89 |         for seq in self.sequence:
 90 |             if seq in graph:
 91 |                 graph[seq].append([{"edges":i,"status":None} for i in self.dag["edges"] if i["src"]==seq])
 92 |             else:
 93 |                 graph[seq]=[{"edges":i,"status":None} for i in self.dag["edges"] if i["src"]==seq]
 94 |         print(graph)
 95 |         '''
 96 |         data=yamlio.read_yaml(os.path.join(self.base_path,"info.yaml"))
 97 |         
 98 |         for idx,d in enumerate(data):
 99 |             if d["pipelinename"]==self.name:
100 |                 already_exist=True
101 |                 exists_idx=idx
102 |         if not already_exist:
103 |             data.append({
104 |                 "pipelinename":self.name,
105 |                 "path":self.path_pipe,
106 |                 "folder":self.PIPELINE_FOLDER,
107 |                 "subfolder":self.name,
108 |                 "created_at":  datetime.datetime.now(),
109 |                 "status":"-",
110 |                 "jobtime":"",
111 |                 "jobtime":"-"
112 |             })
113 |         else:
114 |             data[idx].update({
115 |                 "pipelinename":self.name,
116 |                 "path":self.path_pipe,
117 |                 "folder":self.PIPELINE_FOLDER,
118 |                 "subfolder":self.name,
119 |                 "created_at":  datetime.datetime.now(),
120 |                 "status":"-",
121 |                 "jobtime":"",
122 |                 "jobtime":"-"
123 |             })
124 |             
125 |         yamlio.write_to_yaml(os.path.join(self.base_path,"info.yaml"), data)
126 |         yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"), self.dag)
127 |     
128 |     def load_pipeline(self):
129 |         self.dag=yamlio.read_yaml(os.path.join(self.path_pipe,self.name+".yaml"))
130 |     
131 |     def _find_next_node(self,node_name):
132 |         return self.dag["graph"][node_name]
133 |     
134 |     def _create_graph(self,edges):
135 |         graph={}
136 |         for edge in edges:
137 |             if edge["src"] in graph:
138 |                 graph[edge["src"]].append(edge["target"])
139 |             else:
140 |                 graph[edge["src"]]=[edge["target"]]
141 |         return graph
142 |     def _make_previous_output(self,_prev_outputs,neighbor,functions_args):
143 |         inp=[]
144 |         #print(functions_args)
145 |         for n in neighbor:
146 |             #when output --> tuple,list
147 |             if isinstance(_prev_outputs[n], tuple) or isinstance(_prev_outputs[n], list):
148 |                 inp.extend(list(_prev_outputs[n]))
149 |                
150 |                 if functions_args!=None:
151 |                     inp.extend(functions_args)
152 |             #make output --> dict
153 |             else: # or isinstance(_prev_outputs[n], str) or isinstance(_prev_outputs[n], int) or isinstance(_prev_outputs[n], float):
154 |                 inp.append(_prev_outputs[n])
155 |                 
156 |                 if functions_args!=None:
157 |                     inp.extend(functions_args)
158 |             #make output --> str,float,int
159 |             
160 |         print("input-->",inp)
161 |         return inp
162 |         
163 |     def _change_status(self,node,status,info=None):
164 |         dag=yamlio.read_yaml(os.path.join(self.path_pipe,self.name+".yaml"))
165 |         if status=="Started":
166 |             
167 |             dag["node_details"][node]["status"]=status
168 |             
169 |             dag["node_details"][node]["start_time"]=str(datetime.datetime.now())
170 |             dag["node_details"][node]["log"]="======"+status.upper()+"======"+str(datetime.datetime.now())+"\n"
171 |         elif status=="Completed" or status=="Failed":
172 |             dag["node_details"][node]["status"]=status
173 |             dag["node_details"][node]["end_time"]=str(datetime.datetime.now())
174 |             dag["node_details"][node]["log"]+="======"+status.upper()+"======"+str(datetime.datetime.now())+"\n"
175 |             if info!=None:
176 |                 dag["node_details"][node]["log"]+="\n"+str(info)+"======"
177 |                 
178 |                 
179 |         dag=yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"),dag)
180 |         
181 |         
182 |         
183 |     def bfs(self,graph,entry_node,_prev_outputs,_functions,_node_order,functions_args,job_name,flag_variable_path):
184 |         visited = [entry_node] # List to keep track of visited nodes.
185 |         queue = [entry_node]     #Initialize a queue
186 |         while queue:
187 |             s = queue.pop(0) 
188 |             
189 |             if s in graph:
190 |                 for neighbour in graph[s]:
191 |                     if neighbour not in visited:
192 |                         func=_functions[neighbour]
193 |                         self._change_status(neighbour,"Started")
194 |                         if not self._check_for_job_status(job_name,flag_variable_path): sys.exit() 
195 |                         #print(self._make_previous_output(_prev_outputs,_node_order[neighbour]))
196 |                         print(_prev_outputs)
197 |                         try:
198 |                             _prev_outputs[neighbour]=func(*self._make_previous_output(_prev_outputs,_node_order[neighbour],functions_args[neighbour]))
199 |                             
200 |                             self._change_status(neighbour,"Completed")
201 |                             
202 |                         #func()
203 |                         except Exception as e:
204 |                             #print(neighbour)
205 |                             print(traceback.format_exc())
206 |                             #raceback.print_exception(*sys.exc_info())
207 |                             self._change_status(neighbour,"Failed",info=traceback.format_exc())
208 |                         
209 |                         visited.append(neighbour)
210 |                         queue.append(neighbour)
211 |         return _prev_outputs
212 |     
213 |     def _get_path(self,base_path,folder):
214 |         return os.path.join(base_path,folder)
215 |     def _check_for_job_status(self,jobname,queue_name):
216 |         all_jobs=yamlio.read_yaml(queue_name)
217 |         status=[j["status"] for j in all_jobs if j["pipelinename"]==jobname]
218 |         return False if status[0]=="Stopped" else True
219 |     
220 |     def run(self,*args,**kwargs):
221 |         if len(self.dag["sequence"])==0:
222 |             raise ValueError("Error!!! No Dag Provided!!!!")
223 |         #if not self.is_entry_node:
224 |         #    raise ValueError("Error!!! Entry Node Not defined please provide and entry node with entry_node=True!!!!")
225 |         entrynode=[]
226 |         functions={}
227 |         output_nodes={}
228 |         functions_args={}
229 |         for node in self.dag["nodes"]:
230 |             #print(node,self.dag["nodes"][node])
231 |             if self.dag["nodes"][node]["entry"]:
232 |                 entrynode.append(node)
233 |             functions[node]=self.load(self._get_path(self.path_pipe, self.dag["nodes"][node]["mould_file"]))#self.dag["nodes"][node]["path"])
234 |             functions_args[node]=self.dag["nodes"][node]["args"]
235 |         graph=self._create_graph(self.dag["edges"])
236 |         self.dag["node_details"]={node:{"status":"Queued","start_time":"-","end_time":"-","log":""} for node in self.dag["node_details"]}
237 |         yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"), self.dag)
238 |         for node in entrynode:
239 |             func=functions[node]
240 |             self._change_status(node,"Started")
241 |             try:
242 |                 print("node", node)
243 |                 output_nodes[node]=func(*args,**kwargs)
244 |                 self._change_status(node,"Completed")
245 |             except Exception as e:
246 |                 print(node)
247 |                 
248 |                 traceback.print_exception(*sys.exc_info())
249 |                 self._change_status(node,"Failed")
250 |             output_nodes=self.bfs(graph,node,output_nodes,functions,self.dag["node_order"],functions_args)
251 |         return output_nodes
252 |     
253 |     
254 |     
255 |     
256 |     def run_serialized(self,flag_variable_path,job_name,*args,**kwargs):
257 |         if len(self.dag["sequence"])==0:
258 |             raise ValueError("Error!!! No Dag Provided!!!!")
259 |         #if not self.is_entry_node:
260 |         #    raise ValueError("Error!!! Entry Node Not defined please provide and entry node with entry_node=True!!!!")
261 |         entrynode=[]
262 |         functions={}
263 |         output_nodes={}
264 |         functions_args={}
265 |         for node in self.dag["nodes"]:
266 |             #print(node,self.dag["nodes"][node])
267 |             if self.dag["nodes"][node]["entry"]:
268 |                 entrynode.append(node)
269 |             functions[node]=self.load(self._get_path(self.path_pipe, self.dag["nodes"][node]["mould_file"]))#self.dag["nodes"][node]["path"])
270 |             functions_args[node]=self.dag["nodes"][node]["args"]
271 |         graph=self._create_graph(self.dag["edges"])
272 |         self.dag["node_details"]={node:{"status":"Queued","start_time":"-","end_time":"-","log":""} for node in self.dag["node_details"]}
273 |         yamlio.write_to_yaml(os.path.join(self.path_pipe,self.name+".yaml"), self.dag)
274 |         for node in entrynode:
275 |             func=functions[node]
276 |             self._change_status(node,"Started")
277 |             try:
278 |                 print("node", node,self._check_for_job_status(job_name,flag_variable_path))
279 |                 if not self._check_for_job_status(job_name,flag_variable_path):
280 |                     sys.exit()
281 |                 output_nodes[node]=func(*args,**kwargs)
282 |                 self._change_status(node,"Completed")
283 |             except Exception as e:
284 |        
285 |                 
286 |                 traceback.print_exception(*sys.exc_info())
287 |                 self._change_status(node,"Failed")
288 |             output_nodes=self.bfs(graph,node,output_nodes,functions,self.dag["node_order"],functions_args,job_name,flag_variable_path)
289 |         return output_nodes
290 |     
291 |    
292 |             
293 |         
294 |             
295 |             
296 |             
297 |         
298 |         
299 |                 
300 |         
301 |             
302 |         
303 |             
304 |         
305 |         
306 | 
307 | 


--------------------------------------------------------------------------------
/pipeline.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import cloudpickle
  3 | from pymlpipe.utils import database,yamlio
  4 | import os
  5 | import datetime
  6 | import traceback,sys
  7 | import inspect
  8 | ##---To Be moved into separate file--##
  9 | 
 10 | __FOLDER__="ML_pipelines"
 11 | 
 12 | 
 13 | class Node:
 14 |     def __init__(self,name, func,path):
 15 |         self.name=name
 16 |         self.func=func
 17 |         self.path=path
 18 |         self.save(name, func)
 19 | 
 20 |     def save(self,name: str,func) -> None:
 21 |         """_summary_: Saves a python Function into Mould file
 22 | 
 23 |         Args:
 24 |             name (str): Name of the function
 25 |             func (Object): Actual python function
 26 |         """
 27 | 
 28 |         self.name_of_file = f"{self.name}.mld"
 29 |         self.filename=os.path.join(self.path,self.name_of_file)
 30 |         cloudpickle.dump(func, open(self.filename, "wb"))
 31 | 
 32 | class PipeLine:
 33 |     def __init__(self,pipeline_name,pipeline_path=None):
 34 |         self.path=os.getcwd()
 35 |         self.pipeline_name=pipeline_name
 36 |         self.pipeline_path = __FOLDER__ if pipeline_path is None else pipeline_path
 37 |         database.create_folder(self.path)
 38 |         self.base_path=database.create_folder(self.path,self.pipeline_path)
 39 |         self.path_pipe=database.create_folder(self.base_path,self.pipeline_name)
 40 | 
 41 |         self.dag={"nodes":{},"graph":{},"args_map":{},"node_details":{}}
 42 |         self.status_code={0:"Started",1:"Completed",2:"Queued",3:"Failed"}
 43 |         self._args_tag="args@"
 44 | 
 45 |     def add_node(self,node_name:str,function,input_nodes:list =None,args:dict=None,entry_node:bool=False) -> None:
 46 |         """_summary_
 47 | 
 48 |         Args:
 49 |             node_name (str): Name of the node
 50 |             function (_type_): Python function you want to execute
 51 |             input_nodes (list, optional): List of nodes that are connected to this node. The connected nodes should return a value which will act as an input to the node . Defaults to None.
 52 |             entry_node (bool, optional): boolean flag indicating if this is the starting node(first node). Defaults to False.
 53 |             args (list, optional): Run time arguments . Defaults to None.
 54 | 
 55 |         Raises:
 56 |             ValueError: _description_
 57 |             TypeError: _description_
 58 |             TypeError: _description_
 59 |             TypeError: _description_
 60 |         """
 61 |         '''Exception Start'''
 62 |         if entry_node and "root" in self.dag["graph"]:
 63 |             raise ValueError("Error!!! entry_node is already set. Two nodes cannot be Entry Node in DAG.")
 64 |         if input_nodes != None and not isinstance(input_nodes,list):
 65 |             raise TypeError(
 66 |                 f"Error!!! 'input_node' expected to be type:list got {type(input_nodes)}"
 67 |             )
 68 |         if not isinstance(entry_node,bool):
 69 |             raise TypeError(
 70 |                 f"Error!!! 'entry_node' expected to be type:bool got {type(input_nodes)}"
 71 |             )
 72 |         if not isinstance(node_name,str):
 73 |             raise TypeError(
 74 |                 f"Error!!! 'node_name' expected to be type:str got {type(input_nodes)}"
 75 |             )
 76 | 
 77 |         '''Exception End'''
 78 | 
 79 | 
 80 |         node=Node(node_name,function,self.path_pipe)
 81 |         self.dag["nodes"][node_name]={
 82 |             'filename':node.name_of_file,
 83 |             'root_path':self.pipeline_path,
 84 |             'sub_path':self.pipeline_name,
 85 |             "edge_nodes":input_nodes,
 86 |             "args":args
 87 |         }
 88 | 
 89 |         _arg_names=inspect.getfullargspec(function).args
 90 |         ## Mapping args to the input node
 91 |         if args !=None:
 92 |             for arg_name in _arg_names:
 93 |                 if arg_name in args:
 94 |                     input_nodes.append(f"{self._args_tag}{arg_name}")
 95 |         _mapper = dict(zip(input_nodes,_arg_names)) if len(_arg_names)!=0 else {}
 96 |         #print("_mapper:",_mapper)
 97 |         self.dag["args_map"][node_name]=_mapper
 98 |         if entry_node:
 99 |             self.dag["graph"]["root"]=[node_name]
100 |         else:
101 |             for ipnode in input_nodes:
102 |                 if ipnode.startswith(self._args_tag):
103 |                     continue
104 |                 if ipnode in self.dag["graph"]:
105 |                     self.dag["graph"][ipnode].append(node_name)
106 |                 else:
107 |                     self.dag["graph"][ipnode]=[node_name]
108 |         if node_name not in self.dag["graph"]:
109 |             self.dag["graph"][node_name]=[]
110 |         self.dag["node_details"][node_name]={"status":self.status_code[2],"start_time":"-","end_time":"-","log":""}
111 | 
112 | 
113 |     def register_dag(self):
114 |         """_summary_: Registers the pipeline as an Dag Object
115 |         """
116 |         path_to_yaml=self.path_pipe
117 |         file_name = f"{self.pipeline_name}.yaml"
118 |         
119 |         data=yamlio.read_yaml(os.path.join(self.base_path,"info.yaml"))
120 |         info={
121 |                 "pipelinename":self.pipeline_name,
122 |                 "folder":self.pipeline_path,
123 |                 "subfolder":self.pipeline_name,
124 |                 "created_at":  datetime.datetime.now(),
125 |                 "status":"-",
126 |                 "jobtime":"",
127 |                 "jobtime":"-"
128 |             }
129 |         already_exist=False
130 |         
131 |         for idx,d in enumerate(data):
132 |             if d["pipelinename"]==self.pipeline_name:
133 |                 already_exist=True
134 |                 exists_idx=idx
135 |         
136 |         if not already_exist:
137 |             data.append(info)
138 |         else:
139 |             data[idx].update(info)
140 |         
141 |             
142 |         yamlio.write_to_yaml(os.path.join(self.base_path,"info.yaml"), data)
143 |         yamlio.write_to_yaml(os.path.join(path_to_yaml,file_name),self.dag)
144 | 
145 |     def __load__mld_file(self,info:dict)->object:
146 |         """_summary_: Load Mould File with all the injected dependencies 
147 | 
148 |         Args:
149 |             info (dict): dictinary containing the location of the mould file
150 | 
151 |         Returns:
152 |             object: returns a python object
153 |         """
154 |         loader_path=os.path.join(self.path,info["root_path"],info["sub_path"],info['filename'])
155 |         return cloudpickle.load(open(loader_path,'rb'))
156 |     
157 |     def load_pipeline(self):
158 |         """_summary_: Load pipeline from specific location
159 |         """
160 |         dag=yamlio.read_yaml(os.path.join(self.pipeline_path,self.pipeline_name,f'{self.pipeline_name}.yaml'))
161 |         self.dag=dag
162 |         
163 |         
164 |         
165 |         
166 |     def _get_input_for_func(self,dag_states:dict,node_dict:dict,out_put_nodes:dict)-> dict:
167 |         """_summary_: get the inputs for each node
168 | 
169 |         Args:
170 |             dag_states (dict): contains mapped variable  <function_nam>: <arg_name> [the <arg_name> is the argument name as defined in the function] 
171 |             out_put_nodes (dict): contains the previous outputs for the functions that have completed running
172 |             node_info (dict): dictinary containing the location of the mould file and input nodes connected to the given node
173 | 
174 |         Returns:
175 |             dict: returns a dictionary for  <arg_name>: <prev_output> mapping that can be used in the next node
176 |         """
177 |         # sourcery skip: assign-if-exp, reintroduce-else, swap-if-expression
178 |         
179 |         ##if no args are there
180 |         
181 |         if not dag_states: return dag_states
182 |         input_dict={}
183 |         for func_name_,map_name_ in dag_states.items():
184 |             ##if there are any external arguments  
185 |             if not func_name_.startswith(self._args_tag):
186 |                 input_dict[map_name_]=out_put_nodes[func_name_]
187 |             else:
188 |                 input_dict[map_name_]=node_dict["args"][map_name_]
189 |         return input_dict
190 |         
191 |     
192 |     def __change_status__(self,status:str,node_name:str,log:str=None):
193 |         """_summary_: Change Node status
194 | 
195 |         Args:
196 |             status (str): What is the status for the Node
197 |             node_name (str): Name of the Node
198 |             log (str, optional): Any Log files to be added. Defaults to None.
199 |         """
200 |         if status==0:
201 |             self.dag["node_details"][node_name] = {
202 |                 "start_time": str(datetime.datetime.now()),
203 |                 "log": f"======{self.status_code[status].upper()}======{str(datetime.datetime.now())}\n",
204 |                 "status":self.status_code[status]
205 |             }
206 |         elif status in {1, 3}:
207 |             self.dag["node_details"][node_name] = {
208 |                 "end_time": str(datetime.datetime.now()),
209 |                 "log": f"======{self.status_code[status].upper()}======{str(datetime.datetime.now())}\n",
210 |                 "status":self.status_code[status]
211 | 
212 |             }
213 |             if log!=None:
214 |                 self.dag["node_details"][node_name]["log"] += "\n" + log + "======"
215 |             
216 |     def _check_for_job_status(self,jobname,queue_name):
217 |         all_jobs=yamlio.read_yaml(queue_name)
218 |         status=[j["status"] for j in all_jobs if j["pipelinename"]==jobname]
219 |         return status[0] != "Stopped"
220 |     
221 |     
222 |     def bfs(self, graph:dict, entry_node:str,node_info:dict,dag_states:dict): #function for BFS
223 |         """_summary_: Breadth-first search 
224 | 
225 |         Args:
226 |             graph (dict): contains DAG structure of the nodes "root" is the starting node {root : [nodeA],nodeA :[nodeB, nodeC]}
227 |             entery_node (str): the entry node is the "root" node
228 |             node_info (dict): dictinary containing the location of the mould file and input nodes connected to the given node
229 |             dag_states (dict): ontains mapped variable  <function_nam>: <arg_name> [the <arg_name> is the argument name as defined in the function] 
230 |         """
231 |         
232 |         visited = [entry_node] # List for visited nodes.
233 |         queue = [entry_node]
234 |         
235 |         output_list={}
236 |         while queue:          # Creating loop to visit each node
237 |             m = queue.pop(0)
238 |             for neighbour in graph[m]:
239 |                 if neighbour not in visited:
240 |                     try:
241 |                         print("Node-->",neighbour)
242 |                         function_=self.__load__mld_file(node_info[neighbour])
243 |                     
244 |                         output_list[neighbour]=function_(**self._get_input_for_func(dag_states[neighbour],node_info[neighbour],output_list))
245 |                         self.__change_status__(1,neighbour)
246 |                     except Exception as e:
247 |                         print(traceback.format_exc())
248 |                         self.__change_status__(3,neighbour,traceback.format_exc())
249 |                     
250 |                     visited.append(neighbour)
251 |                     queue.append(neighbour)
252 |                     yamlio.write_to_yaml(os.path.join(self.path_pipe,f"{self.pipeline_name}.yaml"),self.dag)
253 |                     
254 |                     
255 |     
256 |     def run(self):
257 |          #Initialize a queue
258 |         dag=self.dag
259 |         self.bfs(dag["graph"],"root",node_info=dag["nodes"],dag_states=dag["args_map"])
260 |         yamlio.write_to_yaml(os.path.join(self.path_pipe,f"{self.pipeline_name}.yaml"),self.dag)
261 | 
262 |     def run_serialized(self,flag_variable_path,job_name):
263 |          #Initialize a queue
264 |         dag=self.dag
265 |         print("node", self._check_for_job_status(job_name,flag_variable_path))
266 |         if not self._check_for_job_status(job_name,flag_variable_path):
267 |             sys.exit()
268 |             
269 |         #RESET status    
270 |         for node_name in dag["node_details"]:
271 |             self.dag["node_details"][node_name]={"status":self.status_code[2],"start_time":"-","end_time":"-","log":""}
272 |         yamlio.write_to_yaml(os.path.join(self.path_pipe,f"{self.pipeline_name}.yaml"),self.dag)
273 | 
274 | 
275 |         # After RUn complete write status code
276 |         self.bfs(dag["graph"],"root",node_info=dag["nodes"],dag_states=dag["args_map"])
277 |         
278 | 
279 |     def __get_dag__(self):
280 |         return self.dag


--------------------------------------------------------------------------------
/pymlpipeUI.py:
--------------------------------------------------------------------------------
  1 | import flask
  2 | import os
  3 | from pymlpipe.utils import yamlio
  4 | from pymlpipe.utils import uiutils
  5 | from pymlpipe.utils import change2graph
  6 | from pymlpipe.utils import database
  7 | 
  8 | from flask_api import FlaskAPI
  9 | import numpy as np
 10 | import json
 11 | import uuid
 12 | from datetime import datetime
 13 | 
 14 | import pandas as pd
 15 | 
 16 | app = FlaskAPI(__name__)
 17 | 
 18 | 
 19 | BASE_DIR=os.getcwd()
 20 | MODEL_FOLDER_NAME="modelrun"
 21 | PIPELINE_FOLDER_NAME="ML_pipelines"
 22 | MODEL_DIR=os.path.join(BASE_DIR,MODEL_FOLDER_NAME)
 23 | PIPELINE_DIR=os.path.join(BASE_DIR,PIPELINE_FOLDER_NAME)
 24 | 
 25 | EXPERIMENT_FILE="experiment.yaml"
 26 | DEPLOYMENT_FILE="deployment.yaml"
 27 | QUEUE_NAME="queue.yaml"
 28 | 
 29 | #ALL_DEPLOYED_MODELS=[]
 30 | PREDICTORS={}
 31 | app.secret_key="PYMLPIPE_SEC_KEY"
 32 | 
 33 | 
 34 | @app.route("/")
 35 | def index():
 36 |     '''
 37 |     if "status" in flask.request.args:
 38 |         
 39 |         if flask.request.args["status"]=="501":
 40 |             deploy_status=False
 41 |             
 42 |     '''       
 43 |     metric_filters={}
 44 |     tag_filters=[]
 45 |     if len(flask.request.args):
 46 |         if "metrics" in flask.request.args:
 47 |             metric_filters[flask.request.args['metrics']]=flask.request.args["metricsfilter"]
 48 |         elif "tags" in flask.request.args:
 49 |             tag_filters=flask.request.args["tags"].split(",")
 50 |     
 51 |     experiment_lists=yamlio.read_yaml(os.path.join(MODEL_DIR,EXPERIMENT_FILE))
 52 |     if len(experiment_lists)==0:
 53 |         return flask.render_template("index.html",
 54 |                                     runs=[],
 55 |                                     run_details={},
 56 |                                     metrics=[],
 57 |                                     current_experiment=None
 58 |                                      )
 59 |     info={}
 60 |     metrics=[]
 61 |     exp_wise_metrics={}
 62 |     tags=[]
 63 |     error=""
 64 |     for experiment,run_data in experiment_lists.items():
 65 |         exp_wise_metrics[experiment]=[]
 66 |         for run_id in run_data["runs"]:
 67 |             print(run_data['experiment_path'],run_id,"info.yaml")
 68 |             run_folder=os.path.join(run_data['experiment_path'],run_id,"info.yaml")
 69 |             run_details=yamlio.read_yaml(run_folder)
 70 |             info[run_id]=run_details
 71 |             if 'tags' in run_details:
 72 |                 tags.extend(run_details["tags"])
 73 |             if "metrics" in run_details:
 74 |                 metrics.extend(list(run_details["metrics"].keys()))
 75 |                 mm=[i for i in list(run_details["metrics"].keys()) if i not in exp_wise_metrics[experiment]]
 76 |                 exp_wise_metrics[experiment].extend(mm)
 77 |                 
 78 |     #filter emmpty runs:            
 79 |     info={run:info[run] for run in info if len(info[run])>0}
 80 |     
 81 |     
 82 |     if len(metric_filters)>0:
 83 |         newinfo={}
 84 |         for run_id,details in info.items():
 85 |             
 86 |             for mfilter in metric_filters:
 87 |                 if mfilter in details["metrics"]:
 88 |                     fv=details["metrics"][mfilter]
 89 |                     try:
 90 |                         if eval(str(fv)+metric_filters[mfilter]):
 91 |                             newinfo[run_id]=details
 92 |                     except Exception as e:
 93 |                         error=e
 94 |                 else:
 95 |                     newinfo[run_id]=details
 96 |         info=newinfo
 97 |     elif len(tag_filters)>0:
 98 |         newinfo={}
 99 |         for run_id,details in info.items():
100 |             if len(set(tag_filters).intersection(set(details["tags"])))>0:
101 |                 newinfo[run_id]=details
102 |         info=newinfo
103 |     
104 |     exp_names=list(experiment_lists.keys())
105 |     
106 |     
107 |     return flask.render_template("index.html",
108 |                                  runs=experiment_lists,
109 |                                  run_details=info,
110 |                                  metrics=list(set(metrics)),
111 |                                  current_experiment=exp_names,
112 |                                  tags=list(set(tags)),
113 |                                  exp_wise_metrics=exp_wise_metrics,
114 |                                  error=error
115 |                                  )
116 | @app.route("/run/<run_id>/")
117 | def runpage(run_id):
118 |     deploy_status=True
119 |     if "status" in flask.request.args:
120 |         
121 |         if flask.request.args["status"]=="501":
122 |             deploy_status=False
123 |     
124 |     experiments,run_id=run_id.split("@")
125 |     experiment_lists=yamlio.read_yaml(os.path.join(MODEL_DIR,EXPERIMENT_FILE))
126 |     run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,run_id,'info.yaml'))
127 |     
128 |     model_type=""
129 |     metrics_log={}
130 |     metrics_log_plot={}
131 |     graph_dict={}
132 |     expertiment_details={
133 |         "RUN_ID":run_id,
134 |         "EXPERIMENT NAME":experiments,
135 |         "EXECUTION DATE TIME":run_details["execution_time"]
136 |         }
137 |     if 'tags' in run_details:
138 |         expertiment_details["TAGS"]=run_details['tags']
139 |     else:
140 |         expertiment_details["TAGS"]="-"
141 |     if 'version' in run_details:
142 |         expertiment_details["VERSION"]=run_details['version']
143 |     else:
144 |         expertiment_details["VERSION"]="-"
145 |     
146 |     if "metrics_log" in run_details and len(run_details["metrics_log"])>0:
147 |         metrics_log["data"]=run_details["metrics_log"]
148 |         metrics_log["cols"]=list(run_details["metrics_log"][0].keys())
149 |         last_key=None
150 |         for m in metrics_log["data"]:
151 |             for k,v in m.items():
152 |                 if k in metrics_log_plot:
153 |                     metrics_log_plot[k].append(v)
154 |                 else:
155 |                     metrics_log_plot[k]=[v]
156 |                 last_key=k
157 |         
158 |         metrics_log_plot["range"]=list(range(len(metrics_log_plot[last_key])))
159 |     
160 |     
161 |     if "model" in run_details and "model_type" in run_details["model"]:
162 |         model_type=run_details["model"]["model_type"]
163 |     #print(run_details["model"]["model_ops"])
164 |     
165 |     if "model_ops" in run_details["model"]:
166 |         graph_dict=change2graph.makegraph(run_details["model"]["model_ops"],run_details["model"]["model_architecture"])
167 |     XAI=""
168 |     if "XAI" in run_details:
169 |         XAI_temp=run_details["XAI"]
170 |         XAI_feature_map=pd.read_csv(XAI_temp["feature_explainer"])
171 |         XAI_feature_map=XAI_feature_map.round(3)
172 |         print(XAI_temp)
173 |         XAI={
174 |             "table":{
175 |                 "columns":XAI_feature_map.columns,
176 |                 "rows":XAI_feature_map.values
177 |             },
178 |             "image": flask.Markup(open(XAI_temp["shap"]).read()) if XAI_temp["shap"]!="" else ""
179 |         }
180 |         #print(XAI_feature_map.values)
181 |     return flask.render_template('run.html',
182 |                                  run_id=run_id,
183 |                                  experiments=experiments,
184 |                                  expertiment_details=expertiment_details,
185 |                                  artifact_details=run_details["artifact"],
186 |                                  metrics_details=run_details["metrics"],
187 |                                  model_details=run_details["model"],
188 |                                  param_details=run_details["params"],
189 |                                  schema_details=run_details["artifact_schema"],
190 |                                  is_deployed=True if "model_path" in run_details["model"] else False,
191 |                                  deploy_status=deploy_status,
192 |                                  metrics_log=metrics_log,
193 |                                  metrics_log_plot=metrics_log_plot,
194 |                                  model_type=model_type,
195 |                                  graph_dict=graph_dict,
196 |                                  XAI=XAI
197 |                                  )
198 | @app.route("/download_artifact/<uid>")
199 | def download_artifact(uid):
200 |     experiments,run_id,filename=uid.split("@")
201 |     #run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,run_id,'info.yaml'))
202 |     return flask.send_from_directory(os.path.join(MODEL_DIR,experiments,run_id,"artifacts"), filename,as_attachment=True)
203 | 
204 | @app.route("/download_model/<uid>")
205 | def download_model(uid):
206 |     experiments,run_id,filename,model_type=uid.split("@")
207 |     if model_type=="scikit-learn":
208 |         filename=filename+".pkl"
209 |     elif model_type=="torch":
210 |         filename=filename+".pt"
211 |     #run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,run_id,'info.yaml'))
212 |     return flask.send_from_directory(os.path.join(MODEL_DIR,experiments,run_id,"models"), filename,as_attachment=True)
213 |     
214 | @app.route("/deployments/<run_id>/")
215 | def deployments(run_id):
216 |     
217 |     experiments,runid=run_id.split("@")
218 |     run_details=yamlio.read_yaml(os.path.join(MODEL_DIR,experiments,runid,'info.yaml'))
219 |     deployed=uiutils.deployment_handler(run_details["model"]["model_path"],
220 |                                run_details["model"]["model_type"],
221 |                                run_details["model"]["model_mode"]) 
222 |     run_hash= str(uuid.uuid3(uuid.NAMESPACE_DNS, run_id)).replace("-", "")[:16]
223 |     if run_hash not in PREDICTORS:
224 |         PREDICTORS[run_hash]=deployed
225 |         ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE))
226 |         ALL_DEPLOYED_MODELS.append(
227 |             {
228 |                 "run_id":runid,
229 |                 "experiment_id":experiments,
230 |                 "model_path":run_details["model"]["model_path"],
231 |                 "model_type":run_details["model"]["model_type"],
232 |                 "model_deployment_number": run_hash,
233 |                 "model_url":"/predict/"+run_hash,
234 |                 "status":'running',
235 |                 "model_mode": run_details["model"]["model_mode"]
236 |             }    
237 |         )
238 |         yamlio.write_to_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE),ALL_DEPLOYED_MODELS)
239 |         return flask.redirect(flask.url_for("show_deployments"))
240 |     return flask.redirect("/run/"+run_id+"?status=501")
241 | 
242 | @app.route("/show_deployments/")
243 | def show_deployments():
244 |     ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE))
245 |     return flask.render_template('deployments.html',
246 |                                  ALL_DEPLOYED_MODELS=ALL_DEPLOYED_MODELS
247 |                                  )
248 |     
249 |     
250 | @app.route("/predict/<hashno>",methods=["GET","POST"])
251 | def predict(hashno):
252 |    
253 |     ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE))
254 |     info_dict={}
255 |     model_type=None
256 |     for model in ALL_DEPLOYED_MODELS:
257 |         if model["model_deployment_number"]==hashno and model["status"]=="running":
258 |             del model['model_path']
259 |             info_dict=model
260 |             model_type=model["model_type"]
261 |             break
262 |     
263 |     if len(info_dict)==0 or hashno not in PREDICTORS:
264 |         return {"info":{
265 |             "error":404,
266 |             "msg":"No such endpoint present"
267 |         }
268 |     }
269 |     if flask.request.method=="POST":
270 |         
271 |         data=flask.request.data
272 |         dtype=None
273 |         if "dtype" in data:
274 |             dtype=data["dtype"]
275 |         predictions,status=PREDICTORS[hashno].predict(np.array(data['data']),dtype)
276 |         if status==1:
277 |             return {
278 |                 "deployment no":hashno,
279 |                 "error": predictions
280 |             }
281 |         return {
282 |             "deployment no":hashno,
283 |             "predictions":[float(p) for p in predictions]
284 |         }
285 |     if model_type=="scikit-learn":
286 |         return {
287 |             "info":info_dict,
288 |             "request_body":{
289 |                 "data":[
290 |                     [
291 |                         5.6,
292 |                         3.0,
293 |                         4.5,
294 |                         1.5
295 |                     ],
296 |                     [
297 |                         5.6,
298 |                         3.0,
299 |                         4.5,
300 |                         1.5
301 |                     ]
302 |                 ]
303 |             }}
304 |     else:
305 |         return {
306 |             "info":info_dict,
307 |             "request_body":{
308 |                     "data": [
309 |                         [ 42.0,
310 |                         120.0,   
311 |                         1.0,   
312 |                         0.0,   
313 |                         0.0,   
314 |                         0.0, 
315 |                         185.7, 
316 |                         133.0,
317 |                         31.57,
318 |                         235.1,
319 |                         149.0,
320 |                         19.98,
321 |                         256.4,
322 |                         78.0,
323 |                         11.54,
324 |                         16.9,
325 |                         6.0,
326 |                         4.56,
327 |                         0.0  
328 |                     ]
329 |                 ],
330 |                 "dtype": "float"
331 |             }
332 |             
333 |         }
334 | @app.route("/deployment/stop/<deployment_no>",methods=["GET"])                
335 | def stop_deployment(deployment_no):
336 |     global PREDICTORS
337 |     ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE))
338 |     for idx,d in enumerate(ALL_DEPLOYED_MODELS):
339 |         
340 |         if d['model_deployment_number']==deployment_no:
341 |             print("here here")
342 |             ALL_DEPLOYED_MODELS[idx]['status']="stopped"
343 |     yamlio.write_to_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE),ALL_DEPLOYED_MODELS)
344 |     PREDICTORS={i:j for i,j in PREDICTORS.items() if i!=deployment_no}
345 |     return {"status":200}
346 | 
347 | 
348 | @app.route("/deployment/start/<deployment_no>",methods=["GET"])                
349 | def start_deployment(deployment_no):
350 |     global PREDICTORS
351 |     ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE))
352 |     for idx,d in enumerate(ALL_DEPLOYED_MODELS):
353 |        
354 |         if d['model_deployment_number']==deployment_no:
355 |             
356 |             ALL_DEPLOYED_MODELS[idx]['status']="running"
357 |             model_type=ALL_DEPLOYED_MODELS[idx]["model_type"]
358 |             
359 |             PREDICTORS[deployment_no]=uiutils.deployment_handler(d["model_path"], model_type, d["model_mode"])
360 |     yamlio.write_to_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE),ALL_DEPLOYED_MODELS)
361 |     
362 |     return {"status":200}
363 |                 
364 |                                  
365 | @app.route("/jobs/")
366 | def jobs():
367 |     all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,"info.yaml"))
368 |     
369 |     return flask.render_template("jobs.html",
370 |                                  pipeline=all_pipelines
371 |                                  )
372 |     
373 | @app.route("/jobs/run/<runid>")
374 | def runjobs(runid):
375 |     #all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,QUEUE_NAME))
376 |     all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,"info.yaml"))
377 |     '''
378 |     all_pipelines.append({
379 |         "pipelinename":runid,
380 |         "datetime": datetime.now(),
381 |         "status":"Queued",
382 |         "ops":{}
383 |     })
384 |     '''
385 |     for idx,p in enumerate(all_pipelines):
386 |         if p["pipelinename"]==runid:
387 |             if all_pipelines[idx]["status"]=="Started":
388 |                 all_pipelines[idx]["status"]="Stopped"
389 |                 all_pipelines[idx]["jobtime"]=datetime.now()
390 |             else:
391 |                 all_pipelines[idx]["status"]="Queued"
392 |                 all_pipelines[idx]["jobtime"]=datetime.now()
393 |             
394 |             
395 |         
396 |     yamlio.write_to_yaml(os.path.join(PIPELINE_DIR,"info.yaml"),all_pipelines)
397 |     return flask.redirect(flask.url_for("jobs"))
398 | 
399 | @app.route("/jobs/view/<runid>")
400 | def viewjobs(runid):
401 |     #all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,QUEUE_NAME))
402 |     all_pipelines=yamlio.read_yaml(os.path.join(PIPELINE_DIR,runid,runid+".yaml"))
403 |     
404 |     grapg_dict=change2graph.makegraph_pipeline(all_pipelines["graph"],all_pipelines["node_details"])
405 |     nodes_logs={k:all_pipelines["node_details"][k]["log"] for k in all_pipelines["node_details"]}
406 |     #nodes_logs={}
407 |     return flask.render_template("job_view.html",
408 |                                  pipelinename=runid,
409 |                                  grapg_dict=grapg_dict,
410 |                                  nodes=nodes_logs,
411 |                                  initital_node=nodes_logs[list(nodes_logs.keys())[0]]
412 |                                  )
413 |     
414 |     
415 |     
416 | def start_ui(host=None,port=None,debug=False):
417 |     '''Implemet logic for try catch'''
418 |     
419 |     ALL_DEPLOYED_MODELS=yamlio.read_yaml(os.path.join(MODEL_DIR,DEPLOYMENT_FILE))
420 |     for i in ALL_DEPLOYED_MODELS:
421 |         model_type=i["model_type"]
422 |         
423 |         deployed=uiutils.deployment_handler(i["model_path"], model_type, i["model_mode"])
424 |         PREDICTORS[i['model_deployment_number']]=deployed
425 |     if host==None and port==None:
426 |         app.run(debug=debug)
427 |     elif host==None:
428 |         app.run(port=port,debug=debug)
429 |     elif port==None:
430 |         app.run(host=host,debug=debug)
431 |     else:
432 |         app.run(host=host,port=port,debug=debug)
433 |         
434 |         
435 | 
436 | if __name__ == '__main__':
437 |     app.run()


--------------------------------------------------------------------------------
/queue.py:
--------------------------------------------------------------------------------
 1 | from pymlpipe.utils import yamlio
 2 | import os
 3 | import time
 4 | import pymlpipe.pipeline as pipeline
 5 | 
 6 | BASE_DIR=os.getcwd()
 7 | queue_store="ML_pipelines"
 8 | queue_name="info.yaml"
 9 | def execute_from_queue(name,path):
10 |     print(f"Start execution :{name}")
11 |     ppl=pipeline.PipeLine(name)
12 |     ppl.load_pipeline()
13 |     ppl.run_serialized(flag_variable_path=path,job_name=name)
14 |     print("End execution :")
15 |     
16 |     
17 | 
18 | def change_status(queue,status,job_id=None):
19 |     for idx,job in enumerate(queue):
20 |         if job["status"] == "Queued" and job_id is None:
21 |             queue[idx]["status"]=status
22 |             return job["pipelinename"],queue
23 |         elif job["status"] == "Started" and job_id is None:
24 |             return None,queue
25 |         elif job["status"] == "Started":
26 |             if job["pipelinename"]==job_id:
27 |                 queue[idx]["status"]=status
28 |                 return job["pipelinename"],queue
29 | 
30 |     return None,queue
31 |             
32 | 
33 | def start_server(check_in:int=5):
34 |     if not isinstance(check_in,int):
35 |         raise ValueError(
36 |             f"ERROR!!! 'check_in' should be in sec [int] found {type(check_in)}"
37 |         )
38 | 
39 |     while True:
40 |         print('-- START--')
41 |         queue=yamlio.read_yaml(os.path.join(BASE_DIR,queue_store,queue_name))
42 |         job_name,queue=change_status(queue,"Started")
43 |         yamlio.write_to_yaml(os.path.join(BASE_DIR,queue_store,queue_name),queue)
44 |         if job_name!=None:
45 |             execute_from_queue(job_name,path=os.path.join(BASE_DIR,queue_store,queue_name))
46 |             job_name,queue=change_status(queue,"Completed",job_name)
47 |             yamlio.write_to_yaml(os.path.join(BASE_DIR,queue_store,queue_name),queue)
48 |         time.sleep(5)
49 |         print('-- END--')


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | atomicwrites==1.4.0
 2 | attrs==21.4.0
 3 | catboost==1.1.1
 4 | click==8.1.3
 5 | cloudpickle==2.2.1
 6 | contourpy==1.0.7
 7 | cycler==0.11.0
 8 | Flask==2.1.2
 9 | Flask-API==3.0.post1
10 | fonttools==4.38.0
11 | graphviz==0.20.1
12 | importlib-metadata==4.12.0
13 | itsdangerous==2.1.2
14 | Jinja2==3.1.2
15 | joblib==1.1.0
16 | kiwisolver==1.4.4
17 | lightgbm==3.3.5
18 | llvmlite==0.39.1
19 | MarkupSafe==2.1.1
20 | matplotlib==3.6.3
21 | more-itertools==8.13.0
22 | numba==0.56.4
23 | numpy==1.23.0
24 | packaging==21.3
25 | pandas==1.4.3
26 | Pillow==9.4.0
27 | plotly==5.13.0
28 | pluggy==0.13.1
29 | py==1.11.0
30 | pyparsing==3.0.9
31 | pytest==5.2.0
32 | python-dateutil==2.8.2
33 | pytz==2022.1
34 | PyYAML==6.0
35 | scikit-learn==1.1.1
36 | scipy==1.8.1
37 | shap==0.41.0
38 | six==1.16.0
39 | sklearn==0.0
40 | slicer==0.0.7
41 | tenacity==8.2.1
42 | threadpoolctl==3.1.0
43 | torch==1.12.0
44 | tqdm==4.64.1
45 | typing-extensions==4.3.0
46 | wcwidth==0.2.5
47 | Werkzeug==2.1.2
48 | xgboost==1.7.3
49 | zipp==3.8.0
50 | 


--------------------------------------------------------------------------------
/samples/runner_pipeline_server.py:
--------------------------------------------------------------------------------
1 | from pymlpipe import queue
2 | 
3 | queue.start_server()


--------------------------------------------------------------------------------
/samples/runner_pymlpipeUI.py:
--------------------------------------------------------------------------------
1 | from pymlpipe.pymlpipeUI import start_ui
2 | 
3 | 
4 | start_ui(host='0.0.0.0', port=8085,debug=True)
5 | 


--------------------------------------------------------------------------------
/samples/test_MLpipeline.py:
--------------------------------------------------------------------------------
 1 | from pymlpipe import pipeline
 2 | import pandas as pd
 3 | from sklearn.datasets import  load_iris
 4 | import pandas as pd
 5 | from sklearn.model_selection import train_test_split
 6 | from pymlpipe.tabular import PyMLPipe
 7 | from sklearn.linear_model import LogisticRegression
 8 | from sklearn.ensemble import  RandomForestClassifier
 9 | from sklearn.tree import DecisionTreeClassifier
10 | from xgboost import XGBClassifier
11 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
12 | import time
13 | 
14 | ppl=pipeline.PipeLine("IrisData")
15 | mlp=PyMLPipe()
16 | mlp.set_experiment("pipelinecheck")
17 | mlp.set_version(0.1)
18 | 
19 | def get_data():
20 |     iris_data=load_iris()
21 |     data=iris_data["data"]
22 |     target=iris_data["target"]
23 |     df=pd.DataFrame(data,columns=iris_data["feature_names"])
24 |     #df["target"]=target
25 |     trainx,testx,trainy,testy=train_test_split(df,target)
26 |     
27 |     return {"trainx":trainx,"trainy":trainy,"testx":testx,"testy":testy}
28 | 
29 | def get_model(model):
30 |     if model==0:
31 |         return LogisticRegression()
32 |     elif model==1:
33 |         return RandomForestClassifier()
34 |     
35 | def train_model(data,model_name):
36 |     with mlp.run():
37 |         trainx,trainy=data["trainx"],data["trainy"]
38 |         mlp.set_tags(["Classification","test run","logisticRegression"])
39 |         model=get_model(model_name)
40 |         model.fit(trainx, trainy)
41 |         
42 |         mlp.scikit_learn.register_model(str(model_name), model)
43 |     
44 |     #print(model)
45 |     #model.fit(trainx, trainy)
46 |     time.sleep(60)
47 |     return model
48 | 
49 | def evaluate(data,model):
50 |     testx,testy=data["testx"],data["testy"]
51 |     print(model.predict(testx))
52 |     
53 | 
54 | n1=ppl.add_node("data", get_data,entry_node=True)
55 | for idx,model in enumerate([0,1]):
56 |     ppl.add_node(
57 |         f"model_train{str(idx)}",
58 |         train_model,
59 |         input_nodes=["data"],
60 |         args={"model_name":model},
61 |     ) 
62 |     ppl.add_node(
63 |         f"eval_train{str(idx)}",
64 |         evaluate,
65 |         input_nodes=["data", f"model_train{str(idx)}"],
66 |     )
67 | 
68 |     #ppl.add_edge(n1, n2)
69 |     #ppl.add_edge(n2, n3)
70 | 
71 | #n1>>[n2,n3]
72 | ppl.register_dag()
73 | #ppl.run()
74 |     
75 | 


--------------------------------------------------------------------------------
/samples/test_api.py:
--------------------------------------------------------------------------------
 1 | from pymlpipe.api import Client
 2 | 
 3 | ml_connect=Client()
 4 | print(ml_connect.get_all_experiments())
 5 | print(ml_connect.get_all_run_ids("IrisAutoML"))
 6 | #print(ml_connect.get_run_details("Pytorch","01d9d974-284c-4775-95bc-792491267d05"))
 7 | #print(ml_connect.get_all_run_details("IrisAutoML"))
 8 | #print(ml_connect.get_metrics_comparison("Pytorch",format="pandas",sort_by="f1"))
 9 | print(ml_connect.get_model_details("IrisAutoML","680f5dcf-e207-4cb5-adb9-cc6d7fbb8b16",format="pandas"))
10 | 


--------------------------------------------------------------------------------
/samples/test_automl_run.py:
--------------------------------------------------------------------------------
 1 | from pymlpipe.automl import AutoMLPipe
 2 | from sklearn.datasets import  load_iris
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | def main():
 7 |     
 8 |     iris_data=load_iris()
 9 |     data=iris_data["data"]
10 |     target=iris_data["target"]
11 | 
12 |     df=pd.DataFrame(data,columns=iris_data["feature_names"])
13 |     automl_obj=AutoMLPipe("IrisAutoML","classification",
14 |                             "precision",
15 |                             df,
16 |                             target,
17 |                             tags=["new_data","clf"],
18 |                             transform=True,
19 |                             scale='normalize',
20 |                             register_model=True,
21 |                             version=1.0,
22 |                             exclude=['log_reg',"lgbmc"])
23 |     preds,result=automl_obj.run_automl(tune=False,tune_best=False)
24 |     
25 |     #DataFrame with comparative metrics of all the models
26 |     print(result)
27 |     #Dictionary with model names and the predictions 
28 |     print(preds)
29 | 
30 | if __name__ == '__main__':
31 |     main()
32 | 
33 | 


--------------------------------------------------------------------------------
/samples/test_cases_pipeline.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/samples/test_cases_pipeline.py


--------------------------------------------------------------------------------
/samples/test_create_pipeline.py:
--------------------------------------------------------------------------------
 1 | #from pymlpipe import pipeline
 2 | from pymlpipe import pipeline
 3 | import pandas as pd
 4 | 
 5 | #ppl=pipeline.Pipeline("PIPELINEV2")
 6 | 
 7 | ppl=pipeline.PipeLine("PIPELINEV2")
 8 | def node1():
 9 |     path="train.csv"
10 |     df=pd.read_csv(path)
11 |     return df
12 | def node2(df):
13 |     stats=df.describe()
14 |     stats.columns=[col+"_node2" for col in stats.columns]
15 |     return stats
16 | def node3(df):
17 |     stats=df.describe()
18 |     stats.columns=[col+"_node3" for col in stats.columns]
19 |     return stats
20 |     
21 | def node4(node1_df,node2_df):
22 |     print(node1_df.append(node2_df))
23 | def node5(node1_df,node2_df,node3df):
24 |     print(node1_df.append(node2_df))
25 |     
26 | ppl.add_node("node1", node1,entry_node=True)
27 | ppl.add_node("node2", node3,input_nodes=["node1"])
28 | ppl.add_node("node3", node2,input_nodes=["node1"])
29 | ppl.add_node("node5", node2,input_nodes=["node1"])
30 | ppl.add_node("node6", node2,input_nodes=["node1"])
31 | ppl.add_node("node4", node4,input_nodes=["node2","node3"])
32 | ppl.add_node("node7", node5,input_nodes=["node5","node6"])
33 | 
34 | 
35 | 
36 | 
37 | #n1>>[n2,n3]
38 | ppl.register_dag()
39 | ppl.run()


--------------------------------------------------------------------------------
/samples/test_dl_torch_train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pandas as pd
  3 | from sklearn.preprocessing import LabelEncoder
  4 | from sklearn.model_selection import train_test_split
  5 | from sklearn.metrics import accuracy_score,f1_score
  6 | from pymlpipe.tabular import PyMLPipe
  7 | df=pd.read_csv("train.csv")
  8 | encoders=["area_code","state","international_plan","voice_mail_plan","churn"]
  9 | 
 10 | for i in encoders:
 11 |     le=LabelEncoder()
 12 |     df[i]=le.fit_transform(df[i])
 13 |     
 14 |     
 15 | trainy=df["churn"]
 16 | trainx=df[['state', 'account_length', 'area_code', 'international_plan',
 17 |        'voice_mail_plan', 'number_vmail_messages', 'total_day_minutes',
 18 |        'total_day_calls', 'total_day_charge', 'total_eve_minutes',
 19 |        'total_eve_calls', 'total_eve_charge', 'total_night_minutes',
 20 |        'total_night_calls', 'total_night_charge', 'total_intl_minutes',
 21 |        'total_intl_calls', 'total_intl_charge',
 22 |        'number_customer_service_calls']]
 23 | 
 24 | 
 25 | class Model(torch.nn.Module):
 26 |     def __init__(self,col_size):
 27 |         super().__init__()
 28 |         self.seq=torch.nn.Sequential(
 29 |             torch.nn.Linear(col_size,15),
 30 |             torch.nn.ReLU(),
 31 |             torch.nn.Linear(15,10),
 32 |             torch.nn.ReLU(),
 33 |             torch.nn.Linear(10,1)
 34 |         )
 35 |         '''
 36 |         self.linear_layer_1=
 37 |         self.relu_1=torch.nn.ReLU()
 38 |         self.linear_layer_2=torch.nn.Linear(15,10)
 39 |         self.relu_2=torch.nn.ReLU()
 40 |         self.linear_layer_3=torch.nn.Linear(10,1)
 41 |         self.linear_layer_4=torch.nn.Linear(10,1)
 42 |         '''
 43 |         
 44 |         
 45 |     def forward(self,x):
 46 |         out=self.seq(x)
 47 |         
 48 |         return torch.sigmoid(out)
 49 |         
 50 | model=Model(len(trainx.columns))
 51 | 
 52 | train_x,test_x,train_y,test_y=train_test_split(trainx,trainy)
 53 | 
 54 | train_x=torch.from_numpy(train_x.values)
 55 | train_x=train_x.type(torch.FloatTensor)
 56 | train_y=torch.from_numpy(train_y.values)
 57 | train_y=train_y.type(torch.FloatTensor)
 58 | 
 59 | test_x=torch.from_numpy(test_x.values)
 60 | test_x=test_x.type(torch.FloatTensor)
 61 | test_y=torch.from_numpy(test_y.values)
 62 | test_y=test_y.type(torch.FloatTensor)
 63 | 
 64 | 
 65 | optimizer=torch.optim.SGD(model.parameters(),lr=0.001)
 66 | 
 67 | criterion=torch.nn.BCELoss()
 68 | 
 69 | 
 70 | def validate(model,testx,testy):
 71 |     prediction=model(testx)
 72 |     prediction=torch.where(prediction>.5,1,0)
 73 |     accu=accuracy_score(prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy())
 74 |     f1=f1_score(prediction.detach().numpy(),test_y.unsqueeze(1).detach().numpy())
 75 |     return {"accuracy":accu,"f1":f1}
 76 | 
 77 | 
 78 | epochs=100
 79 | batch_size=1000
 80 | 
 81 | mlp=PyMLPipe()
 82 | mlp.set_experiment("Pytorch")
 83 | mlp.set_version(0.2)
 84 | 
 85 | with mlp.run():
 86 |     mlp.register_artifact("churndata.csv",df)
 87 |     mlp.log_params({
 88 |         "lr":0.01,
 89 |         "optimizer":"SGD",
 90 |         "loss_fuction":"BCEloss"
 91 |     })
 92 |     for epoch in range(epochs):
 93 |         loss_batch=0
 94 |         for batch in range(1000,5000,1000):
 95 |             optimizer.zero_grad()
 96 |             train_data=train_x[batch-1000:batch]
 97 |             output=model(train_data)
 98 |             loss=criterion(output,train_y[batch-1000:batch].unsqueeze(1))
 99 |             loss.backward()
100 |             optimizer.step()
101 |             loss_batch+=loss.item()
102 | 
103 |         metrics=validate(model,test_x,test_y)
104 |         metrics["loss"]=loss_batch
105 |         metrics["epoch"]=epoch
106 |         mlp.log_metrics_continious(metrics)
107 |     mlp.pytorch.register_model("pytorch_example1", model)
108 |         


--------------------------------------------------------------------------------
/samples/test_mltrain.py:
--------------------------------------------------------------------------------
  1 | from sklearn.datasets import  load_iris
  2 | import pandas as pd
  3 | from sklearn.model_selection import train_test_split
  4 | from pymlpipe.tabular import PyMLPipe
  5 | from sklearn.linear_model import LogisticRegression
  6 | from sklearn.ensemble import  RandomForestClassifier,AdaBoostClassifier
  7 | from sklearn.tree import DecisionTreeClassifier
  8 | from xgboost import XGBClassifier
  9 | from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
 10 | 
 11 | 
 12 | 
 13 | mlp=PyMLPipe()
 14 | mlp.set_experiment("IrisDataV2")
 15 | mlp.set_version(0.1)
 16 | 
 17 | iris_data=load_iris()
 18 | data=iris_data["data"]
 19 | target=iris_data["target"]
 20 | df=pd.DataFrame(data,columns=iris_data["feature_names"])
 21 | #df["target"]=target
 22 | trainx,testx,trainy,testy=train_test_split(df,target)
 23 | 
 24 | with mlp.run():
 25 |     mlp.set_tags(["Classification","test run","logisticRegression"])
 26 |     model=LogisticRegression()
 27 |     model.fit(trainx, trainy)
 28 |     predictions=model.predict(testx)
 29 |     mlp.log_metrics({"Accuracy":accuracy_score(testy,predictions),
 30 |                      "Precision": precision_score(testy,predictions,average='macro'),
 31 |                      "Recall": recall_score(testy,predictions,average='macro'),
 32 |                      "F1": f1_score(testy,predictions,average='macro')
 33 |                      })
 34 |     mlp.register_artifact("train.csv", trainx)
 35 |     mlp.register_artifact("test.csv", testx,artifact_type="testing")
 36 |     mlp.scikit_learn.register_model("logistic regression", model)
 37 |     mlp.explainer(model,trainx)
 38 |     
 39 |     
 40 |     
 41 | 
 42 | 
 43 | 
 44 | with mlp.run():
 45 |     mlp.set_tags(["Classification","test run","dtree"])
 46 |     model=DecisionTreeClassifier()
 47 |     model.fit(trainx, trainy)
 48 |     predictions=model.predict(testx)
 49 |     
 50 |     mlp.log_metrics({"Accuracy":accuracy_score(testy,predictions),"Precision": precision_score(testy,predictions,average='macro')})
 51 |     
 52 |     mlp.log_metric("Recall", recall_score(testy,predictions,average='macro'))
 53 |     mlp.log_metric("F1", f1_score(testy,predictions,average='macro'))
 54 |     
 55 |     #mlp.log_metrics({"r2":0.1,"mse":1.1})
 56 |     mlp.register_artifact("train.csv", trainx)
 57 |     mlp.register_artifact("test.csv", testx,artifact_type="testing")
 58 |     mlp.scikit_learn.register_model("dtree", model)
 59 |     mlp.explainer(model,trainx)
 60 | 
 61 | with mlp.run():
 62 |     mlp.set_tags(["Classification","test run","rf"])
 63 |     model=RandomForestClassifier()
 64 |     model.fit(trainx, trainy)
 65 |     predictions=model.predict(testx)
 66 |     
 67 |     mlp.log_metric("Accuracy", accuracy_score(testy,predictions))
 68 |     mlp.log_metric("Precision", precision_score(testy,predictions,average='macro'))
 69 |     mlp.log_metric("Recall", recall_score(testy,predictions,average='macro'))
 70 |     mlp.log_metric("F1", f1_score(testy,predictions,average='macro'))
 71 |     mlp.register_artifact("train.csv", trainx,)
 72 |     mlp.register_artifact("test.csv", testx,artifact_type="testing")
 73 |     mlp.scikit_learn.register_model("randomForest", model)
 74 |     mlp.explainer(model,trainx)
 75 | 
 76 | with mlp.run():
 77 |     mlp.set_tags(["Classification","test run","xgb"])
 78 |     model=XGBClassifier()
 79 |     model.fit(trainx, trainy)
 80 |     predictions=model.predict(testx)
 81 |     
 82 |     mlp.log_metric("Accuracy", accuracy_score(testy,predictions))
 83 |     mlp.log_metric("Precision", precision_score(testy,predictions,average='macro'))
 84 |     mlp.log_metric("Recall", recall_score(testy,predictions,average='macro'))
 85 |     mlp.log_metric("F1", f1_score(testy,predictions,average='macro'))
 86 |     mlp.register_artifact("train.csv", trainx)
 87 |     mlp.register_artifact("test.csv", testx,artifact_type="testing")
 88 |     mlp.scikit_learn.register_model("xgboost", model)
 89 |     mlp.explainer(model,trainx)   
 90 | 
 91 | with mlp.run():
 92 |     mlp.set_tags(["Classification","test run","xgb"])
 93 |     model=AdaBoostClassifier()
 94 |     model.fit(trainx, trainy)
 95 |     predictions=model.predict(testx)
 96 |     
 97 |     mlp.log_metric("Accuracy", accuracy_score(testy,predictions))
 98 |     mlp.log_metric("Precision", precision_score(testy,predictions,average='macro'))
 99 |     mlp.log_metric("Recall", recall_score(testy,predictions,average='macro'))
100 |     mlp.log_metric("F1", f1_score(testy,predictions,average='macro'))
101 |     mlp.register_artifact("train.csv", trainx)
102 |     mlp.register_artifact("test.csv", testx,artifact_type="testing")
103 |     mlp.scikit_learn.register_model("adaboost", model)
104 |     mlp.explainer(model,trainx)   
105 |     
106 | 


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-04 at 1.42.35 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.42.35 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-04 at 1.42.52 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.42.52 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-04 at 1.43.03 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.43.03 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-04 at 1.43.52 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.43.52 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-04 at 1.44.05 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-04 at 1.44.05 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-16 at 8.03.29 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.03.29 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-16 at 8.03.50 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.03.50 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-16 at 8.04.00 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.04.00 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-16 at 8.04.08 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.04.08 PM.png


--------------------------------------------------------------------------------
/static/Screenshot 2022-07-16 at 8.04.21 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/Screenshot 2022-07-16 at 8.04.21 PM.png


--------------------------------------------------------------------------------
/static/XAI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/XAI.png


--------------------------------------------------------------------------------
/static/download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/download.png


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/favicon.ico


--------------------------------------------------------------------------------
/static/filter.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32"><defs><style>.cls-1{fill:#231f20;}</style></defs><g id="filter"><path class="cls-1" d="M27.49,2h-22A1.54,1.54,0,0,0,4,3.53v2a1.53,1.53,0,0,0,.31.93l7.83,10.36A4.32,4.32,0,0,1,13,19.41v4.92a3.57,3.57,0,0,0,1.36,2.8l3.22,2.5a1.52,1.52,0,0,0,.93.32,1.44,1.44,0,0,0,.67-.16A1.5,1.5,0,0,0,20,28.42l0-9a4.32,4.32,0,0,1,.86-2.6L28.69,6.47A1.53,1.53,0,0,0,29,5.54v-2A1.52,1.52,0,0,0,27.49,2ZM27,5.39,19.27,15.61A6.37,6.37,0,0,0,18,19.43l0,8-2.45-1.89A1.57,1.57,0,0,1,15,24.33V19.41a6.29,6.29,0,0,0-1.27-3.79L6,5.39V4H27Z"/></g></svg>


--------------------------------------------------------------------------------
/static/logo.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 359.52 297.48"><defs><style>.cls-1{fill:#fff;stroke:#1d1d1b;}.cls-1,.cls-2{stroke-miterlimit:10;}.cls-2{fill:#060707;stroke:url(#radial-gradient);}</style><radialGradient id="radial-gradient" cx="309.22" cy="394.8" r="164.98" gradientUnits="userSpaceOnUse"><stop offset="0.26" stop-color="#334194"/><stop offset="1" stop-color="#267cc0" stop-opacity="0.03"/><stop offset="1" stop-color="#2581c4" stop-opacity="0"/></radialGradient></defs><path class="cls-1" d="M500.74,232.82" transform="translate(-129.46 -246.06)"/><path class="cls-2" d="M448.56,464.07c-20.55-7-38.57-2-73.08,8.28-1,.31-2.23.69-3.57,1.15q6.43-4.59,13.12-10a272.34,272.34,0,0,0,31.12-29.57c2.3-2.38,3.42-3.51,3.69-3.75.08-.06.32-.35.7-.73l.07-.09.85-1.23c7.86-13.8-18.33-44.4-38.49-60.54-9.77-7.82-28.14-20.26-56.61-27a34.42,34.42,0,0,0-8.13-61.91,17.7,17.7,0,1,0-20.6,0,34.42,34.42,0,0,0-7.8,62.11c-28,6.81-46.1,19.09-55.78,26.83-20.16,16.14-46.34,46.74-38.48,60.54,2.17,3.82,6.41,5.11,14.88,7.7,9.19,2.81,20,4.12,45.15.51,21.55-3.09,32.33-4.64,38.48-10.26,11.7-10.69,15.79-30.11,10.26-34.89-3.88-3.36-14-.76-15.21,3.27-.5,1.64.72,2.71,1.36,4.94,1.86,6.45-3,14.47-8.28,18.61-3.53,2.74-7.36,3.58-15,5.28-2.09.46-5,.78-11,1.64-17,2.46-24.27,1.55-30.47-.34-5.71-1.75-8.57-2.62-10-5.2-5.43-9.44,14.3-30.75,26-40.86,10.53-9.12,31.48-23.81,66.82-28.9,35.35,5.09,56.3,19.78,66.83,28.9,11.67,10.11,31.39,31.42,26,40.86-1.48,2.58-4.33,3.45-10,5.2-6.2,1.89-13.53,2.8-30.48.34-5.93-.86-8.87-1.18-11-1.64-7.66-1.7-11.48-2.54-15-5.28-5.32-4.14-10.14-12.16-8.28-18.61.64-2.23,1.86-3.3,1.35-4.94-1.23-4-11.33-6.63-15.21-3.27-5.52,4.78-1.44,24.2,10.26,34.89,6.16,5.62,16.93,7.17,38.49,10.26,16.83,2.42,27.24,2.63,35.05,1.65A222.07,222.07,0,0,1,364,465.67c-23.64,16.48-45.76,25-59.68,30.31l-5-2-.07,0-2.38-.94c-30.71-12.17-46.26-18.31-53.86-20.67-30.79-9.56-50.95-15.85-73.08-8.28-17.24,5.9-40.19,21.67-39.91,41.28.2,13.86,11.95,24.09,16.22,27.41,21.62,16.74,52.64,10.08,94.53.56a410.92,410.92,0,0,0,61-19l7.53-2.91,7.54,2.91a410.56,410.56,0,0,0,61,19c42.3,9.62,73,16.15,94.53-.56,4.27-3.32,16-13.55,16.22-27.41C488.75,485.74,465.8,470,448.56,464.07Zm-140-208c9.67,0,9.65,15,0,15S298.86,256,308.51,256Zm-.31,74.68c-11.21,0-19.51-9.17-20-20s9.54-20,20-20c11.22,0,19.52,9.16,20,20S318.67,330.72,308.2,330.72ZM234.85,520.48c-52.79,12.78-63.51,9.6-70.77,5-.68-.44-15.93-10.36-15.1-22.94,1-14.73,23.43-24.28,37.48-26.85,11.1-2,20.21-.29,33.56,2.24a155.43,155.43,0,0,1,29.64,8.95c18.72,7.34,29.75,11.67,29.65,16.78C279.26,506.12,276.61,510.38,234.85,520.48Zm219.5,5c-7.26,4.59-18,7.77-70.77-5-41.76-10.1-44.41-14.36-44.46-16.81-.11-5.11,10.93-9.44,29.65-16.78a155.43,155.43,0,0,1,29.64-8.95c13.35-2.53,22.46-4.27,33.56-2.24,14,2.57,36.51,12.12,37.48,26.85C470.28,515.13,455,525.05,454.35,525.49Z" transform="translate(-129.46 -246.06)"/></svg>


--------------------------------------------------------------------------------
/static/pipelineUI 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI 2.png


--------------------------------------------------------------------------------
/static/pipelineUI 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI 3.png


--------------------------------------------------------------------------------
/static/pipelineUI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI.png


--------------------------------------------------------------------------------
/static/pipelineUI_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI_1.png


--------------------------------------------------------------------------------
/static/pipelineUI_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/pipelineUI_2.png


--------------------------------------------------------------------------------
/static/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neelindresh/pymlpipe/51a3f0ad651ef3b2f25b7808a788803209afc384/static/start.png


--------------------------------------------------------------------------------
/static/start.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <!-- Creator: CorelDRAW X5 -->
 4 | <svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" width="2048px" height="2048px" style="shape-rendering:geometricPrecision; text-rendering:geometricPrecision; image-rendering:optimizeQuality; fill-rule:evenodd; clip-rule:evenodd"
 5 | viewBox="0 0 2048 2048"
 6 |  xmlns:xlink="http://www.w3.org/1999/xlink">
 7 |  <defs>
 8 |   <style type="text/css">
 9 |    <![CDATA[
10 |     .fil1 {fill:none}
11 |     .fil0 {fill:#D50000}
12 |     .fil2 {fill:#FFFDE7}
13 |     .fil3 {fill:#FFFDE7;fill-rule:nonzero}
14 |    ]]>
15 |   </style>
16 |  </defs>
17 |  <g id="Layer_x0020_1">
18 |   <metadata id="CorelCorpID_0Corel-Layer"/>
19 |   <circle class="fil0" cx="1024" cy="1024" r="768"/>
20 |   <path class="fil1" d="M1208 546c198,76 328,266 328,478 0,283 -229,512 -512,512 -283,0 -512,-229 -512,-512 0,-212 130,-402 328,-478"/>
21 |   <g id="_360258376">
22 |    <path class="fil2" d="M1077 1037c0,29 -24,53 -53,53 -29,0 -53,-24 -53,-53l0 -547c0,-29 24,-53 53,-53 29,0 53,24 53,53l0 547z"/>
23 |    <path class="fil3" d="M1158 666c-27,-10 -41,-41 -31,-69 10,-27 41,-41 69,-31 93,36 170,98 223,176 54,78 85,173 85,272 0,133 -54,253 -141,339 -87,87 -207,141 -339,141 -133,0 -253,-54 -339,-141 -87,-87 -141,-207 -141,-339 0,-99 31,-193 84,-272 54,-78 131,-141 224,-176 27,-10 58,3 69,31 10,27 -3,58 -31,69 -72,28 -132,76 -174,137 -42,61 -66,134 -66,212 0,103 42,196 109,264 68,68 161,109 264,109 103,0 196,-42 264,-109 68,-68 109,-161 109,-264 0,-78 -24,-151 -65,-212 -42,-61 -102,-109 -174,-137z"/>
24 |   </g>
25 |   <rect class="fil1" width="2048" height="2048"/>
26 |  </g>
27 | </svg>
28 | 


--------------------------------------------------------------------------------
/static/start1.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><title>17.Power</title><g id="_17.Power" data-name="17.Power"><path d="M12,24A12,12,0,1,1,24,12,12.013,12.013,0,0,1,12,24ZM12,2A10,10,0,1,0,22,12,10.011,10.011,0,0,0,12,2Z"/><path d="M12,19A7,7,0,0,1,7.333,6.783l1.334,1.49a5,5,0,1,0,6.666,0l1.334-1.49A7,7,0,0,1,12,19Z"/><rect x="11" y="5" width="2" height="7"/></g></svg>


--------------------------------------------------------------------------------
/tabular.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pymlpipe.utils.database import create_folder
  3 | from pymlpipe.utils.getschema import schema_
  4 | from pymlpipe.utils import _xai as xai
  5 | import uuid
  6 | import yaml
  7 | from contextlib import contextmanager
  8 | import pandas as pd
  9 | import shutil
 10 | import pickle
 11 | import sklearn
 12 | import datetime
 13 | import torch
 14 | import torch.fx
 15 | 
 16 | 
 17 | _COLOR_MAP = {
 18 |     "placeholder": "AliceBlue",
 19 |     "call_module": "LemonChiffon1",
 20 |     "get_param": "Yellow2",
 21 |     "get_attr": "LightGrey",
 22 |     "output": "PowderBlue",
 23 | }
 24 | 
 25 | 
 26 | class Context_Manager:
 27 |         """_summary_: Context Manager for with statement
 28 |         1. creates folders and subfolders
 29 |         2. creates runid for a run instance
 30 |         """
 31 |         def __init__(self,name,feature_store,run_id=None):
 32 |             super(PyMLPipe)
 33 |             if run_id==None:
 34 |                 self.runid = str(uuid.uuid4())
 35 |             else:
 36 |                 self.runid=run_id
 37 |             self.name = name
 38 |             self.feature_store=feature_store
 39 |             self.exp_path=os.path.join(self.feature_store,self.name,self.runid)
 40 |             self.folders={"artifacts":os.path.join(self.exp_path,"artifacts"),
 41 |                           "metrics":os.path.join(self.exp_path,"metrics"),
 42 |                           "models":os.path.join(self.exp_path,"models"),
 43 |                           "params":os.path.join(self.exp_path,"params")}
 44 |             self.info_dict=[]
 45 |         def get_path(self):
 46 |             """_summary_
 47 | 
 48 |             Returns:
 49 |                 _type_: _description_
 50 |             """
 51 |             return self.exp_path
 52 |             
 53 |         def structure(self):
 54 |             """_summary_
 55 | 
 56 |             Returns:
 57 |                 _type_: _description_
 58 |             """
 59 |             self.exp_path=create_folder(self.feature_store,self.name)
 60 |             self.exp_path=create_folder(self.exp_path,self.runid)
 61 |             self._create_all_folders(self.exp_path)
 62 |             return self.exp_path
 63 |         
 64 |         def _create_all_folders(self,exp_path):
 65 |             """_summary_
 66 | 
 67 |             Args:
 68 |                 exp_path (_type_): _description_
 69 |             """
 70 |             for i in self.folders:
 71 |                 create_folder(exp_path,i)
 72 |         def write_to_yaml(self,info):
 73 |             with open(os.path.join(self.exp_path,"info.yaml"), 'w') as file:
 74 |                 documents = yaml.dump(info, file)
 75 |             
 76 |         
 77 |         
 78 | class PyMLPipe:
 79 |     def __init__(self):
 80 |         
 81 |         self.feature_store=create_folder(os.getcwd())
 82 |         self.experiment_name='0'
 83 |         self.folders=None
 84 |         self.experiment_path=None
 85 |         self.info={}
 86 |         self.info["tags"]=[]
 87 |         self.info["metrics"]={}
 88 |         self.info["params"]={}
 89 |         self.info["artifact"]=[]
 90 |         self.info["model"]={}
 91 |         self.info["artifact_schema"]=[]
 92 |         self.info["metrics_log"]=[]
 93 |         self._is_continious_logging=False
 94 |         
 95 |         
 96 |     def __reset__(self):
 97 |         self.feature_store=create_folder(os.getcwd())
 98 |         self.folders=None
 99 |         self.experiment_path=None
100 |         
101 |         self.info["tags"]=[]
102 |         self.info["metrics"]={}
103 |         self.info["params"]={}
104 |         self.info["artifact"]=[]
105 |         self.info["model"]={}
106 |         self.info["artifact_schema"]=[]
107 |         self.info["metrics_log"]=[]
108 |         
109 |     @contextmanager
110 |     def run(self,experiment_name=None,runid=None):
111 |         """_summary_: start a context manager for with statement
112 |         1. When run is started it will create
113 |             a. RUN ID
114 |             b. EXPERIMENT ID
115 |             c. FOLDERS for storing the details
116 | 
117 |         Args:
118 |             experiment_name (str, optional): gives a experiment name. Defaults to None.
119 |             runid (str, optional): gives a runid. Defaults to None.
120 | 
121 |         Returns:
122 |             class context_run(object): object for the context manager
123 |         """
124 |         if experiment_name!=None:
125 |             self.experiment_name=experiment_name
126 |         r=Context_Manager(self.experiment_name,
127 |                            self.feature_store,runid)
128 |         
129 |         self._write_info_run(self.experiment_name, r.runid)
130 |         r.structure()
131 |         self.context_manager=r
132 |         #initialize models 
133 |         self.scikit_learn=ScikitLearn(self.context_manager.folders)
134 |         self.pytorch=Pytorch(self.context_manager.folders)
135 |         yield r
136 |         self.info["execution_time"]=str(datetime.datetime.now()).split(".")[0]
137 |         if self.scikit_learn.registered:
138 |             self.info["model"]={"model_name":self.scikit_learn.model_name,
139 |                                 "model_path":self.scikit_learn.model_path,
140 |                                 "model_params": self.scikit_learn.model_params,
141 |                                 "model_class":self.scikit_learn.model_class,
142 |                                 "model_type":self.scikit_learn.model_type,
143 |                                 "model_tags":self.scikit_learn.model_tags,
144 |                                 "registered":self.scikit_learn.registered,
145 |                                 "model_mode":self.scikit_learn.model_mode
146 |                                 }
147 |             
148 |         elif self.pytorch.registered:
149 |             self.info["model"]={"model_name":self.pytorch.model_name,
150 |                                 "model_path":self.pytorch.model_path,
151 |                                 "model_architecture":self.pytorch.model_architecture,
152 |                                 "model_class":self.pytorch.model_class,
153 |                                 "model_type":self.pytorch.model_type,
154 |                                 "model_ops":self.pytorch.model_ops,
155 |                                 "registered":self.pytorch.registered,
156 |                                 "model_mode":self.pytorch.model_mode
157 |                                 }
158 |         #print(self.info)
159 |         if len(self.info["metrics"])==0 and self._is_continious_logging:
160 |             self.info["metrics"]=self.info["metrics_log"][-1]
161 |             
162 |         self.context_manager.write_to_yaml(self.info)
163 |         self.__reset__()
164 |     def explainer(self,model,trainx):
165 |         """_summary_: This is an explainer API that do global explainibilty. 
166 | 
167 |         Args:
168 |             model (scikit-learn): Model Object
169 |             trainx (Pandas DataFrame): Data Frame for Global Explainability
170 | 
171 |         Raises:
172 |             TypeError: _description_
173 |         """
174 |         if not isinstance(trainx, pd.DataFrame):
175 |             raise TypeError("Error: Please provide a valid data pd.Dataframe or correct artifact Name")
176 |         model_type=str(type(model))
177 |         if ('sklearn' not in model_type) and ("catboost" not in model_type):
178 |             raise TypeError("Error: Scikit-learn or Catboost or Xgboost Expected got {model_type}".format(model_type=model_type) )
179 |         explainer_instance=xai.Explainer(model,trainx,self.context_manager.folders["artifacts"])
180 |         artifacts=explainer_instance.explain()
181 |         self.info["XAI"]=artifacts
182 |     
183 |     def set_experiment(self,name):
184 |         """_summary_: sets the experiment name
185 | 
186 |         Args:
187 |             name (str): name of the experiment
188 |         """
189 |         self.experiment_name=name
190 |         exp_path=create_folder(self.feature_store,self.experiment_name)
191 |         self._write_info_experiment(name,exp_path)
192 |     
193 |     
194 |     def set_tag(self,tag_value):
195 |         """_summary_: sets a tag for a perticular run
196 |         Args:
197 |             name (str or int or float): tag name 
198 |         Raises:
199 |             TypeError: Supported type 'str','int','float'
200 |         """
201 |         
202 |             
203 |         if isinstance(tag_value,dict) or isinstance(tag_value,list) or isinstance(tag_value,set): 
204 |            raise TypeError("unsupported type, Expected 'str','int','float' got "+str(type(tag_value)))
205 |         self.info["tags"].append(tag_value)
206 |         
207 |     
208 |     
209 |     def set_tags(self,tag_dict:list):
210 |         """_summary_:sets N no of tags for a perticular run
211 | 
212 |         Args:
213 |             tag_dict (list): tag names in list format
214 | 
215 |         Raises:
216 |             TypeError: Expected 'list'
217 |         """
218 |        
219 |         if isinstance(tag_dict,list): 
220 |             self.info["tags"].extend(tag_dict)
221 |         else:
222 |             raise TypeError("unsupported type, Expected 'list' got "+str(type(tag_dict)))
223 |         
224 |     def get_tags(self):
225 |         """_summary_: get all the tags that are associated with the run
226 | 
227 |         Returns:
228 |             list: tags that are associated with the run
229 |         """
230 |         return self.info["tags"]
231 |         
232 |     def set_version(self,version):
233 |         """_summary_:sets version number for the perticular run
234 | 
235 |         Args:
236 |             version (str or int or float): version number
237 | 
238 |         Raises:
239 |             TypeError: Expected 'str','int','float'
240 |         """
241 |         if isinstance(version,dict) or isinstance(version,list) or isinstance(version,set): 
242 |            raise TypeError("unsupported type, Expected 'str','int','float' got "+str(type(tag_dict)))
243 |         self.info["version"]=version
244 |         
245 |         
246 |     def get_version(self):
247 |         """_summary_:get the version number associated with the run
248 | 
249 | 
250 |         Returns:
251 |             _type_: version number
252 |         """
253 |         return self.info["version"]
254 |     
255 |     def log_metrics(self,metric_dict:dict):
256 |         """_summary_: log metrics for the model run
257 | 
258 |         Args:
259 |             metric_dict (dict): key value pair with metric name and metric value
260 | 
261 |         Raises:
262 |             TypeError: Expected 'dict'
263 |         """
264 |            
265 |         if isinstance(metric_dict,dict): 
266 |             self.info["metrics"].update({i:float("{0:.2f}".format(j)) for i,j in metric_dict.items()})
267 |         else:
268 |             raise TypeError("unsupported type, Expected 'dict' got "+str(type(metric_dict)))
269 |     
270 |     def log_metrics_continious(self,metric_dict:dict):
271 |         """_summary_
272 | 
273 |         Args:
274 |             metric_dict (dict): key value pair with metric name and metric value
275 | 
276 |         Raises:
277 |             TypeError: Expected 'dict'
278 |         """
279 |         if isinstance(metric_dict,dict):
280 |             self.info["metrics_log"].append({i:float("{0:.2f}".format(j)) for i,j in metric_dict.items()})
281 |         else:
282 |             raise TypeError("Expected Type dict got " +type(metric_dict))
283 |         self._is_continious_logging=True
284 |         
285 |     def log_metric(self,metric_name,metric_value):
286 |         """_summary_: log single metric for the model run
287 | 
288 |         Args:
289 |             metric_name (str): name of the metric
290 |             metric_value (int or float): value of the metric
291 | 
292 |         Raises:
293 |             TypeError: metric_name expected to be str
294 |             TypeError: metric_value expected to be int or float
295 |         """
296 |         
297 |         mv=None
298 |         if not isinstance(metric_value,int) and not isinstance(metric_value,float): 
299 |             raise TypeError("unsupported type, 'metric_value' Expected 'int','float' got "+str(type(metric_value)))
300 |         if not isinstance(metric_name,str): 
301 |             raise TypeError("unsupported type, 'metric_value' Expected 'str' got "+str(type(metric_name)))
302 |         
303 |         
304 |         self.info["metrics"][metric_name]=float("{0:.2f}".format(metric_value))
305 |        
306 |         
307 |     def log_params(self,param_dict:dict):
308 |         """_summary_: log parameters for the model run
309 | 
310 |         Args:
311 |             param_dict (dict): key value pair with parameter name and parameter value
312 | 
313 |         Raises:
314 |             TypeError: Expected 'dict'
315 |         """
316 |         
317 |             
318 |         if isinstance(param_dict,dict): 
319 |             self.info["params"].update(param_dict)
320 |         else:
321 |             raise TypeError("unsupported type, Expected 'dict' got "+str(type(metric_dict)))
322 |         
323 |         
324 |     def log_param(self,param_name,param_value):
325 |         """_summary_:log single parameter for the model run
326 | 
327 |         Args:
328 |             param_name (str): _description_
329 |             param_value (int or float or str): _description_
330 | 
331 |         Raises:
332 |             TypeError: param_name Expected 'str' 
333 |             TypeError: param_value Expected 'int','float','str' 
334 |         """
335 |        
336 |         mv=None
337 |         if not isinstance(param_value,int) and not isinstance(param_value,float) and not isinstance(param_value,str): 
338 |             raise TypeError("unsupported type, 'param_value' Expected 'int','float','str' got "+str(type(metric_value)))
339 |         if not isinstance(param_name,str): 
340 |             raise TypeError("unsupported type, 'param_name' Expected 'str' got "+str(type(metric_name)))
341 |         self.info["params"][param_name]=param_value
342 |     
343 |     def register_artifact(self,artifact_name,artifact,artifact_type="training"):
344 |         """_summary_: Save Artifact as part of data verion control
345 | 
346 |         Args:
347 |             artifact_name (str): name of the artifact
348 |             artifact (pandas DataFrame): pandas DataFrame object with the data
349 |             artifact_type (str, optional): Defaults to "training". artifact_type can be [training,testing,validation,dev,prod]
350 | 
351 |         Raises:
352 |             TypeError: Expected DataFrame object
353 |             ValueError: artifact_name should have a string value
354 |         """
355 |         if not isinstance(artifact, pd.DataFrame):
356 |             raise TypeError("Please provide DataFrame in 'artifact'")
357 |         if artifact_name=="" or artifact_name==None:
358 |             raise ValueError("Please provide a name in 'artifact_name' which is not '' or None")
359 |         path=os.path.join(self.context_manager.folders["artifacts"],artifact_name)
360 |         dataschema=artifact.describe(include='all')
361 |         
362 |         artifact.to_csv(path,index=False)
363 |         
364 |         
365 |             
366 |         
367 |         self.info["artifact"].append({
368 |             "name":artifact_name,
369 |             "path":path,
370 |             "tag":artifact_type
371 |         })
372 |         schema_data,schema_details=schema_(artifact)
373 |         self.info["artifact_schema"].append({
374 |                 "name":artifact_name,
375 |                 "schema":schema_data,
376 |                 "details":schema_details
377 |             }
378 |         )
379 |         
380 |         
381 |     def register_artifact_with_path(self,artifact,artifact_type="training"):
382 |         """_summary_
383 | 
384 |         Args:
385 |             artifact (str): path of the artifact
386 |             artifact_type (str, optional): _description_. Defaults to "training".artifact_type can be [training,testing,validation,dev,prod]
387 | 
388 |         Raises:
389 |             TypeError: artifact path should be str
390 |             ValueError: artifact path should be correct
391 |         """
392 |         if not isinstance(artifact, str):
393 |             raise TypeError("Please provide full path of artifact")
394 |         if not os.path.exists(artifact):
395 |             raise ValueError("Please provide correct path of artifact")
396 |         
397 |         shutil.copy(artifact, self.context_manager.folders["artifacts"])
398 |         
399 |         path=os.path.join(self.context_manager.folders["artifacts"],os.path.basename(artifact))
400 |         self.info["artifact"].append({
401 |             "name":os.path.basename(path),
402 |             "path":path,
403 |             "tag":artifact_type
404 |         })
405 |         filename=os.path.basename(artifact)
406 |         if filename.endswith('.csv'):
407 |             artifact=pd.read_csv(path)
408 |         elif filename.endswith('.xlxs'):
409 |             artifact=pd.read_excel(path)
410 |         elif filename.endswith('.parquet'):
411 |             artifact=pd.read_parquet(path)
412 |         else:
413 |             print("Error: Unknown file type cannot generate Schema!!!!")
414 |             return
415 |         
416 |         schema_data,schema_details=schema_(artifact)
417 |         self.info["artifact_schema"].append({
418 |                 "name":filename,
419 |                 "schema":schema_data,
420 |                 "details":schema_details
421 |             }
422 |         )
423 |         
424 |     def get_info(self):
425 |         """_summary_: get the whole run details
426 | 
427 |         Returns:
428 |             dict: information about the whole run
429 |         """
430 |         return self.info 
431 |     
432 |     
433 |     def get_artifact(self):
434 |         """_summary_: get the artifact details
435 | 
436 |         Returns:
437 |             dict: returns the artifact detail
438 |         """
439 |         return self.info["artifact"]
440 |     
441 |     def _write_info_experiment(self,experiment_name,path):
442 |         """_summary_: writes to the experiment schema
443 | 
444 |         Args:
445 |             experiment_name (str): name of the experiment
446 |             path (str): path to save the run details
447 |         """
448 |         fulllist={}
449 |         if os.path.exists(os.path.join(self.feature_store,"experiment.yaml")):
450 |             with open(os.path.join(self.feature_store,"experiment.yaml")) as file:
451 |                 fulllist = yaml.load(file, Loader=yaml.FullLoader)
452 |             if experiment_name not in fulllist:
453 |                 fulllist[experiment_name]={"experiment_path":path,
454 |                                            "runs":[],
455 |                                            "execution_time":str(datetime.datetime.now()).split(".")[0]
456 |                                            }
457 |             else:
458 |                 fulllist[experiment_name]["execution_time"]=str(datetime.datetime.now()).split(".")[0]
459 |         else:
460 |             fulllist[experiment_name]={"experiment_path":path,
461 |                                        "runs":[],
462 |                                        "execution_time":str(datetime.datetime.now()).split(".")[0]
463 |                                        }
464 |         
465 |             
466 |         with open(os.path.join(self.feature_store,"experiment.yaml"), 'w') as file:
467 |                 documents = yaml.dump(fulllist, file)
468 |                 
469 |     
470 |     def _write_info_run(self,experiment_name,run_id):
471 |         """_summary_:writes to the run schema
472 | 
473 |         Args:
474 |             experiment_name (str): name of the experiment
475 |             run_id (str): ID for the running instance
476 |         """
477 |         fulllist={}
478 |         
479 |         with open(os.path.join(self.feature_store,"experiment.yaml")) as file:
480 |             fulllist = yaml.load(file, Loader=yaml.FullLoader)
481 |         fulllist[experiment_name]["runs"].append(run_id)
482 |             
483 |         
484 |             
485 |         with open(os.path.join(self.feature_store,"experiment.yaml"), 'w') as file:
486 |                 documents = yaml.dump(fulllist, file)
487 |         
488 |     
489 |     def set_uri(self):
490 |         pass
491 |     
492 | 
493 |                 
494 | #explainer*
495 | #https://github.com/SauceCat/PDPbox             
496 |  #https://github.com/AustinRochford/PyCEbox               
497 | 
498 | 
499 | class ScikitLearn:
500 |     def __init__(self,folders):
501 |         self.folders=folders
502 |         self.model_name=""
503 |         self.model_path=""
504 |         self.model_class=""
505 |         self.model_type=""
506 |         self.model_params={}
507 |         self.model_tags={}
508 |         self.registered=False
509 |         self.model_mode=""
510 |         
511 |         
512 |     def register_model(self,model_name,model):
513 |         if "sklearn" in str(type(model)) or "catboost" in str(type(model)):
514 |             
515 |             pickle.dump(model, open(os.path.join(self.folders["models"],model_name+'.pkl'), 'wb'))
516 |             self.model_type="scikit-learn"
517 |         else:
518 |             raise TypeError("Error:Expected ScikitLearn Module!!!!")
519 |         self.model=model
520 |         self.model_name=model_name
521 |         self.model_path=os.path.join(self.folders["models"],model_name+'.pkl')
522 |         self.model_class=type(model).__name__
523 |         self.model_params=model.get_params()
524 |         self.model_tags={tag:str(value) for tag,value in model._get_tags().items()}
525 |         self.registered=True
526 |     
527 |     
528 | class Pytorch:
529 |     def __init__(self,folders):
530 |         self.folders=folders
531 |         self.model_name=""
532 |         self.model_path=""
533 |         self.model_class=""
534 |         self.model_type=""
535 |         self.model_architecture=[]
536 |         self.model_ops=[]
537 |         self.registered=False
538 |         self.model_mode=""
539 |         
540 |     def register_model(self,model_name,model):
541 |         """_summary_: Save the model as an aritifact object
542 | 
543 |         Args:
544 |             model_name (str): name of file to be saved
545 |             model (Pytorch Model): the model
546 | 
547 |         Raises:
548 |             Exception: 
549 |         """
550 |         try:
551 |             model_scripted = torch.jit.script(model)
552 |             model_scripted.save(os.path.join(self.folders["models"],model_name+'.pt'))
553 |             self.model_type="torch"
554 |         except Exception as e:
555 |             raise Exception(e)
556 |         self.model_name=model_name
557 |         self.model_path=os.path.join(self.folders["models"],model_name+'.pt')
558 |         self.model_class=type(model).__name__
559 |         self.registered=True
560 |         self.model_architecture=self._get_model_arch(model)
561 |         self.model_ops=self._get_model_ops(model)
562 |         self.model_mode="non_runtime"
563 | 
564 |     def register_model_with_runtime(self,model_name,model,data):
565 |         """_summary_: Save the model as an aritifact object with runtime details.
566 |         This helps in Saving the model for model conversion
567 | 
568 |         Args:
569 |             model_name (str): name of file to be saved
570 |             model (Pytorch Model): the model
571 |             data (TorchTensor): Data used for training. 1 row of data is enogh
572 | 
573 |         Raises:
574 |             Exception: _description_
575 |         """
576 |         try:
577 |             traced_cell = torch.jit.trace(model, data)
578 |             torch.jit.save(traced_cell, os.path.join(self.folders["models"],model_name+".pt"))
579 |         except Exception as e:
580 |             raise Exception(e)
581 |         self.model=model
582 |         self.model_name=model_name
583 |         self.model_path=os.path.join(self.folders["models"],model_name+'.pt')
584 |         self.model_class=type(model).__name__
585 |         self.registered=True
586 |         self.model_architecture=self._get_model_arch(model)
587 |         self.model_ops=self._get_model_ops(model)
588 |         self.model_mode="runtime"
589 |         
590 |     def _load_model(self,model_name):
591 |         model = torch.jit.load(model_name)
592 |         return
593 | 
594 |     def _load_model_with_runtime(self,model_name):
595 |         loaded_trace = torch.jit.load(model_name)
596 |         return loaded_trace
597 |     
598 |     def _get_model_ops(self,model):
599 |         """_summary_: get forward operations in for pytorch model
600 | 
601 |         Args:
602 |             model (Pytorch Model): Pytorch model
603 | 
604 |         Returns:
605 |             list: all tensor operations
606 |         """
607 |         gm = torch.fx.symbolic_trace(model)
608 |         ops_data={}
609 |         for idx, n in enumerate(gm.graph.nodes):
610 |             ops_data[f"op_{idx}"]={
611 |                 "name":str(n),
612 |                 "op":n.__dict__["op"],
613 |                 "input_node":{str(k): str(v) for k,v in n.__dict__['_input_nodes'].items()},
614 |                 "args":[str(i) for i in n.__dict__["_args"]],
615 |                 "prev":str(n.__dict__["_prev"]),
616 |                 "next":str(n.__dict__["_next"]),
617 |                 "users":{str(k): str(v) for k,v in n.__dict__['users'].items()},
618 |             }
619 |         return ops_data
620 |     
621 |     def _get_model_arch(self,model):
622 |         """_summary_: get forward operations in for pytorch model
623 | 
624 |         Args:
625 |             model (Pytorch Model): Pytorch model
626 | 
627 |         Returns:
628 |             list: all Layers in model
629 |         """
630 |         arch=[]
631 |         for layers,details in dict(model.named_modules()).items():
632 |             _temp={}
633 |             if layers!="":
634 |                 _temp["layer_name"]=layers.replace(".","_")
635 |                 _temp["layer"]=str(details)
636 |                 _temp["layer_type"]=type(details).__name__
637 |                 _temp["layer_class"]=str(type(details)).strip("<").strip(">").split(" ")[1]
638 |                 _temp["params"]={}
639 |                 for params in details.__dict__:
640 |                     if not params.startswith("_"):
641 |                         _temp["params"][params]=details.__dict__[params]
642 |             if len(_temp)>0:        
643 |                 arch.append(_temp)
644 |         return arch
645 |     
646 | 


--------------------------------------------------------------------------------
/templates/check_deployment.html:
--------------------------------------------------------------------------------
 1 | {% extends "template.html" %}
 2 | 
 3 | {% block content %}
 4 | <script>
 5 |     function prettyPrint() {
 6 |         var ugly = document.getElementById('myTextArea').value;
 7 |         console.log(ugly)
 8 |         var obj = JSON.parse(ugly);
 9 |         var pretty = JSON.stringify(obj, undefined, 4);
10 |         document.getElementById('myTextArea').value = pretty;
11 |     }
12 | </script>
13 | <nav class="breadcrumb" aria-label="breadcrumbs">
14 |     <ul>
15 |         <li><a href="/">Bulma</a></li>
16 |         <li><a href="/show_deployments/">Deployments</a></li>
17 |         <li class="is-active"><a href="#" aria-current="page">{{deployment}}</a></li>
18 |       </ul>
19 | </nav>
20 | <div>
21 |     <div class="box">
22 |         <h1 class="subtitle" style="color:darkslategray">/POST/</h1>
23 |     </div>
24 |     <!--<form action="/predict/{{deployment}}" method="POST">-->
25 |         <div class="box">
26 |             <textarea class="textarea" id="myTextArea" placeholder="e.g. Hello world" onclick="prettyPrint()" name="random_data"></textarea>
27 |             <hr>
28 |             <button class="button is-primary" >
29 | 
30 |                 Predict
31 |             </button>
32 |         </div>
33 |     <!--</form>-->
34 |     
35 | </div>
36 | {% endblock %}


--------------------------------------------------------------------------------
/templates/deployments.html:
--------------------------------------------------------------------------------
 1 | {% extends "template.html" %}
 2 | 
 3 | {% block content %}
 4 | <script>
 5 |     function stop_deployment(deployment_no){
 6 |         console.log(deployment_no)
 7 |         var xhttp = new XMLHttpRequest();
 8 |         xhttp.onreadystatechange = function() {
 9 |             if (this.readyState == 4 && this.status == 200) {
10 |                 location.reload();
11 |             }
12 |         };
13 |         xhttp.open("GET", "/deployment/stop/"+deployment_no[0], true);
14 |         //xhttp.setRequestHeader("Content-type", "application/json");
15 |         xhttp.send();
16 |     }
17 |     function start_deployment(deployment_no){
18 |         console.log(deployment_no)
19 |         var xhttp = new XMLHttpRequest();
20 |         xhttp.onreadystatechange = function() {
21 |             if (this.readyState == 4 && this.status == 200) {
22 |                 location.reload();
23 | 
24 |             }
25 |         };
26 |         xhttp.open("GET", "/deployment/start/"+deployment_no[0], true);
27 |         //xhttp.setRequestHeader("Content-type", "application/json");
28 |         xhttp.send();
29 |     }
30 | </script>
31 |     <nav class="breadcrumb" aria-label="breadcrumbs">
32 |         <ul>
33 |             <li><a href="/">home</a></li>
34 |             <li class="is-active"><a href="#" aria-current="page">deployments</li>
35 |         </ul>
36 |     </nav>
37 |     
38 |     <table class="table is-striped" style="width:100%;">
39 |         <thead>
40 |           <tr>
41 |             <th>Experiment No</th>
42 |             <th>Run ID</th>
43 |             <th>Deployment No</th>
44 |             <th>Deployment URL</th>
45 |             <th>Status</th>
46 |             <th>Actions</th>
47 |           </tr>
48 |           
49 |         </thead>
50 |         <tbody>
51 |           {%for details in ALL_DEPLOYED_MODELS%}
52 |           <tr>
53 |             <td>{{details["experiment_id"]}}</td>
54 |             <td><a href="/run/{{details['experiment_id']}}@{{details['run_id']}}/">{{details["run_id"]}}</a></td>
55 |             <td>{{details["model_deployment_number"]}}</td>
56 |             
57 |             <td>
58 |                 {%if details["status"]=='running'%}
59 |                 <a href="{{details['model_url']}}">{{details["model_url"]}}</a>
60 |                 {%endif%}
61 |             </td>
62 |             <td>{{details["status"]}}</td>
63 |             <td>
64 |                 {%if details["status"]=='running'%}
65 |                 <button class="button is-danger" onclick="stop_deployment({{[details['model_deployment_number']]}})">Stop</button>
66 |                 {%elif details["status"]=='stopped'%}
67 |                 <button class="button is-success" onclick="start_deployment({{[details['model_deployment_number']]}})">Start</button>
68 |                 {%endif%}
69 |                 
70 |             </td>
71 |             
72 |            
73 |           </tr>
74 |           {%endfor%}
75 |         </tbody>
76 |       </table>
77 |   
78 | {% endblock %}


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  6 |     <title>PyMLPipe</title>
  7 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
  8 |     <link rel="shortcut icon" href="{{ url_for('static', filename='favicon.ico') }}">
  9 | 
 10 |     <script src="https://cdn.plot.ly/plotly-2.12.1.min.js"></script>
 11 | 
 12 |   </head>
 13 |   <style>
 14 |     .center-text{
 15 |         text-align: center
 16 |     }
 17 |     .dropdown-inactive{
 18 |         display: none;
 19 |     }
 20 |     .dropdown-active{
 21 |         display: block;
 22 |     }
 23 |     hr{
 24 |         margin:2px;
 25 |     }
 26 |     .sidebar-active{
 27 |         background: aquamarine;
 28 |         border: 1px solid;
 29 |     }
 30 |     td{
 31 |         
 32 |         /*height: 30%;*/
 33 |         overflow: hidden;
 34 |         text-overflow: ellipsis;
 35 |         word-wrap: break-word;
 36 |         max-width: 100px;
 37 |     }
 38 |   </style>
 39 |   <script>
 40 |     const run_data = {{ run_details | tojson }} //can be improved
 41 |     var current_table={{current_experiment | safe}} //can be improved
 42 |     current_table=current_table[0]
 43 |     toggle=false;
 44 | 
 45 | 
 46 | 
 47 |     function show_dropdown(id){
 48 |         console.log(id)
 49 |         console.log(toggle)
 50 |         if(toggle==false){
 51 |             console.log("show")
 52 |             document.getElementById(id).style.display="block"
 53 |             toggle=id;
 54 |         }
 55 |         else if(toggle==id){
 56 |             console.log("hide")
 57 |             document.getElementById(id).style.display="none"
 58 |             toggle=false;
 59 |         }
 60 |         else if(toggle!=id){
 61 |             document.getElementById(toggle).style.display="none"
 62 |             document.getElementById(id).style.display="block"
 63 |             toggle=id
 64 |         }
 65 |         
 66 |     }
 67 |     var metrics=[]
 68 |     var run_ids=[]
 69 |     function checkmetric(id)
 70 |     {   
 71 |         //console.log(id)
 72 |         console.log(document.getElementById("checkbox_@_"+id).checked,id)
 73 |         //document.getElementById(id).checked) 
 74 |         if(document.getElementById("checkbox_@_"+id).checked){
 75 |             id=id.split("@")[1]
 76 |             console.log(id)
 77 |             metrics.push(id)
 78 |             //document.getElementById("checkbox_"+id).setAttribute('checked', 'checked')
 79 |         }
 80 |         else{
 81 |             index = metrics.indexOf(id.split("@")[1]);
 82 |             if (index > -1) {
 83 |                 metrics.splice(index, 1); // 2nd parameter means remove one item only
 84 |               }
 85 |         }
 86 |         if(metrics.length>0 & run_ids.length>0){
 87 |             document.getElementById("metric-compare-button").disabled = false
 88 |         }
 89 |         else{
 90 |             document.getElementById("metric-compare-button").disabled = true
 91 |         }
 92 |        
 93 |         console.log(metrics,run_ids)
 94 |     }
 95 |     function checkrow(id){
 96 |         
 97 |         if(document.getElementById("checkbox_"+id).checked){
 98 |             run_ids.push(id)
 99 |         }
100 |         else{
101 |             index = run_ids.indexOf(id);
102 |             if (index > -1) {
103 |                 run_ids.splice(index, 1); // 2nd parameter means remove one item only
104 |               }
105 |         }
106 |         if(metrics.length>0 & run_ids.length>0){
107 |             document.getElementById("metric-compare-button").disabled = false
108 |         }
109 |         else{
110 |             document.getElementById("metric-compare-button").disabled = true
111 |         }
112 |         console.log(metrics,run_ids)
113 |     }
114 |     function compare_metrics(){
115 |         traces=[]
116 |         for(var i=0;i<run_ids.length;i++){
117 |             label_for_plot=run_ids[i].slice(0,13)+"..."
118 |             run_details=run_ids[i]
119 |             metrics_details=run_data[run_details]["metrics"]
120 |             metrics_values=[]
121 |             for(var j=0;j<metrics.length;j++){
122 |                 metrics_values.push(metrics_details[metrics[j]])
123 |             }
124 |             trace=create_traces(metrics,metrics_values,label_for_plot)
125 |             traces.push(trace)
126 |         }
127 |         plot_bar_group(traces)
128 |     }
129 |     function show_table(id){
130 |         if(id!=current_table){
131 |             document.getElementById(id).style.display='table'
132 |             document.getElementById(current_table).style.display='none'
133 |             document.getElementById("sidebar_"+id).classList.add("sidebar-active")
134 |             
135 |             document.getElementById("sidebar_"+current_table).classList.remove("sidebar-active")
136 |             current_table=id
137 |             metrics=[]
138 |             run_ids=[]
139 |             remove_plot_bar_group()
140 |             uncheck_all()
141 |         }
142 |         
143 |     }
144 |     function uncheck_all(){
145 |         input_elements=document.getElementsByTagName("input")
146 |         for(var i=0;i<input_elements.length;i++){
147 |             if(input_elements[i].type=="checkbox"){
148 |                 if(input_elements[i].checked){
149 |                     console.log(input_elements[i].click())
150 |                 }
151 |             }
152 |         }
153 |     }
154 |     function metricsfilter(){
155 |         value=document.getElementById('metricsfilter').value;
156 |         condition=document.getElementById("metricsfilter_condition").value
157 |         flag=true
158 |         if(condition==""){
159 |             document.getElementById("error").innerHTML="Please enter some value for filtering the metrics EX: >10"
160 |             document.getElementById("error").style.display="block"
161 |             flag=false
162 |         }
163 |         if(flag){
164 |             window.open("/?metrics="+value+"&metricsfilter="+condition,"_self")
165 |         }
166 |         
167 |     }
168 |     function filtertags(){
169 |         tags=document.getElementsByName('tagCheckboxs')
170 |         all_tags=[]
171 |         
172 |         for(var i=0;i<tags.length;i++){
173 |             if (tags[i].checked){
174 |                 all_tags.push(tags[i].value)
175 |                 
176 |             }
177 |         }
178 |         if (all_tags.length>0){
179 |             window.open("/?tags="+all_tags,"_self")
180 |         }
181 | 
182 |         
183 |     }
184 |     function hideerror(){
185 |         document.getElementById("error").style.display="none"
186 |         
187 |     }
188 |   </script>
189 |   <body>
190 |   <section class="section">
191 |     <div class="container">
192 |         
193 |         <h1 class="title center-text"><span><img src="{{ url_for('static', filename='logo.svg') }}" height="30" width="30"></span> PyMLPipe</h1>
194 |     </div>
195 |   </section>
196 |   
197 |   <section class='section'>
198 |     {% if error != "" %}
199 |     <div class="notification is-danger is-light" id="error">
200 |         <button class="delete" onclick="hideerror()"></button>
201 |         {{error}} Please provide a valid expression Ex: >10
202 |     </div>
203 |     {%else%}
204 |     <div class="notification is-danger is-light" id="error" style="display:none">
205 |         <button class="delete" onclick="hideerror()"></button>
206 |         {{error}}
207 |     </div>
208 |     {%endif%}
209 |     <div class="columns">
210 | 
211 | 
212 |         <div class="column is-one-fifth">
213 | 
214 |         <aside class="menu">
215 |             <p class="menu-label">
216 |             Experiments
217 |             </p>
218 |             <ul class="menu-list">
219 |             {%for experiment in runs%}
220 |             
221 |             <li onclick="show_table('{{experiment}}')" id="sidebar_{{experiment}}"><a href="#{{experiment}}" >{{experiment}}</a></li>
222 |             
223 |             <hr>
224 |             <!--
225 |             <li style="margin:5px;">
226 |                 <a class="is-active" href="#{{experiment}}" onclick=show_dropdown("{{experiment}}")>{{loop.index}}. {{experiment}}</a>
227 |                 <ul id="{{experiment}}" class="dropdown-inactive">
228 |                 {%for run in runs[experiment]['runs']%}
229 |                 <li><a>{{run[:15]}}...</a></li>
230 |                 
231 |                 {%endfor%}
232 |                 </ul>
233 |             </li>
234 |             -->
235 |             {%endfor%}
236 |                 
237 |             </ul>
238 |             <p class="menu-label">
239 |                 Deployments
240 |             </p>
241 |             <ul class="menu-list">
242 |                 <li> <a href="/show_deployments/">Show Deployments</a></li>
243 |             </ul>
244 |             <p class="menu-label">
245 |                 Jobs
246 |             </p>
247 |             <ul class="menu-list">
248 |                 <li> <a href="/jobs/">Show Pipeline</a></li>
249 |             </ul>
250 |             
251 |         </aside>
252 | 
253 |         </div>
254 | 
255 |         <div class="column" >
256 |             <div class="action-bar">
257 |                 <div class="columns box" style="margin:5px">
258 |                     <!--<div class="column">
259 |                 
260 |                     </div>-->
261 |                     <!--
262 |                     <div class="column">
263 |                         <div class="field has-addons">
264 |                             <div class="control">
265 |                               <input class="input" type="text" placeholder="Filter metrics>condition">
266 |                             </div>
267 |                             <div class="control">
268 |                               <a class="button is-info" onclick="filter()">
269 |                                 Search
270 |                               </a>
271 |                             </div>
272 |                           </div>
273 |                     </div>
274 |                     -->
275 |                     <div class="column">
276 |                         <div class="field has-addons has-addons-centered">
277 |                             <p class="control">
278 |                               <span class="select">
279 |                                 <select id="metricsfilter">
280 |                                     {%for metric in metrics%}
281 |                                     <option value="{{metric}}">{{metric}}</option>
282 |                                     {%endfor%}
283 |                                 </select>
284 |                               </span>
285 |                             </p>
286 |                             <p class="control">
287 |                               <input class="input" type="text" placeholder="Condition eg:>10" id="metricsfilter_condition" required>
288 |                             </p>
289 |                             <p class="control">
290 |                               <a class="button is-primary" onclick="metricsfilter()">
291 |                                 Filter
292 |                               </a>
293 |                             </p>
294 |                           </div>
295 | 
296 |                     </div>
297 |                     <div class="column">
298 |                         <div class="columns">
299 |                             <div class="column">
300 |                                 <div class="navbar-item has-dropdown is-hoverable">
301 |                                     <a class="navbar-link">
302 |                                       Tags
303 |                                     </a>
304 |                                 
305 |                                     <div class="navbar-dropdown">
306 |                                         {%for tag in tags%}
307 |                                         <a class="navbar-item">
308 |                                             <input type="checkbox" name="tagCheckboxs" value="{{tag}}"> {{tag}}
309 |                                         </a>
310 |                                         {%endfor%}
311 |                                       
312 |                                     </div>
313 |                                 </div>
314 |                             </div>
315 |                             <div class="column">
316 |                                 <button class="button is-primary" onclick="filtertags()"><img src="{{ url_for('static', filename='filter.svg') }}" height="20" width="20"></button>
317 | 
318 |                             </div>
319 |                             
320 |                         </div>
321 |                         
322 |                         
323 |                     </div>
324 |                     <div class="column">
325 |                         <button class="button is-primary" onclick="compare_metrics()" id="metric-compare-button" disabled>Compare Metrics</button>
326 |                     </div>
327 |                 </div>
328 | 
329 |             </div>
330 |             {%for experiment in runs%}
331 |             
332 |             {%if loop.index==1%}
333 |                 {%set display='table'%}
334 |             {%else%}
335 |                 {%set display='none'%}
336 |             {%endif%}
337 |             <table class="table is-striped" style="width:100%;display:{{display}}" id="{{experiment}}">
338 |                 
339 |                 <!--<thead>
340 |                     <tr>
341 |                         <th></th>
342 |                         <th></th>
343 |                         <th></th>
344 |                         <th></th>
345 |                         <th></th>
346 |                         <th></th>
347 |                         
348 |                         
349 |                         <th>
350 |                             <input type="checkbox" onclick=checkmetric("{{metric}}")>
351 |                             Select all
352 |                         </th>
353 |                         
354 |                     </tr>
355 |                 </thead>-->
356 |                 <thead>
357 |                     <tr>
358 |                         <th></th>
359 |                         <th>Run ID</th>
360 |                         <th>Date</th>
361 |                         <th>Model</th>
362 |                         <th>version</th>
363 |                         <th>tags</th>
364 |                         
365 |                         {%for metric in exp_wise_metrics[experiment]%}
366 |                             <th>
367 |                                 <div>
368 |                                     <input type="checkbox" onclick="checkmetric('{{experiment}}@{{metric}}')" id="checkbox_@_{{experiment}}@{{metric}}">
369 |                                 
370 |                                 <div>
371 |                                 {{metric}}
372 |                                 </div>
373 |                                 
374 |                             </th>
375 |                             
376 |                         {%endfor%}
377 |                     </tr>
378 |                 </thead>
379 |                 <tbody>
380 |                     
381 |                     {%for run in runs[experiment]['runs']%}
382 |                     {%if run in run_details%}
383 |                     <tr>
384 |                         <td><input type="checkbox" onclick="checkrow('{{run}}')" id="checkbox_{{run}}"></td>
385 |                         <td><a href="/run/{{experiment}}@{{run}}">{{run[:13]}}...</a></td>
386 |                         <td>{{run_details[run].execution_time}}</td>
387 |                         <td>{{run_details[run].model.model_name}}</td>
388 |                         <td>{{run_details[run].version}}</td>
389 |                         <td>
390 |                             {%for tag in run_details[run].tags%}
391 |                             <span class="tag is-warning ">{{tag}}</span>
392 |                             {%endfor%}
393 |                         </td>
394 |                         {%for metric in exp_wise_metrics[experiment]%}
395 |                         <td>{{run_details[run]["metrics"][metric]|string}}</td>
396 |                         {%endfor%}
397 |                     </tr>
398 |                     {%endif%}
399 |                     {%endfor%}
400 |                     
401 |    
402 |  
403 |                 </tbody>
404 |                
405 |             </table>
406 |             {%endfor%}
407 |             <!--Display tables--->
408 |             <div id="plot-data">
409 | 
410 |             </div>
411 |         </div>
412 |         
413 |     </div>
414 |     <div class="columns">
415 |         <div class="column is-one-fifth">
416 | 
417 |         </div>
418 |         <div class="column">
419 |             
420 |         </div>
421 |     </div>
422 |     
423 |     </section>
424 |   </body>
425 |   <script>
426 |     function create_traces(labels,points,name){
427 |         return {
428 |             x: labels,
429 |             y: points,
430 |             name: name,
431 |             type: 'bar'
432 |           };
433 |         
434 |     }
435 |     function plot_bar_group(data){
436 |         
437 |       var layout = {barmode: 'group'};
438 |       Plotly.newPlot('plot-data', data,layout);
439 |     }
440 |     function remove_plot_bar_group(){
441 |         Plotly.purge('plot-data');
442 |     }
443 |     document.getElementById("sidebar_"+current_table).classList.add("sidebar-active")
444 | 
445 |   </script>
446 | </html>


--------------------------------------------------------------------------------
/templates/job_view.html:
--------------------------------------------------------------------------------
 1 | {% extends "template.html" %}
 2 | 
 3 | {% block content %}
 4 | <script>
 5 |   nodes={{nodes|safe}}
 6 |   function changelog(){
 7 |     nodename=document.getElementById("lognode").value
 8 |     document.getElementById("logviewer").value=nodes[nodename]
 9 |   }
10 | </script>
11 | <style>
12 |     #cy {
13 |         width: 100%;
14 |         height: 400px;
15 |         display: block;
16 |         border: 1px solid gainsboro
17 |       }
18 | </style>
19 | 
20 | <nav class="breadcrumb" aria-label="breadcrumbs">
21 |     <ul>
22 |         <li><a href="/">home</a></li>
23 |         <li><a href="/jobs/">Pipeline</a></li>
24 |         <li class="is-active"><a href="/jobs/view/{{pipelinename}}" aria-current="page">{{pipelinename}}</a></li>
25 |       </ul>
26 | </nav>
27 | <div id="cy">
28 | 
29 | </div>
30 | <div id="logs">
31 |   <div class="box">
32 |     <div class="select">
33 |       <select onchange="changelog()" id="lognode">
34 |        {%for i in nodes%}
35 |        <option>{{i}}</option>
36 |        {%endfor%}
37 |       </select>
38 |     </div>
39 |     <div>
40 |       <textarea class="textarea" readonly id="logviewer">{{initital_node}}</textarea>
41 |     </div>
42 |   </div>
43 | </div>
44 | <script>
45 | graph_dict={{grapg_dict|safe}}
46 | var cy = cytoscape({
47 | 
48 |     container: document.getElementById('cy'),
49 |               style: [
50 |                   {
51 |                       selector: 'node',
52 |                       css: {
53 |                           label: "data(label)",
54 |             
55 |                             width: 45,
56 |                             height: 35,
57 |                             shape: "round-rectangle",
58 |                             color: "#828282",
59 |                             "background-color":"data(color)",
60 |                             'font-size':15,
61 |                             'line-color': 'red'
62 |                       }
63 |                       
64 |                   },
65 |                   {
66 |                     selector: 'edge',
67 |                     css: {
68 |                       'width': 2,
69 |                       'line-color': '#ccc',
70 |                       'target-arrow-color': '#ccc',
71 |                       'target-arrow-shape': 'triangle' 
72 |                   }
73 |                 }
74 |               ],
75 |               elements: graph_dict,
76 |               
77 |      layout: {
78 |       name: 'dagre',
79 |       rankDir: 'LR'
80 |     },
81 |       //pan: { x: 600, y: 100 },
82 |   });
83 | 
84 | </script>
85 | {% endblock %}


--------------------------------------------------------------------------------
/templates/jobs.html:
--------------------------------------------------------------------------------
 1 | {% extends "template.html" %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | <nav class="breadcrumb" aria-label="breadcrumbs">
 6 |     <ul>
 7 |         <li><a href="/">home</a></li>
 8 |         <li><a href="/jobs/">Pipeline</a></li>
 9 |         
10 |       </ul>
11 | </nav>
12 | 
13 | <table class="table is-striped" style="width:100%;">
14 |   <thead>
15 |     <tr>
16 |       <td>Pipeline Name</td>
17 |       <td>Created On</td>
18 |       <td>Start Time</td>
19 |       <td>Status</td>
20 |       <td>Actions</td>
21 |     </tr>
22 |   </thead>
23 |   <tbody>
24 |     {%for pipe in pipeline%}
25 |     <tr>
26 |       <td>
27 |         {{pipe["pipelinename"]}}
28 |       </td>
29 |       <td>
30 |         {{pipe["created_at"]}}
31 |       </td>
32 |       <td>
33 |         {{pipe["jobtime"]}}
34 |       </td>
35 |       <td>
36 |         {{pipe["status"]}}
37 |       </td>
38 |       {%if pipe["status"]=="Queued" or pipe["status"]=="Started"%}
39 |       <td>
40 |         <a class="button is-danger" href="/jobs/run/{{pipe['pipelinename']}}">Stop</a>
41 |         <a class="button is-warning" href="/jobs/view/{{pipe['pipelinename']}}">view</a>
42 |       </td>
43 |       {%else%}
44 |       <td>
45 |         <a class="button is-primary" href="/jobs/run/{{pipe['pipelinename']}}">Start</a>
46 |         <a class="button is-warning" href="/jobs/view/{{pipe['pipelinename']}}">view</a>
47 |       </td>
48 |       
49 |       {%endif%}
50 |     </tr>
51 |     {%endfor%}
52 |   </tbody>
53 | </table>
54 | 
55 | {% endblock %}


--------------------------------------------------------------------------------
/templates/template.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 6 |     <title>PyMLPipe</title>
 7 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
 8 |     <link rel="shortcut icon" href="{{ url_for('static', filename='favicon.ico') }}">
 9 | 
10 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
11 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.22.0/cytoscape.min.js"></script>
12 |     
13 |     <script src="https://unpkg.com/dagre@0.7.4/dist/dagre.js"></script>
14 |   <script src="https://cdn.jsdelivr.net/npm/cytoscape-dagre@2.1.0/cytoscape-dagre.min.js"></script>
15 |   </head>
16 |   <style>
17 |     .center-text{
18 |         text-align: center
19 |     }
20 |     .dropdown-inactive{
21 |         display: none;
22 |     }
23 |     .dropdown-active{
24 |         display: block;
25 |     }
26 |     hr{
27 |         margin:2px;
28 |     }
29 |   </style>
30 |   <body>
31 |     <section class="section">
32 |         <div class="container">
33 |           <h1 class="title center-text"><span><img src="{{ url_for('static', filename='logo.svg') }}" height="30" width="30"></span> PyMLPipe</h1>
34 |         </div>
35 |     </section>
36 |     <section>
37 |         <div class="container">
38 |             {% block content %}
39 |             {% endblock %}
40 |         </div>
41 |     </section>
42 |   </body>
43 | </html>


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.0'
2 | __author__= "indresh bhattacharya"   


--------------------------------------------------------------------------------
/utils/_sklearn_prediction.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | class Deployment:
 3 |     def __init__(self,model_path):
 4 |         self.model_path = model_path
 5 |         self.model=pickle.load(open(self.model_path,'rb'))
 6 |         
 7 |     
 8 |     def predict(self,data,dtype):
 9 |         status=0
10 |         try:
11 |             return self.model.predict(data),status
12 |         except Exception as e:
13 |             status=1
14 |             return str(e),status
15 |         
16 |         


--------------------------------------------------------------------------------
/utils/_torch_prediction.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import torch
 3 | class Deployment:
 4 |     def __init__(self,model_path,typeof="non_runtime"):
 5 |         
 6 |         self.model_path = model_path
 7 |         if typeof=="non_runtime":
 8 |             self.model=self._load_model(self.model_path)
 9 |         elif typeof=="runtime":
10 |             self.model=self._load_model_with_runtime(self.model_path)
11 |             
12 |         
13 |     def _load_model(self,model_name):
14 |         model = torch.jit.load(model_name)
15 |         return model
16 | 
17 |     def _load_model_with_runtime(self,model_name):
18 |         loaded_trace = torch.jit.load(model_name)
19 |         return loaded_trace
20 |     
21 |     def predict(self,data,dtype):
22 |         status=0
23 |         try:
24 |             if dtype=="float":
25 |                 data=torch.from_numpy(data).type(torch.FloatTensor)
26 |                 return self.model(data).detach().numpy(),status
27 |             elif dtype=="double":
28 |                 data=torch.from_numpy(data).type(torch.DoubleTensor)
29 |                 return self.model(data).detach().numpy(),status
30 |             elif dtype=="int":
31 |                 data=torch.from_numpy(data).type(torch.IntTensor)
32 |                 return self.model(data).detach().numpy(),status
33 |         except Exception as e:
34 |             status=1
35 |             return str(e),status
36 |         
37 |         


--------------------------------------------------------------------------------
/utils/_xai.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as pl
  2 | from sklearn import inspection
  3 | import shap
  4 | import pandas as pd
  5 | import os
  6 | import numpy as np
  7 | 
  8 | 
  9 | 
 10 | XAI_MAP={
 11 |     "TreeBasedModels": ['BaseDecisionTree',
 12 |  'DecisionTreeClassifier',
 13 |  'DecisionTreeRegressor',
 14 |  'ExtraTreeClassifier',
 15 |  'ExtraTreeRegressor',
 16 |  'BaseEnsemble',
 17 |  'RandomForestClassifier',
 18 |  'RandomForestRegressor',
 19 |  'RandomTreesEmbedding',
 20 |  'ExtraTreesClassifier',
 21 |  'ExtraTreesRegressor',
 22 |  'BaggingClassifier',
 23 |  'BaggingRegressor',
 24 |  'IsolationForest',
 25 |  'GradientBoostingClassifier',
 26 |  'GradientBoostingRegressor',
 27 |  'AdaBoostClassifier',
 28 |  'AdaBoostRegressor',
 29 |  'VotingClassifier',
 30 |  'VotingRegressor',
 31 |  'StackingClassifier',
 32 |  'StackingRegressor',
 33 |  "XGBClassifier",
 34 |  "XGBRegressor",
 35 |  "CatBoostClassifier",
 36 |  "CatBoostRegressor",
 37 |  "LGBMClassifier",
 38 |  "LGBMRegressor"
 39 |  ],
 40 | "LinearModels": ['ARDRegression',
 41 |  'BayesianRidge',
 42 |  'ElasticNet',
 43 |  'ElasticNetCV',
 44 |  'Hinge',
 45 |  'Huber',
 46 |  'HuberRegressor',
 47 |  'Lars',
 48 |  'LarsCV',
 49 |  'Lasso',
 50 |  'LassoCV',
 51 |  'LassoLars',
 52 |  'LassoLarsCV',
 53 |  'LassoLarsIC',
 54 |  'LinearRegression',
 55 |  'LogisticRegression',
 56 |  'LogisticRegressionCV',
 57 |  'ModifiedHuber',
 58 |  'MultiTaskElasticNet',
 59 |  'MultiTaskElasticNetCV',
 60 |  'MultiTaskLasso',
 61 |  'MultiTaskLassoCV',
 62 |  'OrthogonalMatchingPursuit',
 63 |  'OrthogonalMatchingPursuitCV',
 64 |  'PassiveAggressiveClassifier',
 65 |  'PassiveAggressiveRegressor',
 66 |  'Perceptron',
 67 |  'Ridge',
 68 |  'RidgeCV',
 69 |  'RidgeClassifier',
 70 |  'RidgeClassifierCV',
 71 |  'SGDClassifier',
 72 |  'SGDRegressor',
 73 |  'SquaredLoss',
 74 |  'TheilSenRegressor',
 75 |  'RANSACRegressor',
 76 |  'PoissonRegressor',
 77 |  'GammaRegressor',
 78 |  'TweedieRegressor'],
 79 | }
 80 | 
 81 | class Explainer():
 82 |     def __init__(self,model,data,artifact_path):
 83 |         self.model=model
 84 |         self.data=data
 85 |         self.artifact_path=artifact_path
 86 |         self.feature_map=self.data.columns
 87 |     def explain(self):
 88 |         model_class=type(self.model)
 89 |         model_name=type(self.model).__name__
 90 |         flag=False
 91 |         if model_name in XAI_MAP["LinearModels"]:
 92 |             self.coef_based_feature_importance(self.model,np.std(self.data,0),self.feature_map,os.path.join(self.artifact_path,"explainer"))
 93 |             try:
 94 |                 self.tree_linear_summary_plot(self.model,self.data,self.feature_map,os.path.join(self.artifact_path,"explainer"))
 95 |             except Exception as e:
 96 |                 flag=True
 97 |                 print("Warning:Instance of model {model} not supported".format(model=model_name))
 98 |             
 99 |         elif model_name in XAI_MAP["TreeBasedModels"]:
100 |             self.tree_based_feature_importance(self.model,self.feature_map,os.path.join(self.artifact_path,"explainer"))
101 |             try:
102 |                 self.tree_expainer_summary_plot(self.model,self.data,self.feature_map,os.path.join(self.artifact_path,"explainer"))
103 |             except Exception as e:
104 |                 flag=True
105 |                 print("Warning: Instance of model {model} not supported".format(model=model_name))
106 |             
107 |         else:
108 |             #implement XAI for NeuralNetworks
109 |             pass
110 |         if not flag:
111 |             return {
112 |                 "feature_explainer":os.path.join(self.artifact_path,"explainer.csv"),
113 |                 "shap":os.path.join(self.artifact_path,"explainer.svg")
114 |                 }
115 |         else:
116 |             return {
117 |                 "feature_explainer":os.path.join(self.artifact_path,"explainer.csv"),
118 |                 "shap":""
119 |             }
120 | 
121 |     def tree_expainer_summary_plot(self,model,xtrain,feature_map,fig_name):
122 |         shap_xgb_explainer = shap.TreeExplainer(model)
123 |         shap_xgb_values_train = shap_xgb_explainer.shap_values(xtrain)
124 |         shap.summary_plot(shap_xgb_values_train, xtrain,feature_names=feature_map,show=False)
125 |         pl.savefig("{fig_name}.svg".format(fig_name=fig_name),dpi=700,bbox_inches='tight')
126 |         pl.close('all')
127 |         
128 |         
129 |     def tree_linear_summary_plot(self,model,xtrain,feature_map,fig_name):
130 |         shap_xgb_explainer = shap.LinearExplainer(model,xtrain)
131 |         shap_xgb_values_train = shap_xgb_explainer.shap_values(xtrain)
132 |         shap.summary_plot(shap_xgb_values_train, xtrain,feature_names=feature_map,show=False)
133 |         pl.savefig("{fig_name}.svg".format(fig_name=fig_name),dpi=700,bbox_inches='tight')
134 |         pl.close('all')
135 | 
136 |     def permutation_feature_importance(self,model,trainx,trainy):
137 |         permutation_imp=inspection.permutation_importance(model, trainx, trainy, n_jobs=-1,scoring='accuracy', n_repeats=8,)
138 |         return permutation_imp.importances_mean
139 | 
140 | 
141 |     def tree_based_feature_importance(self,model,feature_map,path):
142 |         model_ranks=pd.DataFrame([{"feature":f,"importance":fi} for f,fi in zip(feature_map,model.feature_importances_)])
143 |         dt_rank_df = pd.DataFrame({"feature":model_ranks["feature"],"importance":model_ranks["importance"],'rank': model_ranks["importance"].rank(method='first', ascending=False).astype(int)})
144 |         dt_rank_df.to_csv('{path}.csv'.format(path=path),index=False)
145 | 
146 |     def coef_based_feature_importance(self,model,std,feature_map,path):
147 |         maps={"feature":feature_map,}
148 |         n_coff=0
149 |         for idx,i in enumerate(model.coef_):
150 |             maps["coef_norm_"+str(idx)]=model.coef_[idx] *std
151 |             n_coff+=1
152 |         df=pd.DataFrame(maps)
153 |         df=df.round(3)
154 |         df["avg_coef_norm"]=df.sum(axis=1)/n_coff
155 |         
156 |         ndf=df.sort_values(by="avg_coef_norm",ascending=False)
157 |         ndf.to_csv('{path}.csv'.format(path=path),index=False)
158 | 
159 |     def permuatation_feature_importance(self,model,test_x,test_y,feature_map):
160 |         importancef=inspection.permutation_importance(model,test_x,test_y,n_jobs=-1, n_repeats=8)
161 |         return pd.DataFrame([{"feature":f,"importance":fi} for f,fi in zip(feature_map,importancef.importances_mean)])
162 | 
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/utils/change2graph.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def search_arch(architecture, node):
 3 |     for arch in architecture:
 4 |         if arch["layer_name"] == node:
 5 |             return arch
 6 |     return None
 7 | 
 8 | def makegraph(ops,architecture):
 9 |     #print(ops)
10 |     #print(architecture)
11 |     graph_dict={"nodes":[],"edges":[]}
12 |     for op in ops:
13 |         #prev=ops[op]['name'] if ops[op]['prev']=="" else ops[op]['prev']
14 |         #next_pt=ops[op]['name'] if ops[op]['next']=="" else ops[op]['next']
15 |         arch_details=search_arch(architecture, ops[op]['name'])
16 |         if arch_details!=None:
17 |             graph_dict["nodes"].append({'data':{ 'id': ops[op]['name'] ,
18 |                                             "label":op+"_"+ops[op]['name']  ,
19 |                                             "type":arch_details["layer_type"] ,
20 |                                             "details":[k+"="+str(v) for k,v in arch_details["params"].items()]
21 |                                             } })
22 |         else:
23 |             graph_dict["nodes"].append({'data':{ 'id': ops[op]['name'] ,
24 |                                             "label":op+"_"+ops[op]['name'],
25 |                                             "type": ops[op]['name'],
26 |                                             "details":[ops[op]['op']]   
27 |                                             } })
28 |         if ops[op]['next']!="":
29 |             graph_dict["edges"].append({ 'data': { 'id': op, 'source': ops[op]['name'], 'target': ops[op]['next']} })
30 |     return graph_dict
31 |     
32 |     
33 | '''
34 | def makegraph_pipeline(edges,sequence,node_details):
35 |     #print(node_details)
36 |     graph_dict={"nodes":[],"edges":[]}
37 |     color={"Queued":"#828282","Completed":"#80ff80","Failed":"#fc3d03","Started":"#ffff33"}
38 |     for op in sequence:
39 |         #print(op)
40 |         graph_dict["nodes"].append({'data':{ 'id': op ,
41 |                                             "label":op ,
42 |                                             "color":color[node_details[op]["status"]]
43 |                                             } })
44 |         
45 |         for edge in edges:
46 |             if edge["src"]==op:
47 |                 graph_dict["edges"].append({'data':{ 'id': edge["src"]+ edge["target"], 'source': edge["src"], 'target': edge["target"]} })
48 |                 
49 |     return graph_dict
50 | '''
51 | def makegraph_pipeline(graph:dict,node_details:dict):
52 |     """_summary_: Make graph format for Web Visualization
53 | 
54 |     Args:
55 |         graph (dict): Contains the data structure for node -edge connection
56 |         node_details (dict): Contains status and log history of nodes
57 |     Returns:
58 |         dict: Returns a dictionary with web format
59 |     """
60 |     color={"Queued":"#828282","Completed":"#80ff80","Failed":"#fc3d03","Started":"#ffff33"}
61 |     entry_node="root"
62 |     graph_dict={"nodes":[],"edges":[]}
63 |     _args_tag="args@"
64 |     for op in graph:
65 |         if op.startswith(_args_tag):
66 |             op=op.strip(_args_tag)
67 |         if entry_node==op:
68 |             graph_dict["nodes"].append({'data':{ 'id': op ,
69 |                                             "label":op ,
70 |                                             "color":color["Completed"]
71 |                                             } })
72 |         else:
73 |             graph_dict["nodes"].append({'data':{ 'id': op ,
74 |                                             "label":op ,
75 |                                             "color":color[node_details[op]["status"]]
76 |                                             } })
77 |         for edge in graph[op]:
78 |             graph_dict["edges"].append({'data':{ 'id': op+ edge, 'source': op, 'target': edge} })
79 |     return graph_dict


--------------------------------------------------------------------------------
/utils/database.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | MODEL_FOLDER_NAME="modelrun"
 5 | 
 6 | PIPELINE_FOLDER_NAME="ML_pipelines"
 7 | 
 8 | def create_folder(folder_path,name=None):
 9 |     """_summary_:create a folder for storing model information
10 | 
11 |     Returns:
12 |         str: path for storing model details
13 |     """
14 |     folder=MODEL_FOLDER_NAME
15 |     if name!=None: folder=name
16 |     path=os.path.join(folder_path,folder)
17 |     
18 |     if not os.path.exists(path):
19 |         os.mkdir(path)
20 |     return path
21 | 
22 | 
23 | def getfolders(path):
24 |     return os.listdir(path)
25 | 
26 |     
27 | 


--------------------------------------------------------------------------------
/utils/factory.py:
--------------------------------------------------------------------------------
1 | """_summary_Contains all files and Folder names
2 | """
3 | 
4 | DEFAULT={
5 |     "ModelRunSave":"modelrun",
6 |     "ModelRunInfo": "experiment.yaml",
7 |     "RunInfo":"info.yaml",   
8 | }


--------------------------------------------------------------------------------
/utils/getschema.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | def schema_(data):
 3 |     """_summary_: Generate schema object for a dataframe
 4 | 
 5 |     Args:
 6 |         data (Pandas DataFrame): Pandas Artifact
 7 | 
 8 |     Returns:
 9 |         dict: with column schema
10 |     """
11 |     schema={}
12 |     details=[]
13 |     for col in data:
14 |         schema[col]={
15 |             'min':float("{0:.4f}".format(data[col].min())),
16 |             'max':float("{0:.4f}".format(data[col].max())),
17 |             'std':float("{0:.4f}".format(data[col].std())),
18 |             "variance":float("{0:.4f}".format(data[col].var())),
19 |             "mean":float("{0:.4f}".format(data[col].mean())),
20 |             "median":float("{0:.4f}".format(data[col].median())),
21 |             "data type":str(data[col].dtype),
22 |             "unique_values":int(len(data[col].unique())),
23 |             "25th percentile":float("{0:.4f}".format(data[col].quantile(0.25))),
24 |             "50% percentile":float("{0:.4f}".format(data[col].quantile(0.5))),
25 |             "75% percentile":float("{0:.4f}".format(data[col].quantile(0.75))),
26 |         }
27 |         if len(details)==0:
28 |             details=list(schema[col].keys())
29 |     #print("-------->",schema)
30 |     return schema,details


--------------------------------------------------------------------------------
/utils/uiutils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pymlpipe.utils import _sklearn_prediction,_torch_prediction
 3 | 
 4 | 
 5 | def deployment_handler(model_path,model_type,runtime):
 6 |     if model_type=="scikit-learn":
 7 |         
 8 |         deployed=_sklearn_prediction.Deployment(model_path)
 9 |     elif model_type=="torch":
10 |         deployed=_torch_prediction.Deployment(model_path,typeof=runtime)
11 |     return deployed


--------------------------------------------------------------------------------
/utils/yamlio.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import  os
 3 | def read_yaml(path):
 4 |     if not os.path.exists(path):
 5 |         return []
 6 |     with open(path) as file:
 7 |         fulllist = yaml.load(file, Loader=yaml.FullLoader)
 8 |     return fulllist
 9 | 
10 | 
11 | def write_to_yaml(path,info):
12 |     with open(os.path.join(path), 'w') as file:
13 |         documents = yaml.dump(info, file)
14 |             


--------------------------------------------------------------------------------