├── Notebooks ├── Readme.md └── Out-of-core-ML-incremental-learning.ipynb ├── README.md ├── LICENSE └── .gitignore /Notebooks/Readme.md: -------------------------------------------------------------------------------- 1 | ## Notebooks 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dask-analytics-ML 2 | Data science and ML with Dask 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Tirthajyoti Sarkar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Notebooks/Out-of-core-ML-incremental-learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "bc834e1c-e7d4-49a4-9ac2-5c3592d73a53", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from dask.distributed import Client\n", 11 | "import numpy as np\n", 12 | "import dask.array as da\n", 13 | "from dask_ml.datasets import make_classification\n", 14 | "from dask_ml.model_selection import train_test_split\n", 15 | "from time import time" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "id": "eaf8ce35-3e64-4f08-b609-a3f08beb13d2", 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/html": [ 27 | "\n", 28 | "
\n", 29 | "
\n", 36 | "
\n", 37 | "

Client

\n", 38 | "

Client-87f6a835-f23a-11eb-ad7c-8cae4ce539e5

\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | "
Connection method: Cluster objectCluster type: LocalCluster
\n", 48 | " Dashboard: \n", 49 | " http://10.0.0.199:8787/status\n", 50 | "
\n", 55 | " \n", 56 | "
\n", 57 | "

Cluster Info

\n", 58 | " \n", 59 | "
\n", 60 | "
\n", 67 | "
\n", 68 | "

LocalCluster

\n", 69 | "

91c35c39

\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 88 | " \n", 92 | " \n", 93 | " \n", 94 | "
Status: runningUsing processes: False
\n", 79 | " Dashboard: http://10.0.0.199:8787/status\n", 80 | " Workers: 4
\n", 85 | " Total threads:\n", 86 | " 8\n", 87 | " \n", 89 | " Total memory:\n", 90 | " 14.90 GiB\n", 91 | "
\n", 95 | "
\n", 96 | "

Scheduler Info

\n", 97 | " \n", 98 | "
\n", 99 | " \n", 100 | "
\n", 101 | "
\n", 108 | "
\n", 109 | "

Scheduler

\n", 110 | "

Scheduler-8bc28c9d-fc53-4318-81a1-5b3e0605c936

\n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 120 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 130 | " \n", 134 | " \n", 135 | "
Comm: inproc://10.0.0.199/11644/1Workers: 4
\n", 118 | " Dashboard: http://10.0.0.199:8787/status\n", 119 | " \n", 121 | " Total threads:\n", 122 | " 8\n", 123 | "
\n", 127 | " Started:\n", 128 | " Just now\n", 129 | " \n", 131 | " Total memory:\n", 132 | " 14.90 GiB\n", 133 | "
\n", 136 | "
\n", 137 | "
\n", 138 | " \n", 139 | "
\n", 140 | "

Workers

\n", 141 | " \n", 142 | "
\n", 143 | "
\n", 149 | "
\n", 150 | "
\n", 151 | " \n", 152 | "

Worker: 0

\n", 153 | "
\n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 164 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | "
Comm: inproc://10.0.0.199/11644/4Total threads: 2
\n", 161 | " Dashboard: \n", 162 | " http://10.0.0.199:61781/status\n", 163 | " \n", 165 | " Memory: \n", 166 | " 3.73 GiB\n", 167 | "
Nanny: None
\n", 175 | " Local directory: \n", 176 | " C:\\Users\\tirth\\Documents\\Personal\\Data Science related\\Python and other Notebooks\\Dask\\dask-worker-space\\worker-36pp8i9t\n", 177 | "
\n", 182 | "
\n", 183 | "
\n", 184 | "
\n", 185 | " \n", 186 | "
\n", 187 | "
\n", 193 | "
\n", 194 | "
\n", 195 | " \n", 196 | "

Worker: 1

\n", 197 | "
\n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 208 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | "
Comm: inproc://10.0.0.199/11644/3Total threads: 2
\n", 205 | " Dashboard: \n", 206 | " http://10.0.0.199:61780/status\n", 207 | " \n", 209 | " Memory: \n", 210 | " 3.73 GiB\n", 211 | "
Nanny: None
\n", 219 | " Local directory: \n", 220 | " C:\\Users\\tirth\\Documents\\Personal\\Data Science related\\Python and other Notebooks\\Dask\\dask-worker-space\\worker-swj95wxm\n", 221 | "
\n", 226 | "
\n", 227 | "
\n", 228 | "
\n", 229 | " \n", 230 | "
\n", 231 | "
\n", 237 | "
\n", 238 | "
\n", 239 | " \n", 240 | "

Worker: 2

\n", 241 | "
\n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 252 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | "
Comm: inproc://10.0.0.199/11644/6Total threads: 2
\n", 249 | " Dashboard: \n", 250 | " http://10.0.0.199:61783/status\n", 251 | " \n", 253 | " Memory: \n", 254 | " 3.73 GiB\n", 255 | "
Nanny: None
\n", 263 | " Local directory: \n", 264 | " C:\\Users\\tirth\\Documents\\Personal\\Data Science related\\Python and other Notebooks\\Dask\\dask-worker-space\\worker-qhs3q5vf\n", 265 | "
\n", 270 | "
\n", 271 | "
\n", 272 | "
\n", 273 | " \n", 274 | "
\n", 275 | "
\n", 281 | "
\n", 282 | "
\n", 283 | " \n", 284 | "

Worker: 3

\n", 285 | "
\n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 296 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | "
Comm: inproc://10.0.0.199/11644/5Total threads: 2
\n", 293 | " Dashboard: \n", 294 | " http://10.0.0.199:61782/status\n", 295 | " \n", 297 | " Memory: \n", 298 | " 3.73 GiB\n", 299 | "
Nanny: None
\n", 307 | " Local directory: \n", 308 | " C:\\Users\\tirth\\Documents\\Personal\\Data Science related\\Python and other Notebooks\\Dask\\dask-worker-space\\worker-7uxga12x\n", 309 | "
\n", 314 | "
\n", 315 | "
\n", 316 | "
\n", 317 | " \n", 318 | "
\n", 319 | "
\n", 320 | " \n", 321 | "
\n", 322 | "
\n", 323 | "
\n", 324 | " \n", 325 | "
\n", 326 | " \n", 327 | "
\n", 328 | "
\n", 329 | " " 330 | ], 331 | "text/plain": [ 332 | "" 333 | ] 334 | }, 335 | "execution_count": 2, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "client = Client(n_workers=4, threads_per_worker=2, \n", 342 | " processes=False, \n", 343 | " memory_limit='4 GB',\n", 344 | " silence_logs='error')\n", 345 | "client" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 3, 351 | "id": "cf3d9158-c0d3-4cea-b91c-22ca7300dfe0", 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "N_samples = 12000000\n", 356 | "N_features = 500" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 4, 362 | "id": "56df318b-6a7d-484f-aaee-83c6a6a75f99", 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "from dask_ml.datasets import make_classification" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 5, 372 | "id": "a5241354-76ee-4d9f-b10b-dc88ee935f55", 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "name": "stderr", 377 | "output_type": "stream", 378 | "text": [ 379 | "c:\\program files\\python39\\lib\\site-packages\\dask_ml\\datasets.py:377: PerformanceWarning: Slicing is producing a large chunk. To accept the large\n", 380 | "chunk and silence this warning, set the option\n", 381 | " >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):\n", 382 | " ... array[indexer]\n", 383 | "\n", 384 | "To avoid creating the large chunks, set the option\n", 385 | " >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n", 386 | " ... array[indexer]\n", 387 | " z0 = X[:, informative_idx].dot(beta[informative_idx])\n" 388 | ] 389 | } 390 | ], 391 | "source": [ 392 | "X, y = make_classification(\n", 393 | " n_samples=N_samples,n_features=N_features, \n", 394 | " n_redundant=0, n_informative=N_features-5,n_classes=2,\n", 395 | " flip_y=0.05,class_sep=0.8,\n", 396 | " chunks=10000)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 6, 402 | "id": "1539eda6-89eb-4ce6-a265-abde41fda4f9", 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "X = X.astype(np.float16)\n", 407 | "y = y.astype(np.float16)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 7, 413 | "id": "a432820e-a519-49b4-8c1f-f027c0527b03", 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "from dask_ml.model_selection import train_test_split\n", 418 | "\n", 419 | "X_train, X_test, y_train, y_test = train_test_split(X,y,\n", 420 | " test_size=0.2,\n", 421 | " random_state=101)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 8, 427 | "id": "abb176f0-5422-47e6-aa0c-ff41e571a0bd", 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "data": { 432 | "text/plain": [ 433 | "(9600000, 500)" 434 | ] 435 | }, 436 | "execution_count": 8, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "X_train.shape" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 9, 448 | "id": "f8798c64-e645-4ed7-99d4-c4728eeef318", 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "dask.array.core.Array" 455 | ] 456 | }, 457 | "execution_count": 9, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "type(X_train)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 10, 469 | "id": "048fc51e-8f6b-4df5-98fb-691d36128a62", 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "data": { 474 | "text/html": [ 475 | "\n", 476 | "\n", 477 | "\n", 490 | "\n", 527 | "\n", 528 | "
\n", 478 | "\n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | "
Array Chunk
Bytes 8.94 GiB 7.63 MiB
Shape (9600000, 500) (8000, 500)
Count 9600 Tasks 1200 Chunks
Type float16 numpy.ndarray
\n", 489 | "
\n", 491 | "\n", 492 | "\n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | "\n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | "\n", 519 | " \n", 520 | " \n", 521 | "\n", 522 | " \n", 523 | " 500\n", 524 | " 9600000\n", 525 | "\n", 526 | "
" 529 | ], 530 | "text/plain": [ 531 | "dask.array" 532 | ] 533 | }, 534 | "execution_count": 10, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | } 538 | ], 539 | "source": [ 540 | "X_train" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 11, 546 | "id": "21791e5d-8b8e-4e34-8311-fff1372878d9", 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/html": [ 552 | "\n", 553 | "\n", 554 | "\n", 567 | "\n", 604 | "\n", 605 | "
\n", 555 | "\n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | "
Array Chunk
Bytes 2.24 GiB 1.91 MiB
Shape (2400000, 500) (2000, 500)
Count 9600 Tasks 1200 Chunks
Type float16 numpy.ndarray
\n", 566 | "
\n", 568 | "\n", 569 | "\n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | "\n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | "\n", 596 | " \n", 597 | " \n", 598 | "\n", 599 | " \n", 600 | " 500\n", 601 | " 2400000\n", 602 | "\n", 603 | "
" 606 | ], 607 | "text/plain": [ 608 | "dask.array" 609 | ] 610 | }, 611 | "execution_count": 11, 612 | "metadata": {}, 613 | "output_type": "execute_result" 614 | } 615 | ], 616 | "source": [ 617 | "X_test" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 12, 623 | "id": "8a2ccb88-a90d-4376-8f88-3f551cde9cb2", 624 | "metadata": {}, 625 | "outputs": [ 626 | { 627 | "data": { 628 | "text/html": [ 629 | "\n", 630 | "\n", 631 | "\n", 644 | "\n", 663 | "\n", 664 | "
\n", 632 | "\n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | "
Array Chunk
Bytes 50 B 50 B
Shape (5, 5) (5, 5)
Count 9601 Tasks 1 Chunks
Type float16 numpy.ndarray
\n", 643 | "
\n", 645 | "\n", 646 | "\n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | "\n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | "\n", 655 | " \n", 656 | " \n", 657 | "\n", 658 | " \n", 659 | " 5\n", 660 | " 5\n", 661 | "\n", 662 | "
" 665 | ], 666 | "text/plain": [ 667 | "dask.array" 668 | ] 669 | }, 670 | "execution_count": 12, 671 | "metadata": {}, 672 | "output_type": "execute_result" 673 | } 674 | ], 675 | "source": [ 676 | "X_train[:5,:5]" 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": 13, 682 | "id": "4a7dc9da-5f97-406a-b727-b295a494d9d9", 683 | "metadata": {}, 684 | "outputs": [ 685 | { 686 | "data": { 687 | "text/plain": [ 688 | "array([[ 1.077 , -0.10785, 1.416 , 1.115 , -1.278 ],\n", 689 | " [-1.136 , -1.6045 , -0.534 , -0.54 , -0.4863 ],\n", 690 | " [ 0.667 , -1.814 , -0.9385 , -0.1387 , -0.316 ],\n", 691 | " [-0.7485 , -1.677 , -1.216 , 0.07336, -1.039 ],\n", 692 | " [ 0.5503 , -1.395 , 0.4563 , 0.4243 , 0.9536 ]], dtype=float16)" 693 | ] 694 | }, 695 | "execution_count": 13, 696 | "metadata": {}, 697 | "output_type": "execute_result" 698 | } 699 | ], 700 | "source": [ 701 | "X_train[:5,:5].compute()" 702 | ] 703 | }, 704 | { 705 | "cell_type": "markdown", 706 | "id": "3bcddbf3-b2ca-445f-863c-b7a7b0e9cef2", 707 | "metadata": {}, 708 | "source": [ 709 | "## Incremental fit" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 14, 715 | "id": "0430dfb9-6384-4763-8fb5-213ac539ffda", 716 | "metadata": {}, 717 | "outputs": [], 718 | "source": [ 719 | "from dask_ml.wrappers import Incremental\n", 720 | "from sklearn.linear_model import SGDClassifier" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": 15, 726 | "id": "381c0a5c-ff55-46cc-90dd-115a38d17d04", 727 | "metadata": {}, 728 | "outputs": [], 729 | "source": [ 730 | "est = SGDClassifier(loss='log', penalty='l2', tol=1e-4)\n", 731 | "inc = Incremental(est, scoring='accuracy')\n", 732 | "classes = da.array([0,1])" 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": 16, 738 | "id": "7f14f61c-0652-4515-87f9-55e1dad69635", 739 | "metadata": {}, 740 | "outputs": [ 741 | { 742 | "name": "stdout", 743 | "output_type": "stream", 744 | "text": [ 745 | "Fitting time:228.506 seconds\n" 746 | ] 747 | } 748 | ], 749 | "source": [ 750 | "t1 = time()\n", 751 | "inc.fit(X_train, y_train, classes=classes)\n", 752 | "t2 = time()\n", 753 | "delt = round(t2-t1,3)\n", 754 | "print(f\"Fitting time:{delt} seconds\")" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": null, 760 | "id": "0b69b584-4551-45c2-b396-ca83be90f1be", 761 | "metadata": {}, 762 | "outputs": [], 763 | "source": [ 764 | "t1 = time()\n", 765 | "score=inc.score(X_test, y_test)\n", 766 | "t2 = time()\n", 767 | "delt = round(t2-t1,3)\n", 768 | "print(f\"Score evaluation time:{delt} seconds\")\n", 769 | "print(f\"Accuracy score on the test set: {score}\")" 770 | ] 771 | } 772 | ], 773 | "metadata": { 774 | "kernelspec": { 775 | "display_name": "Python 3", 776 | "language": "python", 777 | "name": "python3" 778 | }, 779 | "language_info": { 780 | "codemirror_mode": { 781 | "name": "ipython", 782 | "version": 3 783 | }, 784 | "file_extension": ".py", 785 | "mimetype": "text/x-python", 786 | "name": "python", 787 | "nbconvert_exporter": "python", 788 | "pygments_lexer": "ipython3", 789 | "version": "3.9.5" 790 | } 791 | }, 792 | "nbformat": 4, 793 | "nbformat_minor": 5 794 | } 795 | --------------------------------------------------------------------------------