├── .gitignore ├── Compare_Workbooks.ipynb ├── Different_Shape ├── Arrival_Dates.xlsx ├── Arrival_Dates_Final.xlsx └── Difference_Highlighted.xlsx ├── Pandas_Merge.png ├── README.md ├── Same_Shape ├── Arrival_Dates.xlsx ├── Arrival_Dates_Final.xlsx ├── Difference.xlsx └── Difference_Highlighted.xlsx └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python ### 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | 142 | # pytype static type analyzer 143 | .pytype/ 144 | 145 | # Cython debug symbols 146 | cython_debug/ 147 | 148 | -------------------------------------------------------------------------------- /Compare_Workbooks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Compare 2 (Excel) Datasets & Show The Difference 🧐" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Load Dependencies" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "**Requirements:**
\n", 22 | "`openpyxl==3.0.9`
\n", 23 | "`pandas==1.3.5`
\n", 24 | "`xlwings==0.25.3`
" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "

\n", 32 | "C:\\Users\\YOUR_USERNAME> pip install pandas openpyxl xlwings\n", 33 | "\n", 34 | "

" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "from pathlib import Path # Core Python Module\n", 44 | "\n", 45 | "import pandas as pd # pip install pandas openpyxl\n", 46 | "import xlwings as xw # pip install xlwings" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Data with the same shape 🏄‍♂️" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### Define Filepath" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 2, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "initial_version = Path.cwd() / \"Same_Shape\" / \"Arrival_Dates.xlsx\"\n", 70 | "updated_version = Path.cwd() / \"Same_Shape\" / \"Arrival_Dates_Final.xlsx\"" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "### Load the DataFrames" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/html": [ 88 | "
\n", 89 | "\n", 102 | "\n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | "
First NameLast NameArrival Date
0CarlosSullivan2022-10-02
1JeffreyGiles2022-04-25
2MckenziePerkins2022-04-13
\n", 132 | "
" 133 | ], 134 | "text/plain": [ 135 | " First Name Last Name Arrival Date\n", 136 | "0 Carlos Sullivan 2022-10-02\n", 137 | "1 Jeffrey Giles 2022-04-25\n", 138 | "2 Mckenzie Perkins 2022-04-13" 139 | ] 140 | }, 141 | "execution_count": 3, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "df_initial = pd.read_excel(initial_version)\n", 148 | "df_initial.head(3)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 4, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/html": [ 159 | "
\n", 160 | "\n", 173 | "\n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | "
First NameLast NameArrival Date
0CarlosSullivan2022-10-02
1JeffreyGiles2022-04-25
2MckenziePerkins2022-04-13
\n", 203 | "
" 204 | ], 205 | "text/plain": [ 206 | " First Name Last Name Arrival Date\n", 207 | "0 Carlos Sullivan 2022-10-02\n", 208 | "1 Jeffrey Giles 2022-04-25\n", 209 | "2 Mckenzie Perkins 2022-04-13" 210 | ] 211 | }, 212 | "execution_count": 4, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "df_update = pd.read_excel(updated_version)\n", 219 | "df_update.head(3)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "---" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "**Pandas `compare` method**
\n", 234 | "Find the docs here: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.compare.html" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "
\n", 242 | "Note: the method can only compare identically-labeled DataFrame objects, this means DataFrames with identical row and column labels.\n", 243 | "
" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 5, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "(100, 3)" 255 | ] 256 | }, 257 | "execution_count": 5, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "df_initial.shape" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 6, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "text/plain": [ 274 | "(100, 3)" 275 | ] 276 | }, 277 | "execution_count": 6, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | } 281 | ], 282 | "source": [ 283 | "df_update.shape" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 7, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "True" 295 | ] 296 | }, 297 | "execution_count": 7, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "df_initial.shape == df_update.shape" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "### Align the differences on columns" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 8, 316 | "metadata": {}, 317 | "outputs": [ 318 | { 319 | "data": { 320 | "text/html": [ 321 | "
\n", 322 | "\n", 335 | "\n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | "
First NameArrival Date
selfotherselfother
10TroiTroyNaTNaT
21NaNNaN2022-12-102022-03-23
56NaNNaN2022-08-182022-02-01
85NaNNaN2022-07-262022-10-24
99NaNNaN2022-12-032022-07-09
\n", 388 | "
" 389 | ], 390 | "text/plain": [ 391 | " First Name Arrival Date \n", 392 | " self other self other\n", 393 | "10 Troi Troy NaT NaT\n", 394 | "21 NaN NaN 2022-12-10 2022-03-23\n", 395 | "56 NaN NaN 2022-08-18 2022-02-01\n", 396 | "85 NaN NaN 2022-07-26 2022-10-24\n", 397 | "99 NaN NaN 2022-12-03 2022-07-09" 398 | ] 399 | }, 400 | "execution_count": 8, 401 | "metadata": {}, 402 | "output_type": "execute_result" 403 | } 404 | ], 405 | "source": [ 406 | "diff = df_update.compare(df_initial, align_axis=1)\n", 407 | "diff\n", 408 | "# self = updated_version\n", 409 | "# other = initial_version" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "### Stack the differences on rows" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 9, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "text/html": [ 427 | "
\n", 428 | "\n", 441 | "\n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | "
First NameArrival Date
10selfTroiNaT
otherTroyNaT
21selfNaN2022-12-10
otherNaN2022-03-23
56selfNaN2022-08-18
otherNaN2022-02-01
85selfNaN2022-07-26
otherNaN2022-10-24
99selfNaN2022-12-03
otherNaN2022-07-09
\n", 508 | "
" 509 | ], 510 | "text/plain": [ 511 | " First Name Arrival Date\n", 512 | "10 self Troi NaT\n", 513 | " other Troy NaT\n", 514 | "21 self NaN 2022-12-10\n", 515 | " other NaN 2022-03-23\n", 516 | "56 self NaN 2022-08-18\n", 517 | " other NaN 2022-02-01\n", 518 | "85 self NaN 2022-07-26\n", 519 | " other NaN 2022-10-24\n", 520 | "99 self NaN 2022-12-03\n", 521 | " other NaN 2022-07-09" 522 | ] 523 | }, 524 | "execution_count": 9, 525 | "metadata": {}, 526 | "output_type": "execute_result" 527 | } 528 | ], 529 | "source": [ 530 | "diff = df_update.compare(df_initial, align_axis=0)\n", 531 | "diff" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": {}, 537 | "source": [ 538 | "### Keep all original rows and columns" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 10, 544 | "metadata": {}, 545 | "outputs": [ 546 | { 547 | "data": { 548 | "text/html": [ 549 | "
\n", 550 | "\n", 563 | "\n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | "
First NameLast NameArrival Date
selfotherselfotherselfother
0NaNNaNNaNNaNNaTNaT
1NaNNaNNaNNaNNaTNaT
2NaNNaNNaNNaNNaTNaT
3NaNNaNNaNNaNNaTNaT
4NaNNaNNaNNaNNaTNaT
.....................
95NaNNaNNaNNaNNaTNaT
96NaNNaNNaNNaNNaTNaT
97NaNNaNNaNNaNNaTNaT
98NaNNaNNaNNaNNaTNaT
99NaNNaNNaNNaN2022-12-032022-07-09
\n", 683 | "

100 rows × 6 columns

\n", 684 | "
" 685 | ], 686 | "text/plain": [ 687 | " First Name Last Name Arrival Date \n", 688 | " self other self other self other\n", 689 | "0 NaN NaN NaN NaN NaT NaT\n", 690 | "1 NaN NaN NaN NaN NaT NaT\n", 691 | "2 NaN NaN NaN NaN NaT NaT\n", 692 | "3 NaN NaN NaN NaN NaT NaT\n", 693 | "4 NaN NaN NaN NaN NaT NaT\n", 694 | ".. ... ... ... ... ... ...\n", 695 | "95 NaN NaN NaN NaN NaT NaT\n", 696 | "96 NaN NaN NaN NaN NaT NaT\n", 697 | "97 NaN NaN NaN NaN NaT NaT\n", 698 | "98 NaN NaN NaN NaN NaT NaT\n", 699 | "99 NaN NaN NaN NaN 2022-12-03 2022-07-09\n", 700 | "\n", 701 | "[100 rows x 6 columns]" 702 | ] 703 | }, 704 | "execution_count": 10, 705 | "metadata": {}, 706 | "output_type": "execute_result" 707 | } 708 | ], 709 | "source": [ 710 | "diff = df_update.compare(df_initial, keep_shape=True, keep_equal=False)\n", 711 | "diff" 712 | ] 713 | }, 714 | { 715 | "cell_type": "markdown", 716 | "metadata": {}, 717 | "source": [ 718 | "### Keep all original rows and columns and also all original values" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 11, 724 | "metadata": {}, 725 | "outputs": [ 726 | { 727 | "data": { 728 | "text/html": [ 729 | "
\n", 730 | "\n", 743 | "\n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | "
First NameLast NameArrival Date
selfotherselfotherselfother
0CarlosCarlosSullivanSullivan2022-10-022022-10-02
1JeffreyJeffreyGilesGiles2022-04-252022-04-25
2MckenzieMckenziePerkinsPerkins2022-04-132022-04-13
3MaryMaryMarshallMarshall2022-09-022022-09-02
4KathrynKathrynBurchBurch2022-02-112022-02-11
.....................
95DanielDanielWallWall2022-12-032022-12-03
96PamelaPamelaMclaughlinMclaughlin2022-04-142022-04-14
97CarmenCarmenWilliamsWilliams2022-12-152022-12-15
98LauraLauraRogersRogers2022-09-062022-09-06
99DavidDavidStoutStout2022-12-032022-07-09
\n", 863 | "

100 rows × 6 columns

\n", 864 | "
" 865 | ], 866 | "text/plain": [ 867 | " First Name Last Name Arrival Date \n", 868 | " self other self other self other\n", 869 | "0 Carlos Carlos Sullivan Sullivan 2022-10-02 2022-10-02\n", 870 | "1 Jeffrey Jeffrey Giles Giles 2022-04-25 2022-04-25\n", 871 | "2 Mckenzie Mckenzie Perkins Perkins 2022-04-13 2022-04-13\n", 872 | "3 Mary Mary Marshall Marshall 2022-09-02 2022-09-02\n", 873 | "4 Kathryn Kathryn Burch Burch 2022-02-11 2022-02-11\n", 874 | ".. ... ... ... ... ... ...\n", 875 | "95 Daniel Daniel Wall Wall 2022-12-03 2022-12-03\n", 876 | "96 Pamela Pamela Mclaughlin Mclaughlin 2022-04-14 2022-04-14\n", 877 | "97 Carmen Carmen Williams Williams 2022-12-15 2022-12-15\n", 878 | "98 Laura Laura Rogers Rogers 2022-09-06 2022-09-06\n", 879 | "99 David David Stout Stout 2022-12-03 2022-07-09\n", 880 | "\n", 881 | "[100 rows x 6 columns]" 882 | ] 883 | }, 884 | "execution_count": 11, 885 | "metadata": {}, 886 | "output_type": "execute_result" 887 | } 888 | ], 889 | "source": [ 890 | "diff = df_update.compare(df_initial, keep_shape=True, keep_equal=True)\n", 891 | "diff" 892 | ] 893 | }, 894 | { 895 | "cell_type": "markdown", 896 | "metadata": {}, 897 | "source": [ 898 | "### Export difference to Excel 📥" 899 | ] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": 12, 904 | "metadata": {}, 905 | "outputs": [], 906 | "source": [ 907 | "diff = df_update.compare(df_initial, align_axis=1)\n", 908 | "diff.to_excel(Path.cwd() / \"Same_Shape\" /\"Difference.xlsx\")" 909 | ] 910 | }, 911 | { 912 | "cell_type": "markdown", 913 | "metadata": {}, 914 | "source": [ 915 | "### [BONUS] Highlight the difference 🔥" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": 13, 921 | "metadata": {}, 922 | "outputs": [], 923 | "source": [ 924 | "with xw.App(visible=False) as app:\n", 925 | " initial_wb = app.books.open(initial_version)\n", 926 | " initial_ws = initial_wb.sheets(1)\n", 927 | "\n", 928 | " updated_wb = app.books.open(updated_version)\n", 929 | " updated_ws = updated_wb.sheets(1)\n", 930 | "\n", 931 | " for cell in updated_ws.used_range:\n", 932 | " old_value = initial_ws.range((cell.row, cell.column)).value\n", 933 | " if cell.value != old_value:\n", 934 | " cell.api.AddComment(f\"Value from {initial_wb.name}: {old_value}\") # WARNING: Platform specific (!)\n", 935 | " cell.color = (255, 71, 76) # light red\n", 936 | "\n", 937 | " updated_wb.save(Path.cwd() / \"Same_Shape\" / \"Difference_Highlighted.xlsx\")" 938 | ] 939 | }, 940 | { 941 | "cell_type": "markdown", 942 | "metadata": {}, 943 | "source": [ 944 | "## Data with different shape 🚨" 945 | ] 946 | }, 947 | { 948 | "cell_type": "markdown", 949 | "metadata": {}, 950 | "source": [ 951 | "### Load the DataFrames" 952 | ] 953 | }, 954 | { 955 | "cell_type": "code", 956 | "execution_count": 14, 957 | "metadata": {}, 958 | "outputs": [], 959 | "source": [ 960 | "initial_version = Path.cwd() / \"Different_Shape\" / \"Arrival_Dates.xlsx\"\n", 961 | "updated_version = Path.cwd() / \"Different_Shape\" / \"Arrival_Dates_Final.xlsx\"" 962 | ] 963 | }, 964 | { 965 | "cell_type": "markdown", 966 | "metadata": {}, 967 | "source": [ 968 | "### Check shape & data" 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": 15, 974 | "metadata": {}, 975 | "outputs": [ 976 | { 977 | "data": { 978 | "text/plain": [ 979 | "(100, 3)" 980 | ] 981 | }, 982 | "execution_count": 15, 983 | "metadata": {}, 984 | "output_type": "execute_result" 985 | } 986 | ], 987 | "source": [ 988 | "df_initial = pd.read_excel(initial_version)\n", 989 | "df_initial.shape" 990 | ] 991 | }, 992 | { 993 | "cell_type": "code", 994 | "execution_count": 16, 995 | "metadata": {}, 996 | "outputs": [ 997 | { 998 | "data": { 999 | "text/plain": [ 1000 | "(102, 3)" 1001 | ] 1002 | }, 1003 | "execution_count": 16, 1004 | "metadata": {}, 1005 | "output_type": "execute_result" 1006 | } 1007 | ], 1008 | "source": [ 1009 | "df_update = pd.read_excel(updated_version)\n", 1010 | "df_update.shape" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": 17, 1016 | "metadata": {}, 1017 | "outputs": [ 1018 | { 1019 | "data": { 1020 | "text/plain": [ 1021 | "False" 1022 | ] 1023 | }, 1024 | "execution_count": 17, 1025 | "metadata": {}, 1026 | "output_type": "execute_result" 1027 | } 1028 | ], 1029 | "source": [ 1030 | "df_initial.shape == df_update.shape" 1031 | ] 1032 | }, 1033 | { 1034 | "cell_type": "markdown", 1035 | "metadata": {}, 1036 | "source": [ 1037 | "### Show the difference by merging both DataFrames" 1038 | ] 1039 | }, 1040 | { 1041 | "cell_type": "code", 1042 | "execution_count": 18, 1043 | "metadata": {}, 1044 | "outputs": [ 1045 | { 1046 | "data": { 1047 | "text/html": [ 1048 | "
\n", 1049 | "\n", 1062 | "\n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | "
indexFirst NameLast NameArrival Date
00CarlosSullivan2022-10-02
11JeffreyGiles2022-04-25
22MckenziePerkins2022-04-13
\n", 1096 | "
" 1097 | ], 1098 | "text/plain": [ 1099 | " index First Name Last Name Arrival Date\n", 1100 | "0 0 Carlos Sullivan 2022-10-02\n", 1101 | "1 1 Jeffrey Giles 2022-04-25\n", 1102 | "2 2 Mckenzie Perkins 2022-04-13" 1103 | ] 1104 | }, 1105 | "execution_count": 18, 1106 | "metadata": {}, 1107 | "output_type": "execute_result" 1108 | } 1109 | ], 1110 | "source": [ 1111 | "# We need the index information to highlight the rows in Excel\n", 1112 | "df_update = df_update.reset_index()\n", 1113 | "df_update.head(3)" 1114 | ] 1115 | }, 1116 | { 1117 | "cell_type": "markdown", 1118 | "metadata": {}, 1119 | "source": [ 1120 | "![PandasMerge](Pandas_Merge.png)" 1121 | ] 1122 | }, 1123 | { 1124 | "cell_type": "code", 1125 | "execution_count": 19, 1126 | "metadata": {}, 1127 | "outputs": [ 1128 | { 1129 | "data": { 1130 | "text/html": [ 1131 | "
\n", 1132 | "\n", 1145 | "\n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | "
First NameLast NameArrival DateindexExist
0CarlosSullivan2022-10-020.0both
1JeffreyGiles2022-04-251.0both
2MckenziePerkins2022-04-132.0both
3MaryMarshall2022-09-023.0both
4KathrynBurch2022-02-114.0both
..................
102NaNNaNNaT18.0right_only
103KevinElliott2022-12-1023.0right_only
104KathyOrtiz2022-08-1858.0right_only
105MichaelDavis2022-07-2687.0right_only
106DavidStout2022-12-03101.0right_only
\n", 1247 | "

107 rows × 5 columns

\n", 1248 | "
" 1249 | ], 1250 | "text/plain": [ 1251 | " First Name Last Name Arrival Date index Exist\n", 1252 | "0 Carlos Sullivan 2022-10-02 0.0 both\n", 1253 | "1 Jeffrey Giles 2022-04-25 1.0 both\n", 1254 | "2 Mckenzie Perkins 2022-04-13 2.0 both\n", 1255 | "3 Mary Marshall 2022-09-02 3.0 both\n", 1256 | "4 Kathryn Burch 2022-02-11 4.0 both\n", 1257 | ".. ... ... ... ... ...\n", 1258 | "102 NaN NaN NaT 18.0 right_only\n", 1259 | "103 Kevin Elliott 2022-12-10 23.0 right_only\n", 1260 | "104 Kathy Ortiz 2022-08-18 58.0 right_only\n", 1261 | "105 Michael Davis 2022-07-26 87.0 right_only\n", 1262 | "106 David Stout 2022-12-03 101.0 right_only\n", 1263 | "\n", 1264 | "[107 rows x 5 columns]" 1265 | ] 1266 | }, 1267 | "execution_count": 19, 1268 | "metadata": {}, 1269 | "output_type": "execute_result" 1270 | } 1271 | ], 1272 | "source": [ 1273 | "# Merge dataframes and add inidactor column\n", 1274 | "df_diff = pd.merge(df_initial, df_update, how=\"outer\", indicator=\"Exist\")\n", 1275 | "df_diff" 1276 | ] 1277 | }, 1278 | { 1279 | "cell_type": "code", 1280 | "execution_count": 20, 1281 | "metadata": {}, 1282 | "outputs": [ 1283 | { 1284 | "data": { 1285 | "text/html": [ 1286 | "
\n", 1287 | "\n", 1300 | "\n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | "
First NameLast NameArrival DateindexExist
10TroyClay2022-05-12NaNleft_only
21KevinElliott2022-03-23NaNleft_only
56KathyOrtiz2022-02-01NaNleft_only
85MichaelDavis2022-10-24NaNleft_only
99DavidStout2022-07-09NaNleft_only
100PeterParker2022-02-118.0right_only
101TroiClay2022-05-1211.0right_only
102NaNNaNNaT18.0right_only
103KevinElliott2022-12-1023.0right_only
104KathyOrtiz2022-08-1858.0right_only
105MichaelDavis2022-07-2687.0right_only
106DavidStout2022-12-03101.0right_only
\n", 1410 | "
" 1411 | ], 1412 | "text/plain": [ 1413 | " First Name Last Name Arrival Date index Exist\n", 1414 | "10 Troy Clay 2022-05-12 NaN left_only\n", 1415 | "21 Kevin Elliott 2022-03-23 NaN left_only\n", 1416 | "56 Kathy Ortiz 2022-02-01 NaN left_only\n", 1417 | "85 Michael Davis 2022-10-24 NaN left_only\n", 1418 | "99 David Stout 2022-07-09 NaN left_only\n", 1419 | "100 Peter Parker 2022-02-11 8.0 right_only\n", 1420 | "101 Troi Clay 2022-05-12 11.0 right_only\n", 1421 | "102 NaN NaN NaT 18.0 right_only\n", 1422 | "103 Kevin Elliott 2022-12-10 23.0 right_only\n", 1423 | "104 Kathy Ortiz 2022-08-18 58.0 right_only\n", 1424 | "105 Michael Davis 2022-07-26 87.0 right_only\n", 1425 | "106 David Stout 2022-12-03 101.0 right_only" 1426 | ] 1427 | }, 1428 | "execution_count": 20, 1429 | "metadata": {}, 1430 | "output_type": "execute_result" 1431 | } 1432 | ], 1433 | "source": [ 1434 | "# Show only the differnce\n", 1435 | "df_diff = df_diff.query(\"Exist != 'both'\")\n", 1436 | "df_diff" 1437 | ] 1438 | }, 1439 | { 1440 | "cell_type": "markdown", 1441 | "metadata": {}, 1442 | "source": [ 1443 | "### [BONUS] Highlight the difference 🔥" 1444 | ] 1445 | }, 1446 | { 1447 | "cell_type": "code", 1448 | "execution_count": 21, 1449 | "metadata": {}, 1450 | "outputs": [ 1451 | { 1452 | "data": { 1453 | "text/html": [ 1454 | "
\n", 1455 | "\n", 1468 | "\n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | "
First NameLast NameArrival DateindexExist
100PeterParker2022-02-118.0right_only
101TroiClay2022-05-1211.0right_only
102NaNNaNNaT18.0right_only
103KevinElliott2022-12-1023.0right_only
104KathyOrtiz2022-08-1858.0right_only
105MichaelDavis2022-07-2687.0right_only
106DavidStout2022-12-03101.0right_only
\n", 1538 | "
" 1539 | ], 1540 | "text/plain": [ 1541 | " First Name Last Name Arrival Date index Exist\n", 1542 | "100 Peter Parker 2022-02-11 8.0 right_only\n", 1543 | "101 Troi Clay 2022-05-12 11.0 right_only\n", 1544 | "102 NaN NaN NaT 18.0 right_only\n", 1545 | "103 Kevin Elliott 2022-12-10 23.0 right_only\n", 1546 | "104 Kathy Ortiz 2022-08-18 58.0 right_only\n", 1547 | "105 Michael Davis 2022-07-26 87.0 right_only\n", 1548 | "106 David Stout 2022-12-03 101.0 right_only" 1549 | ] 1550 | }, 1551 | "execution_count": 21, 1552 | "metadata": {}, 1553 | "output_type": "execute_result" 1554 | } 1555 | ], 1556 | "source": [ 1557 | "# Show only the data we want to highlight\n", 1558 | "df_highlight = df_diff.query(\"Exist == 'right_only'\")\n", 1559 | "df_highlight" 1560 | ] 1561 | }, 1562 | { 1563 | "cell_type": "code", 1564 | "execution_count": 22, 1565 | "metadata": {}, 1566 | "outputs": [ 1567 | { 1568 | "data": { 1569 | "text/plain": [ 1570 | "[8.0, 11.0, 18.0, 23.0, 58.0, 87.0, 101.0]" 1571 | ] 1572 | }, 1573 | "execution_count": 22, 1574 | "metadata": {}, 1575 | "output_type": "execute_result" 1576 | } 1577 | ], 1578 | "source": [ 1579 | "# Get the row numbers we want to highlight in Excel\n", 1580 | "highlight_rows = df_highlight['index'].tolist()\n", 1581 | "highlight_rows" 1582 | ] 1583 | }, 1584 | { 1585 | "cell_type": "code", 1586 | "execution_count": 23, 1587 | "metadata": {}, 1588 | "outputs": [ 1589 | { 1590 | "data": { 1591 | "text/plain": [ 1592 | "[8, 11, 18, 23, 58, 87, 101]" 1593 | ] 1594 | }, 1595 | "execution_count": 23, 1596 | "metadata": {}, 1597 | "output_type": "execute_result" 1598 | } 1599 | ], 1600 | "source": [ 1601 | "# Convert floats to integers\n", 1602 | "highlight_rows = [int(row) for row in highlight_rows]\n", 1603 | "highlight_rows" 1604 | ] 1605 | }, 1606 | { 1607 | "cell_type": "code", 1608 | "execution_count": 24, 1609 | "metadata": {}, 1610 | "outputs": [ 1611 | { 1612 | "data": { 1613 | "text/plain": [ 1614 | "[10, 13, 20, 25, 60, 89, 103]" 1615 | ] 1616 | }, 1617 | "execution_count": 24, 1618 | "metadata": {}, 1619 | "output_type": "execute_result" 1620 | } 1621 | ], 1622 | "source": [ 1623 | "# pandas index starts at 0\n", 1624 | "# Excel data (w/o header) starts from row 2\n", 1625 | "first_row_in_excel = 2\n", 1626 | "\n", 1627 | "highlight_rows = [x + first_row_in_excel for x in highlight_rows]\n", 1628 | "highlight_rows" 1629 | ] 1630 | }, 1631 | { 1632 | "cell_type": "markdown", 1633 | "metadata": {}, 1634 | "source": [ 1635 | "#### Highlight the rows in Excel" 1636 | ] 1637 | }, 1638 | { 1639 | "cell_type": "code", 1640 | "execution_count": 25, 1641 | "metadata": {}, 1642 | "outputs": [ 1643 | { 1644 | "name": "stdout", 1645 | "output_type": "stream", 1646 | "text": [ 1647 | "Used Range: $A$1:$C$103\n" 1648 | ] 1649 | } 1650 | ], 1651 | "source": [ 1652 | "with xw.App(visible=False) as app:\n", 1653 | " updated_wb = app.books.open(updated_version)\n", 1654 | " updated_ws = updated_wb.sheets(1)\n", 1655 | " rng = updated_ws.used_range\n", 1656 | "\n", 1657 | " print(f\"Used Range: {rng.address}\")\n", 1658 | "\n", 1659 | " # Hightlight the rows in Excel\n", 1660 | " for row in rng.rows:\n", 1661 | " if row.row in highlight_rows:\n", 1662 | " row.color = (255, 71, 76) # light red\n", 1663 | "\n", 1664 | " updated_wb.save(Path.cwd() / \"Different_Shape\" / \"Difference_Highlighted.xlsx\")" 1665 | ] 1666 | } 1667 | ], 1668 | "metadata": { 1669 | "kernelspec": { 1670 | "display_name": "Python 3", 1671 | "language": "python", 1672 | "name": "python3" 1673 | }, 1674 | "language_info": { 1675 | "codemirror_mode": { 1676 | "name": "ipython", 1677 | "version": 3 1678 | }, 1679 | "file_extension": ".py", 1680 | "mimetype": "text/x-python", 1681 | "name": "python", 1682 | "nbconvert_exporter": "python", 1683 | "pygments_lexer": "ipython3", 1684 | "version": "3.8.5" 1685 | } 1686 | }, 1687 | "nbformat": 4, 1688 | "nbformat_minor": 4 1689 | } 1690 | -------------------------------------------------------------------------------- /Different_Shape/Arrival_Dates.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Different_Shape/Arrival_Dates.xlsx -------------------------------------------------------------------------------- /Different_Shape/Arrival_Dates_Final.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Different_Shape/Arrival_Dates_Final.xlsx -------------------------------------------------------------------------------- /Different_Shape/Difference_Highlighted.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Different_Shape/Difference_Highlighted.xlsx -------------------------------------------------------------------------------- /Pandas_Merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Pandas_Merge.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compare Two Excel Sheets with Different Number of Rows and Find Differences using Python 2 | 3 | In this video, I will show you how to use Python to compare two Excel sheets with different numbers of rows and find the differences between them. 4 | 5 | ## Video Tutorial 6 | [![YouTube Video](https://img.youtube.com/vi/D7dEQ9LI-8A/0.jpg)](https://youtu.be/D7dEQ9LI-8A) 7 | 8 | 9 | ## Requirements 10 | ``` 11 | openpyxl==3.0.9 12 | pandas==1.3.5 13 | xlwings==0.25.3 14 | ``` 15 | 16 | 17 | ## 🤓 Check Out My Excel Add-ins 18 | I've developed some handy Excel add-ins that you might find useful: 19 | 20 | - 📊 **[Dashboard Add-in](https://pythonandvba.com/grafly)**: Easily create interactive and visually appealing dashboards. 21 | - 🎨 **[Cartoon Charts Add-In](https://pythonandvba.com/cuteplots)**: Create engaging and fun cartoon-style charts. 22 | - 🤪 **[Emoji Add-in](https://pythonandvba.com/emojify)**: Add a touch of fun to your spreadsheets with emojis. 23 | - 🛠️ **[MyToolBelt Add-in](https://pythonandvba.com/mytoolbelt)**: A versatile toolbelt for Excel, featuring: 24 | - Creation of Pandas DataFrames and Jupyter Notebooks from Excel ranges 25 | - ChatGPT integration for advanced data analysis 26 | - And much more! 27 | 28 | 29 | 30 | ## 🤝 Connect with Me 31 | - 📺 **YouTube:** [CodingIsFun](https://youtube.com/c/CodingIsFun) 32 | - 🌐 **Website:** [PythonAndVBA](https://pythonandvba.com) 33 | - 💬 **Discord:** [Join the Community](https://pythonandvba.com/discord) 34 | - 💼 **LinkedIn:** [Sven Bosau](https://www.linkedin.com/in/sven-bosau/) 35 | - 📸 **Instagram:** [sven_bosau](https://www.instagram.com/sven_bosau/) 36 | 37 | ## ☕ Support 38 | If you appreciate the project and wish to encourage its continued development, consider [supporting my work](https://pythonandvba.com/coffee-donation). 39 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://pythonandvba.com/coffee-donation) 40 | 41 | ## Feedback & Collaboration 42 | For feedback, suggestions, or potential collaboration opportunities, reach out at contact@pythonandvba.com. 43 | ![Logo](https://www.pythonandvba.com/banner-img) 44 | -------------------------------------------------------------------------------- /Same_Shape/Arrival_Dates.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Same_Shape/Arrival_Dates.xlsx -------------------------------------------------------------------------------- /Same_Shape/Arrival_Dates_Final.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Same_Shape/Arrival_Dates_Final.xlsx -------------------------------------------------------------------------------- /Same_Shape/Difference.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Same_Shape/Difference.xlsx -------------------------------------------------------------------------------- /Same_Shape/Difference_Highlighted.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sven-Bo/compare-two-excel-sheets-python/7619e175d835b74739b77e08521413bd1c325d4b/Same_Shape/Difference_Highlighted.xlsx -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openpyxl==3.0.9 2 | pandas==1.3.5 3 | xlwings==0.25.3 --------------------------------------------------------------------------------