├── README.md └── mynotebook.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # my-first-notebook -------------------------------------------------------------------------------- /mynotebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "id": "OOC-egEWD0_J" 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd\n", 26 | "import numpy as np\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "import seaborn as sns" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "source": [ 34 | "from google.colab import drive\n", 35 | "drive.mount('/content/drive')" 36 | ], 37 | "metadata": { 38 | "id": "KEJv3nICISR4" 39 | }, 40 | "execution_count": null, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "source": [ 46 | "dikw = pd.read_csv('/content/drive/MyDrive/Classroom/Advanced Data Analytics/Nasratullah Shafiq - DIKW_HW_Activity.csv')" 47 | ], 48 | "metadata": { 49 | "id": "gyy4feKQOlVN" 50 | }, 51 | "execution_count": null, 52 | "outputs": [] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "source": [ 57 | "dikw" 58 | ], 59 | "metadata": { 60 | "colab": { 61 | "base_uri": "https://localhost:8080/", 62 | "height": 243 63 | }, 64 | "id": "SD8aP7qe4QQH", 65 | "outputId": "836da8c0-d7f9-408c-db28-374f2f0c8562" 66 | }, 67 | "execution_count": null, 68 | "outputs": [ 69 | { 70 | "output_type": "execute_result", 71 | "data": { 72 | "text/plain": [ 73 | " EmployeeID EmployeeName Department JoiningDate Salary Gender \\\n", 74 | "0 E001 James Wilson Sales 2022-01-15 50000 Male \n", 75 | "1 E002 Maria Garcia Engineering 2021-03-12 75000 Female \n", 76 | "2 E003 Robert Brown HR 2020-06-23 60000 Male \n", 77 | "3 E004 Emily Davis Engineering 2021-11-05 72000 Female \n", 78 | "4 E005 Michael Johnson Marketing 2022-05-20 65000 Male \n", 79 | "\n", 80 | " Country PerformanceScore JobSatisfaction \n", 81 | "0 USA 85 4 \n", 82 | "1 UK 92 5 \n", 83 | "2 Canada 88 4 \n", 84 | "3 USA 79 3 \n", 85 | "4 Australia 83 4 " 86 | ], 87 | "text/html": [ 88 | "\n", 89 | "
| \n", 108 | " | EmployeeID | \n", 109 | "EmployeeName | \n", 110 | "Department | \n", 111 | "JoiningDate | \n", 112 | "Salary | \n", 113 | "Gender | \n", 114 | "Country | \n", 115 | "PerformanceScore | \n", 116 | "JobSatisfaction | \n", 117 | "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", 122 | "E001 | \n", 123 | "James Wilson | \n", 124 | "Sales | \n", 125 | "2022-01-15 | \n", 126 | "50000 | \n", 127 | "Male | \n", 128 | "USA | \n", 129 | "85 | \n", 130 | "4 | \n", 131 | "
| 1 | \n", 134 | "E002 | \n", 135 | "Maria Garcia | \n", 136 | "Engineering | \n", 137 | "2021-03-12 | \n", 138 | "75000 | \n", 139 | "Female | \n", 140 | "UK | \n", 141 | "92 | \n", 142 | "5 | \n", 143 | "
| 2 | \n", 146 | "E003 | \n", 147 | "Robert Brown | \n", 148 | "HR | \n", 149 | "2020-06-23 | \n", 150 | "60000 | \n", 151 | "Male | \n", 152 | "Canada | \n", 153 | "88 | \n", 154 | "4 | \n", 155 | "
| 3 | \n", 158 | "E004 | \n", 159 | "Emily Davis | \n", 160 | "Engineering | \n", 161 | "2021-11-05 | \n", 162 | "72000 | \n", 163 | "Female | \n", 164 | "USA | \n", 165 | "79 | \n", 166 | "3 | \n", 167 | "
| 4 | \n", 170 | "E005 | \n", 171 | "Michael Johnson | \n", 172 | "Marketing | \n", 173 | "2022-05-20 | \n", 174 | "65000 | \n", 175 | "Male | \n", 176 | "Australia | \n", 177 | "83 | \n", 178 | "4 | \n", 179 | "
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None) -> None
Print a concise summary of a DataFrame.\n", 478 | "\n", 479 | "This method prints information about a DataFrame including\n", 480 | "the index dtype and columns, non-null values and memory usage.\n", 481 | "\n", 482 | "Parameters\n", 483 | "----------\n", 484 | "verbose : bool, optional\n", 485 | " Whether to print the full summary. By default, the setting in\n", 486 | " ``pandas.options.display.max_info_columns`` is followed.\n", 487 | "buf : writable buffer, defaults to sys.stdout\n", 488 | " Where to send the output. By default, the output is printed to\n", 489 | " sys.stdout. Pass a writable buffer if you need to further process\n", 490 | " the output.\n", 491 | "max_cols : int, optional\n", 492 | " When to switch from the verbose to the truncated output. If the\n", 493 | " DataFrame has more than `max_cols` columns, the truncated output\n", 494 | " is used. By default, the setting in\n", 495 | " ``pandas.options.display.max_info_columns`` is used.\n", 496 | "memory_usage : bool, str, optional\n", 497 | " Specifies whether total memory usage of the DataFrame\n", 498 | " elements (including the index) should be displayed. By default,\n", 499 | " this follows the ``pandas.options.display.memory_usage`` setting.\n", 500 | "\n", 501 | " True always show memory usage. False never shows memory usage.\n", 502 | " A value of 'deep' is equivalent to "True with deep introspection".\n", 503 | " Memory usage is shown in human-readable units (base-2\n", 504 | " representation). Without deep introspection a memory estimation is\n", 505 | " made based in column dtype and number of rows assuming values\n", 506 | " consume the same memory amount for corresponding dtypes. With deep\n", 507 | " memory introspection, a real memory usage calculation is performed\n", 508 | " at the cost of computational resources. See the\n", 509 | " :ref:`Frequently Asked Questions <df-memory-usage>` for more\n", 510 | " details.\n", 511 | "show_counts : bool, optional\n", 512 | " Whether to show the non-null counts. By default, this is shown\n", 513 | " only if the DataFrame is smaller than\n", 514 | " ``pandas.options.display.max_info_rows`` and\n", 515 | " ``pandas.options.display.max_info_columns``. A value of True always\n", 516 | " shows the counts, and False never shows the counts.\n", 517 | "\n", 518 | "Returns\n", 519 | "-------\n", 520 | "None\n", 521 | " This method prints a summary of a DataFrame and returns None.\n", 522 | "\n", 523 | "See Also\n", 524 | "--------\n", 525 | "DataFrame.describe: Generate descriptive statistics of DataFrame\n", 526 | " columns.\n", 527 | "DataFrame.memory_usage: Memory usage of DataFrame columns.\n", 528 | "\n", 529 | "Examples\n", 530 | "--------\n", 531 | ">>> int_values = [1, 2, 3, 4, 5]\n", 532 | ">>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']\n", 533 | ">>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n", 534 | ">>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,\n", 535 | "... "float_col": float_values})\n", 536 | ">>> df\n", 537 | " int_col text_col float_col\n", 538 | "0 1 alpha 0.00\n", 539 | "1 2 beta 0.25\n", 540 | "2 3 gamma 0.50\n", 541 | "3 4 delta 0.75\n", 542 | "4 5 epsilon 1.00\n", 543 | "\n", 544 | "Prints information of all columns:\n", 545 | "\n", 546 | ">>> df.info(verbose=True)\n", 547 | "<class 'pandas.core.frame.DataFrame'>\n", 548 | "RangeIndex: 5 entries, 0 to 4\n", 549 | "Data columns (total 3 columns):\n", 550 | " # Column Non-Null Count Dtype\n", 551 | "--- ------ -------------- -----\n", 552 | " 0 int_col 5 non-null int64\n", 553 | " 1 text_col 5 non-null object\n", 554 | " 2 float_col 5 non-null float64\n", 555 | "dtypes: float64(1), int64(1), object(1)\n", 556 | "memory usage: 248.0+ bytes\n", 557 | "\n", 558 | "Prints a summary of columns count and its dtypes but not per column\n", 559 | "information:\n", 560 | "\n", 561 | ">>> df.info(verbose=False)\n", 562 | "<class 'pandas.core.frame.DataFrame'>\n", 563 | "RangeIndex: 5 entries, 0 to 4\n", 564 | "Columns: 3 entries, int_col to float_col\n", 565 | "dtypes: float64(1), int64(1), object(1)\n", 566 | "memory usage: 248.0+ bytes\n", 567 | "\n", 568 | "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n", 569 | "buffer content and writes to a text file:\n", 570 | "\n", 571 | ">>> import io\n", 572 | ">>> buffer = io.StringIO()\n", 573 | ">>> df.info(buf=buffer)\n", 574 | ">>> s = buffer.getvalue()\n", 575 | ">>> with open("df_info.txt", "w",\n", 576 | "... encoding="utf-8") as f: # doctest: +SKIP\n", 577 | "... f.write(s)\n", 578 | "260\n", 579 | "\n", 580 | "The `memory_usage` parameter allows deep introspection mode, specially\n", 581 | "useful for big DataFrames and fine-tune memory optimization:\n", 582 | "\n", 583 | ">>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", 584 | ">>> df = pd.DataFrame({\n", 585 | "... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", 586 | "... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", 587 | "... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", 588 | "... })\n", 589 | ">>> df.info()\n", 590 | "<class 'pandas.core.frame.DataFrame'>\n", 591 | "RangeIndex: 1000000 entries, 0 to 999999\n", 592 | "Data columns (total 3 columns):\n", 593 | " # Column Non-Null Count Dtype\n", 594 | "--- ------ -------------- -----\n", 595 | " 0 column_1 1000000 non-null object\n", 596 | " 1 column_2 1000000 non-null object\n", 597 | " 2 column_3 1000000 non-null object\n", 598 | "dtypes: object(3)\n", 599 | "memory usage: 22.9+ MB\n", 600 | "\n", 601 | ">>> df.info(memory_usage='deep')\n", 602 | "<class 'pandas.core.frame.DataFrame'>\n", 603 | "RangeIndex: 1000000 entries, 0 to 999999\n", 604 | "Data columns (total 3 columns):\n", 605 | " # Column Non-Null Count Dtype\n", 606 | "--- ------ -------------- -----\n", 607 | " 0 column_1 1000000 non-null object\n", 608 | " 1 column_2 1000000 non-null object\n", 609 | " 2 column_3 1000000 non-null object\n", 610 | "dtypes: object(3)\n", 611 | "memory usage: 165.9 MB\n", 612 | " \n", 631 | "