├── .DS_Store
├── Life Quest.pdf
├── Police Ordinance.pdf
├── Dunn Transportation.pdf
├── Simple Python Dashboard Part 1
    └── Goals.csv
├── LICENSE
├── Pandas_Profiling.ipynb
├── Query_Big_Query_from_Python_Notebook.ipynb
├── LOF.ipynb
├── WordCloud.ipynb
├── Query.ipynb
├── Categorical_Encoding.ipynb
├── Binning.ipynb
├── Univariate_Outlier_Detection.ipynb
├── OpenAI_API_in_Python.ipynb
├── Working_with_APIs_Covid.ipynb
├── Sentiment_Analysis_OpenAI_API.ipynb
├── Web_Scraping_from_TrustPilot_v3.ipynb
├── Data
    ├── heart.csv
    └── euro2016goals.csv
├── Pull_all_Comments_and_Replies_for_YouTube_Playlists.ipynb
├── Dummy_Data.ipynb
├── Lambda.ipynb
├── YouTubeComments.ipynb
├── Apex_Connect.ipynb
└── Web_Scraping_from_TrustPilot.ipynb


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/analyticswithadam/Python/HEAD/.DS_Store


--------------------------------------------------------------------------------
/Life Quest.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/analyticswithadam/Python/HEAD/Life Quest.pdf


--------------------------------------------------------------------------------
/Police Ordinance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/analyticswithadam/Python/HEAD/Police Ordinance.pdf


--------------------------------------------------------------------------------
/Dunn Transportation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/analyticswithadam/Python/HEAD/Dunn Transportation.pdf


--------------------------------------------------------------------------------
/Simple Python Dashboard Part 1/Goals.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/analyticswithadam/Python/HEAD/Simple Python Dashboard Part 1/Goals.csv


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 SuperDataWorld
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Pandas_Profiling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Pandas Profiling.ipynb",
  7 |       "private_outputs": true,
  8 |       "provenance": [],
  9 |       "collapsed_sections": [],
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "language_info": {
 17 |       "name": "python"
 18 |     },
 19 |     "accelerator": "GPU"
 20 |   },
 21 |   "cells": [
 22 |     {
 23 |       "cell_type": "markdown",
 24 |       "metadata": {
 25 |         "id": "view-in-github",
 26 |         "colab_type": "text"
 27 |       },
 28 |       "source": [
 29 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Pandas_Profiling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 30 |       ]
 31 |     },
 32 |     {
 33 |       "cell_type": "markdown",
 34 |       "metadata": {
 35 |         "id": "C5he_jAvzM_Q"
 36 |       },
 37 |       "source": [
 38 |         "## 1. Import Pandas, Pyplot and Read Data\n",
 39 |         "\n"
 40 |       ]
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "metadata": {
 45 |         "id": "olx6o0tuRFVm"
 46 |       },
 47 |       "source": [
 48 |         "!pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip"
 49 |       ],
 50 |       "execution_count": null,
 51 |       "outputs": []
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "metadata": {
 56 |         "id": "MDfXkCh2zD4v"
 57 |       },
 58 |       "source": [
 59 |         "import pandas as pd\n",
 60 |         "import matplotlib.pyplot as plt\n",
 61 |         "from pandas_profiling import ProfileReport\n",
 62 |         "%matplotlib inline\n",
 63 |         "pd.set_option('display.max_colwidth', None)"
 64 |       ],
 65 |       "execution_count": null,
 66 |       "outputs": []
 67 |     },
 68 |     {
 69 |       "cell_type": "code",
 70 |       "metadata": {
 71 |         "id": "0tA2cpynEzLU"
 72 |       },
 73 |       "source": [
 74 |         "df = pd.read_csv('heart.csv')"
 75 |       ],
 76 |       "execution_count": null,
 77 |       "outputs": []
 78 |     },
 79 |     {
 80 |       "cell_type": "markdown",
 81 |       "metadata": {
 82 |         "id": "hG6wXb4KO8Lt"
 83 |       },
 84 |       "source": [
 85 |         "## 2. Exploratory Data Analysis \n",
 86 |         "\n",
 87 |         "---"
 88 |       ]
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "metadata": {
 93 |         "id": "2tz3KKg3Qseu"
 94 |       },
 95 |       "source": [
 96 |         "# Pandas profiling report\n",
 97 |         "profile = ProfileReport(df, title='Heart Data', explorative=True)"
 98 |       ],
 99 |       "execution_count": null,
100 |       "outputs": []
101 |     },
102 |     {
103 |       "cell_type": "code",
104 |       "metadata": {
105 |         "id": "ARiCWwedUzBd"
106 |       },
107 |       "source": [
108 |         "# For notebook exploration\n",
109 |         "profile.to_notebook_iframe()"
110 |       ],
111 |       "execution_count": null,
112 |       "outputs": []
113 |     },
114 |     {
115 |       "cell_type": "code",
116 |       "metadata": {
117 |         "id": "A8xQo0RMWJDy"
118 |       },
119 |       "source": [
120 |         "# export analysis results to an html page, for sharing to a wider audience and non-Jupyter users.\n",
121 |         "profile.to_file('Heart Data.html')"
122 |       ],
123 |       "execution_count": null,
124 |       "outputs": []
125 |     }
126 |   ]
127 | }


--------------------------------------------------------------------------------
/Query_Big_Query_from_Python_Notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyOtdIzYDE8gUyle84xKEEeT",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/analyticswithadam/Python/blob/main/Query_Big_Query_from_Python_Notebook.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "source": [
 32 |         "!pip install --upgrade google-cloud-bigquery"
 33 |       ],
 34 |       "metadata": {
 35 |         "id": "n4LvC5cZK3x2"
 36 |       },
 37 |       "execution_count": null,
 38 |       "outputs": []
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "source": [
 43 |         "from google.colab import auth\n",
 44 |         "auth.authenticate_user()\n",
 45 |         "print('Authenticated')"
 46 |       ],
 47 |       "metadata": {
 48 |         "id": "mrdyqZ1CKTcD"
 49 |       },
 50 |       "execution_count": null,
 51 |       "outputs": []
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "source": [
 56 |         "# Set your Google Cloud Project ID\n",
 57 |         "project_id = 'sqlcourse-352110'"
 58 |       ],
 59 |       "metadata": {
 60 |         "id": "z3T36Uz5O1gS"
 61 |       },
 62 |       "execution_count": null,
 63 |       "outputs": []
 64 |     },
 65 |     {
 66 |       "cell_type": "code",
 67 |       "source": [
 68 |         "from google.cloud import bigquery\n",
 69 |         "import pandas as pd\n",
 70 |         "\n",
 71 |         "# Create a BigQuery client\n",
 72 |         "client = bigquery.Client(project=project_id)"
 73 |       ],
 74 |       "metadata": {
 75 |         "id": "A842uhabLhK1"
 76 |       },
 77 |       "execution_count": null,
 78 |       "outputs": []
 79 |     },
 80 |     {
 81 |       "cell_type": "code",
 82 |       "source": [
 83 |         "query = \"\"\"\n",
 84 |         "          SELECT name, SUM(number) as count\n",
 85 |         "          FROM `bigquery-public-data.usa_names.usa_1910_current`\n",
 86 |         "          GROUP BY name\n",
 87 |         "          ORDER BY count DESC\n",
 88 |         "          LIMIT 10\n",
 89 |         "        \"\"\"\n",
 90 |         "\n",
 91 |         "query_job = client.query(query)"
 92 |       ],
 93 |       "metadata": {
 94 |         "id": "7wObBCmuOQWW"
 95 |       },
 96 |       "execution_count": null,
 97 |       "outputs": []
 98 |     },
 99 |     {
100 |       "cell_type": "code",
101 |       "source": [
102 |         "df = query_job.to_dataframe()\n",
103 |         "df"
104 |       ],
105 |       "metadata": {
106 |         "id": "8DKwm7WyOTqY"
107 |       },
108 |       "execution_count": null,
109 |       "outputs": []
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "source": [
114 |         "%%bigquery df --project sqlcourse-352110\n",
115 |         "SELECT name, SUM(number) as count\n",
116 |         "          FROM `bigquery-public-data.usa_names.usa_1910_current`\n",
117 |         "          GROUP BY name\n",
118 |         "          ORDER BY count DESC\n",
119 |         "          LIMIT 10"
120 |       ],
121 |       "metadata": {
122 |         "id": "B1Lgc64LRW_V"
123 |       },
124 |       "execution_count": null,
125 |       "outputs": []
126 |     },
127 |     {
128 |       "cell_type": "code",
129 |       "source": [
130 |         "df"
131 |       ],
132 |       "metadata": {
133 |         "id": "GMroggFISQsV"
134 |       },
135 |       "execution_count": null,
136 |       "outputs": []
137 |     }
138 |   ]
139 | }


--------------------------------------------------------------------------------
/LOF.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "LOF.ipynb",
  7 |       "provenance": [],
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/LOF.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "source": [
 32 |         "import random\n",
 33 |         "import pandas as pd\n",
 34 |         "\n",
 35 |         "pct = [.01,.08,.12]\n",
 36 |         "amounts = [1000, 2000, 3000]\n",
 37 |         "charges = pd.DataFrame()\n",
 38 |         "\n",
 39 |         "for i in range(0, 1000):\n",
 40 |         "  amount = random.choice(amounts) * (random.uniform(.95, 1.05))\n",
 41 |         "  bank_charge = amount * .04 * (random.uniform(.95, 1.05))\n",
 42 |         "  linedict = {'Amount': [amount], 'Charge':[bank_charge]}\n",
 43 |         "  line = pd.DataFrame(linedict)\n",
 44 |         "  charges = pd.concat([charges, line])\n",
 45 |         "\n",
 46 |         "for i in range(0, 10):\n",
 47 |         "  amount = random.choice(amounts) * (random.uniform(.95, 1.05))\n",
 48 |         "  bank_charge = amount * random.choice(pct) * (random.uniform(.95, 1.05))\n",
 49 |         "  linedict = {'Amount': [amount], 'Charge':[bank_charge]}\n",
 50 |         "  line = pd.DataFrame(linedict)\n",
 51 |         "  charges = pd.concat([charges, line])"
 52 |       ],
 53 |       "metadata": {
 54 |         "id": "xlnPkGURlaC1"
 55 |       },
 56 |       "execution_count": null,
 57 |       "outputs": []
 58 |     },
 59 |     {
 60 |       "cell_type": "code",
 61 |       "source": [
 62 |         "charges.head(10)"
 63 |       ],
 64 |       "metadata": {
 65 |         "id": "pRt8k3PRt5yN"
 66 |       },
 67 |       "execution_count": null,
 68 |       "outputs": []
 69 |     },
 70 |     {
 71 |       "cell_type": "code",
 72 |       "source": [
 73 |         "charges.tail(10)"
 74 |       ],
 75 |       "metadata": {
 76 |         "id": "AOe4Tn6qhUeP"
 77 |       },
 78 |       "execution_count": null,
 79 |       "outputs": []
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "source": [
 84 |         "import numpy as np\n",
 85 |         "from sklearn.neighbors import LocalOutlierFactor\n",
 86 |         "clf = LocalOutlierFactor(n_neighbors=30)\n",
 87 |         "normalized_df=(charges-charges.mean())/charges.std()\n",
 88 |         "\n",
 89 |         "clf.fit_predict(normalized_df)\n",
 90 |         "results = clf.negative_outlier_factor_\n",
 91 |         "charges['LOF'] = results.tolist()\n",
 92 |         "charges['PCT'] = charges['Charge'] / charges['Amount']"
 93 |       ],
 94 |       "metadata": {
 95 |         "id": "PsljfQOGvjfq"
 96 |       },
 97 |       "execution_count": null,
 98 |       "outputs": []
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "source": [
103 |         "import matplotlib.pyplot as plt\n",
104 |         "plt.scatter(charges.Amount, charges.Charge, c='black', s=charges.LOF * -1,label=\"Data Points\")\n",
105 |         "radius = (results.max() - results) / (results.max() - results.min())\n",
106 |         "plt.scatter(charges.Amount, charges.Charge, s=500 * radius,edgecolors=\"r\",facecolors=\"none\",label=\"Outlier scores\",)\n",
107 |         "legend = plt.legend(loc=\"upper left\")\n",
108 |         "legend.legendHandles[0]._sizes = [10]\n",
109 |         "legend.legendHandles[1]._sizes = [20]\n",
110 |         "plt.show()\n"
111 |       ],
112 |       "metadata": {
113 |         "id": "gJoBLkrLlZ61"
114 |       },
115 |       "execution_count": null,
116 |       "outputs": []
117 |     },
118 |     {
119 |       "cell_type": "code",
120 |       "source": [
121 |         "charges.query('LOF < -1.5')"
122 |       ],
123 |       "metadata": {
124 |         "id": "2vD1dpO3lCyC"
125 |       },
126 |       "execution_count": null,
127 |       "outputs": []
128 |     }
129 |   ]
130 | }


--------------------------------------------------------------------------------
/WordCloud.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyMV4dzU5X0HHevhQb+jHILU",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/WordCloud.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "execution_count": null,
 32 |       "metadata": {
 33 |         "id": "E3nWmsQ1jyNE"
 34 |       },
 35 |       "outputs": [],
 36 |       "source": [
 37 |         "# Install wordcloud if needed\n",
 38 |         "!pip install wordcloud\n",
 39 |         "\n",
 40 |         "# Imports \n",
 41 |         "from wordcloud import WordCloud, STOPWORDS\n",
 42 |         "import pandas as pd\n",
 43 |         "import matplotlib.pyplot as plt\n",
 44 |         "\n",
 45 |         "# URL for colormaps \n",
 46 |         "# https://matplotlib.org/stable/tutorials/colors/colormaps.html\n"
 47 |       ]
 48 |     },
 49 |     {
 50 |       "cell_type": "code",
 51 |       "source": [
 52 |         "# Import Data\n",
 53 |         "df = pd.read_csv('https://raw.githubusercontent.com/SuperDataWorld/Python/main/REF%20TWEETS.csv')"
 54 |       ],
 55 |       "metadata": {
 56 |         "id": "dle1j0ICkSqB"
 57 |       },
 58 |       "execution_count": null,
 59 |       "outputs": []
 60 |     },
 61 |     {
 62 |       "cell_type": "code",
 63 |       "source": [
 64 |         "# Unify Text from all weeks\n",
 65 |         "text = df['Tweet'].str.cat(sep=' ')"
 66 |       ],
 67 |       "metadata": {
 68 |         "id": "yP-gfk3MmUDU"
 69 |       },
 70 |       "execution_count": null,
 71 |       "outputs": []
 72 |     },
 73 |     {
 74 |       "cell_type": "code",
 75 |       "source": [
 76 |         "#set the stopwords list\n",
 77 |         "stopwords= set(STOPWORDS)\n",
 78 |         "new_words = []\n",
 79 |         "new_stopwords=stopwords.union(new_words)"
 80 |       ],
 81 |       "metadata": {
 82 |         "id": "ORtDDoURkSxT"
 83 |       },
 84 |       "execution_count": null,
 85 |       "outputs": []
 86 |     },
 87 |     {
 88 |       "cell_type": "code",
 89 |       "source": [
 90 |         "# Size of Word Cloud\n",
 91 |         "plt.rcParams[\"figure.figsize\"] = (10,10)\n",
 92 |         "\n",
 93 |         "# Make Wordcloud\n",
 94 |         "wordcloud = WordCloud(max_font_size=50, max_words=50, background_color=\"white\",stopwords=new_stopwords, colormap='flag').generate(text)\n",
 95 |         "\n",
 96 |         "# Plot Wordcloud\n",
 97 |         "plt.plot()\n",
 98 |         "plt.imshow(wordcloud, interpolation=\"bilinear\")\n",
 99 |         "plt.axis(\"off\")\n",
100 |         "plt.show()"
101 |       ],
102 |       "metadata": {
103 |         "id": "OMmb3X0AkSuI"
104 |       },
105 |       "execution_count": null,
106 |       "outputs": []
107 |     },
108 |     {
109 |       "cell_type": "code",
110 |       "source": [
111 |         "# Unify Text from tweets of different sentiment\n",
112 |         "text = df[df['Sentiment']=='Negative']['Tweet'].str.cat(sep=' ')"
113 |       ],
114 |       "metadata": {
115 |         "id": "0czKVYskotsY"
116 |       },
117 |       "execution_count": null,
118 |       "outputs": []
119 |     },
120 |     {
121 |       "cell_type": "code",
122 |       "source": [
123 |         "wordcloud = WordCloud(max_font_size=50, max_words=50, background_color=\"white\",stopwords=new_stopwords, colormap='flag').generate(text)\n",
124 |         "plt.plot()\n",
125 |         "plt.imshow(wordcloud, interpolation=\"bilinear\")\n",
126 |         "plt.axis(\"off\")\n",
127 |         "plt.show()"
128 |       ],
129 |       "metadata": {
130 |         "id": "MjAp8u7Oo9xB"
131 |       },
132 |       "execution_count": null,
133 |       "outputs": []
134 |     },
135 |     {
136 |       "cell_type": "code",
137 |       "source": [],
138 |       "metadata": {
139 |         "id": "uJ0fgtW0pDWU"
140 |       },
141 |       "execution_count": null,
142 |       "outputs": []
143 |     }
144 |   ]
145 | }


--------------------------------------------------------------------------------
/Query.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Query.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyM9el45rKwsHbx33dJ/wsYf",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Query.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "markdown",
 32 |       "source": [
 33 |         "#Data Import"
 34 |       ],
 35 |       "metadata": {
 36 |         "id": "dJC6Cu21Xc1Z"
 37 |       }
 38 |     },
 39 |     {
 40 |       "cell_type": "code",
 41 |       "source": [
 42 |         "import pandas as pd\n",
 43 |         "df = pd.read_csv('https://raw.githubusercontent.com/SuperDataWorld/Python/main/Data/euro2016goals.csv')"
 44 |       ],
 45 |       "metadata": {
 46 |         "id": "kS-UUCZLXgrp"
 47 |       },
 48 |       "execution_count": null,
 49 |       "outputs": []
 50 |     },
 51 |     {
 52 |       "cell_type": "code",
 53 |       "source": [
 54 |         "df.head()"
 55 |       ],
 56 |       "metadata": {
 57 |         "id": "HCFGf99qZiOf"
 58 |       },
 59 |       "execution_count": null,
 60 |       "outputs": []
 61 |     },
 62 |     {
 63 |       "cell_type": "code",
 64 |       "source": [
 65 |         "df = df[['FULL_NAME','AGE','POSITION','GOALS_OVERALL','ASSISTS_OVERALL','YELLOW_CARDS_OVERALL','PENALTY_GOALS','RED_CARDS_OVERALL']]\n",
 66 |         "df.columns = ['Name','AGE','Pos','Goals','Assists','YC','Pens','RC']\n",
 67 |         "df.head()"
 68 |       ],
 69 |       "metadata": {
 70 |         "id": "Jt0Uuh1VXdvv"
 71 |       },
 72 |       "execution_count": null,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "cell_type": "markdown",
 77 |       "source": [
 78 |         "# Traditional Querying"
 79 |       ],
 80 |       "metadata": {
 81 |         "id": "rp3EfgqlXOBO"
 82 |       }
 83 |     },
 84 |     {
 85 |       "cell_type": "code",
 86 |       "source": [
 87 |         "df.AGE > 30"
 88 |       ],
 89 |       "metadata": {
 90 |         "id": "_yOzgEOYaEiD"
 91 |       },
 92 |       "execution_count": null,
 93 |       "outputs": []
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "source": [
 98 |         "df['AGE'] > 30"
 99 |       ],
100 |       "metadata": {
101 |         "id": "EFUsTHkWcmJt"
102 |       },
103 |       "execution_count": null,
104 |       "outputs": []
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "source": [
109 |         "df[df['AGE'] > 20]"
110 |       ],
111 |       "metadata": {
112 |         "id": "1vIHq4T6aEui"
113 |       },
114 |       "execution_count": null,
115 |       "outputs": []
116 |     },
117 |     {
118 |       "cell_type": "code",
119 |       "execution_count": null,
120 |       "metadata": {
121 |         "id": "9cFjIloqW0fE"
122 |       },
123 |       "outputs": [],
124 |       "source": [
125 |         "df[(df['AGE'] > 25) & (df['Goals'] > 1) & (df['Pens'] == 1)]"
126 |       ]
127 |     },
128 |     {
129 |       "cell_type": "code",
130 |       "source": [
131 |         "df[(df['AGE'] > 25) & (df['Goals'] > 1) & (df['Pens'] == 1) & (df['Pos'] == 'Forward')]"
132 |       ],
133 |       "metadata": {
134 |         "id": "8wbpZHAVdnWo"
135 |       },
136 |       "execution_count": null,
137 |       "outputs": []
138 |     },
139 |     {
140 |       "cell_type": "markdown",
141 |       "source": [
142 |         "# DF.Query"
143 |       ],
144 |       "metadata": {
145 |         "id": "j7xLoTIlXO_0"
146 |       }
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "source": [
151 |         "df.query('AGE > 20')"
152 |       ],
153 |       "metadata": {
154 |         "id": "cOwday_xaFhT"
155 |       },
156 |       "execution_count": null,
157 |       "outputs": []
158 |     },
159 |     {
160 |       "cell_type": "code",
161 |       "source": [
162 |         "df.query('AGE > 20 & Goals > 1 & Pens == 1')"
163 |       ],
164 |       "metadata": {
165 |         "id": "diOUhFBmaFaT"
166 |       },
167 |       "execution_count": null,
168 |       "outputs": []
169 |     },
170 |     {
171 |       "cell_type": "code",
172 |       "source": [
173 |         "df.query('AGE > 20 & Goals > 1 & Pens == 1 & Pos == \"Forward\"')"
174 |       ],
175 |       "metadata": {
176 |         "id": "dFthlyLJXarx"
177 |       },
178 |       "execution_count": null,
179 |       "outputs": []
180 |     }
181 |   ]
182 | }


--------------------------------------------------------------------------------
/Categorical_Encoding.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Categorical_Encoding.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyPSJcA8vtQK+Vf9RPk7UAuB",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Categorical_Encoding.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "markdown",
 32 |       "metadata": {
 33 |         "id": "eVbc2PHZVp-i"
 34 |       },
 35 |       "source": [
 36 |         "## Imports"
 37 |       ]
 38 |     },
 39 |     {
 40 |       "cell_type": "code",
 41 |       "metadata": {
 42 |         "id": "ipCPKpl3U0q6"
 43 |       },
 44 |       "source": [
 45 |         "import pandas as pd\n",
 46 |         "import numpy as np"
 47 |       ],
 48 |       "execution_count": null,
 49 |       "outputs": []
 50 |     },
 51 |     {
 52 |       "cell_type": "markdown",
 53 |       "metadata": {
 54 |         "id": "Ex5FrY4zVvlo"
 55 |       },
 56 |       "source": [
 57 |         "## Import Data"
 58 |       ]
 59 |     },
 60 |     {
 61 |       "cell_type": "code",
 62 |       "metadata": {
 63 |         "id": "PmD0nWFJvj6E"
 64 |       },
 65 |       "source": [
 66 |         "! wget https://raw.githubusercontent.com/SuperDataWorld/Python/main/Data/bmw.csv"
 67 |       ],
 68 |       "execution_count": null,
 69 |       "outputs": []
 70 |     },
 71 |     {
 72 |       "cell_type": "code",
 73 |       "metadata": {
 74 |         "id": "iaQxndU_uWXz"
 75 |       },
 76 |       "source": [
 77 |         "df = pd.read_csv('bmw.csv')\n",
 78 |         "df.head()"
 79 |       ],
 80 |       "execution_count": null,
 81 |       "outputs": []
 82 |     },
 83 |     {
 84 |       "cell_type": "markdown",
 85 |       "metadata": {
 86 |         "id": "1udURtgOWpCw"
 87 |       },
 88 |       "source": [
 89 |         "# Investigate Data"
 90 |       ]
 91 |     },
 92 |     {
 93 |       "cell_type": "code",
 94 |       "metadata": {
 95 |         "id": "S91wvDtEWons"
 96 |       },
 97 |       "source": [
 98 |         "df.info()"
 99 |       ],
100 |       "execution_count": null,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "metadata": {
106 |         "id": "ysxj4Kf9WuCJ"
107 |       },
108 |       "source": [
109 |         "col_list = df.columns\n",
110 |         "\n",
111 |         "for i in col_list:\n",
112 |         "  print(i+' values \\n')\n",
113 |         "  print(df[i].value_counts())"
114 |       ],
115 |       "execution_count": null,
116 |       "outputs": []
117 |     },
118 |     {
119 |       "cell_type": "markdown",
120 |       "metadata": {
121 |         "id": "81kQh9NhX12c"
122 |       },
123 |       "source": [
124 |         "## Convert to Category"
125 |       ]
126 |     },
127 |     {
128 |       "cell_type": "code",
129 |       "metadata": {
130 |         "id": "tQBjOl8XX5kk"
131 |       },
132 |       "source": [
133 |         "conversion_list = ['engineSize','fuelType','year','model']\n",
134 |         "for i in conversion_list:\n",
135 |         "  df[i] = df[i].astype('category')\n",
136 |         "\n",
137 |         "df.info()"
138 |       ],
139 |       "execution_count": null,
140 |       "outputs": []
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "metadata": {
145 |         "id": "bV6Mk2h2YiA8"
146 |       },
147 |       "source": [
148 |         "df.head()"
149 |       ],
150 |       "execution_count": null,
151 |       "outputs": []
152 |     },
153 |     {
154 |       "cell_type": "markdown",
155 |       "metadata": {
156 |         "id": "gfwqnoopY4kY"
157 |       },
158 |       "source": [
159 |         "## Method 1 Get Dummies"
160 |       ]
161 |     },
162 |     {
163 |       "cell_type": "code",
164 |       "metadata": {
165 |         "id": "xP9alx3zY0FW"
166 |       },
167 |       "source": [
168 |         "df2 = pd.get_dummies(df)\n",
169 |         "df2.head()"
170 |       ],
171 |       "execution_count": null,
172 |       "outputs": []
173 |     },
174 |     {
175 |       "cell_type": "markdown",
176 |       "metadata": {
177 |         "id": "-iJfsrJCZaYe"
178 |       },
179 |       "source": [
180 |         "## Binning"
181 |       ]
182 |     },
183 |     {
184 |       "cell_type": "code",
185 |       "metadata": {
186 |         "id": "iCn5nJkUZWRS"
187 |       },
188 |       "source": [
189 |         "#df['engine_bin'] = pd.cut(df.engineSize, 4, labels = ['bin1','bin2','bin3','bin4'])\n",
190 |         "df['engine_bin'] = pd.cut(df.engineSize, 4)\n",
191 |         "df.head()\n",
192 |         "df3 = df.drop(['engineSize'], axis = 1)\n",
193 |         "pd.get_dummies(df3)"
194 |       ],
195 |       "execution_count": null,
196 |       "outputs": []
197 |     },
198 |     {
199 |       "cell_type": "code",
200 |       "metadata": {
201 |         "id": "03VkQYNqbQNO"
202 |       },
203 |       "source": [
204 |         ""
205 |       ],
206 |       "execution_count": null,
207 |       "outputs": []
208 |     }
209 |   ]
210 | }


--------------------------------------------------------------------------------
/Binning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Binning.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyNkIYmu1JGWfSs1T/1yGaMp",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Binning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "code",
 32 |       "execution_count": null,
 33 |       "metadata": {
 34 |         "id": "siFijo0HbiPZ"
 35 |       },
 36 |       "outputs": [],
 37 |       "source": [
 38 |         "import pandas as pd\n",
 39 |         "df = pd.read_csv('https://raw.githubusercontent.com/SuperDataWorld/Python/main/Data/StudentsPerformance.csv')"
 40 |       ]
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "source": [
 45 |         "df.head()"
 46 |       ],
 47 |       "metadata": {
 48 |         "id": "oTYCuxBTbsbX"
 49 |       },
 50 |       "execution_count": null,
 51 |       "outputs": []
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "source": [
 56 |         "df.columns"
 57 |       ],
 58 |       "metadata": {
 59 |         "id": "iZJPRWV6bseN"
 60 |       },
 61 |       "execution_count": null,
 62 |       "outputs": []
 63 |     },
 64 |     {
 65 |       "cell_type": "code",
 66 |       "source": [
 67 |         "df.columns = ['gender', 'ethnicity', 'parentaledu', 'lunch', 'prep', 'math', 'reading','writing']"
 68 |       ],
 69 |       "metadata": {
 70 |         "id": "dI2HH_obbsgQ"
 71 |       },
 72 |       "execution_count": null,
 73 |       "outputs": []
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "source": [
 78 |         "df.head()"
 79 |       ],
 80 |       "metadata": {
 81 |         "id": "CjhvWkYvbsiC"
 82 |       },
 83 |       "execution_count": null,
 84 |       "outputs": []
 85 |     },
 86 |     {
 87 |       "cell_type": "code",
 88 |       "source": [
 89 |         "df.parentaledu.value_counts()"
 90 |       ],
 91 |       "metadata": {
 92 |         "id": "8c_bwK9pbska"
 93 |       },
 94 |       "execution_count": null,
 95 |       "outputs": []
 96 |     },
 97 |     {
 98 |       "cell_type": "code",
 99 |       "source": [
100 |         "#Dummy Bins\n",
101 |         "pd.get_dummies(df.parentaledu)"
102 |       ],
103 |       "metadata": {
104 |         "id": "dmoOGbovkE7U"
105 |       },
106 |       "execution_count": null,
107 |       "outputs": []
108 |     },
109 |     {
110 |       "cell_type": "code",
111 |       "source": [
112 |         "# Numeric Bins\n",
113 |         "\n",
114 |         "df['parentaledu'] = pd.Categorical(df.parentaledu, ordered = True, \n",
115 |         "  categories = [\"some high school\",\"high school\",\"some college\",\"associate's degree\",\"bachelor's degree\",\"master's degree\"])\n",
116 |         "\n",
117 |         "print(df.parentaledu.min())\n",
118 |         "print(df.parentaledu.max())\n",
119 |         "df['parentaledu']\n"
120 |       ],
121 |       "metadata": {
122 |         "id": "9Eze0LmRkDSI"
123 |       },
124 |       "execution_count": null,
125 |       "outputs": []
126 |     },
127 |     {
128 |       "cell_type": "code",
129 |       "source": [
130 |         "df['parentaledu'].cat.codes"
131 |       ],
132 |       "metadata": {
133 |         "id": "5daZ708WE_0r"
134 |       },
135 |       "execution_count": null,
136 |       "outputs": []
137 |     },
138 |     {
139 |       "cell_type": "code",
140 |       "source": [
141 |         "df.reading.hist()"
142 |       ],
143 |       "metadata": {
144 |         "id": "Tu2S-IdNdEt6"
145 |       },
146 |       "execution_count": null,
147 |       "outputs": []
148 |     },
149 |     {
150 |       "cell_type": "code",
151 |       "source": [
152 |         "lables = ['low','average','high']\n",
153 |         "\n",
154 |         "#Equi-width Bining\n",
155 |         "\n",
156 |         "df['readingrank'] = pd.cut(df.reading, bins = 3, labels = lables)\n",
157 |         "df.readingrank.value_counts()"
158 |       ],
159 |       "metadata": {
160 |         "id": "t_9T6IXIdRPj"
161 |       },
162 |       "execution_count": null,
163 |       "outputs": []
164 |     },
165 |     {
166 |       "cell_type": "code",
167 |       "source": [
168 |         "df.groupby('readingrank')['reading'].mean()"
169 |       ],
170 |       "metadata": {
171 |         "id": "jUBewmYdhlcH"
172 |       },
173 |       "execution_count": null,
174 |       "outputs": []
175 |     },
176 |     {
177 |       "cell_type": "code",
178 |       "source": [
179 |         "#Equi-depth Bining\n",
180 |         "df['readingrank2'] = pd.qcut(df.reading, q = 3, labels = lables)\n",
181 |         "df.readingrank2.value_counts()"
182 |       ],
183 |       "metadata": {
184 |         "id": "REOcP8Y4dgdO"
185 |       },
186 |       "execution_count": null,
187 |       "outputs": []
188 |     },
189 |     {
190 |       "cell_type": "code",
191 |       "source": [
192 |         "df.groupby('readingrank2')['reading'].mean()"
193 |       ],
194 |       "metadata": {
195 |         "id": "HURmWIg5dRVE"
196 |       },
197 |       "execution_count": null,
198 |       "outputs": []
199 |     }
200 |   ]
201 | }


--------------------------------------------------------------------------------
/Univariate_Outlier_Detection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Univariate Outlier Detection.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyN3D1T7tGc5nw9P9ik0zWCd",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Univariate_Outlier_Detection.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "code",
 32 |       "execution_count": null,
 33 |       "metadata": {
 34 |         "id": "e5Hdn6y65LMr"
 35 |       },
 36 |       "outputs": [],
 37 |       "source": [
 38 |         "import pandas as pd\n",
 39 |         "import matplotlib.pyplot as plt\n",
 40 |         "import seaborn as sns\n",
 41 |         "%matplotlib inline"
 42 |       ]
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "source": [
 47 |         "df = pd.read_csv('https://raw.githubusercontent.com/SuperDataWorld/Python/main/Data/bikerental.csv')"
 48 |       ],
 49 |       "metadata": {
 50 |         "id": "_Q0W1pdT5yHP"
 51 |       },
 52 |       "execution_count": null,
 53 |       "outputs": []
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "source": [
 58 |         "df.describe()"
 59 |       ],
 60 |       "metadata": {
 61 |         "id": "kczTodol9mj9"
 62 |       },
 63 |       "execution_count": null,
 64 |       "outputs": []
 65 |     },
 66 |     {
 67 |       "cell_type": "code",
 68 |       "source": [
 69 |         "df.head()"
 70 |       ],
 71 |       "metadata": {
 72 |         "id": "U2Y7onin8XVT"
 73 |       },
 74 |       "execution_count": null,
 75 |       "outputs": []
 76 |     },
 77 |     {
 78 |       "cell_type": "code",
 79 |       "source": [
 80 |         "sns.set_palette(\"bone_r\")\n",
 81 |         "f , axes = plt.subplots(nrows=1, ncols=3, figsize = (20,4))\n",
 82 |         "sns.set_style(\"darkgrid\")\n",
 83 |         "\n",
 84 |         "fig1 = sns.boxplot(y= df.windspeed, ax = axes[0] )\n",
 85 |         "fig2 = sns.boxplot(y= df.hum, ax = axes[1] )\n",
 86 |         "fig3 = sns.boxplot(y=df.casual, ax = axes[2] )\n",
 87 |         "plt.show()"
 88 |       ],
 89 |       "metadata": {
 90 |         "id": "j29b746T5jSj"
 91 |       },
 92 |       "execution_count": null,
 93 |       "outputs": []
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "source": [
 98 |         "df.windspeed.quantile(.25)"
 99 |       ],
100 |       "metadata": {
101 |         "id": "exMkzpmv5jPu"
102 |       },
103 |       "execution_count": null,
104 |       "outputs": []
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "source": [
109 |         "IQR = df.windspeed.quantile(.75) - df.windspeed.quantile(.25)\n",
110 |         "IQR"
111 |       ],
112 |       "metadata": {
113 |         "id": "HlJJmRgp5jLs"
114 |       },
115 |       "execution_count": null,
116 |       "outputs": []
117 |     },
118 |     {
119 |       "cell_type": "code",
120 |       "source": [
121 |         "whisker  = (df.windspeed.quantile(.75) - df.windspeed.quantile(.25)) * 1.5\n",
122 |         "whisker\n"
123 |       ],
124 |       "metadata": {
125 |         "id": "Ln9tWtPi8hY5"
126 |       },
127 |       "execution_count": null,
128 |       "outputs": []
129 |     },
130 |     {
131 |       "cell_type": "code",
132 |       "source": [
133 |         "lower_range = df.windspeed.quantile(.25) - whisker \n",
134 |         "upper_range = df.windspeed.quantile(.75) + whisker "
135 |       ],
136 |       "metadata": {
137 |         "id": "iUTmOyng_T_o"
138 |       },
139 |       "execution_count": null,
140 |       "outputs": []
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "source": [
145 |         "df.query('windspeed > @upper_range | windspeed < @lower_range' )"
146 |       ],
147 |       "metadata": {
148 |         "id": "FkmocZzB_-KH"
149 |       },
150 |       "execution_count": null,
151 |       "outputs": []
152 |     },
153 |     {
154 |       "cell_type": "code",
155 |       "source": [
156 |         "def findoutliers(col):\n",
157 |         "  whisker  = (col.quantile(.75) - col.quantile(.25)) * 1.5\n",
158 |         "  lower_range = col.quantile(.25) - whisker \n",
159 |         "  upper_range = col.quantile(.75) + whisker\n",
160 |         "  return df.query('@col > @upper_range | @col < @lower_range')"
161 |       ],
162 |       "metadata": {
163 |         "id": "wpCrLGE-Bm38"
164 |       },
165 |       "execution_count": null,
166 |       "outputs": []
167 |     },
168 |     {
169 |       "cell_type": "code",
170 |       "source": [
171 |         "findoutliers(df.hum)"
172 |       ],
173 |       "metadata": {
174 |         "id": "AQ9zNDdpB6SQ"
175 |       },
176 |       "execution_count": null,
177 |       "outputs": []
178 |     },
179 |     {
180 |       "cell_type": "code",
181 |       "source": [
182 |         "cas = findoutliers(df.casual)"
183 |       ],
184 |       "metadata": {
185 |         "id": "yQ46dhD-CdVi"
186 |       },
187 |       "execution_count": null,
188 |       "outputs": []
189 |     },
190 |     {
191 |       "cell_type": "code",
192 |       "source": [
193 |         "cas"
194 |       ],
195 |       "metadata": {
196 |         "id": "m60lEUO9EEc-"
197 |       },
198 |       "execution_count": null,
199 |       "outputs": []
200 |     }
201 |   ]
202 | }


--------------------------------------------------------------------------------
/OpenAI_API_in_Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "include_colab_link": true
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/analyticswithadam/Python/blob/main/OpenAI_API_in_Python.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "execution_count": null,
 31 |       "metadata": {
 32 |         "id": "1YP6-sABwTEv"
 33 |       },
 34 |       "outputs": [],
 35 |       "source": [
 36 |         "# get the openai secret key\n",
 37 |         "import getpass\n",
 38 |         "\n",
 39 |         "# https://platform.openai.com/account/api-keys\n",
 40 |         "secret_key = getpass.getpass('Please enter your openai key: ')"
 41 |       ]
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "source": [
 46 |         "!pip install openai"
 47 |       ],
 48 |       "metadata": {
 49 |         "id": "pUl4Xe2FwYJ1"
 50 |       },
 51 |       "execution_count": null,
 52 |       "outputs": []
 53 |     },
 54 |     {
 55 |       "cell_type": "code",
 56 |       "source": [
 57 |         "from openai import OpenAI\n",
 58 |         "client = OpenAI(api_key=secret_key)\n",
 59 |         "\n",
 60 |         "def get_response(role, prompt):\n",
 61 |         "  response = client.chat.completions.create(\n",
 62 |         "  model=\"gpt-3.5-turbo\",\n",
 63 |         "  messages=[\n",
 64 |         "    {\n",
 65 |         "      \"role\": \"system\",\n",
 66 |         "      \"content\": f\"\"\" {role} \"\"\"\n",
 67 |         "    },\n",
 68 |         "    {\n",
 69 |         "      \"role\": \"user\",\n",
 70 |         "      \"content\": f\"\"\"{prompt} \"\"\"\n",
 71 |         "    }\n",
 72 |         "    ],\n",
 73 |         "      temperature=.25,\n",
 74 |         "      max_tokens=1000,\n",
 75 |         "      top_p=1,\n",
 76 |         "      frequency_penalty=0,\n",
 77 |         "      presence_penalty=0\n",
 78 |         "    )\n",
 79 |         "  return response.choices[0].message.content"
 80 |       ],
 81 |       "metadata": {
 82 |         "id": "416nypIKwYEv"
 83 |       },
 84 |       "execution_count": null,
 85 |       "outputs": []
 86 |     },
 87 |     {
 88 |       "cell_type": "code",
 89 |       "source": [
 90 |         "# Collect Info\n",
 91 |         "Role = \"\"\"You are an expert in business. I want you to classify\n",
 92 |         "these companies in to their industries in one word\"\"\"\n",
 93 |         "Companies = \"IBM, Netflix, Chase, Meta\"\n"
 94 |       ],
 95 |       "metadata": {
 96 |         "id": "Alr4sMQUzChH"
 97 |       },
 98 |       "execution_count": null,
 99 |       "outputs": []
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "source": [
104 |         "print(get_response(Role, Companies))"
105 |       ],
106 |       "metadata": {
107 |         "id": "yGv6KAyP4p5p"
108 |       },
109 |       "execution_count": null,
110 |       "outputs": []
111 |     },
112 |     {
113 |       "cell_type": "code",
114 |       "source": [
115 |         "\n",
116 |         "def get_response(role, example, examplea, prompt):\n",
117 |         "  response = client.chat.completions.create(\n",
118 |         "  model=\"gpt-3.5-turbo\",\n",
119 |         "  messages=[\n",
120 |         "    {\n",
121 |         "      \"role\": \"system\",\n",
122 |         "      \"content\": f\"\"\" {role} \"\"\"\n",
123 |         "    },\n",
124 |         "    {\n",
125 |         "      \"role\": \"user\",\n",
126 |         "      \"content\": f\"\"\" {example} \"\"\"\n",
127 |         "    },\n",
128 |         "    {\n",
129 |         "      \"role\": \"assistant\",\n",
130 |         "      \"content\": f\"\"\" {examplea} \"\"\"\n",
131 |         "    },\n",
132 |         "     {\n",
133 |         "      \"role\": \"user\",\n",
134 |         "      \"content\": f\"\"\"{prompt} \"\"\"\n",
135 |         "    }\n",
136 |         "    ],\n",
137 |         "      temperature=.25,\n",
138 |         "      max_tokens=1000,\n",
139 |         "      top_p=1,\n",
140 |         "      frequency_penalty=0,\n",
141 |         "      presence_penalty=0\n",
142 |         "    )\n",
143 |         "  return response.choices[0].message.content"
144 |       ],
145 |       "metadata": {
146 |         "id": "PoX_L2qnAbHR"
147 |       },
148 |       "execution_count": null,
149 |       "outputs": []
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "source": [
154 |         "# Collect Info\n",
155 |         "\n",
156 |         "Role = \"\"\"You are an expert in business. I want you to classify\n",
157 |         "these companies in to their industries in one word\"\"\"\n",
158 |         "ExampleQ = \"Apple, Microsoft, JP Morgan\"\n",
159 |         "ExampleA = \"Tech, Tech, FS\"\n",
160 |         "Companies = \"IBM, Netflix, Meta, Citi\""
161 |       ],
162 |       "metadata": {
163 |         "id": "XKABfKsQAftb"
164 |       },
165 |       "execution_count": null,
166 |       "outputs": []
167 |     },
168 |     {
169 |       "cell_type": "markdown",
170 |       "source": [],
171 |       "metadata": {
172 |         "id": "b3qHoUwBCF_k"
173 |       }
174 |     },
175 |     {
176 |       "cell_type": "code",
177 |       "source": [
178 |         "print(get_response(Role, ExampleQ, ExampleA, Companies))"
179 |       ],
180 |       "metadata": {
181 |         "id": "i7MWraIcAimy"
182 |       },
183 |       "execution_count": null,
184 |       "outputs": []
185 |     }
186 |   ]
187 | }


--------------------------------------------------------------------------------
/Working_with_APIs_Covid.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "include_colab_link": true
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Working_with_APIs_Covid.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "markdown",
 30 |       "source": [
 31 |         "\n",
 32 |         "## Public API \n",
 33 |         "Data can retrieved from an API publically or with a key<br> A publically avaliable example is the covid API (https://api.covid19api.com/summary)\n",
 34 |         "\n"
 35 |       ],
 36 |       "metadata": {
 37 |         "id": "xqkvfYt7gIP_"
 38 |       }
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "source": [
 43 |         "# Request Liabrary can pull a reponse from an API \n",
 44 |         "import requests\n",
 45 |         "response = requests.get('https://api.covid19api.com/summary').text"
 46 |       ],
 47 |       "metadata": {
 48 |         "id": "GZqLW9Rhex4y"
 49 |       },
 50 |       "execution_count": null,
 51 |       "outputs": []
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "source": [
 56 |         "# This response is in JSON \n",
 57 |         "response"
 58 |       ],
 59 |       "metadata": {
 60 |         "id": "xTPEYNihe02o"
 61 |       },
 62 |       "execution_count": null,
 63 |       "outputs": []
 64 |     },
 65 |     {
 66 |       "cell_type": "code",
 67 |       "source": [
 68 |         "# We can use the JSON library to parse this in to a dictionary\n",
 69 |         "import json \n",
 70 |         "y = json.loads(response)"
 71 |       ],
 72 |       "metadata": {
 73 |         "id": "zugv_VNnf5ap"
 74 |       },
 75 |       "execution_count": null,
 76 |       "outputs": []
 77 |     },
 78 |     {
 79 |       "cell_type": "code",
 80 |       "source": [
 81 |         "# We can confirm this is a dictionary with the type function \n",
 82 |         "type(y)"
 83 |       ],
 84 |       "metadata": {
 85 |         "id": "tykMQLhcgrMo"
 86 |       },
 87 |       "execution_count": null,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "source": [
 93 |         "# This is a large dictionary but follows a pattern - We are interested in Country level details\n",
 94 |         "y"
 95 |       ],
 96 |       "metadata": {
 97 |         "id": "3TW9R7_8syJN"
 98 |       },
 99 |       "execution_count": null,
100 |       "outputs": []
101 |     },
102 |     {
103 |       "cell_type": "code",
104 |       "source": [
105 |         "# We can count the number of countries by using the len function \n",
106 |         "len(y['Countries'])"
107 |       ],
108 |       "metadata": {
109 |         "id": "RK83MxoXhN3v"
110 |       },
111 |       "execution_count": null,
112 |       "outputs": []
113 |     },
114 |     {
115 |       "cell_type": "code",
116 |       "source": [
117 |         "\n",
118 |         "# The first element with country has a country of Afghanistan\n",
119 |         "y['Countries'][0]['Country']"
120 |       ],
121 |       "metadata": {
122 |         "id": "VGnQcxaWh2zP"
123 |       },
124 |       "execution_count": null,
125 |       "outputs": []
126 |     },
127 |     {
128 |       "cell_type": "code",
129 |       "source": [
130 |         "# We can see how many deaths have taken place \n",
131 |         "y['Countries'][0]['NewDeaths']"
132 |       ],
133 |       "metadata": {
134 |         "id": "pgrL-KDzikqm"
135 |       },
136 |       "execution_count": null,
137 |       "outputs": []
138 |     },
139 |     {
140 |       "cell_type": "code",
141 |       "source": [
142 |         "# We can use f'strings to format a print statement containing elements\n",
143 |         "f\"{y['Countries'][0]['Country']} has had {y['Countries'][0]['TotalDeaths']} total deaths from COVID 19\""
144 |       ],
145 |       "metadata": {
146 |         "id": "v7V8J_-_iy1H"
147 |       },
148 |       "execution_count": null,
149 |       "outputs": []
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "source": [
154 |         "# We can do this a number of countries using range\n",
155 |         "for i in range(10):\n",
156 |         "  print(f\"{y['Countries'][i]['Country']} has had {y['Countries'][i]['TotalDeaths']} total deaths from COVID 19\")"
157 |       ],
158 |       "metadata": {
159 |         "id": "gzVnauQWjV1p"
160 |       },
161 |       "execution_count": null,
162 |       "outputs": []
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "source": [
167 |         "# This data can all be added to a dataframe\n",
168 |         "import pandas as pd\n",
169 |         "df = pd.DataFrame(data=y['Countries'])"
170 |       ],
171 |       "metadata": {
172 |         "id": "W-VI3OwHksmO"
173 |       },
174 |       "execution_count": null,
175 |       "outputs": []
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "source": [
180 |         "df.head(10)"
181 |       ],
182 |       "metadata": {
183 |         "id": "tH2u0D3llanW"
184 |       },
185 |       "execution_count": null,
186 |       "outputs": []
187 |     },
188 |     {
189 |       "cell_type": "code",
190 |       "source": [
191 |         "# From the dataframe we can get the index of the country with the maximum deaths and cases \n",
192 |         "most_new_cases = df['NewConfirmed'].idxmax()\n",
193 |         "most_new_deaths = df['NewDeaths'].idxmax()"
194 |       ],
195 |       "metadata": {
196 |         "id": "YvdO0pfclbbH"
197 |       },
198 |       "execution_count": null,
199 |       "outputs": []
200 |     },
201 |     {
202 |       "cell_type": "code",
203 |       "source": [
204 |         "# We can display this automatically \n",
205 |         "print(f\" Good Morning here is your daily covid report.\\n \n",
206 |         "The most daily confirmed cases look place in {df['Country'].loc[most_new_cases]} \n",
207 |         "with {df['NewConfirmed'].loc[most_new_cases]} new cases.\\n The most deaths occured \n",
208 |         "in {df['Country'].loc[most_new_deaths]} with {df['NewDeaths'].loc[most_new_deaths]} new deaths.\")"
209 |       ],
210 |       "metadata": {
211 |         "id": "qjar1BwlmFKV"
212 |       },
213 |       "execution_count": null,
214 |       "outputs": []
215 |     }
216 |   ]
217 | }


--------------------------------------------------------------------------------
/Sentiment_Analysis_OpenAI_API.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "include_colab_link": true
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/analyticswithadam/Python/blob/main/Sentiment_Analysis_OpenAI_API.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "source": [
 31 |         "!pip install openai"
 32 |       ],
 33 |       "metadata": {
 34 |         "id": "15uaY-Y8QBE9"
 35 |       },
 36 |       "execution_count": null,
 37 |       "outputs": []
 38 |     },
 39 |     {
 40 |       "cell_type": "code",
 41 |       "execution_count": null,
 42 |       "metadata": {
 43 |         "id": "1YP6-sABwTEv"
 44 |       },
 45 |       "outputs": [],
 46 |       "source": [
 47 |         "# get the openai secret key\n",
 48 |         "import getpass\n",
 49 |         "import pandas as pd\n",
 50 |         "\n",
 51 |         "\n",
 52 |         "# https://platform.openai.com/account/api-keys\n",
 53 |         "secret_key = getpass.getpass('Please enter your openai key: ')"
 54 |       ]
 55 |     },
 56 |     {
 57 |       "cell_type": "code",
 58 |       "source": [
 59 |         "from openai import OpenAI\n",
 60 |         "client = OpenAI(api_key=secret_key)"
 61 |       ],
 62 |       "metadata": {
 63 |         "id": "Jl9pTctvATwa"
 64 |       },
 65 |       "execution_count": null,
 66 |       "outputs": []
 67 |     },
 68 |     {
 69 |       "cell_type": "code",
 70 |       "source": [
 71 |         "def get_response(example):\n",
 72 |         "  response = client.chat.completions.create(\n",
 73 |         "    model=\"gpt-3.5-turbo\",\n",
 74 |         "    messages=[\n",
 75 |         "      {\n",
 76 |         "        \"role\": \"system\",\n",
 77 |         "        \"content\": \"You are an expert in restaurant marketing with a specialism in sentiment analysis. \\n\\nI will give you some reviews and I want you to return one word based on the sentiment. Positive, Negative or Neutral. \"\n",
 78 |         "      },\n",
 79 |         "      {\n",
 80 |         "        \"role\": \"user\",\n",
 81 |         "        \"content\": \"With all the new upgrades everything looks great and clean great staff\"\n",
 82 |         "      },\n",
 83 |         "      {\n",
 84 |         "        \"role\": \"assistant\",\n",
 85 |         "        \"content\": \"Positive\"\n",
 86 |         "      },\n",
 87 |         "      {\n",
 88 |         "        \"role\": \"user\",\n",
 89 |         "        \"content\": \"It'd McDonalds. It is what it is as far as the food and atmosphere go. The staff here does make a difference. They are all friendly, accommodating and always smiling. Makes for a more pleasant experience than many other fast food places.\"\n",
 90 |         "      },\n",
 91 |         "      {\n",
 92 |         "        \"role\": \"assistant\",\n",
 93 |         "        \"content\": \"Positive\"\n",
 94 |         "      },\n",
 95 |         "      {\n",
 96 |         "        \"role\": \"user\",\n",
 97 |         "        \"content\": \"We stopped by for a quick breakfast.  It was not crowded inside, but there was a pretty long drive thru.\\nOur order was supposed to have 3 food items and 2 drinks. They missed a food item. The lady ignored me when I told her.My husband went to get it, she finished her task then handed it to him without a word or smile or anything.\"\n",
 98 |         "      },\n",
 99 |         "      {\n",
100 |         "        \"role\": \"assistant\",\n",
101 |         "        \"content\": \"Negative\"\n",
102 |         "      },\n",
103 |         "      {\n",
104 |         "        \"role\": \"user\",\n",
105 |         "        \"content\": \"The morning crew seems fast and efficient. Night crew is a whole different experience, lines down the street, hour long waits. If this was a one time occurrence it wouldn't be so bad but this is a nightly problem. Beyond this the staff is so highly rude you leave wanting to crash your car through the building. Don't forget they will mistake your order almost every single time as well and serve it to you cold.\"\n",
106 |         "      },\n",
107 |         "      {\n",
108 |         "        \"role\": \"assistant\",\n",
109 |         "        \"content\": \"Negative\"\n",
110 |         "      },\n",
111 |         "      {\n",
112 |         "        \"role\": \"user\",\n",
113 |         "        \"content\": \"Me and my girlfriend came tonight to pick up our food after 11pm for a mobile order, they need a better system if you are walking on foot. The food tasted great, loved the McDonald's deal ordering from the app.\"\n",
114 |         "      },\n",
115 |         "      {\n",
116 |         "        \"role\": \"assistant\",\n",
117 |         "        \"content\": \"Neutral\"\n",
118 |         "      },\n",
119 |         "      {\n",
120 |         "        \"role\": \"user\",\n",
121 |         "        \"content\": \"Been frequenting this location for a few years.Morning,mid-day and night,the food is always hot,fresh and served with a smile.\"\n",
122 |         "      },\n",
123 |         "      {\n",
124 |         "        \"role\": \"assistant\",\n",
125 |         "        \"content\": \"Positive\"\n",
126 |         "      },\n",
127 |         "     {\n",
128 |         "      \"role\": \"user\",\n",
129 |         "      \"content\": f\"\"\"{example} \"\"\"\n",
130 |         "    }\n",
131 |         "    ],\n",
132 |         "    temperature=1,\n",
133 |         "    max_tokens=256,\n",
134 |         "    top_p=1,\n",
135 |         "    frequency_penalty=0,\n",
136 |         "    presence_penalty=0\n",
137 |         "  )\n",
138 |         "  return response.choices[0].message.content"
139 |       ],
140 |       "metadata": {
141 |         "id": "ch8VwFL__sOo"
142 |       },
143 |       "execution_count": null,
144 |       "outputs": []
145 |     },
146 |     {
147 |       "cell_type": "code",
148 |       "source": [
149 |         "#Test\n",
150 |         "get_response(\"\"\"I work for door dash and they locked us all out to wait in a long line for no reason at 10. I tried to beg them to open it because I'm door dash and they refused. Covid is over get a new job\"\"\")"
151 |       ],
152 |       "metadata": {
153 |         "id": "ayvYHml5AHNs"
154 |       },
155 |       "execution_count": null,
156 |       "outputs": []
157 |     },
158 |     {
159 |       "cell_type": "code",
160 |       "source": [
161 |         "df = pd.read_csv('https://raw.githubusercontent.com/analyticswithadam/Python/main/Data/McD%20Reviews.csv')\n",
162 |         "df['Review'] = df['Review'].str.strip()"
163 |       ],
164 |       "metadata": {
165 |         "id": "mABizQeYBo3t"
166 |       },
167 |       "execution_count": null,
168 |       "outputs": []
169 |     },
170 |     {
171 |       "cell_type": "code",
172 |       "source": [
173 |         "df.head()"
174 |       ],
175 |       "metadata": {
176 |         "id": "rjBo8-UBEtat"
177 |       },
178 |       "execution_count": null,
179 |       "outputs": []
180 |     },
181 |     {
182 |       "cell_type": "code",
183 |       "source": [
184 |         "df['Review']"
185 |       ],
186 |       "metadata": {
187 |         "id": "YS4_iuUtFtWB"
188 |       },
189 |       "execution_count": null,
190 |       "outputs": []
191 |     },
192 |     {
193 |       "cell_type": "code",
194 |       "source": [
195 |         "df2 = df[100:120].copy()"
196 |       ],
197 |       "metadata": {
198 |         "id": "I3WGC4xlLhDh"
199 |       },
200 |       "execution_count": null,
201 |       "outputs": []
202 |     },
203 |     {
204 |       "cell_type": "code",
205 |       "source": [
206 |         "df2"
207 |       ],
208 |       "metadata": {
209 |         "id": "pSwoWvFPLmGY"
210 |       },
211 |       "execution_count": null,
212 |       "outputs": []
213 |     },
214 |     {
215 |       "cell_type": "code",
216 |       "source": [
217 |         "df2['GPT Sentiment'] = df2['Review'].apply(get_response)"
218 |       ],
219 |       "metadata": {
220 |         "id": "h9dSMyydLsHw"
221 |       },
222 |       "execution_count": null,
223 |       "outputs": []
224 |     },
225 |     {
226 |       "cell_type": "code",
227 |       "source": [
228 |         "df2"
229 |       ],
230 |       "metadata": {
231 |         "id": "2lSEwyH9Nskr"
232 |       },
233 |       "execution_count": null,
234 |       "outputs": []
235 |     }
236 |   ]
237 | }


--------------------------------------------------------------------------------
/Web_Scraping_from_TrustPilot_v3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyP6RHkoCJa/xj34im0tCo30",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/analyticswithadam/Python/blob/main/Web_Scraping_from_TrustPilot_v3.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "execution_count": 1,
 32 |       "metadata": {
 33 |         "id": "JP6PLQnwDvlC",
 34 |         "outputId": "6e20d237-d6f5-4474-88c3-6cf7c08f4132",
 35 |         "colab": {
 36 |           "base_uri": "https://localhost:8080/"
 37 |         }
 38 |       },
 39 |       "outputs": [
 40 |         {
 41 |           "output_type": "stream",
 42 |           "name": "stdout",
 43 |           "text": [
 44 |             "Scraping: https://www.trustpilot.com/review/pepsi.com?page=1\n",
 45 |             "Scraping: https://www.trustpilot.com/review/pepsi.com?page=2\n",
 46 |             "Scraping: https://www.trustpilot.com/review/pepsi.com?page=3\n",
 47 |             "Scraping: https://www.trustpilot.com/review/pepsi.com?page=4\n",
 48 |             "Scraping: https://www.trustpilot.com/review/pepsi.com?page=5\n",
 49 |             "Scraping: https://www.trustpilot.com/review/pepsi.com?page=6\n",
 50 |             "                                           Review Text   Review Date Rating\n",
 51 |             "0                                                 None     A day ago   None\n",
 52 |             "1                                                 None    3 days ago   None\n",
 53 |             "2                                                 None  Mar 13, 2025   None\n",
 54 |             "3                                                 None  Mar 12, 2025   None\n",
 55 |             "4    Would give 0 if i could, pepsi classic used to...     A day ago      1\n",
 56 |             "..                                                 ...           ...    ...\n",
 57 |             "139  Used to be my favourite drink but now that the...  Sep 16, 2023      1\n",
 58 |             "140  I have enjoyed Pepsi cherry max for years and ...  Sep 11, 2023      1\n",
 59 |             "141  So I noticed for the last half year that Pepsi...  Sep 10, 2023      1\n",
 60 |             "142  I want to write this review which is almost id...   Sep 9, 2023      1\n",
 61 |             "143  In the past, lots of people know Pepsi was my ...   Sep 8, 2023      1\n",
 62 |             "\n",
 63 |             "[144 rows x 3 columns]\n"
 64 |           ]
 65 |         }
 66 |       ],
 67 |       "source": [
 68 |         "import requests\n",
 69 |         "from bs4 import BeautifulSoup\n",
 70 |         "import pandas as pd\n",
 71 |         "from time import sleep\n",
 72 |         "\n",
 73 |         "def extract_reviews(page_url):\n",
 74 |         "    headers = {\"User-Agent\": \"Mozilla/5.0\"}  # Mimic a browser\n",
 75 |         "    response = requests.get(page_url, headers=headers)\n",
 76 |         "    soup = BeautifulSoup(response.content, 'html.parser')\n",
 77 |         "\n",
 78 |         "    # Find all articles that represent individual reviews.\n",
 79 |         "    review_articles = soup.find_all('article', attrs={\"data-service-review-card-paper\": True})\n",
 80 |         "\n",
 81 |         "    reviews_data = []\n",
 82 |         "    for article in review_articles:\n",
 83 |         "        review_text = None\n",
 84 |         "        review_date = None\n",
 85 |         "        rating = None\n",
 86 |         "\n",
 87 |         "        # Extract the full review text from the <p> tag with the review text attribute.\n",
 88 |         "        text_tag = article.find('p', attrs={\"data-service-review-text-typography\": True})\n",
 89 |         "        if text_tag:\n",
 90 |         "            review_text = text_tag.get_text(strip=True)\n",
 91 |         "\n",
 92 |         "        # Extract the review date from the first <time> element in the article.\n",
 93 |         "        time_tag = article.find('time')\n",
 94 |         "        if time_tag:\n",
 95 |         "            review_date = time_tag.get_text(strip=True)\n",
 96 |         "\n",
 97 |         "        # Extract the rating from the parent container using the attribute.\n",
 98 |         "        header_div = article.find('div', attrs={\"data-service-review-rating\": True})\n",
 99 |         "        if header_div:\n",
100 |         "            rating = header_div.get(\"data-service-review-rating\")\n",
101 |         "\n",
102 |         "        reviews_data.append({\n",
103 |         "            \"Review Text\": review_text,\n",
104 |         "            \"Review Date\": review_date,\n",
105 |         "            \"Rating\": rating\n",
106 |         "        })\n",
107 |         "\n",
108 |         "    return reviews_data\n",
109 |         "\n",
110 |         "def extract_all_reviews(base_url, from_page=1, to_page=6):\n",
111 |         "    all_reviews = []\n",
112 |         "    for page in range(from_page, to_page + 1):\n",
113 |         "        page_url = f\"{base_url}?page={page}\"\n",
114 |         "        print(f\"Scraping: {page_url}\")\n",
115 |         "        all_reviews.extend(extract_reviews(page_url))\n",
116 |         "        sleep(1)  # Pause to avoid throttling\n",
117 |         "    return pd.DataFrame(all_reviews)\n",
118 |         "\n",
119 |         "# Example usage:\n",
120 |         "base_url = \"https://www.trustpilot.com/review/pepsi.com\"\n",
121 |         "df_reviews = extract_all_reviews(base_url, from_page=1, to_page=6)\n",
122 |         "print(df_reviews)\n",
123 |         "\n"
124 |       ]
125 |     },
126 |     {
127 |       "cell_type": "code",
128 |       "source": [
129 |         "from google.colab import files\n",
130 |         "# Convert the DataFrame to a CSV file and download it\n",
131 |         "df_reviews.to_csv('reviews.csv', index=False)\n",
132 |         "files.download('reviews.csv')"
133 |       ],
134 |       "metadata": {
135 |         "colab": {
136 |           "base_uri": "https://localhost:8080/",
137 |           "height": 17
138 |         },
139 |         "id": "XfeWnNaSVd02",
140 |         "outputId": "446ed222-bf21-4d59-f6a5-d4ec3bdfe680"
141 |       },
142 |       "execution_count": 3,
143 |       "outputs": [
144 |         {
145 |           "output_type": "display_data",
146 |           "data": {
147 |             "text/plain": [
148 |               "<IPython.core.display.Javascript object>"
149 |             ],
150 |             "application/javascript": [
151 |               "\n",
152 |               "    async function download(id, filename, size) {\n",
153 |               "      if (!google.colab.kernel.accessAllowed) {\n",
154 |               "        return;\n",
155 |               "      }\n",
156 |               "      const div = document.createElement('div');\n",
157 |               "      const label = document.createElement('label');\n",
158 |               "      label.textContent = `Downloading \"${filename}\": `;\n",
159 |               "      div.appendChild(label);\n",
160 |               "      const progress = document.createElement('progress');\n",
161 |               "      progress.max = size;\n",
162 |               "      div.appendChild(progress);\n",
163 |               "      document.body.appendChild(div);\n",
164 |               "\n",
165 |               "      const buffers = [];\n",
166 |               "      let downloaded = 0;\n",
167 |               "\n",
168 |               "      const channel = await google.colab.kernel.comms.open(id);\n",
169 |               "      // Send a message to notify the kernel that we're ready.\n",
170 |               "      channel.send({})\n",
171 |               "\n",
172 |               "      for await (const message of channel.messages) {\n",
173 |               "        // Send a message to notify the kernel that we're ready.\n",
174 |               "        channel.send({})\n",
175 |               "        if (message.buffers) {\n",
176 |               "          for (const buffer of message.buffers) {\n",
177 |               "            buffers.push(buffer);\n",
178 |               "            downloaded += buffer.byteLength;\n",
179 |               "            progress.value = downloaded;\n",
180 |               "          }\n",
181 |               "        }\n",
182 |               "      }\n",
183 |               "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
184 |               "      const a = document.createElement('a');\n",
185 |               "      a.href = window.URL.createObjectURL(blob);\n",
186 |               "      a.download = filename;\n",
187 |               "      div.appendChild(a);\n",
188 |               "      a.click();\n",
189 |               "      div.remove();\n",
190 |               "    }\n",
191 |               "  "
192 |             ]
193 |           },
194 |           "metadata": {}
195 |         },
196 |         {
197 |           "output_type": "display_data",
198 |           "data": {
199 |             "text/plain": [
200 |               "<IPython.core.display.Javascript object>"
201 |             ],
202 |             "application/javascript": [
203 |               "download(\"download_673e03a6-132b-4c0c-8370-dff56e8244ff\", \"reviews.csv\", 41374)"
204 |             ]
205 |           },
206 |           "metadata": {}
207 |         }
208 |       ]
209 |     }
210 |   ]
211 | }


--------------------------------------------------------------------------------
/Data/heart.csv:
--------------------------------------------------------------------------------
  1 | age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
  2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
  3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
  4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
  5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
  6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
  7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
  8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
  9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1
 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1
 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1
 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1
 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1
 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1
 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1
 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1
 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1
 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1
 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1
 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1
 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1
 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1
 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1
 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1
 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1
 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1
 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1
 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1
 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1
 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1
 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1
 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1
 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1
 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1
 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1
 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1
 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1
 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1
 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1
 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1
 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1
 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1
 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1
 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1
 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1
 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1
 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1
 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1
 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1
 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1
 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1
 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1
 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1
 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1
 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1
 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1
 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1
 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1
 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1
 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1
 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1
 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1
 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1
 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1
 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1
 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1
 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1
 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1
 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1
 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1
 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1
 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1
 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1
 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1
 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1
 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1
 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1
 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1
 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1
 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1
 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1
 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1
 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1
 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1
 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1
 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1
 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1
 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1
 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1
 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1
 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1
 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1
 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1
 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1
 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1
100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1
101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1
102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1
103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1
104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1
105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1
106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1
107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1
108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1
109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1
110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1
111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1
112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1
113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1
115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1
116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1
117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1
118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1
119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1
120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1
121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1
122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1
123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1
124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1
125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1
126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1
127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1
129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1
130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1
131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1
132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1
133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1
134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1
135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1
136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1
137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1
138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1
139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1
140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1
141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1
142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1
143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1
144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1
145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1
146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1
148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1
150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1
151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1
152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1
153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1
154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1
155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1
156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1
157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1
158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1
159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1
160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1
161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1
162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1
163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1
164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1
165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0
168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0
169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0
170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0
173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0
174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0
175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0
176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0
177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0
178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0
179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0
180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0
181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0
183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0
184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0
185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0
186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0
187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0
188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0
189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0
190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0
191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0
192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0
193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0
194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0
195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0
196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0
197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0
198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0
200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0
201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0
202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0
203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0
205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0
206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0
207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0
208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0
209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0
210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0
211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0
212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0
213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0
214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0
215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0
216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0
218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0
219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0
220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0
221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0
222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0
223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0
224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0
225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0
226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0
227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0
229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0
230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0
232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0
233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0
234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0
236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0
237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0
238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0
239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0
240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0
241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0
242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0
243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0
244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0
245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0
246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0
247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0
248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0
250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0
251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0
252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0
253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0
254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0
256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0
257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0
258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0
259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0
260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0
261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0
262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0
263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0
264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0
265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0
266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0
267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0
268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0
269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0
270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0
271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0
272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0
274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0
275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0
276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0
277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0
278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0
279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0
280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0
281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0
282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0
283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0
284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0
285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0
286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0
287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0
288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0
289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0
290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0
291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0
292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0
293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0
295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0
296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0
297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0
298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0
299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0
300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0
305 | 


--------------------------------------------------------------------------------
/Data/euro2016goals.csv:
--------------------------------------------------------------------------------
 1 | FULL_NAME,AGE,LEAGUE,SEASON,POSITION,CURRENT_CLUB,MINUTES_PLAYED_OVERALL,MINUTES_PLAYED_HOME,MINUTES_PLAYED_AWAY,NATIONALITY,APPEARANCES_OVERALL,APPEARANCES_HOME,APPEARANCES_AWAY,GOALS_OVERALL,GOALS_HOME,GOALS_AWAY,ASSISTS_OVERALL,ASSISTS_HOME,ASSISTS_AWAY,PENALTY_GOALS,PENALTY_MISSES,CLEAN_SHEETS_OVERALL,CLEAN_SHEETS_HOME,CLEAN_SHEETS_AWAY,CONCEDED_OVERALL,CONCEDED_HOME,CONCEDED_AWAY,YELLOW_CARDS_OVERALL,RED_CARDS_OVERALL,GOALS_INVOLVED_PER_90_OVERALL,ASSISTS_PER_90_OVERALL,GOALS_PER_90_OVERALL,GOALS_PER_90_HOME,GOALS_PER_90_AWAY,MIN_PER_GOAL_OVERALL,CONCEDED_PER_90_OVERALL,MIN_PER_CONCEDED_OVERALL,MIN_PER_MATCH,MIN_PER_CARD_OVERALL,MIN_PER_ASSIST_OVERALL,CARDS_PER_90_OVERALL
 2 | Aaron Ramsey,30,UEFA Euro Championship,2016,Midfielder,Wales,448,268,180,Wales,5,3,2,1,0,1,4,3,1,0,0,2,1,1,3,2,1,2,0,1,0.8,0.2,0,0.5,448,0.6,149,90,224,112,0.4
 3 | Adam Szalai,33,UEFA Euro Championship,2016,Forward,Hungary,235,161,74,Hungary,4,2,2,1,0,1,0,0,0,0,0,1,0,1,6,6,0,1,0,0.38,0,0.38,0,1.22,235,2.3,39,59,235,0,0.38
 4 | Admir Mehmedi,30,UEFA Euro Championship,2016,Forward,Switzerland,308,156,152,Switzerland,4,2,2,1,0,1,0,0,0,0,0,2,1,1,2,1,1,0,0,0.29,0,0.29,0,0.59,308,0.58,154,77,0,0,0
 5 | Alessandro Schoepf,27,UEFA Euro Championship,2016,Midfielder,Austria,81,12,69,Austria,3,1,2,1,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1.11,0,1.11,0,1.3,81,1.11,81,27,81,0,1.11
 6 | Alvaro Morata,28,UEFA Euro Championship,2016,Forward,Spain,289,152,137,Spain,4,2,2,3,2,1,0,0,0,0,0,2,2,0,2,0,2,0,0,0.93,0,0.93,1.18,0.66,96,0.62,145,72,0,0,0
 7 | Antoine Griezmann,30,UEFA Euro Championship,2016,Forward,France,525,268,257,France,7,4,3,6,4,2,2,2,0,1,0,3,1,2,4,4,0,0,0,1.37,0.34,1.03,1.34,0.7,88,0.69,131,75,0,263,0
 8 | Arkadiusz Milik,27,UEFA Euro Championship,2016,Forward,Poland,450,180,270,Poland,5,2,3,1,1,0,1,0,1,0,0,3,1,2,2,1,1,0,0,0.4,0.2,0.2,0.5,0,450,0.4,225,90,0,450,0
 9 | Armando Sadiku,29,UEFA Euro Championship,2016,Forward,Albania,231,82,149,Albania,3,1,2,1,0,1,0,0,0,0,0,1,0,1,1,1,0,0,0,0.39,0,0.39,0,0.6,231,0.39,231,77,0,0,0
10 | Traustason,27,UEFA Euro Championship,2016,Midfielder,Iceland,11,10,1,Iceland,2,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8.18,0,8.18,9,0,11,0,0,6,0,0,0
11 | Ashley Williams,36,UEFA Euro Championship,2016,Defender,Wales,540,270,270,Wales,6,3,3,1,1,0,0,0,0,0,0,2,1,1,5,2,3,0,0,0.17,0,0.17,0.33,0,540,0.83,108,90,0,0,0
12 | Axel Witsel,32,UEFA Euro Championship,2016,Midfielder,Belgium,450,180,270,Belgium,5,2,3,1,1,0,0,0,0,0,0,3,1,2,4,1,3,1,0,0.2,0,0.2,0.5,0,450,0.8,113,90,450,0,0.2
13 | Balazs Dzsudzsak,34,UEFA Euro Championship,2016,Midfielder,Hungary,360,180,180,Hungary,4,2,2,2,2,0,0,0,0,0,0,1,0,1,7,6,1,1,0,0.5,0,0.5,1,0,180,1.75,51,90,360,0,0.25
14 | Bastian Schweinsteiger,36,UEFA Euro Championship,2016,Midfielder,Germany,188,167,21,Germany,5,4,1,1,1,0,0,0,0,0,0,3,2,1,3,3,0,2,0,0.48,0,0.48,0.54,0,188,1.44,63,38,94,0,0.96
15 | Birkir Bjarnason,32,UEFA Euro Championship,2016,Midfielder,Iceland,450,180,270,Iceland,5,2,3,2,0,2,0,0,0,0,0,0,0,0,9,2,7,2,0,0.4,0,0.4,0,0.67,225,1.8,50,90,225,0,0.4
16 | Bogdan Sorin Stancu,33,UEFA Euro Championship,2016,Forward,Romania,264,174,90,Romania,3,2,1,2,1,1,0,0,0,2,0,0,0,0,4,2,2,0,0,0.68,0,0.68,0.52,1,132,1.36,66,88,0,0,0
17 | Burak yilmaz,35,UEFA Euro Championship,2016,Forward,Turkey,205,25,180,Turkey,3,1,2,1,0,1,0,0,0,0,0,1,0,1,3,0,3,1,0,0.44,0,0.44,0,0.5,205,1.32,68,68,205,0,0.44
18 | Cristiano Ronaldo,36,UEFA Euro Championship,2016,Forward,Portugal,565,295,270,Portugal,7,4,3,3,1,2,2,1,1,0,1,4,3,1,5,1,4,1,0,0.8,0.32,0.48,0.31,0.67,188,0.8,113,81,565,283,0.16
19 | Daniel Sturridge,31,UEFA Euro Championship,2016,Forward,England,210,134,76,England,3,2,1,1,1,0,0,0,0,0,0,1,0,1,2,2,0,1,0,0.43,0,0.43,0.67,0,210,0.86,105,70,210,0,0.43
20 | Denis Glushakov,34,UEFA Euro Championship,2016,Midfielder,Russia,144,134,10,Russia,3,2,1,1,1,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0.63,0,0.63,0.67,0,144,1.88,48,48,0,0,0
21 | Dimitri Payet,34,UEFA Euro Championship,2016,Midfielder,France,506,350,156,France,7,4,3,3,3,0,2,2,0,0,0,3,1,2,3,3,0,0,0,0.89,0.36,0.53,0.77,0,169,0.53,169,72,0,253,0
22 | Eden Hazard,30,UEFA Euro Championship,2016,Midfielder,Belgium,441,180,261,Belgium,5,2,3,1,0,1,4,1,3,0,0,3,1,2,4,1,3,0,0,1.02,0.82,0.2,0,0.34,441,0.82,110,88,0,110,0
23 | Eder Citadin Martins,34,UEFA Euro Championship,2016,Forward,Italy,355,172,183,Italy,4,2,2,1,1,0,0,0,0,0,0,3,2,1,1,0,1,1,0,0.25,0,0.25,0.52,0,355,0.25,355,89,355,0,0.25
24 | Yannick Carrasco,33,UEFA Euro Championship,2016,Forward,Portugal,24,24,0,Portugal,3,3,0,1,1,0,0,0,0,0,0,2,2,0,0,0,0,0,0,3.75,0,3.75,3.75,0,24,0,0,8,0,0,0
25 | Emanuele Giaccherini,35,UEFA Euro Championship,2016,Midfielder,Italy,360,180,180,Italy,4,2,2,1,0,1,1,1,0,0,0,3,2,1,1,0,1,1,0,0.5,0.25,0.25,0,0.5,360,0.25,360,90,360,360,0.25
26 | Eric Dier,27,UEFA Euro Championship,2016,Midfielder,England,316,226,90,England,4,3,1,1,1,0,0,0,0,0,0,1,0,1,3,3,0,0,0,0.28,0,0.28,0.4,0,316,0.85,105,79,0,0,0
27 | Fabian Schar,29,UEFA Euro Championship,2016,Defender,Switzerland,360,180,180,Switzerland,4,2,2,1,0,1,0,0,0,0,0,2,1,1,2,1,1,2,0,0.25,0,0.25,0,0.5,360,0.5,180,90,180,0,0.5
28 | Gareth Bale,31,UEFA Euro Championship,2016,Forward,Wales,533,270,263,Wales,6,3,3,3,1,2,0,0,0,0,0,2,1,1,5,2,3,1,0,0.51,0,0.51,0.33,0.68,178,0.84,107,89,533,0,0.17
29 | Gareth McAuley,41,UEFA Euro Championship,2016,Defender,Northern Ireland,354,90,264,Northern Ireland,4,1,3,1,0,1,0,0,0,0,0,1,0,1,3,1,2,0,0,0.25,0,0.25,0,0.34,354,0.76,118,89,0,0,0
30 | Gerard Pique,34,UEFA Euro Championship,2016,Defender,Spain,360,180,180,Spain,4,2,2,1,1,0,0,0,0,0,0,2,2,0,3,0,3,0,0,0.25,0,0.25,0.5,0,360,0.75,120,90,0,0,0
31 | Giorgio Chiellini,36,UEFA Euro Championship,2016,Defender,Italy,390,180,210,Italy,4,2,2,1,1,0,0,0,0,0,0,3,2,1,1,0,1,1,0,0.23,0,0.23,0.5,0,390,0.23,390,98,390,0,0.23
32 | Graziano Pelle,35,UEFA Euro Championship,2016,Forward,Italy,330,150,180,Italy,4,2,2,2,1,1,0,0,0,0,0,3,2,1,1,0,1,2,0,0.55,0,0.55,0.6,0.5,165,0.27,330,83,165,0,0.55
33 | Gylfi Sigurdsson,31,UEFA Euro Championship,2016,Midfielder,Iceland,450,180,270,Iceland,5,2,3,1,1,0,1,0,1,1,0,0,0,0,9,2,7,1,0,0.4,0.2,0.2,0.5,0,450,1.8,50,90,450,450,0.2
34 | Hal Robson-Kanu,31,UEFA Euro Championship,2016,Forward,Wales,269,134,135,Wales,5,3,2,2,2,0,0,0,0,0,0,1,1,0,4,1,3,0,0,0.67,0,0.67,1.34,0,135,1.34,67,54,0,0,0
35 | Ivan Periisic,32,UEFA Euro Championship,2016,Midfielder,Croatia,357,180,177,Croatia,4,2,2,2,1,1,1,1,0,0,0,1,0,1,3,1,2,1,0,0.76,0.25,0.5,0.5,0.51,179,0.76,119,89,357,357,0.25
36 | Ivan Rakiitic,33,UEFA Euro Championship,2016,Midfielder,Croatia,379,200,179,Croatia,4,2,2,1,0,1,0,0,0,0,0,1,0,1,3,1,2,0,0,0.24,0,0.24,0,0.5,379,0.71,126,95,0,0,0
37 | Jakub blaszczykowski,35,UEFA Euro Championship,2016,Midfielder,Poland,384,170,214,Poland,5,2,3,2,0,2,1,1,0,0,0,3,1,2,2,1,1,0,0,0.7,0.23,0.47,0,0.84,192,0.47,192,77,0,384,0
38 | Jamie Vardy,34,UEFA Euro Championship,2016,Forward,England,164,74,90,England,3,2,1,1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0.55,0,0.55,1.22,0,164,0,0,55,0,0,0
39 | Jerome Boateng,32,UEFA Euro Championship,2016,Defender,Germany,478,402,76,Germany,6,5,1,1,1,0,0,0,0,0,0,4,3,1,2,2,0,1,0,0.19,0,0.19,0.22,0,478,0.38,239,80,478,0,0.19
40 | Jon Davi Bodvarsson,28,UEFA Euro Championship,2016,Forward,Iceland,365,140,225,Iceland,5,2,3,1,1,0,1,0,1,0,0,0,0,0,7,1,6,0,0,0.49,0.25,0.25,0.64,0,365,1.73,52,73,0,365,0
41 | Julian Draxler,27,UEFA Euro Championship,2016,Midfielder,Germany,330,330,0,Germany,5,5,0,1,1,0,1,1,0,0,0,3,3,0,3,3,0,1,0,0.55,0.27,0.27,0.27,0,330,0.82,110,66,330,330,0.27
42 | Kolbeinn Sigarsson,31,UEFA Euro Championship,2016,Forward,Iceland,405,164,241,Iceland,5,2,3,2,0,2,0,0,0,0,0,0,0,0,8,1,7,1,0,0.44,0,0.44,0,0.75,203,1.78,51,81,405,0,0.22
43 | Leonardo Bonucci,33,UEFA Euro Championship,2016,Defender,Italy,450,270,180,Italy,5,3,2,1,0,1,1,0,1,1,0,3,2,1,2,1,1,1,0,0.4,0.2,0.2,0,0.5,450,0.4,225,90,450,450,0.2
44 | Luis Carlos Almeida da Cunha,34,UEFA Euro Championship,2016,Midfielder,Portugal,616,355,261,Portugal,7,4,3,3,2,1,1,0,1,0,0,4,3,1,5,1,4,0,0,0.58,0.15,0.44,0.51,0.34,205,0.73,123,88,0,616,0
45 | Luka Modic,35,UEFA Euro Championship,2016,Midfielder,Croatia,242,90,152,Croatia,3,1,2,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0.37,0,0.37,0,0.59,242,0,0,81,0,0,0
46 | Marek Hamsik,33,UEFA Euro Championship,2016,Midfielder,Slovakia,360,90,270,Slovakia,4,1,3,1,0,1,1,0,1,0,0,1,1,0,6,0,6,0,0,0.5,0.25,0.25,0,0.33,360,1.5,60,90,0,360,0
47 | Mario Gomez,35,UEFA Euro Championship,2016,Forward,Germany,270,180,90,Germany,4,3,1,2,1,1,0,0,0,0,0,3,2,1,0,0,0,0,0,0.67,0,0.67,0.5,1,135,0,0,68,0,0,0
48 | Mesut Ozil,32,UEFA Euro Championship,2016,Midfielder,Germany,540,450,90,Germany,6,5,1,1,1,0,1,1,0,0,1,4,3,1,3,3,0,2,0,0.33,0.17,0.17,0.2,0,540,0.5,180,90,270,540,0.33
49 | Michy Batshuayi,27,UEFA Euro Championship,2016,Forward,Belgium,21,0,21,Belgium,2,0,2,1,0,1,0,0,0,0,0,1,0,1,1,0,1,1,0,4.29,0,4.29,0,4.29,21,4.29,21,11,21,0,4.29
50 | Milan Skoda,35,UEFA Euro Championship,2016,Forward,Czech Republic,57,57,0,Czech Republic,2,2,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1.58,0,1.58,1.58,0,57,1.58,57,29,0,0,0
51 | Neil Taylor,32,UEFA Euro Championship,2016,Defender,Wales,540,270,270,Wales,6,3,3,1,0,1,0,0,0,0,0,2,1,1,5,2,3,1,0,0.17,0,0.17,0,0.33,540,0.83,108,90,540,0,0.17
52 | Niall McGinn,33,UEFA Euro Championship,2016,Midfielder,Northern Ireland,38,6,32,Northern Ireland,3,1,2,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,2.37,0,2.37,0,2.81,38,0,0,13,0,0,0
53 | Nikola KaliniÄa,33,UEFA Euro Championship,2016,Forward,Croatia,92,92,0,Croatia,2,2,0,1,1,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1.96,0.98,0.98,0.98,0,92,0.98,92,46,0,92,0
54 | Nolito,34,UEFA Euro Championship,2016,Forward,Spain,278,172,106,Spain,4,2,2,1,1,0,1,1,0,0,0,2,2,0,2,0,2,1,0,0.65,0.32,0.32,0.52,0,278,0.65,139,70,278,278,0.32
55 | Olivier Giroud,34,UEFA Euro Championship,2016,Forward,France,456,300,156,France,6,4,2,3,3,0,2,2,0,0,0,2,1,1,3,3,0,1,0,0.99,0.39,0.59,0.9,0,152,0.59,152,76,456,228,0.2
56 | Ondrej Duda,26,UEFA Euro Championship,2016,Midfielder,Slovakia,154,57,97,Slovakia,3,1,2,1,0,1,0,0,0,0,0,1,1,0,1,0,1,0,0,0.58,0,0.58,0,0.93,154,0.58,154,51,0,0,0
57 | Ozan Tufan,26,UEFA Euro Championship,2016,Midfielder,Turkey,270,90,180,Turkey,3,1,2,1,0,1,0,0,0,0,0,1,0,1,4,1,3,1,0,0.33,0,0.33,0,0.5,270,1.33,68,90,270,0,0.33
58 | Paul Pogba,28,UEFA Euro Championship,2016,Midfielder,France,571,301,270,France,7,4,3,1,1,0,0,0,0,0,0,3,1,2,4,4,0,1,0,0.16,0,0.16,0.3,0,571,0.63,143,82,571,0,0.16
59 | Radja Nainggolan,32,UEFA Euro Championship,2016,Midfielder,Belgium,365,95,270,Belgium,5,2,3,2,0,2,1,0,1,0,0,3,1,2,4,1,3,0,0,0.74,0.25,0.49,0,0.67,183,0.99,91,73,0,365,0
60 | Ragnar Sigurson,34,UEFA Euro Championship,2016,Defender,Iceland,450,180,270,Iceland,5,2,3,1,0,1,0,0,0,0,0,0,0,0,9,2,7,0,0,0.2,0,0.2,0,0.33,450,1.8,50,90,0,0,0
61 | Renato Sanches,23,UEFA Euro Championship,2016,Midfielder,Portugal,346,172,174,Portugal,6,3,3,1,0,1,0,0,0,0,0,3,2,1,3,0,3,0,0,0.26,0,0.26,0,0.52,346,0.78,115,58,0,0,0
62 | Ricardo Andrade Quaresma Bernardo,37,UEFA Euro Championship,2016,Midfielder,Portugal,196,154,42,Portugal,7,4,3,1,0,1,1,0,1,0,0,4,3,1,0,0,0,1,0,0.92,0.46,0.46,0,2.14,196,0,0,28,196,196,0.46
63 | Robbie Brady,29,UEFA Euro Championship,2016,Midfielder,Republic of Ireland,360,90,270,Republic of Ireland,4,1,3,2,0,2,0,0,0,1,0,1,0,1,6,1,5,0,0,0.5,0,0.5,0,0.67,180,1.5,60,90,0,0,0
64 | Robert Lewandowski,32,UEFA Euro Championship,2016,Forward,Poland,450,180,270,Poland,5,2,3,1,1,0,0,0,0,0,0,3,1,2,2,1,1,0,0,0.2,0,0.2,0.5,0,450,0.4,225,90,0,0,0
65 | Romelu Lukaku,27,UEFA Euro Championship,2016,Forward,Belgium,401,155,246,Belgium,5,2,3,2,2,0,0,0,0,0,0,3,1,2,3,1,2,0,0,0.45,0,0.45,1.16,0,201,0.67,134,80,0,0,0
66 | Sam Vokes,31,UEFA Euro Championship,2016,Forward,Wales,187,65,122,Wales,4,2,2,1,1,0,0,0,0,0,0,2,1,1,0,0,0,1,0,0.48,0,0.48,1.38,0,187,0,0,47,187,0,0.48
67 | Shkodran Mustafi,28,UEFA Euro Championship,2016,Defender,Germany,119,119,0,Germany,2,2,0,1,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0,0.76,0,0.76,0.76,0,119,0.76,119,60,0,0,0
68 | Toby Alderweireld,32,UEFA Euro Championship,2016,Defender,Belgium,450,180,270,Belgium,5,2,3,1,0,1,0,0,0,0,0,3,1,2,4,1,3,1,0,0.2,0,0.2,0,0.33,450,0.8,113,90,450,0,0.2
69 | Tomas Necid,31,UEFA Euro Championship,2016,Forward,Czech Republic,169,94,75,Czech Republic,3,2,1,1,1,0,0,0,0,1,0,0,0,0,2,2,0,0,0,0.53,0,0.53,0.96,0,169,1.07,85,56,0,0,0
70 | Vasili Berezutski,38,UEFA Euro Championship,2016,Defender,Russia,226,136,90,Russia,3,2,1,1,0,1,0,0,0,0,0,0,0,0,5,4,1,0,0,0.4,0,0.4,0,1,226,1.99,45,75,0,0,0
71 | Vladimir Weiss,31,UEFA Euro Championship,2016,Forward,Slovakia,279,78,201,Slovakia,4,1,3,1,0,1,1,0,1,0,0,1,1,0,4,0,4,1,0,0.65,0.32,0.32,0,0.45,279,1.29,70,70,279,279,0.32
72 | Wayne Rooney,35,UEFA Euro Championship,2016,Forward,England,288,254,34,England,4,3,1,1,1,0,0,0,0,1,0,1,0,1,3,3,0,0,0,0.31,0,0.31,0.35,0,288,0.94,96,72,0,0,0
73 | Wes Hoolahan,38,UEFA Euro Championship,2016,Midfielder,Republic of Ireland,181,78,103,Republic of Ireland,4,1,3,1,1,0,1,0,1,0,0,1,0,1,4,1,3,0,0,0.99,0.5,0.5,1.15,0,181,1.99,45,45,0,181,0
74 | Xherdan Shaqiri,29,UEFA Euro Championship,2016,Midfielder,Switzerland,347,169,178,Switzerland,4,2,2,1,1,0,1,0,1,0,0,2,1,1,2,1,1,0,0,0.52,0.26,0.26,0.53,0,347,0.52,174,87,0,347,0
75 | Yannick Carrasco,27,UEFA Euro Championship,2016,Midfielder,Belgium,216,79,137,Belgium,5,2,3,1,0,1,0,0,0,0,0,3,1,2,1,0,1,0,0,0.42,0,0.42,0,0.66,216,0.42,216,43,0,0,0
76 | Zoltan Gera,41,UEFA Euro Championship,2016,Forward,Hungary,272,92,180,Hungary,4,2,2,1,1,0,0,0,0,0,0,1,0,1,3,2,1,1,0,0.33,0,0.33,0.98,0,272,0.99,91,68,272,0,0.33
77 | Zoltan Stieber,32,UEFA Euro Championship,2016,Midfielder,Hungary,84,7,77,Hungary,3,1,2,1,0,1,0,0,0,0,0,1,0,1,1,0,1,0,0,1.07,0,1.07,0,1.17,84,1.07,84,28,0,0,0


--------------------------------------------------------------------------------
/Pull_all_Comments_and_Replies_for_YouTube_Playlists.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyMjEo/RWEWra5YAbiEj5mDz",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/analyticswithadam/Python/blob/main/Pull_all_Comments_and_Replies_for_YouTube_Playlists.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "markdown",
 31 |       "source": [
 32 |         "### Imports"
 33 |       ],
 34 |       "metadata": {
 35 |         "id": "jF_ENh3atCvf"
 36 |       }
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "source": [
 41 |         "from googleapiclient.discovery import build\n",
 42 |         "import pandas as pd\n",
 43 |         "from google.colab import files, drive\n",
 44 |         "import getpass"
 45 |       ],
 46 |       "metadata": {
 47 |         "id": "I3JYDedSB02s"
 48 |       },
 49 |       "execution_count": 7,
 50 |       "outputs": []
 51 |     },
 52 |     {
 53 |       "cell_type": "markdown",
 54 |       "source": [
 55 |         "## User Input"
 56 |       ],
 57 |       "metadata": {
 58 |         "id": "svxnefn2B5Yt"
 59 |       }
 60 |     },
 61 |     {
 62 |       "cell_type": "code",
 63 |       "source": [
 64 |         "api_key = getpass.getpass('Please enter your YouTube API key: ')\n",
 65 |         "playlist_ids = ['PLbHrOSG7nVN0iy3JQonGt6p6illtDhoqX']\n"
 66 |       ],
 67 |       "metadata": {
 68 |         "id": "shzPziGAsy_l",
 69 |         "colab": {
 70 |           "base_uri": "https://localhost:8080/"
 71 |         },
 72 |         "outputId": "2de0c531-4b0b-4dd3-f7ff-120c41debe12"
 73 |       },
 74 |       "execution_count": 8,
 75 |       "outputs": [
 76 |         {
 77 |           "name": "stdout",
 78 |           "output_type": "stream",
 79 |           "text": [
 80 |             "Please enter your YouTube API key: ··········\n"
 81 |           ]
 82 |         }
 83 |       ]
 84 |     },
 85 |     {
 86 |       "cell_type": "code",
 87 |       "execution_count": 9,
 88 |       "metadata": {
 89 |         "id": "hK5fMz-Zgg_F"
 90 |       },
 91 |       "outputs": [],
 92 |       "source": [
 93 |         "# Build the YouTube client\n",
 94 |         "youtube = build('youtube', 'v3', developerKey=api_key)"
 95 |       ]
 96 |     },
 97 |     {
 98 |       "cell_type": "markdown",
 99 |       "source": [
100 |         "## Get Video IDs for Playlist"
101 |       ],
102 |       "metadata": {
103 |         "id": "EGeNx0lTCUNL"
104 |       }
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "source": [
109 |         "def get_all_video_ids_from_playlists(youtube, playlist_ids):\n",
110 |         "    all_videos = []  # Initialize a single list to hold all video IDs\n",
111 |         "\n",
112 |         "    for playlist_id in playlist_ids:\n",
113 |         "        next_page_token = None\n",
114 |         "\n",
115 |         "        # Fetch videos from the current playlist\n",
116 |         "        while True:\n",
117 |         "            playlist_request = youtube.playlistItems().list(\n",
118 |         "                part='contentDetails',\n",
119 |         "                playlistId=playlist_id,\n",
120 |         "                maxResults=50,\n",
121 |         "                pageToken=next_page_token)\n",
122 |         "            playlist_response = playlist_request.execute()\n",
123 |         "\n",
124 |         "            all_videos += [item['contentDetails']['videoId'] for item in playlist_response['items']]\n",
125 |         "\n",
126 |         "            next_page_token = playlist_response.get('nextPageToken')\n",
127 |         "\n",
128 |         "            if next_page_token is None:\n",
129 |         "                break\n",
130 |         "\n",
131 |         "    return all_videos\n",
132 |         "\n",
133 |         "# Fetch all video IDs from the specified playlists\n",
134 |         "video_ids = get_all_video_ids_from_playlists(youtube, playlist_ids)\n",
135 |         "\n",
136 |         "# Now you can pass video_ids to the next function\n",
137 |         "# next_function(video_ids)"
138 |       ],
139 |       "metadata": {
140 |         "id": "zL7LFvM4BO_a"
141 |       },
142 |       "execution_count": 10,
143 |       "outputs": []
144 |     },
145 |     {
146 |       "cell_type": "markdown",
147 |       "source": [
148 |         "## Get All Comments"
149 |       ],
150 |       "metadata": {
151 |         "id": "dQ-LTgQatXIi"
152 |       }
153 |     },
154 |     {
155 |       "cell_type": "code",
156 |       "source": [
157 |         "# Function to get replies for a specific comment\n",
158 |         "def get_replies(youtube, parent_id, video_id):  # Added video_id as an argument\n",
159 |         "    replies = []\n",
160 |         "    next_page_token = None\n",
161 |         "\n",
162 |         "    while True:\n",
163 |         "        reply_request = youtube.comments().list(\n",
164 |         "            part=\"snippet\",\n",
165 |         "            parentId=parent_id,\n",
166 |         "            textFormat=\"plainText\",\n",
167 |         "            maxResults=100,\n",
168 |         "            pageToken=next_page_token\n",
169 |         "        )\n",
170 |         "        reply_response = reply_request.execute()\n",
171 |         "\n",
172 |         "        for item in reply_response['items']:\n",
173 |         "            comment = item['snippet']\n",
174 |         "            replies.append({\n",
175 |         "                'Timestamp': comment['publishedAt'],\n",
176 |         "                'Username': comment['authorDisplayName'],\n",
177 |         "                'VideoID': video_id,\n",
178 |         "                'Comment': comment['textDisplay'],\n",
179 |         "                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']\n",
180 |         "            })\n",
181 |         "\n",
182 |         "        next_page_token = reply_response.get('nextPageToken')\n",
183 |         "        if not next_page_token:\n",
184 |         "            break\n",
185 |         "\n",
186 |         "    return replies\n",
187 |         "\n",
188 |         "# Function to get all comments (including replies) for a single video\n",
189 |         "def get_comments_for_video(youtube, video_id):\n",
190 |         "    all_comments = []\n",
191 |         "    next_page_token = None\n",
192 |         "\n",
193 |         "    while True:\n",
194 |         "        comment_request = youtube.commentThreads().list(\n",
195 |         "            part=\"snippet\",\n",
196 |         "            videoId=video_id,\n",
197 |         "            pageToken=next_page_token,\n",
198 |         "            textFormat=\"plainText\",\n",
199 |         "            maxResults=100\n",
200 |         "        )\n",
201 |         "        comment_response = comment_request.execute()\n",
202 |         "\n",
203 |         "        for item in comment_response['items']:\n",
204 |         "            top_comment = item['snippet']['topLevelComment']['snippet']\n",
205 |         "            all_comments.append({\n",
206 |         "                'Timestamp': top_comment['publishedAt'],\n",
207 |         "                'Username': top_comment['authorDisplayName'],\n",
208 |         "                'VideoID': video_id,  # Directly using video_id from function parameter\n",
209 |         "                'Comment': top_comment['textDisplay'],\n",
210 |         "                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']\n",
211 |         "            })\n",
212 |         "\n",
213 |         "            # Fetch replies if there are any\n",
214 |         "            if item['snippet']['totalReplyCount'] > 0:\n",
215 |         "                all_comments.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_id))\n",
216 |         "\n",
217 |         "        next_page_token = comment_response.get('nextPageToken')\n",
218 |         "        if not next_page_token:\n",
219 |         "            break\n",
220 |         "\n",
221 |         "    return all_comments\n",
222 |         "\n",
223 |         "# List to hold all comments from all videos\n",
224 |         "all_comments = []\n",
225 |         "\n",
226 |         "\n",
227 |         "for video_id in video_ids:\n",
228 |         "    video_comments = get_comments_for_video(youtube, video_id)\n",
229 |         "    all_comments.extend(video_comments)\n",
230 |         "\n",
231 |         "# Create DataFrame\n",
232 |         "comments_df = pd.DataFrame(all_comments)\n"
233 |       ],
234 |       "metadata": {
235 |         "id": "vUdZCrxHmnL8"
236 |       },
237 |       "execution_count": 11,
238 |       "outputs": []
239 |     },
240 |     {
241 |       "cell_type": "markdown",
242 |       "source": [
243 |         "### Output to CSV"
244 |       ],
245 |       "metadata": {
246 |         "id": "sQeo2iTwDROo"
247 |       }
248 |     },
249 |     {
250 |       "cell_type": "code",
251 |       "source": [
252 |         "# Export whole dataset to the local machine as CSV File\n",
253 |         "csv_file = 'comments_data.csv'  # Name your file\n",
254 |         "comments_df.to_csv(csv_file, index=False)\n",
255 |         "\n",
256 |         "from google.colab import files\n",
257 |         "\n",
258 |         "# Trigger a download to your local machine\n",
259 |         "files.download(csv_file)"
260 |       ],
261 |       "metadata": {
262 |         "colab": {
263 |           "base_uri": "https://localhost:8080/",
264 |           "height": 17
265 |         },
266 |         "id": "-phDM_447hTR",
267 |         "outputId": "4f1c0f72-6c13-428a-ea1f-f1da855b9d2a"
268 |       },
269 |       "execution_count": 12,
270 |       "outputs": [
271 |         {
272 |           "output_type": "display_data",
273 |           "data": {
274 |             "text/plain": [
275 |               "<IPython.core.display.Javascript object>"
276 |             ],
277 |             "application/javascript": [
278 |               "\n",
279 |               "    async function download(id, filename, size) {\n",
280 |               "      if (!google.colab.kernel.accessAllowed) {\n",
281 |               "        return;\n",
282 |               "      }\n",
283 |               "      const div = document.createElement('div');\n",
284 |               "      const label = document.createElement('label');\n",
285 |               "      label.textContent = `Downloading \"${filename}\": `;\n",
286 |               "      div.appendChild(label);\n",
287 |               "      const progress = document.createElement('progress');\n",
288 |               "      progress.max = size;\n",
289 |               "      div.appendChild(progress);\n",
290 |               "      document.body.appendChild(div);\n",
291 |               "\n",
292 |               "      const buffers = [];\n",
293 |               "      let downloaded = 0;\n",
294 |               "\n",
295 |               "      const channel = await google.colab.kernel.comms.open(id);\n",
296 |               "      // Send a message to notify the kernel that we're ready.\n",
297 |               "      channel.send({})\n",
298 |               "\n",
299 |               "      for await (const message of channel.messages) {\n",
300 |               "        // Send a message to notify the kernel that we're ready.\n",
301 |               "        channel.send({})\n",
302 |               "        if (message.buffers) {\n",
303 |               "          for (const buffer of message.buffers) {\n",
304 |               "            buffers.push(buffer);\n",
305 |               "            downloaded += buffer.byteLength;\n",
306 |               "            progress.value = downloaded;\n",
307 |               "          }\n",
308 |               "        }\n",
309 |               "      }\n",
310 |               "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
311 |               "      const a = document.createElement('a');\n",
312 |               "      a.href = window.URL.createObjectURL(blob);\n",
313 |               "      a.download = filename;\n",
314 |               "      div.appendChild(a);\n",
315 |               "      a.click();\n",
316 |               "      div.remove();\n",
317 |               "    }\n",
318 |               "  "
319 |             ]
320 |           },
321 |           "metadata": {}
322 |         },
323 |         {
324 |           "output_type": "display_data",
325 |           "data": {
326 |             "text/plain": [
327 |               "<IPython.core.display.Javascript object>"
328 |             ],
329 |             "application/javascript": [
330 |               "download(\"download_f7c3bd82-8b7f-41de-855e-8446a3de334c\", \"comments_data.csv\", 15130)"
331 |             ]
332 |           },
333 |           "metadata": {}
334 |         }
335 |       ]
336 |     }
337 |   ]
338 | }


--------------------------------------------------------------------------------
/Dummy_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Dummy_Data.ipynb",
  7 |       "provenance": [],
  8 |       "mount_file_id": "1k9Ax4sAULozq-YN4aE-o1pvA6lINwjyM",
  9 |       "authorship_tag": "ABX9TyNFct9Zll2QFX8EjLcsRLsX",
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "language_info": {
 17 |       "name": "python"
 18 |     }
 19 |   },
 20 |   "cells": [
 21 |     {
 22 |       "cell_type": "markdown",
 23 |       "metadata": {
 24 |         "id": "view-in-github",
 25 |         "colab_type": "text"
 26 |       },
 27 |       "source": [
 28 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Dummy_Data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "metadata": {
 34 |         "id": "GnwxoCXGqRxk"
 35 |       },
 36 |       "source": [
 37 |         "import pandas as pd\n",
 38 |         "import numpy as np\n",
 39 |         "import random\n",
 40 |         "import datetime"
 41 |       ],
 42 |       "execution_count": null,
 43 |       "outputs": []
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "metadata": {
 48 |         "id": "lNqMiH23l1hf"
 49 |       },
 50 |       "source": [
 51 |         "cars = ['Audi', 'BMW', 'Kia', 'Honda', 'Skoda']\n",
 52 |         "\n",
 53 |         "models = {0:['A1', 'A3', 'A4', 'A5', 'A6'], \n",
 54 |         "          1:['1 Series','3 Series','5 Series','7 Series','X5'],\n",
 55 |         "          2:['Rio','Spotage','Sorento','Soul','Ceed'],\n",
 56 |         "          3:['Accord','Civic','Jazz','Fit'],\n",
 57 |         "          4:['Citigo','Scala','Octavia']}\n",
 58 |         "\n",
 59 |         "prices =  {0:[30000, 32000, 55000, 60000, 70000], \n",
 60 |         "          1:[35000, 50000, 60000, 70000, 65000],\n",
 61 |         "          2:[19000, 40000, 58000, 37000, 25000],\n",
 62 |         "          3:[32000, 25000, 30000, 15000],\n",
 63 |         "          4:[15000, 20000, 35000]}\n",
 64 |         "\n",
 65 |         "years = [2021, 2020, 2019, 2018, 2017]\n",
 66 |         "\n",
 67 |         "salespeople = ['Pat Byrne', 'Joe Fats', 'Tony Long', 'Jimmy The Fly', 'Santa Barry', 'Mary Black']"
 68 |       ],
 69 |       "execution_count": null,
 70 |       "outputs": []
 71 |     },
 72 |     {
 73 |       "cell_type": "markdown",
 74 |       "metadata": {
 75 |         "id": "-SJrBwAnqc6G"
 76 |       },
 77 |       "source": [
 78 |         "# Rules\n",
 79 |         "1: Pick a random manufacturer for our list <br>\n",
 80 |         "2: From that manufacturer pick a car from model list<br> \n",
 81 |         "3: Pick a Year between (2021 and 2017)<br>\n",
 82 |         "4: Get reference price for the model<br>\n",
 83 |         "5: Adjust price 10% down for each year<br>\n",
 84 |         "6: Take purchase price and randomise - 10% up or down<br>\n",
 85 |         "7: Randomise Repairs as percentage of purchase price 1 - 10%<br> \n",
 86 |         "8: Randomise sale price as purchase + repairs - 5% to plus 10%<br>\n",
 87 |         "9: Generate sales date within the month<br>\n",
 88 |         "10: Randomise days in stock between 1 - 90<br>\n",
 89 |         "11: Choose Random Sales Person"
 90 |       ]
 91 |     },
 92 |     {
 93 |       "cell_type": "code",
 94 |       "metadata": {
 95 |         "id": "8D-OW8JdseVi"
 96 |       },
 97 |       "source": [
 98 |         "car_data = pd.DataFrame()\n",
 99 |         "\n",
100 |         "for i in range(0, 250):\n",
101 |         "  make = random.choice(cars)\n",
102 |         "  modelindex = cars.index(make)\n",
103 |         "  model = random.choice(models.get(modelindex))\n",
104 |         "  priceindex = models.get(modelindex).index(model)\n",
105 |         "  price = prices.get(modelindex)[priceindex]\n",
106 |         "  year = random.choice(years)\n",
107 |         "  deval = (2021 - year) * .1\n",
108 |         "  purchase_price = round(price * (1 - (deval + random.uniform(-.05, .1))),2)\n",
109 |         "  repair_cost = round(purchase_price * random.uniform(.01, .1),2)\n",
110 |         "  sales_price = round((purchase_price + repair_cost) * random.uniform(.95, 1.1),2)\n",
111 |         "  stock_days = random.randint(1, 90)\n",
112 |         "  date_sold = datetime.datetime(2021, 9, 1) + datetime.timedelta(days = random.randint(0, 30))\n",
113 |         "  profit = sales_price - purchase_price - repair_cost\n",
114 |         "  salesperson = random.choice(salespeople)\n",
115 |         "\n",
116 |         "  linedict = {'Make': [make], 'Model': [model], 'Year':[year], 'Purchase Price':[purchase_price],'Repair Cost':[repair_cost],\n",
117 |         "          'Sales Price':[sales_price], 'Profit':[profit],'Stock Days':[stock_days], 'Date Sold':[date_sold], 'Sales Person':[salesperson]}\n",
118 |         "\n",
119 |         "  line = pd.DataFrame(linedict)\n",
120 |         "  car_data = pd.concat([car_data, line])\n",
121 |         "\n",
122 |         "#print('Make')\n",
123 |         "#print(make)\n",
124 |         "#print('\\nModel')\n",
125 |         "#print(model)\n",
126 |         "#print('\\nYear')\n",
127 |         "#print(year)\n",
128 |         "#print('\\nPurchase Price')\n",
129 |         "#print(purchase_price)\n",
130 |         "#print('\\nRepairs')\n",
131 |         "#print(repair_cost)\n",
132 |         "#print('\\nTotal Cost')\n",
133 |         "#print(round(purchase_price+repair_cost,2))\n",
134 |         "#print('\\nSales Price')\n",
135 |         "#print(sales_price)\n",
136 |         "#print('\\nProfit') \n",
137 |         "#print(round(sales_price - (purchase_price + repair_cost),2))\n",
138 |         "#print('\\nDays in Stock') \n",
139 |         "#print(stock_days)\n",
140 |         "#print('\\nDay Sold') \n",
141 |         "#print(date_sold)"
142 |       ],
143 |       "execution_count": null,
144 |       "outputs": []
145 |     },
146 |     {
147 |       "cell_type": "code",
148 |       "metadata": {
149 |         "id": "f9bNoz7PBZju"
150 |       },
151 |       "source": [
152 |         "car_data.to_csv('Sales_Data_Sept21.csv',index = False)"
153 |       ],
154 |       "execution_count": null,
155 |       "outputs": []
156 |     },
157 |     {
158 |       "cell_type": "code",
159 |       "metadata": {
160 |         "colab": {
161 |           "base_uri": "https://localhost:8080/",
162 |           "height": 419
163 |         },
164 |         "id": "n6ajV4fN_JgA",
165 |         "outputId": "744931b4-20e1-41f5-ec19-81959f5abd2f"
166 |       },
167 |       "source": [
168 |         "car_data"
169 |       ],
170 |       "execution_count": null,
171 |       "outputs": [
172 |         {
173 |           "output_type": "execute_result",
174 |           "data": {
175 |             "text/html": [
176 |               "<div>\n",
177 |               "<style scoped>\n",
178 |               "    .dataframe tbody tr th:only-of-type {\n",
179 |               "        vertical-align: middle;\n",
180 |               "    }\n",
181 |               "\n",
182 |               "    .dataframe tbody tr th {\n",
183 |               "        vertical-align: top;\n",
184 |               "    }\n",
185 |               "\n",
186 |               "    .dataframe thead th {\n",
187 |               "        text-align: right;\n",
188 |               "    }\n",
189 |               "</style>\n",
190 |               "<table border=\"1\" class=\"dataframe\">\n",
191 |               "  <thead>\n",
192 |               "    <tr style=\"text-align: right;\">\n",
193 |               "      <th></th>\n",
194 |               "      <th>Make</th>\n",
195 |               "      <th>Model</th>\n",
196 |               "      <th>Year</th>\n",
197 |               "      <th>Purchase Price</th>\n",
198 |               "      <th>Repair Cost</th>\n",
199 |               "      <th>Sales Price</th>\n",
200 |               "      <th>Profit</th>\n",
201 |               "      <th>Stock Days</th>\n",
202 |               "      <th>Date Sold</th>\n",
203 |               "      <th>Sales Person</th>\n",
204 |               "    </tr>\n",
205 |               "  </thead>\n",
206 |               "  <tbody>\n",
207 |               "    <tr>\n",
208 |               "      <th>0</th>\n",
209 |               "      <td>Kia</td>\n",
210 |               "      <td>Rio</td>\n",
211 |               "      <td>2019</td>\n",
212 |               "      <td>14721.94</td>\n",
213 |               "      <td>1060.18</td>\n",
214 |               "      <td>17126.53</td>\n",
215 |               "      <td>1344.41</td>\n",
216 |               "      <td>36</td>\n",
217 |               "      <td>2021-10-01</td>\n",
218 |               "      <td>Mary Black</td>\n",
219 |               "    </tr>\n",
220 |               "    <tr>\n",
221 |               "      <th>0</th>\n",
222 |               "      <td>Audi</td>\n",
223 |               "      <td>A5</td>\n",
224 |               "      <td>2017</td>\n",
225 |               "      <td>32775.52</td>\n",
226 |               "      <td>2850.40</td>\n",
227 |               "      <td>37672.37</td>\n",
228 |               "      <td>2046.45</td>\n",
229 |               "      <td>52</td>\n",
230 |               "      <td>2021-09-28</td>\n",
231 |               "      <td>Mary Black</td>\n",
232 |               "    </tr>\n",
233 |               "    <tr>\n",
234 |               "      <th>0</th>\n",
235 |               "      <td>Honda</td>\n",
236 |               "      <td>Accord</td>\n",
237 |               "      <td>2021</td>\n",
238 |               "      <td>32807.23</td>\n",
239 |               "      <td>515.73</td>\n",
240 |               "      <td>32895.45</td>\n",
241 |               "      <td>-427.51</td>\n",
242 |               "      <td>70</td>\n",
243 |               "      <td>2021-09-11</td>\n",
244 |               "      <td>Tony Long</td>\n",
245 |               "    </tr>\n",
246 |               "    <tr>\n",
247 |               "      <th>0</th>\n",
248 |               "      <td>Kia</td>\n",
249 |               "      <td>Rio</td>\n",
250 |               "      <td>2020</td>\n",
251 |               "      <td>16605.56</td>\n",
252 |               "      <td>1221.56</td>\n",
253 |               "      <td>19088.20</td>\n",
254 |               "      <td>1261.08</td>\n",
255 |               "      <td>67</td>\n",
256 |               "      <td>2021-09-27</td>\n",
257 |               "      <td>Mary Black</td>\n",
258 |               "    </tr>\n",
259 |               "    <tr>\n",
260 |               "      <th>0</th>\n",
261 |               "      <td>Audi</td>\n",
262 |               "      <td>A6</td>\n",
263 |               "      <td>2017</td>\n",
264 |               "      <td>39002.05</td>\n",
265 |               "      <td>1926.71</td>\n",
266 |               "      <td>43745.14</td>\n",
267 |               "      <td>2816.38</td>\n",
268 |               "      <td>2</td>\n",
269 |               "      <td>2021-09-16</td>\n",
270 |               "      <td>Santa Barry</td>\n",
271 |               "    </tr>\n",
272 |               "    <tr>\n",
273 |               "      <th>...</th>\n",
274 |               "      <td>...</td>\n",
275 |               "      <td>...</td>\n",
276 |               "      <td>...</td>\n",
277 |               "      <td>...</td>\n",
278 |               "      <td>...</td>\n",
279 |               "      <td>...</td>\n",
280 |               "      <td>...</td>\n",
281 |               "      <td>...</td>\n",
282 |               "      <td>...</td>\n",
283 |               "      <td>...</td>\n",
284 |               "    </tr>\n",
285 |               "    <tr>\n",
286 |               "      <th>0</th>\n",
287 |               "      <td>BMW</td>\n",
288 |               "      <td>1 Series</td>\n",
289 |               "      <td>2020</td>\n",
290 |               "      <td>29735.49</td>\n",
291 |               "      <td>2321.10</td>\n",
292 |               "      <td>31944.39</td>\n",
293 |               "      <td>-112.20</td>\n",
294 |               "      <td>32</td>\n",
295 |               "      <td>2021-09-13</td>\n",
296 |               "      <td>Tony Long</td>\n",
297 |               "    </tr>\n",
298 |               "    <tr>\n",
299 |               "      <th>0</th>\n",
300 |               "      <td>Skoda</td>\n",
301 |               "      <td>Scala</td>\n",
302 |               "      <td>2017</td>\n",
303 |               "      <td>10152.50</td>\n",
304 |               "      <td>191.72</td>\n",
305 |               "      <td>10982.91</td>\n",
306 |               "      <td>638.69</td>\n",
307 |               "      <td>79</td>\n",
308 |               "      <td>2021-09-07</td>\n",
309 |               "      <td>Santa Barry</td>\n",
310 |               "    </tr>\n",
311 |               "    <tr>\n",
312 |               "      <th>0</th>\n",
313 |               "      <td>Skoda</td>\n",
314 |               "      <td>Citigo</td>\n",
315 |               "      <td>2021</td>\n",
316 |               "      <td>14243.96</td>\n",
317 |               "      <td>1156.99</td>\n",
318 |               "      <td>16940.98</td>\n",
319 |               "      <td>1540.03</td>\n",
320 |               "      <td>66</td>\n",
321 |               "      <td>2021-09-19</td>\n",
322 |               "      <td>Tony Long</td>\n",
323 |               "    </tr>\n",
324 |               "    <tr>\n",
325 |               "      <th>0</th>\n",
326 |               "      <td>Audi</td>\n",
327 |               "      <td>A1</td>\n",
328 |               "      <td>2019</td>\n",
329 |               "      <td>25101.42</td>\n",
330 |               "      <td>2280.16</td>\n",
331 |               "      <td>26370.38</td>\n",
332 |               "      <td>-1011.20</td>\n",
333 |               "      <td>87</td>\n",
334 |               "      <td>2021-09-21</td>\n",
335 |               "      <td>Jimmy The Fly</td>\n",
336 |               "    </tr>\n",
337 |               "    <tr>\n",
338 |               "      <th>0</th>\n",
339 |               "      <td>Skoda</td>\n",
340 |               "      <td>Octavia</td>\n",
341 |               "      <td>2018</td>\n",
342 |               "      <td>22689.81</td>\n",
343 |               "      <td>1683.94</td>\n",
344 |               "      <td>23565.83</td>\n",
345 |               "      <td>-807.92</td>\n",
346 |               "      <td>53</td>\n",
347 |               "      <td>2021-09-27</td>\n",
348 |               "      <td>Jimmy The Fly</td>\n",
349 |               "    </tr>\n",
350 |               "  </tbody>\n",
351 |               "</table>\n",
352 |               "<p>250 rows × 10 columns</p>\n",
353 |               "</div>"
354 |             ],
355 |             "text/plain": [
356 |               "     Make     Model  Year  ...  Stock Days  Date Sold   Sales Person\n",
357 |               "0     Kia       Rio  2019  ...          36 2021-10-01     Mary Black\n",
358 |               "0    Audi        A5  2017  ...          52 2021-09-28     Mary Black\n",
359 |               "0   Honda    Accord  2021  ...          70 2021-09-11      Tony Long\n",
360 |               "0     Kia       Rio  2020  ...          67 2021-09-27     Mary Black\n",
361 |               "0    Audi        A6  2017  ...           2 2021-09-16    Santa Barry\n",
362 |               "..    ...       ...   ...  ...         ...        ...            ...\n",
363 |               "0     BMW  1 Series  2020  ...          32 2021-09-13      Tony Long\n",
364 |               "0   Skoda     Scala  2017  ...          79 2021-09-07    Santa Barry\n",
365 |               "0   Skoda    Citigo  2021  ...          66 2021-09-19      Tony Long\n",
366 |               "0    Audi        A1  2019  ...          87 2021-09-21  Jimmy The Fly\n",
367 |               "0   Skoda   Octavia  2018  ...          53 2021-09-27  Jimmy The Fly\n",
368 |               "\n",
369 |               "[250 rows x 10 columns]"
370 |             ]
371 |           },
372 |           "metadata": {
373 |             "tags": []
374 |           },
375 |           "execution_count": 5
376 |         }
377 |       ]
378 |     }
379 |   ]
380 | }


--------------------------------------------------------------------------------
/Lambda.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Lambda.ipynb",
  7 |       "provenance": [],
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Lambda.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "markdown",
 31 |       "metadata": {
 32 |         "id": "8ym3nBQ8TeFG"
 33 |       },
 34 |       "source": [
 35 |         "Lambda Expression V Regular Functions"
 36 |       ]
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "metadata": {
 41 |         "id": "66nLT6T1fdj6",
 42 |         "outputId": "37667e86-abc1-4c37-a906-57eb6f09f98a",
 43 |         "colab": {
 44 |           "base_uri": "https://localhost:8080/"
 45 |         }
 46 |       },
 47 |       "source": [
 48 |         "# Creating a function \n",
 49 |         "def squared(num):\n",
 50 |         "  return num ** 2\n",
 51 |         "print(squared(4))\n",
 52 |         "\n",
 53 |         "# using a lambda expression\n",
 54 |         "(lambda x: x**2)(4)"
 55 |       ],
 56 |       "execution_count": 1,
 57 |       "outputs": [
 58 |         {
 59 |           "output_type": "stream",
 60 |           "name": "stdout",
 61 |           "text": [
 62 |             "16\n"
 63 |           ]
 64 |         },
 65 |         {
 66 |           "output_type": "execute_result",
 67 |           "data": {
 68 |             "text/plain": [
 69 |               "16"
 70 |             ]
 71 |           },
 72 |           "metadata": {},
 73 |           "execution_count": 1
 74 |         }
 75 |       ]
 76 |     },
 77 |     {
 78 |       "cell_type": "markdown",
 79 |       "metadata": {
 80 |         "id": "Sh1zOHnMlG3D"
 81 |       },
 82 |       "source": [
 83 |         "Lambda Expression if If Statement"
 84 |       ]
 85 |     },
 86 |     {
 87 |       "cell_type": "code",
 88 |       "metadata": {
 89 |         "id": "m5hfi561lMg4",
 90 |         "outputId": "08701972-f0ec-4a9d-8297-7103e3c5dfdd",
 91 |         "colab": {
 92 |           "base_uri": "https://localhost:8080/",
 93 |           "height": 53
 94 |         }
 95 |       },
 96 |       "source": [
 97 |         "num = 5\n",
 98 |         "\n",
 99 |         "if(num > 4):\n",
100 |         "   print('Greater') \n",
101 |         "else: \n",
102 |         "   print('Less')\n",
103 |         "\n",
104 |         "(lambda x: 'Greater' if x > 4 else 'Less')(num)\n"
105 |       ],
106 |       "execution_count": 2,
107 |       "outputs": [
108 |         {
109 |           "output_type": "stream",
110 |           "name": "stdout",
111 |           "text": [
112 |             "Greater\n"
113 |           ]
114 |         },
115 |         {
116 |           "output_type": "execute_result",
117 |           "data": {
118 |             "text/plain": [
119 |               "'Greater'"
120 |             ],
121 |             "application/vnd.google.colaboratory.intrinsic+json": {
122 |               "type": "string"
123 |             }
124 |           },
125 |           "metadata": {},
126 |           "execution_count": 2
127 |         }
128 |       ]
129 |     },
130 |     {
131 |       "cell_type": "markdown",
132 |       "metadata": {
133 |         "id": "Eg9iUYYZTgKT"
134 |       },
135 |       "source": [
136 |         "Lambda Expression in Dataframes with Apply"
137 |       ]
138 |     },
139 |     {
140 |       "cell_type": "code",
141 |       "metadata": {
142 |         "id": "N217pIwxTf4k",
143 |         "outputId": "1317f05f-3f70-4f3a-9972-9c9d03054e3b",
144 |         "colab": {
145 |           "base_uri": "https://localhost:8080/"
146 |         }
147 |       },
148 |       "source": [
149 |         "import pandas as pd\n",
150 |         "\n",
151 |         "df=pd.DataFrame({\n",
152 |         "    'id':[1,2,3,4,5],\n",
153 |         "    'name':['Paul','Tim','Mary','Tony','Sam'],\n",
154 |         "    'age':[31,45,90,20,35],\n",
155 |         "    'income':[50000,70000,25000,5000,100000],\n",
156 |         "    'married':[True, False, True, False, True]\n",
157 |         "})\n",
158 |         "\n",
159 |         "print(df.head())\n",
160 |         "df['lamdba cat'] = df['income'].apply(lambda x: 'high income' if x > 30000 else ('medium income' if x > 7500 else 'low income'))\n",
161 |         "\n",
162 |         "print(df.head())\n"
163 |       ],
164 |       "execution_count": 3,
165 |       "outputs": [
166 |         {
167 |           "output_type": "stream",
168 |           "name": "stdout",
169 |           "text": [
170 |             "   id  name  age  income  married\n",
171 |             "0   1  Paul   31   50000     True\n",
172 |             "1   2   Tim   45   70000    False\n",
173 |             "2   3  Mary   90   25000     True\n",
174 |             "3   4  Tony   20    5000    False\n",
175 |             "4   5   Sam   35  100000     True\n",
176 |             "   id  name  age  income  married     lamdba cat\n",
177 |             "0   1  Paul   31   50000     True    high income\n",
178 |             "1   2   Tim   45   70000    False    high income\n",
179 |             "2   3  Mary   90   25000     True  medium income\n",
180 |             "3   4  Tony   20    5000    False     low income\n",
181 |             "4   5   Sam   35  100000     True    high income\n"
182 |           ]
183 |         }
184 |       ]
185 |     },
186 |     {
187 |       "cell_type": "markdown",
188 |       "metadata": {
189 |         "id": "trHST7vVThVV"
190 |       },
191 |       "source": [
192 |         "Advanced Filtering "
193 |       ]
194 |     },
195 |     {
196 |       "cell_type": "code",
197 |       "metadata": {
198 |         "id": "q-F0KmiGptM8",
199 |         "outputId": "92971b33-5ad3-47ec-a6cb-527e11018254",
200 |         "colab": {
201 |           "base_uri": "https://localhost:8080/",
202 |           "height": 143
203 |         }
204 |       },
205 |       "source": [
206 |         "df[(df['age'] > 20) & (df['income'] * 3.5 > 100000)]"
207 |       ],
208 |       "execution_count": 4,
209 |       "outputs": [
210 |         {
211 |           "output_type": "execute_result",
212 |           "data": {
213 |             "text/plain": [
214 |               "   id  name  age  income  married   lamdba cat\n",
215 |               "0   1  Paul   31   50000     True  high income\n",
216 |               "1   2   Tim   45   70000    False  high income\n",
217 |               "4   5   Sam   35  100000     True  high income"
218 |             ],
219 |             "text/html": [
220 |               "\n",
221 |               "  <div id=\"df-52a53f5d-23a2-472e-810c-0cde9ca42a08\">\n",
222 |               "    <div class=\"colab-df-container\">\n",
223 |               "      <div>\n",
224 |               "<style scoped>\n",
225 |               "    .dataframe tbody tr th:only-of-type {\n",
226 |               "        vertical-align: middle;\n",
227 |               "    }\n",
228 |               "\n",
229 |               "    .dataframe tbody tr th {\n",
230 |               "        vertical-align: top;\n",
231 |               "    }\n",
232 |               "\n",
233 |               "    .dataframe thead th {\n",
234 |               "        text-align: right;\n",
235 |               "    }\n",
236 |               "</style>\n",
237 |               "<table border=\"1\" class=\"dataframe\">\n",
238 |               "  <thead>\n",
239 |               "    <tr style=\"text-align: right;\">\n",
240 |               "      <th></th>\n",
241 |               "      <th>id</th>\n",
242 |               "      <th>name</th>\n",
243 |               "      <th>age</th>\n",
244 |               "      <th>income</th>\n",
245 |               "      <th>married</th>\n",
246 |               "      <th>lamdba cat</th>\n",
247 |               "    </tr>\n",
248 |               "  </thead>\n",
249 |               "  <tbody>\n",
250 |               "    <tr>\n",
251 |               "      <th>0</th>\n",
252 |               "      <td>1</td>\n",
253 |               "      <td>Paul</td>\n",
254 |               "      <td>31</td>\n",
255 |               "      <td>50000</td>\n",
256 |               "      <td>True</td>\n",
257 |               "      <td>high income</td>\n",
258 |               "    </tr>\n",
259 |               "    <tr>\n",
260 |               "      <th>1</th>\n",
261 |               "      <td>2</td>\n",
262 |               "      <td>Tim</td>\n",
263 |               "      <td>45</td>\n",
264 |               "      <td>70000</td>\n",
265 |               "      <td>False</td>\n",
266 |               "      <td>high income</td>\n",
267 |               "    </tr>\n",
268 |               "    <tr>\n",
269 |               "      <th>4</th>\n",
270 |               "      <td>5</td>\n",
271 |               "      <td>Sam</td>\n",
272 |               "      <td>35</td>\n",
273 |               "      <td>100000</td>\n",
274 |               "      <td>True</td>\n",
275 |               "      <td>high income</td>\n",
276 |               "    </tr>\n",
277 |               "  </tbody>\n",
278 |               "</table>\n",
279 |               "</div>\n",
280 |               "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-52a53f5d-23a2-472e-810c-0cde9ca42a08')\"\n",
281 |               "              title=\"Convert this dataframe to an interactive table.\"\n",
282 |               "              style=\"display:none;\">\n",
283 |               "        \n",
284 |               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
285 |               "       width=\"24px\">\n",
286 |               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
287 |               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
288 |               "  </svg>\n",
289 |               "      </button>\n",
290 |               "      \n",
291 |               "  <style>\n",
292 |               "    .colab-df-container {\n",
293 |               "      display:flex;\n",
294 |               "      flex-wrap:wrap;\n",
295 |               "      gap: 12px;\n",
296 |               "    }\n",
297 |               "\n",
298 |               "    .colab-df-convert {\n",
299 |               "      background-color: #E8F0FE;\n",
300 |               "      border: none;\n",
301 |               "      border-radius: 50%;\n",
302 |               "      cursor: pointer;\n",
303 |               "      display: none;\n",
304 |               "      fill: #1967D2;\n",
305 |               "      height: 32px;\n",
306 |               "      padding: 0 0 0 0;\n",
307 |               "      width: 32px;\n",
308 |               "    }\n",
309 |               "\n",
310 |               "    .colab-df-convert:hover {\n",
311 |               "      background-color: #E2EBFA;\n",
312 |               "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
313 |               "      fill: #174EA6;\n",
314 |               "    }\n",
315 |               "\n",
316 |               "    [theme=dark] .colab-df-convert {\n",
317 |               "      background-color: #3B4455;\n",
318 |               "      fill: #D2E3FC;\n",
319 |               "    }\n",
320 |               "\n",
321 |               "    [theme=dark] .colab-df-convert:hover {\n",
322 |               "      background-color: #434B5C;\n",
323 |               "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
324 |               "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
325 |               "      fill: #FFFFFF;\n",
326 |               "    }\n",
327 |               "  </style>\n",
328 |               "\n",
329 |               "      <script>\n",
330 |               "        const buttonEl =\n",
331 |               "          document.querySelector('#df-52a53f5d-23a2-472e-810c-0cde9ca42a08 button.colab-df-convert');\n",
332 |               "        buttonEl.style.display =\n",
333 |               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
334 |               "\n",
335 |               "        async function convertToInteractive(key) {\n",
336 |               "          const element = document.querySelector('#df-52a53f5d-23a2-472e-810c-0cde9ca42a08');\n",
337 |               "          const dataTable =\n",
338 |               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
339 |               "                                                     [key], {});\n",
340 |               "          if (!dataTable) return;\n",
341 |               "\n",
342 |               "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
343 |               "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
344 |               "            + ' to learn more about interactive tables.';\n",
345 |               "          element.innerHTML = '';\n",
346 |               "          dataTable['output_type'] = 'display_data';\n",
347 |               "          await google.colab.output.renderOutput(dataTable, element);\n",
348 |               "          const docLink = document.createElement('div');\n",
349 |               "          docLink.innerHTML = docLinkHtml;\n",
350 |               "          element.appendChild(docLink);\n",
351 |               "        }\n",
352 |               "      </script>\n",
353 |               "    </div>\n",
354 |               "  </div>\n",
355 |               "  "
356 |             ]
357 |           },
358 |           "metadata": {},
359 |           "execution_count": 4
360 |         }
361 |       ]
362 |     },
363 |     {
364 |       "cell_type": "code",
365 |       "metadata": {
366 |         "id": "p1rqtCVcptCR",
367 |         "outputId": "8c075c3f-2194-4703-c2a4-8b64859a048a",
368 |         "colab": {
369 |           "base_uri": "https://localhost:8080/",
370 |           "height": 143
371 |         }
372 |       },
373 |       "source": [
374 |         "df[df['name'].apply(len) > 3]"
375 |       ],
376 |       "execution_count": 31,
377 |       "outputs": [
378 |         {
379 |           "output_type": "execute_result",
380 |           "data": {
381 |             "text/plain": [
382 |               "   id  name  age  income  married     lamdba cat\n",
383 |               "0   1  Paul   31   50000     True    high income\n",
384 |               "2   3  Mary   90   25000     True  medium income\n",
385 |               "3   4  Tony   20    5000    False     low income"
386 |             ],
387 |             "text/html": [
388 |               "\n",
389 |               "  <div id=\"df-bdc0157b-648d-459c-bdbd-af78a0002ab1\">\n",
390 |               "    <div class=\"colab-df-container\">\n",
391 |               "      <div>\n",
392 |               "<style scoped>\n",
393 |               "    .dataframe tbody tr th:only-of-type {\n",
394 |               "        vertical-align: middle;\n",
395 |               "    }\n",
396 |               "\n",
397 |               "    .dataframe tbody tr th {\n",
398 |               "        vertical-align: top;\n",
399 |               "    }\n",
400 |               "\n",
401 |               "    .dataframe thead th {\n",
402 |               "        text-align: right;\n",
403 |               "    }\n",
404 |               "</style>\n",
405 |               "<table border=\"1\" class=\"dataframe\">\n",
406 |               "  <thead>\n",
407 |               "    <tr style=\"text-align: right;\">\n",
408 |               "      <th></th>\n",
409 |               "      <th>id</th>\n",
410 |               "      <th>name</th>\n",
411 |               "      <th>age</th>\n",
412 |               "      <th>income</th>\n",
413 |               "      <th>married</th>\n",
414 |               "      <th>lamdba cat</th>\n",
415 |               "    </tr>\n",
416 |               "  </thead>\n",
417 |               "  <tbody>\n",
418 |               "    <tr>\n",
419 |               "      <th>0</th>\n",
420 |               "      <td>1</td>\n",
421 |               "      <td>Paul</td>\n",
422 |               "      <td>31</td>\n",
423 |               "      <td>50000</td>\n",
424 |               "      <td>True</td>\n",
425 |               "      <td>high income</td>\n",
426 |               "    </tr>\n",
427 |               "    <tr>\n",
428 |               "      <th>2</th>\n",
429 |               "      <td>3</td>\n",
430 |               "      <td>Mary</td>\n",
431 |               "      <td>90</td>\n",
432 |               "      <td>25000</td>\n",
433 |               "      <td>True</td>\n",
434 |               "      <td>medium income</td>\n",
435 |               "    </tr>\n",
436 |               "    <tr>\n",
437 |               "      <th>3</th>\n",
438 |               "      <td>4</td>\n",
439 |               "      <td>Tony</td>\n",
440 |               "      <td>20</td>\n",
441 |               "      <td>5000</td>\n",
442 |               "      <td>False</td>\n",
443 |               "      <td>low income</td>\n",
444 |               "    </tr>\n",
445 |               "  </tbody>\n",
446 |               "</table>\n",
447 |               "</div>\n",
448 |               "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-bdc0157b-648d-459c-bdbd-af78a0002ab1')\"\n",
449 |               "              title=\"Convert this dataframe to an interactive table.\"\n",
450 |               "              style=\"display:none;\">\n",
451 |               "        \n",
452 |               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
453 |               "       width=\"24px\">\n",
454 |               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
455 |               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
456 |               "  </svg>\n",
457 |               "      </button>\n",
458 |               "      \n",
459 |               "  <style>\n",
460 |               "    .colab-df-container {\n",
461 |               "      display:flex;\n",
462 |               "      flex-wrap:wrap;\n",
463 |               "      gap: 12px;\n",
464 |               "    }\n",
465 |               "\n",
466 |               "    .colab-df-convert {\n",
467 |               "      background-color: #E8F0FE;\n",
468 |               "      border: none;\n",
469 |               "      border-radius: 50%;\n",
470 |               "      cursor: pointer;\n",
471 |               "      display: none;\n",
472 |               "      fill: #1967D2;\n",
473 |               "      height: 32px;\n",
474 |               "      padding: 0 0 0 0;\n",
475 |               "      width: 32px;\n",
476 |               "    }\n",
477 |               "\n",
478 |               "    .colab-df-convert:hover {\n",
479 |               "      background-color: #E2EBFA;\n",
480 |               "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
481 |               "      fill: #174EA6;\n",
482 |               "    }\n",
483 |               "\n",
484 |               "    [theme=dark] .colab-df-convert {\n",
485 |               "      background-color: #3B4455;\n",
486 |               "      fill: #D2E3FC;\n",
487 |               "    }\n",
488 |               "\n",
489 |               "    [theme=dark] .colab-df-convert:hover {\n",
490 |               "      background-color: #434B5C;\n",
491 |               "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
492 |               "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
493 |               "      fill: #FFFFFF;\n",
494 |               "    }\n",
495 |               "  </style>\n",
496 |               "\n",
497 |               "      <script>\n",
498 |               "        const buttonEl =\n",
499 |               "          document.querySelector('#df-bdc0157b-648d-459c-bdbd-af78a0002ab1 button.colab-df-convert');\n",
500 |               "        buttonEl.style.display =\n",
501 |               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
502 |               "\n",
503 |               "        async function convertToInteractive(key) {\n",
504 |               "          const element = document.querySelector('#df-bdc0157b-648d-459c-bdbd-af78a0002ab1');\n",
505 |               "          const dataTable =\n",
506 |               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
507 |               "                                                     [key], {});\n",
508 |               "          if (!dataTable) return;\n",
509 |               "\n",
510 |               "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
511 |               "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
512 |               "            + ' to learn more about interactive tables.';\n",
513 |               "          element.innerHTML = '';\n",
514 |               "          dataTable['output_type'] = 'display_data';\n",
515 |               "          await google.colab.output.renderOutput(dataTable, element);\n",
516 |               "          const docLink = document.createElement('div');\n",
517 |               "          docLink.innerHTML = docLinkHtml;\n",
518 |               "          element.appendChild(docLink);\n",
519 |               "        }\n",
520 |               "      </script>\n",
521 |               "    </div>\n",
522 |               "  </div>\n",
523 |               "  "
524 |             ]
525 |           },
526 |           "metadata": {},
527 |           "execution_count": 31
528 |         }
529 |       ]
530 |     },
531 |     {
532 |       "cell_type": "code",
533 |       "metadata": {
534 |         "id": "D-aveCQQTh8r"
535 |       },
536 |       "source": [
537 |         "df[df.apply(lambda x : (x['age'] > 20) & (x['income'] * 3.5 > 100000) & (len(x['name']) <= 3),axis=1) & (df[\"age\"] < df[\"age\"].mean())]\n"
538 |       ],
539 |       "execution_count": null,
540 |       "outputs": []
541 |     },
542 |     {
543 |       "cell_type": "markdown",
544 |       "metadata": {
545 |         "id": "lN33xSFzk3FL"
546 |       },
547 |       "source": [
548 |         "Change Case"
549 |       ]
550 |     },
551 |     {
552 |       "cell_type": "code",
553 |       "metadata": {
554 |         "id": "RmOsxX3Xqv_P"
555 |       },
556 |       "source": [
557 |         "df['name'].apply(lambda x: x.upper())"
558 |       ],
559 |       "execution_count": null,
560 |       "outputs": []
561 |     },
562 |     {
563 |       "cell_type": "code",
564 |       "metadata": {
565 |         "id": "zRKQtyTqkwB8"
566 |       },
567 |       "source": [
568 |         "df['name'].apply(lambda x: x.lower())"
569 |       ],
570 |       "execution_count": null,
571 |       "outputs": []
572 |     }
573 |   ]
574 | }


--------------------------------------------------------------------------------
/YouTubeComments.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyP18S5koz98EuxKR0Vr96Kb",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/analyticswithadam/Python/blob/main/YouTubeComments.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "markdown",
 31 |       "source": [
 32 |         "## Pull Comments"
 33 |       ],
 34 |       "metadata": {
 35 |         "id": "ExqBG7nOSzQZ"
 36 |       }
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "execution_count": 14,
 41 |       "metadata": {
 42 |         "colab": {
 43 |           "base_uri": "https://localhost:8080/"
 44 |         },
 45 |         "id": "LtsUFbQJOv-5",
 46 |         "outputId": "4ec8d1cb-e23f-462c-a4dc-9ca194e45ae7"
 47 |       },
 48 |       "outputs": [
 49 |         {
 50 |           "output_type": "stream",
 51 |           "name": "stdout",
 52 |           "text": [
 53 |             "ممنونم از به اشتراك گذاشتن اطلاعاتتون\n",
 54 |             "Hi Leila you are awesome ❤and I have a problem with this , I&#39;m not able to find data type although I check data menu and my office is 2021\n",
 55 |             "hey Leila~,<br>Not sure how I got unSub~d, <br>sure glad I had U in my e-mail\n",
 56 |             "How do you make tutorials on features we don&#39;t yet have?\n",
 57 |             "An amazing insight; I&#39;m not sure if i will be using this feature, but you&#39;ve done a great job. I have noticed you have switched your laptop from Surface to hp.\n",
 58 |             "Those salaries!\n",
 59 |             "Thank you for this video.  I have traveling and this was one of the first things I looked at.  I always enjoy and learn something from your videos.  I can&#39;t wait until it rolls out to me.\n",
 60 |             "I truly like all these ideas and suggestions from Leila, but I constantly run into issues when applying them in to a large corporate environment. Linking data via SharePoint gives enough trouble. But pivoting tables and images across multiple data sources and servers (SharePoint, which which are accessibly to everyone within the target audience)  always gives an error message like: &quot;This is not possible&quot;. Makes me feel like being back in 1998, which is also worth something.\n",
 61 |             "Hi. Im hoping if you can help pls. Having issues with conditional format<br><br>A1 = NOW(). example result <a href=\"https://www.youtube.com/watch?v=WNrB1Q9Rry0&amp;t=11m00s\">11:00</a><br>B1 = <a href=\"https://www.youtube.com/watch?v=WNrB1Q9Rry0&amp;t=7m00s\">7:00</a>. On time format<br>Conditiinal format formula is highlight if a1 &gt; b1.<br><br>May i ask why its not highlighting when the statement is true?\n",
 62 |             "Hi Leila, that&#39;s amazing, I have an issue: the Geography was not copied on pivot table. How can I fix it?\n",
 63 |             "This can be a life saver!👏🏼👏🏼👏🏼 My colleague just requested sth similar…will try when back in the office!🎊🎊🎊\n",
 64 |             "ok thanks so much on this explanation , as usual amazing work , and if i want to implement it on dashboard how can i proceed it\n",
 65 |             "Does this work on pivot chart as well?\n",
 66 |             "@LeilaGharani I hope you can really help me.<br>I have a csv file on excel and I have half-yearly data.<br>I want to have only yearly data so would you say I should cancel all the first half of the year data and keep the last one, or should I do like a merging of the two semesters?\n",
 67 |             "This is amazing!!!\n",
 68 |             "Brilliant!! I wish MS would hurry up and add these features to my 365!!\n",
 69 |             "i have a problem madam will u solve it? If C4=Holiday then merge cells C4:C10\n",
 70 |             "Good job.\n",
 71 |             "Let me know what I think???? Well, my jaw has dropped on the floor with rest of my brain to start with! Sorry I need to replay this again!!! Tell ya all later!! Ow! Yeah You’re magical!!! Unbelievable!!!!!❤\n",
 72 |             "if you please Leila share workbook  of  episode\n",
 73 |             "Excellent. Thanks for keeping us updated with the new features.\n",
 74 |             "Hey Leila just wanted to check if we have any similar kind of arrayformula function of Google sheets in excel as that gives us an option to enter formula in one cell which populates values in multiple rows and columns..\n",
 75 |             "Cool\n",
 76 |             "can ChatGPT do this???\n",
 77 |             "Thanks Leila  :)\n",
 78 |             "Thank god this feature is finally coming to excel.\n",
 79 |             "Thank you sincerely\n",
 80 |             "I have an excel file that I use for work, specifically car parks. The data I use are the following, license plate number, time of entry, time of exit, duration of stay, and based on these the price is calculated.<br>Sometimes I mistakenly click on the classification, as a result of which the numbers get confused, i.e. they get mixed up.  If I don&#39;t understand it right away to reverse it with undo, what options do I have?\n",
 81 |             "Very nice video 📹 with bramd new informstion\n",
 82 |             "i like power query  but i feel the more data you have the quicker it would be to do it with formulas and creating everything yourself. especially with data sets that continually increase such as if you&#39;re tracking multiple KPI&#39;s for each month and adding all the data to an annual folder. I don&#39;t know if that can be helped though.\n",
 83 |             "I lav ya\n",
 84 |             "your example is so cluttered and keeps me from wanting to implement this.\n",
 85 |             "Thank you very much, I m going to use it immediately. Great tutorial.\n",
 86 |             "Great additions to Excel\n",
 87 |             "Hi Leila, Is there a way to export or Link these table with custom formatting and images to PowerPoint? I Know it can be done as Image or entire workbook can be embedded but both are not desirable.\n",
 88 |             "Wow, awesome Leila. You have left some great ideas that can be turned into others that will give the business potential and a better form of control, in order to increase the efficiency of all its collaborators. Just great.<br>Thank you very much, Leila, for sharing such valuable information.\n",
 89 |             "Thanks teacher👌👍👍 ❤❤\n",
 90 |             "Ma&#39;am what is advance excel is any course is available at your website.<br>Please let me know ma&#39;am.<br>Thank you.\n",
 91 |             "Excel seems so easy when you teach it step by step\n",
 92 |             "Brilliant!!\n",
 93 |             "This is amazing. Could you please help me out when I drag the flag into pivot table rows section. I am not getting the flag instead of that I am getting text like Image of India, Image of United States etc..\n",
 94 |             "Absolutely cool stuff\n",
 95 |             "Looks like fun\n",
 96 |             "Interesting names...\n",
 97 |             "这功能很实用\n",
 98 |             "In excel 365 can we have ease of updating in web browser like we love on Google sheets.\n",
 99 |             "Leila ji plz make more videos on google sheet features.\n",
100 |             "Magic\n",
101 |             "I always use dynamic tables and it works fine, and I never need (pivot T), is that ok? I mean, dynamic tables do almost everything\n",
102 |             "Walter White bought your courses for data analysis!!!\n",
103 |             "But only available for 365\n",
104 |             "So cool, thanks Leila.  Can&#39;t wait to get this feature!\n",
105 |             "That&#39;s amazing Leila! Really can&#39;t wait to have this soon, and as usual you did what you EXCEL at; your way of explaining in the most simple yet effective manner! No wonder, you won an award at Global Excel Summit for all your great work!  Keep growing!🙂\n",
106 |             "Wow, this is so cool. It feels like big lifeline to Pivot Tables.\n",
107 |             "Ok, a little longer and we&#39;ll have power bi in Excel :)\n",
108 |             "Excellent video Leila <br><br>I will add lot of value with custom DATA TYPES 😀 🎉\n",
109 |             "Very nice video, but a super tip at the end, thanks alot teacher\n",
110 |             "Very much a welcome addition, this!<br><br>With regards to data types, I just remembered they removed Wolfram Data Types on 11 June 2023.\n",
111 |             "Holy shit, I was just thinking about this yesterday!\n",
112 |             "Well updated.......\n",
113 |             "My only comment would be...<br><a href=\"https://youtu.be/WNrB1Q9Rry0?t=497\">https://youtu.be/WNrB1Q9Rry0?t=497</a>\n",
114 |             "Love it! <a href=\"about:invalid#zCSafez\"></a>\n",
115 |             "I don&#39;t fly in a jet stream where I need country flags ... but Client logo&#39;s might be a clever idea ... rather than displaying a lengthy client name repeatedly. <br><br>Which is precisely why &quot;Thursdays With Leila&#39; is always a &#39;must watch&#39; for me  ... ideas ... ideas ... ideas!!! ...  🤔 ... 💡💡💡💡<br><br>😍😍😍😍 ... thank you ... thank you ... thank you ...\n",
116 |             "Thanks ma,am,\n",
117 |             "Leila, is AI taking our jobs away...?\n",
118 |             "Excellent, I’ve been missing your content for the longest in my algorithms but glad to see this recent video, Leila. 😊<br><br>Edit: I’d just be careful on the last section. Probably should’ve blurred out your employees’ PII data or used dummy data for this bit.\n",
119 |             "Thank you Leila 🎉\n",
120 |             "Nice job, mam. But please give me the files\n",
121 |             "How to enable beta version?\n",
122 |             "🤗\n"
123 |           ]
124 |         }
125 |       ],
126 |       "source": [
127 |         "import googleapiclient.discovery\n",
128 |         "import googleapiclient.errors\n",
129 |         "\n",
130 |         "api_service_name = \"youtube\"\n",
131 |         "api_version = \"v3\"\n",
132 |         "DEVELOPER_KEY = \"\"\n",
133 |         "\n",
134 |         "youtube = googleapiclient.discovery.build(\n",
135 |         "    api_service_name, api_version, developerKey=DEVELOPER_KEY)\n",
136 |         "\n",
137 |         "request = youtube.commentThreads().list(\n",
138 |         "    part=\"snippet\",\n",
139 |         "    videoId=\"WNrB1Q9Rry0\",\n",
140 |         "    maxResults=100\n",
141 |         ")\n",
142 |         "response = request.execute()\n",
143 |         "\n",
144 |         "for item in response['items']:\n",
145 |         "    print(item['snippet']['topLevelComment']['snippet']['textDisplay'])\n"
146 |       ]
147 |     },
148 |     {
149 |       "cell_type": "markdown",
150 |       "source": [
151 |         "## Output as Pandas DataFrame"
152 |       ],
153 |       "metadata": {
154 |         "id": "_6cNc7iWSfBS"
155 |       }
156 |     },
157 |     {
158 |       "cell_type": "code",
159 |       "source": [
160 |         "import googleapiclient.discovery\n",
161 |         "import pandas as pd\n",
162 |         "\n",
163 |         "api_service_name = \"youtube\"\n",
164 |         "api_version = \"v3\"\n",
165 |         "DEVELOPER_KEY = \"\"\n",
166 |         "\n",
167 |         "youtube = googleapiclient.discovery.build(\n",
168 |         "    api_service_name, api_version, developerKey=DEVELOPER_KEY)\n",
169 |         "\n",
170 |         "request = youtube.commentThreads().list(\n",
171 |         "    part=\"snippet\",\n",
172 |         "    videoId=\"WNrB1Q9Rry0\",\n",
173 |         "    maxResults=100\n",
174 |         ")\n",
175 |         "response = request.execute()\n",
176 |         "\n",
177 |         "comments = []\n",
178 |         "\n",
179 |         "for item in response['items']:\n",
180 |         "    comment = item['snippet']['topLevelComment']['snippet']\n",
181 |         "    comments.append([\n",
182 |         "        comment['authorDisplayName'],\n",
183 |         "        comment['publishedAt'],\n",
184 |         "        comment['updatedAt'],\n",
185 |         "        comment['likeCount'],\n",
186 |         "        comment['textDisplay']\n",
187 |         "    ])\n",
188 |         "\n",
189 |         "df = pd.DataFrame(comments, columns=['author', 'published_at', 'updated_at', 'like_count', 'text'])\n",
190 |         "\n",
191 |         "df.head(10)\n"
192 |       ],
193 |       "metadata": {
194 |         "colab": {
195 |           "base_uri": "https://localhost:8080/",
196 |           "height": 363
197 |         },
198 |         "id": "lCdRNwqRRmbN",
199 |         "outputId": "93017fc5-51bf-477f-fb9f-9895871dcdb6"
200 |       },
201 |       "execution_count": 16,
202 |       "outputs": [
203 |         {
204 |           "output_type": "execute_result",
205 |           "data": {
206 |             "text/plain": [
207 |               "             author          published_at            updated_at  like_count  \\\n",
208 |               "0      Hadi Asghari  2023-07-10T02:44:43Z  2023-07-10T02:44:43Z           0   \n",
209 |               "1      omid shirazi  2023-07-09T23:13:36Z  2023-07-09T23:13:36Z           0   \n",
210 |               "2          Yachid _  2023-07-09T16:56:39Z  2023-07-09T16:56:39Z           1   \n",
211 |               "3      Enoch Arthur  2023-07-09T11:18:36Z  2023-07-09T11:18:36Z           0   \n",
212 |               "4   Farhan Merchant  2023-07-09T10:13:32Z  2023-07-09T10:13:32Z           0   \n",
213 |               "5        Jake Smith  2023-07-09T01:24:36Z  2023-07-09T01:24:36Z           0   \n",
214 |               "6            Gary S  2023-07-08T19:56:56Z  2023-07-08T19:56:56Z           0   \n",
215 |               "7         Moker_020  2023-07-08T19:31:12Z  2023-07-08T19:31:12Z           0   \n",
216 |               "8        mrjaydp123  2023-07-08T19:20:31Z  2023-07-08T19:20:31Z           0   \n",
217 |               "9  karim Tahernejad  2023-07-08T15:15:14Z  2023-07-08T15:15:14Z           1   \n",
218 |               "\n",
219 |               "                                                text  \n",
220 |               "0              ممنونم از به اشتراك گذاشتن اطلاعاتتون  \n",
221 |               "1  Hi Leila you are awesome ❤and I have a problem...  \n",
222 |               "2  hey Leila~,<br>Not sure how I got unSub~d, <br...  \n",
223 |               "3  How do you make tutorials on features we don&#...  \n",
224 |               "4  An amazing insight; I&#39;m not sure if i will...  \n",
225 |               "5                                    Those salaries!  \n",
226 |               "6  Thank you for this video.  I have traveling an...  \n",
227 |               "7  I truly like all these ideas and suggestions f...  \n",
228 |               "8  Hi. Im hoping if you can help pls. Having issu...  \n",
229 |               "9  Hi Leila, that&#39;s amazing, I have an issue:...  "
230 |             ],
231 |             "text/html": [
232 |               "\n",
233 |               "  <div id=\"df-f1be4cda-7e5c-4305-b56b-8963945fece0\">\n",
234 |               "    <div class=\"colab-df-container\">\n",
235 |               "      <div>\n",
236 |               "<style scoped>\n",
237 |               "    .dataframe tbody tr th:only-of-type {\n",
238 |               "        vertical-align: middle;\n",
239 |               "    }\n",
240 |               "\n",
241 |               "    .dataframe tbody tr th {\n",
242 |               "        vertical-align: top;\n",
243 |               "    }\n",
244 |               "\n",
245 |               "    .dataframe thead th {\n",
246 |               "        text-align: right;\n",
247 |               "    }\n",
248 |               "</style>\n",
249 |               "<table border=\"1\" class=\"dataframe\">\n",
250 |               "  <thead>\n",
251 |               "    <tr style=\"text-align: right;\">\n",
252 |               "      <th></th>\n",
253 |               "      <th>author</th>\n",
254 |               "      <th>published_at</th>\n",
255 |               "      <th>updated_at</th>\n",
256 |               "      <th>like_count</th>\n",
257 |               "      <th>text</th>\n",
258 |               "    </tr>\n",
259 |               "  </thead>\n",
260 |               "  <tbody>\n",
261 |               "    <tr>\n",
262 |               "      <th>0</th>\n",
263 |               "      <td>Hadi Asghari</td>\n",
264 |               "      <td>2023-07-10T02:44:43Z</td>\n",
265 |               "      <td>2023-07-10T02:44:43Z</td>\n",
266 |               "      <td>0</td>\n",
267 |               "      <td>ممنونم از به اشتراك گذاشتن اطلاعاتتون</td>\n",
268 |               "    </tr>\n",
269 |               "    <tr>\n",
270 |               "      <th>1</th>\n",
271 |               "      <td>omid shirazi</td>\n",
272 |               "      <td>2023-07-09T23:13:36Z</td>\n",
273 |               "      <td>2023-07-09T23:13:36Z</td>\n",
274 |               "      <td>0</td>\n",
275 |               "      <td>Hi Leila you are awesome ❤and I have a problem...</td>\n",
276 |               "    </tr>\n",
277 |               "    <tr>\n",
278 |               "      <th>2</th>\n",
279 |               "      <td>Yachid _</td>\n",
280 |               "      <td>2023-07-09T16:56:39Z</td>\n",
281 |               "      <td>2023-07-09T16:56:39Z</td>\n",
282 |               "      <td>1</td>\n",
283 |               "      <td>hey Leila~,&lt;br&gt;Not sure how I got unSub~d, &lt;br...</td>\n",
284 |               "    </tr>\n",
285 |               "    <tr>\n",
286 |               "      <th>3</th>\n",
287 |               "      <td>Enoch Arthur</td>\n",
288 |               "      <td>2023-07-09T11:18:36Z</td>\n",
289 |               "      <td>2023-07-09T11:18:36Z</td>\n",
290 |               "      <td>0</td>\n",
291 |               "      <td>How do you make tutorials on features we don&amp;#...</td>\n",
292 |               "    </tr>\n",
293 |               "    <tr>\n",
294 |               "      <th>4</th>\n",
295 |               "      <td>Farhan Merchant</td>\n",
296 |               "      <td>2023-07-09T10:13:32Z</td>\n",
297 |               "      <td>2023-07-09T10:13:32Z</td>\n",
298 |               "      <td>0</td>\n",
299 |               "      <td>An amazing insight; I&amp;#39;m not sure if i will...</td>\n",
300 |               "    </tr>\n",
301 |               "    <tr>\n",
302 |               "      <th>5</th>\n",
303 |               "      <td>Jake Smith</td>\n",
304 |               "      <td>2023-07-09T01:24:36Z</td>\n",
305 |               "      <td>2023-07-09T01:24:36Z</td>\n",
306 |               "      <td>0</td>\n",
307 |               "      <td>Those salaries!</td>\n",
308 |               "    </tr>\n",
309 |               "    <tr>\n",
310 |               "      <th>6</th>\n",
311 |               "      <td>Gary S</td>\n",
312 |               "      <td>2023-07-08T19:56:56Z</td>\n",
313 |               "      <td>2023-07-08T19:56:56Z</td>\n",
314 |               "      <td>0</td>\n",
315 |               "      <td>Thank you for this video.  I have traveling an...</td>\n",
316 |               "    </tr>\n",
317 |               "    <tr>\n",
318 |               "      <th>7</th>\n",
319 |               "      <td>Moker_020</td>\n",
320 |               "      <td>2023-07-08T19:31:12Z</td>\n",
321 |               "      <td>2023-07-08T19:31:12Z</td>\n",
322 |               "      <td>0</td>\n",
323 |               "      <td>I truly like all these ideas and suggestions f...</td>\n",
324 |               "    </tr>\n",
325 |               "    <tr>\n",
326 |               "      <th>8</th>\n",
327 |               "      <td>mrjaydp123</td>\n",
328 |               "      <td>2023-07-08T19:20:31Z</td>\n",
329 |               "      <td>2023-07-08T19:20:31Z</td>\n",
330 |               "      <td>0</td>\n",
331 |               "      <td>Hi. Im hoping if you can help pls. Having issu...</td>\n",
332 |               "    </tr>\n",
333 |               "    <tr>\n",
334 |               "      <th>9</th>\n",
335 |               "      <td>karim Tahernejad</td>\n",
336 |               "      <td>2023-07-08T15:15:14Z</td>\n",
337 |               "      <td>2023-07-08T15:15:14Z</td>\n",
338 |               "      <td>1</td>\n",
339 |               "      <td>Hi Leila, that&amp;#39;s amazing, I have an issue:...</td>\n",
340 |               "    </tr>\n",
341 |               "  </tbody>\n",
342 |               "</table>\n",
343 |               "</div>\n",
344 |               "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f1be4cda-7e5c-4305-b56b-8963945fece0')\"\n",
345 |               "              title=\"Convert this dataframe to an interactive table.\"\n",
346 |               "              style=\"display:none;\">\n",
347 |               "        \n",
348 |               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
349 |               "       width=\"24px\">\n",
350 |               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
351 |               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
352 |               "  </svg>\n",
353 |               "      </button>\n",
354 |               "      \n",
355 |               "  <style>\n",
356 |               "    .colab-df-container {\n",
357 |               "      display:flex;\n",
358 |               "      flex-wrap:wrap;\n",
359 |               "      gap: 12px;\n",
360 |               "    }\n",
361 |               "\n",
362 |               "    .colab-df-convert {\n",
363 |               "      background-color: #E8F0FE;\n",
364 |               "      border: none;\n",
365 |               "      border-radius: 50%;\n",
366 |               "      cursor: pointer;\n",
367 |               "      display: none;\n",
368 |               "      fill: #1967D2;\n",
369 |               "      height: 32px;\n",
370 |               "      padding: 0 0 0 0;\n",
371 |               "      width: 32px;\n",
372 |               "    }\n",
373 |               "\n",
374 |               "    .colab-df-convert:hover {\n",
375 |               "      background-color: #E2EBFA;\n",
376 |               "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
377 |               "      fill: #174EA6;\n",
378 |               "    }\n",
379 |               "\n",
380 |               "    [theme=dark] .colab-df-convert {\n",
381 |               "      background-color: #3B4455;\n",
382 |               "      fill: #D2E3FC;\n",
383 |               "    }\n",
384 |               "\n",
385 |               "    [theme=dark] .colab-df-convert:hover {\n",
386 |               "      background-color: #434B5C;\n",
387 |               "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
388 |               "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
389 |               "      fill: #FFFFFF;\n",
390 |               "    }\n",
391 |               "  </style>\n",
392 |               "\n",
393 |               "      <script>\n",
394 |               "        const buttonEl =\n",
395 |               "          document.querySelector('#df-f1be4cda-7e5c-4305-b56b-8963945fece0 button.colab-df-convert');\n",
396 |               "        buttonEl.style.display =\n",
397 |               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
398 |               "\n",
399 |               "        async function convertToInteractive(key) {\n",
400 |               "          const element = document.querySelector('#df-f1be4cda-7e5c-4305-b56b-8963945fece0');\n",
401 |               "          const dataTable =\n",
402 |               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
403 |               "                                                     [key], {});\n",
404 |               "          if (!dataTable) return;\n",
405 |               "\n",
406 |               "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
407 |               "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
408 |               "            + ' to learn more about interactive tables.';\n",
409 |               "          element.innerHTML = '';\n",
410 |               "          dataTable['output_type'] = 'display_data';\n",
411 |               "          await google.colab.output.renderOutput(dataTable, element);\n",
412 |               "          const docLink = document.createElement('div');\n",
413 |               "          docLink.innerHTML = docLinkHtml;\n",
414 |               "          element.appendChild(docLink);\n",
415 |               "        }\n",
416 |               "      </script>\n",
417 |               "    </div>\n",
418 |               "  </div>\n",
419 |               "  "
420 |             ]
421 |           },
422 |           "metadata": {},
423 |           "execution_count": 16
424 |         }
425 |       ]
426 |     },
427 |     {
428 |       "cell_type": "code",
429 |       "source": [
430 |         "df.head()"
431 |       ],
432 |       "metadata": {
433 |         "id": "0LV_QAezR0nc"
434 |       },
435 |       "execution_count": null,
436 |       "outputs": []
437 |     }
438 |   ]
439 | }


--------------------------------------------------------------------------------
/Apex_Connect.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Apex_Connect.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyMMyXHwzDIKtM/V90RqOOPo",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/SuperDataWorld/Python/blob/main/Apex_Connect.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "markdown",
 32 |       "metadata": {
 33 |         "id": "SQtcB2XRxVAz"
 34 |       },
 35 |       "source": [
 36 |         "## Imports"
 37 |       ]
 38 |     },
 39 |     {
 40 |       "cell_type": "code",
 41 |       "metadata": {
 42 |         "id": "9xe3X80_t6Kg"
 43 |       },
 44 |       "source": [
 45 |         "import pandas as pd\n",
 46 |         "import requests\n",
 47 |         "import matplotlib.pyplot as plt\n",
 48 |         "%matplotlib inline"
 49 |       ],
 50 |       "execution_count": 1,
 51 |       "outputs": []
 52 |     },
 53 |     {
 54 |       "cell_type": "markdown",
 55 |       "metadata": {
 56 |         "id": "Fv01ucEKxa1V"
 57 |       },
 58 |       "source": [
 59 |         "## Request Data"
 60 |       ]
 61 |     },
 62 |     {
 63 |       "cell_type": "code",
 64 |       "metadata": {
 65 |         "id": "5aifR2qzxeRQ"
 66 |       },
 67 |       "source": [
 68 |         "url = 'https://apex.oracle.com/pls/apex/superdataworld/book_view/'\n",
 69 |         "r = requests.get(url)\n",
 70 |         "json = r.json()\n",
 71 |         "df = pd.DataFrame(json['items'])"
 72 |       ],
 73 |       "execution_count": 10,
 74 |       "outputs": []
 75 |     },
 76 |     {
 77 |       "cell_type": "code",
 78 |       "metadata": {
 79 |         "colab": {
 80 |           "base_uri": "https://localhost:8080/",
 81 |           "height": 362
 82 |         },
 83 |         "id": "gSuViNJUz52i",
 84 |         "outputId": "6de524cb-a4f1-4c1e-9488-bc6ba8c8a2ba"
 85 |       },
 86 |       "source": [
 87 |         "df.info()\n",
 88 |         "df.head()"
 89 |       ],
 90 |       "execution_count": 11,
 91 |       "outputs": [
 92 |         {
 93 |           "output_type": "stream",
 94 |           "text": [
 95 |             "<class 'pandas.core.frame.DataFrame'>\n",
 96 |             "RangeIndex: 25 entries, 0 to 24\n",
 97 |             "Data columns (total 2 columns):\n",
 98 |             " #   Column  Non-Null Count  Dtype \n",
 99 |             "---  ------  --------------  ----- \n",
100 |             " 0   entity  25 non-null     object\n",
101 |             " 1   year    25 non-null     int64 \n",
102 |             "dtypes: int64(1), object(1)\n",
103 |             "memory usage: 528.0+ bytes\n"
104 |           ],
105 |           "name": "stdout"
106 |         },
107 |         {
108 |           "output_type": "execute_result",
109 |           "data": {
110 |             "text/html": [
111 |               "<div>\n",
112 |               "<style scoped>\n",
113 |               "    .dataframe tbody tr th:only-of-type {\n",
114 |               "        vertical-align: middle;\n",
115 |               "    }\n",
116 |               "\n",
117 |               "    .dataframe tbody tr th {\n",
118 |               "        vertical-align: top;\n",
119 |               "    }\n",
120 |               "\n",
121 |               "    .dataframe thead th {\n",
122 |               "        text-align: right;\n",
123 |               "    }\n",
124 |               "</style>\n",
125 |               "<table border=\"1\" class=\"dataframe\">\n",
126 |               "  <thead>\n",
127 |               "    <tr style=\"text-align: right;\">\n",
128 |               "      <th></th>\n",
129 |               "      <th>entity</th>\n",
130 |               "      <th>year</th>\n",
131 |               "    </tr>\n",
132 |               "  </thead>\n",
133 |               "  <tbody>\n",
134 |               "    <tr>\n",
135 |               "      <th>0</th>\n",
136 |               "      <td>Australia</td>\n",
137 |               "      <td>1942</td>\n",
138 |               "    </tr>\n",
139 |               "    <tr>\n",
140 |               "      <th>1</th>\n",
141 |               "      <td>Australia</td>\n",
142 |               "      <td>1943</td>\n",
143 |               "    </tr>\n",
144 |               "    <tr>\n",
145 |               "      <th>2</th>\n",
146 |               "      <td>Australia</td>\n",
147 |               "      <td>1944</td>\n",
148 |               "    </tr>\n",
149 |               "    <tr>\n",
150 |               "      <th>3</th>\n",
151 |               "      <td>Australia</td>\n",
152 |               "      <td>1945</td>\n",
153 |               "    </tr>\n",
154 |               "    <tr>\n",
155 |               "      <th>4</th>\n",
156 |               "      <td>Australia</td>\n",
157 |               "      <td>1946</td>\n",
158 |               "    </tr>\n",
159 |               "  </tbody>\n",
160 |               "</table>\n",
161 |               "</div>"
162 |             ],
163 |             "text/plain": [
164 |               "      entity  year\n",
165 |               "0  Australia  1942\n",
166 |               "1  Australia  1943\n",
167 |               "2  Australia  1944\n",
168 |               "3  Australia  1945\n",
169 |               "4  Australia  1946"
170 |             ]
171 |           },
172 |           "metadata": {},
173 |           "execution_count": 11
174 |         }
175 |       ]
176 |     },
177 |     {
178 |       "cell_type": "markdown",
179 |       "metadata": {
180 |         "id": "vtH41Xa38VUD"
181 |       },
182 |       "source": [
183 |         "## Refine Data and Graph"
184 |       ]
185 |     },
186 |     {
187 |       "cell_type": "code",
188 |       "metadata": {
189 |         "colab": {
190 |           "base_uri": "https://localhost:8080/",
191 |           "height": 206
192 |         },
193 |         "id": "9N0NJb__yE6z",
194 |         "outputId": "28fda8f9-b8ef-4912-834b-a35e35a172e2"
195 |       },
196 |       "source": [
197 |         "df = df[['entity','year','books_per_million']]\n",
198 |         "df = df[df['entity'] == 'Austria']\n",
199 |         "df.head()\n"
200 |       ],
201 |       "execution_count": 8,
202 |       "outputs": [
203 |         {
204 |           "output_type": "execute_result",
205 |           "data": {
206 |             "text/html": [
207 |               "<div>\n",
208 |               "<style scoped>\n",
209 |               "    .dataframe tbody tr th:only-of-type {\n",
210 |               "        vertical-align: middle;\n",
211 |               "    }\n",
212 |               "\n",
213 |               "    .dataframe tbody tr th {\n",
214 |               "        vertical-align: top;\n",
215 |               "    }\n",
216 |               "\n",
217 |               "    .dataframe thead th {\n",
218 |               "        text-align: right;\n",
219 |               "    }\n",
220 |               "</style>\n",
221 |               "<table border=\"1\" class=\"dataframe\">\n",
222 |               "  <thead>\n",
223 |               "    <tr style=\"text-align: right;\">\n",
224 |               "      <th></th>\n",
225 |               "      <th>entity</th>\n",
226 |               "      <th>year</th>\n",
227 |               "      <th>books_per_million</th>\n",
228 |               "    </tr>\n",
229 |               "  </thead>\n",
230 |               "  <tbody>\n",
231 |               "    <tr>\n",
232 |               "      <th>47</th>\n",
233 |               "      <td>Austria</td>\n",
234 |               "      <td>1937</td>\n",
235 |               "      <td>219.002794</td>\n",
236 |               "    </tr>\n",
237 |               "    <tr>\n",
238 |               "      <th>48</th>\n",
239 |               "      <td>Austria</td>\n",
240 |               "      <td>1945</td>\n",
241 |               "      <td>411.685106</td>\n",
242 |               "    </tr>\n",
243 |               "    <tr>\n",
244 |               "      <th>49</th>\n",
245 |               "      <td>Austria</td>\n",
246 |               "      <td>1946</td>\n",
247 |               "      <td>411.226809</td>\n",
248 |               "    </tr>\n",
249 |               "    <tr>\n",
250 |               "      <th>50</th>\n",
251 |               "      <td>Austria</td>\n",
252 |               "      <td>1947</td>\n",
253 |               "      <td>410.831617</td>\n",
254 |               "    </tr>\n",
255 |               "    <tr>\n",
256 |               "      <th>51</th>\n",
257 |               "      <td>Austria</td>\n",
258 |               "      <td>1948</td>\n",
259 |               "      <td>410.508146</td>\n",
260 |               "    </tr>\n",
261 |               "  </tbody>\n",
262 |               "</table>\n",
263 |               "</div>"
264 |             ],
265 |             "text/plain": [
266 |               "     entity  year  books_per_million\n",
267 |               "47  Austria  1937         219.002794\n",
268 |               "48  Austria  1945         411.685106\n",
269 |               "49  Austria  1946         411.226809\n",
270 |               "50  Austria  1947         410.831617\n",
271 |               "51  Austria  1948         410.508146"
272 |             ]
273 |           },
274 |           "metadata": {},
275 |           "execution_count": 8
276 |         }
277 |       ]
278 |     },
279 |     {
280 |       "cell_type": "code",
281 |       "metadata": {
282 |         "colab": {
283 |           "base_uri": "https://localhost:8080/",
284 |           "height": 297
285 |         },
286 |         "id": "IyAe2kDw6P-_",
287 |         "outputId": "a801b218-2cfc-4e3d-cbeb-267498daf7a5"
288 |       },
289 |       "source": [
290 |         "df.plot(x = 'year', y= 'books_per_million')"
291 |       ],
292 |       "execution_count": 9,
293 |       "outputs": [
294 |         {
295 |           "output_type": "execute_result",
296 |           "data": {
297 |             "text/plain": [
298 |               "<matplotlib.axes._subplots.AxesSubplot at 0x7f4ceffd7990>"
299 |             ]
300 |           },
301 |           "metadata": {},
302 |           "execution_count": 9
303 |         },
304 |         {
305 |           "output_type": "display_data",
306 |           "data": {
307 |             "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEGCAYAAACJnEVTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxU1fn48c+ThYQdEiJbgIRddiRABBWVKogLaBVFsbi01hZ/VauAVFutltbauhS1KK24flWs4o4ioIgoW1gCYQ9JgIQtkLAEyCQz8/z+mBsMmIQkZJJJ5nm/XvPKvecucw5Dnnly7rn3iKpijDEmOITUdAWMMcZUHwv6xhgTRCzoG2NMELGgb4wxQcSCvjHGBJGwmq5AWVq0aKFxcXE1XQ1jjKlVVq1adUBVY0raFtBBPy4ujqSkpJquhjHG1CoisqO0bda9Y4wxQcSCvjHGBBEL+sYYE0QCuk+/JIWFhWRmZpKfn1/TVTHVJDIyktjYWMLDw2u6KsbUerUu6GdmZtK4cWPi4uIQkZqujvEzVeXgwYNkZmYSHx9f09UxptYrd/eOiISKyBoR+cxZjxeR5SKSKiKzRaSeUx7hrKc62+OKnWOqU75FREZUpsL5+flER0dbwA8SIkJ0dLT9ZWdMFalIn/69wKZi638HnlXVzkAucKdTfieQ65Q/6+yHiPQAbgJ6AiOBf4tIaGUqbQE/uNjnbUzVKVfQF5FY4Ergv866AJcC7zu7vA6McZZHO+s424c7+48G3lVVl6qmA6nAoKpohDHG1CUfrMrknRU7/XLu8mb6zwGTAa+zHg0cUlW3s54JtHWW2wK7AJzth539T5aXcMxJInKXiCSJSFJ2dnYFmmKMMXXDR2uzeH9Vpl/OfcagLyJXAftVdZVfanAaVZ2pqgmqmhATU+JdxDUuIyODXr16nfV54uLiOHDgQBXUqPr96U9/YsGCBQBcfPHFJ++cLt6mIUOG1Fj9jKnN8gs9RIT5Z0R9eUbvDAWuEZFRQCTQBPgX0ExEwpxsPhbIcvbPAtoBmSISBjQFDhYrL1L8GFPN3G43YWGVH7z1+OOPn3GfH374odLnNyaYudxeGkX4Z3DlGc+qqlOBqQAicjHwoKreIiL/A64H3gUmAB87h3zirC91tn+tqioinwBvi8gzQBugC7DibCr/5083sHH3kbM5xU/0aNOER6/uecb93G43t9xyC6tXr6Znz5688cYbLF26lAcffBC3283AgQOZMWMGERERLFy4sMTyIidOnOC6667juuuu4+abb2bs2LFkZmbi8Xj44x//yI033lhiHeLi4hg7dixffPEF9evX5+2336Zz585kZ2dz9913s3Onr0/wueeeY+jQoTz22GNs376dtLQ02rdvzzvvvPOTc7722mt89NFHHDt2jG3btvHggw9SUFDAm2++SUREBHPnziUqKorbbruNq666iuuvv77Uf6NGjRqRl5eHqjJ58mS++OILRIRHHnmEG2+8kUWLFvHYY4/RokULUlJSGDBgAG+99ZZduDVBL7/QQ2R4pca5nNHZ/P0wBfi9iKTi67N/xSl/BYh2yn8PPASgqhuA94CNwJfARFX1nMX716gtW7bw29/+lk2bNtGkSROeeeYZbrvtNmbPns369etxu93MmDGD/Pz8EsuL5OXlcfXVVzNu3Dh+9atf8eWXX9KmTRuSk5NJSUlh5MiRZdajadOmrF+/nnvuuYf77rsPgHvvvZf777+flStX8sEHH/DLX/7y5P4bN25kwYIFJQb8IikpKcyZM4eVK1fy8MMP06BBA9asWcP555/PG2+8UeF/qzlz5rB27VqSk5NZsGABkyZNYs+ePQCsWbOG5557jo0bN5KWlsb3339f4fMbU9e43N4a7d45SVUXAYuc5TRKGH2jqvnADaUcPw2YVtFKlqY8Gbm/tGvXjqFDhwIwfvx4nnjiCeLj4+natSsAEyZM4MUXX+SSSy4psbwoQI8ePZrJkydzyy23ANC7d28eeOABpkyZwlVXXcWFF15YZj3GjRt38uf9998PwIIFC9i4cePJfY4cOUJeXh4A11xzDfXr1y/znJdccgmNGzemcePGNG3alKuvvvpk3datW1f+fyTHkiVLGDduHKGhobRs2ZJhw4axcuVKmjRpwqBBg4iNjQWgX79+ZGRkcMEFF1T4PYypSwI10w9qp3dBNGvWrFLnGTp0KF9++SWqCkDXrl1ZvXo1vXv35pFHHjlj33nxehQte71eli1bxtq1a1m7di1ZWVk0atQIgIYNG56xTsW7nkJCQk6uh4SE4Ha7SzusUoq/V2hoaJWf35jayJ+ZvgX9Stq5cydLly4F4O233yYhIYGMjAxSU1MBePPNNxk2bBjdunUrsbzI448/TvPmzZk4cSIAu3fvpkGDBowfP55JkyaxevXqMusxe/bskz/PP/98AC6//HKef/75k/usXbu2ilpdORdeeCGzZ8/G4/GQnZ3N4sWLGTTIbtEwpjT+zPRr3bN3AkW3bt148cUXueOOO+jRowfTp08nMTGRG2644eQF27vvvpuIiAheffXVn5QX969//Ys77riDyZMnM3z4cCZNmkRISAjh4eGn9P+XJDc3lz59+hAREXGyn3769OlMnDiRPn364Ha7ueiii3jppZf89m9xJtdeey1Lly6lb9++iAhPPfUUrVq1YvPmzTVWJ2MClar6NdOXom6FQJSQkKCnz5y1adMmzj333BqqUWApmlmsRYsWNV0Vv7PP3QSLAreXro98waQR3Zh4SedKnUNEVqlqQknbrHvHGGMCSL7bN6gxIEbvmJpx7bXXkp6efkrZ3//+dzIyMip9znnz5jFlypRTyuLj4/nwww8rfU5jzNlzFfqedhNhffo/UtWguoHHH4F4xIgRjBhRqadbV7tA7oI0pqrlF/oy/UgbveMTGRnJwYMHLRAEiaJJVCIjI2u6KsZUC5fbMv1TxMbGkpmZiT2BM3gUTZdoTDDwd6Zf64J+eHi4TZtnjKmz/J3p17ruHWOMqctc1qdvjDHBwzJ9Y4wJIi4/j9O3oG+MMQEk3xmnb0/ZNMaYIGCZvjHGBBHL9I0xJojUeKYvIpEiskJEkkVkg4j82Sl/TUTSRWSt8+rnlIuITBeRVBFZJyLnFTvXBBHZ5rwm+KVFxhhTixVl+jX5wDUXcKmq5olIOLBERL5wtk1S1fdP2/8KfJOedwEGAzOAwSISBTwKJAAKrBKRT1Q1tyoaYowxdYHL7SEsRAgLraFMX33ynNVw51XWg29GA284xy0DmolIa2AEMF9Vc5xAPx8oe9ZvY4wJMvmFXr/150M5+/RFJFRE1gL78QXu5c6maU4XzrMiUjTZaVtgV7HDM52y0spPf6+7RCRJRJLs+TrGmGDjcnv81rUD5Qz6qupR1X5ALDBIRHoBU4HuwEAgCphSxinKTVVnqmqCqibExMRUxSmNMabWcBX6b6pEqODoHVU9BHwDjFTVPU4Xjgt4FSia6ToLaFfssFinrLRyY4wxjnx3DXfviEiMiDRzlusDlwGbnX56xDebyRggxTnkE+AXziieROCwqu4B5gGXi0hzEWkOXO6UGWOMcbgKPdTzY6ZfntE7rYHXRSQU35fEe6r6mYh8LSIxgABrgbud/ecCo4BU4DhwO4Cq5ojIE8BKZ7/HVTWn6ppijDG1n78z/TMGfVVdB/QvofzSUvZXYGIp22YBsypYR2OMCRquwgC4kGuMMaZ61HifvjHGmOpjmb4xxgQRl2X6xhgTPCzTN8aYIOJye4kIt6BvjDFBIb/QQ2SYde8YY0xQsEzfGGOChNvjxe1Vy/SNMSYYuNzOBCqW6RtjTN2XX+ibKtGGbBpjTBA4menbkE1jjKn7LNM3xpggYpm+McYEkR+DvmX6xhhT5xV179joHWOMCQKW6RtjTBD58UJuDWb6IhIpIitEJFlENojIn53yeBFZLiKpIjJbROo55RHOeqqzPa7YuaY65VtEZIS/GmWMMbVRoGT6LuBSVe0L9ANGOhOe/x14VlU7A7nAnc7+dwK5Tvmzzn6ISA/gJqAnMBL4tzPvrjHGGAIk01efPGc13HkpcCnwvlP+OjDGWR7trONsHy4i4pS/q6ouVU3HN3H6oCpphTHG1AGBkukjIqEishbYD8wHtgOHVNXt7JIJtHWW2wK7AJzth4Ho4uUlHGOMMUHPFSijd1TVo6r9gFh82Xl3f1VIRO4SkSQRScrOzvbX2xhjTMApyvQD5imbqnoI+AY4H2gmImHOplggy1nOAtoBONubAgeLl5dwTPH3mKmqCaqaEBMTU5HqGWNMreYq9CAC4aHit/coz+idGBFp5izXBy4DNuEL/tc7u00APnaWP3HWcbZ/rarqlN/kjO6JB7oAK6qqIcYYU9vlu71EhoXiuwzqH2Fn3oXWwOvOSJsQ4D1V/UxENgLvishfgDXAK87+rwBvikgqkINvxA6qukFE3gM2Am5goqp6qrY5xhhTe7kKPX7tz4dyBH1VXQf0L6E8jRJG36hqPnBDKeeaBkyreDWNMabuyy/0+rU/H+yOXGOMCRgut/8zfQv6xhgTICzTN8aYIGKZvjHGBJH8Qq9fJ1ABC/rGGBMwXG6PX6dKBAv6xhgTMFxuy/SNMSZo5Bd6iLBM3xhjgoNl+sYYE0TyC73Wp2+MMcHC5fZYpm+MMcHCZZm+McYEB69XKfBYn74xxgSF6pgqESzoG2NMQHC5/T8pOljQN8aYgGCZvjHGBJH8Qsv0jTEmaFimb4wxQSRgMn0RaSci34jIRhHZICL3OuWPiUiWiKx1XqOKHTNVRFJFZIuIjChWPtIpSxWRh/zTJGOMqX2qK9Mvz8TobuABVV0tIo2BVSIy39n2rKr+s/jOItID32ToPYE2wAIR6epsfhG4DMgEVorIJ6q6sSoaYowxtVl1ZfrlmRh9D7DHWT4qIpuAtmUcMhp4V1VdQLqIpPLjBOqpzoTqiMi7zr4W9I0xQc9VGIB9+iISB/QHljtF94jIOhGZJSLNnbK2wK5ih2U6ZaWVn/4ed4lIkogkZWdnV6R6xhhTa+U74/QDZrpEEWkEfADcp6pHgBlAJ6Afvr8Enq6KCqnqTFVNUNWEmJiYqjilMcYEvKJM398To5enTx8RCccX8P9PVecAqOq+Ytv/A3zmrGYB7YodHuuUUUa5McYEtZMXcms60xcRAV4BNqnqM8XKWxfb7VogxVn+BLhJRCJEJB7oAqwAVgJdRCReROrhu9j7SdU0wxhjareTF3IDINMfCtwKrBeRtU7ZH4BxItIPUCAD+DWAqm4QkffwXaB1AxNV1QMgIvcA84BQYJaqbqjCthhjTK1VXZl+eUbvLAGkhE1zyzhmGjCthPK5ZR1njDHBqijTt0crG2NMEHC5vdQLC8HXo+4/FvSNMSYA5Bf6f6pEsKBvjDEBweX2/1SJYEHfGGMCgssyfWOMCR6W6RtjTBBxuS3TN8aYoJFfaJm+McYEDcv0jTEmiFimb4wxQcQyfWOMCSL5hV4L+sYYEyxcbo917xhjTLCwTN8YY4KIZfrGGBMkVBWX2zJ9Y4wJCgUeL6oQYZm+McbUfSdnzQqETF9E2onINyKyUUQ2iMi9TnmUiMwXkW3Oz+ZOuYjIdBFJFZF1InJesXNNcPbfJiIT/NcsY4ypPU7Ojxsgmb4beEBVewCJwEQR6QE8BCxU1S7AQmcd4Ap8k6F3Ae4CZoDvSwJ4FBgMDAIeLfqiMMaYYOYqDKBMX1X3qOpqZ/kosAloC4wGXnd2ex0Y4yyPBt5Qn2VAMxFpDYwA5qtqjqrmAvOBkVXaGmOMqYVcbmd+3ADJ9E8SkTigP7AcaKmqe5xNe4GWznJbYFexwzKdstLKT3+Pu0QkSUSSsrOzK1I9Y4yplfKdTD8yEDL9IiLSCPgAuE9VjxTfpqoKaFVUSFVnqmqCqibExMRUxSmNMSagBVymLyLh+AL+/6nqHKd4n9Ntg/Nzv1OeBbQrdnisU1ZauTHGBDVXIGX6IiLAK8AmVX2m2KZPgKIROBOAj4uV/8IZxZMIHHa6geYBl4tIc+cC7uVOmTHGBLWTQzarIdMPK8c+Q4FbgfUistYp+wPwJPCeiNwJ7ADGOtvmAqOAVOA4cDuAquaIyBPASme/x1U1p0paYYwxtdiPQzb9n+mfMeir6hJAStk8vIT9FZhYyrlmAbMqUkFjjKnrfrw5K0D69I0xxvhPUaYfEOP0jTHG+FdRph8od+QaY4zxI8v0jTEmiATUA9eMMcb4V36hh7AQISzUgr4xxtR5Lre3WvrzwYK+McbUuPxCT7V07YAFfWOMqXGW6RtjTBCprvlxwYK+McbUuPxCD/Us6BtjTHCw7h1jjAkidiHXGGOCiGX6xhgTRFyW6RtjTPCwTN8YY4KI9ekbY0wQCahMX0Rmich+EUkpVvaYiGSJyFrnNarYtqkikioiW0RkRLHykU5Zqog8VPVNMcaY2inQ+vRfA0aWUP6sqvZzXnMBRKQHcBPQ0znm3yISKiKhwIvAFUAPYJyzrzHGBL18t5eIapgfF8o3R+5iEYkr5/lGA++qqgtIF5FUYJCzLVVV0wBE5F1n340VrrExxtQhbo8Xj1eJrIb5ceHs+vTvEZF1TvdPc6esLbCr2D6ZTllp5T8hIneJSJKIJGVnZ59F9YwxJvDlF02gUk2ZfmXfZQbQCegH7AGerqoKqepMVU1Q1YSYmJiqOq0xxgQklzNVYnVdyD1j905JVHVf0bKI/Af4zFnNAtoV2zXWKaOMcmOMCVr51ThVIlQy0xeR1sVWrwWKRvZ8AtwkIhEiEg90AVYAK4EuIhIvIvXwXez9pPLVNsaYuiHgMn0ReQe4GGghIpnAo8DFItIPUCAD+DWAqm4QkffwXaB1AxNV1eOc5x5gHhAKzFLVDVXeGmOMqWXyC6s30y/P6J1xJRS/Usb+04BpJZTPBeZWqHbGGFPHudy+TD8iUG7OMsYY4z+u2tCnb4wxpmrkO336EbVgnL4xxpizVJTpRwb4OH1jjDFVwDJ9Y4wJIpbpG2NMEHFZpm+MMcHDMn1jjAki1qdvjDFBxOX2IgLhoVIt72dB3xhT6+09nM9f525i35H8mq5KhbncXiLDQhGxoG+MMWfk9ni55+3VzFycxpgXv2fj7iMVPscrS9L5asNeP9TuzPILPdX2LH2woG+MCWD5hR4O5LnK3OdfC7eRtCOXe4d3AeD6l35gwcZ9ZR5T3Bfr9/DEZxuZ/vW2s6prZbkKvdU2axZY0DfGBCi3x8utryxn2FPfsHhrybPo/ZB6gBe+SeWGAbHcf1lXPpo4lE4xjfjVm0m8siQdVS3zPTJzjzPlg3WEhggbdx/h8IlCfzSlTPluy/SNMYbpX6eyMiOXpvXDueO1lcxZnXnK9gN5Lu6dvZb4Fg358+ieALRsEsnsXycyokcrnvhsI498lEKhx1vi+d0eL/e+uxZV+Nu1vfEqJGXk+L1dp7NM3xgT9JanHeSFr7dx/YBYvrz/IgbFR/H795KZsWg7qorXqzz4v2QOnyjkhXHn0aDej0+Jb1AvjH/fch53D+vE/y3fyfUvLSXjwLGfvMe/Fm5j1Y5c/nJtL67p14Z6oSEsT6/+oG+ZvjEmqB06XsB9s9fSIbohf76mJ00iw3n19oFc07cNf/9yM499soGZ36WxaEs2f7zyXHq0afKTc4SECA9d0Z1/33IeGQeOMWr6d7y3ctfJ7p6l2w+e7BYa3a8tkeGh9GvXjGVpB6u7udWe6VdqjlxjjPEHVWXKB+s4kOdizm+G0jDCF6IiwkJ57sZ+tGwSwX++SwdgRM+WjE/sUOb5RvVuTf/2zfj97GQmf7COb7bsZ9KIbtw3e80p3UIAiR2jeOGbVI7mF9I4Mtx/jTxNvttDo4jqC8VnzPRFZJaI7BeRlGJlUSIyX0S2OT+bO+UiItNFJFVE1onIecWOmeDsv01EJvinOcaY2uztFTuZt2Efk0d0p3ds01O2hYQID1/Zg8eu7sGQTtE89fO+5Rrb3rppff7vl4OZekV3Fmzax8+e+ZbcY4U8P67/Kd1CgztGO/36uVXerrK4Cr3VNoEKlK975zVg5GllDwELVbULsNBZB7gC32ToXYC7gBng+5LAN7fuYGAQ8GjRF4UxxgBs3XeUxz/dyIVdWnDnBfGl7nfb0Hje/lUiTRuUPxsPCRF+PawTH/52KP3bN+cvY3rRs82pXyrntW9OeKiwLL16u3hcbk+1TZUI5Zsjd7GIxJ1WPBrfZOkArwOLgClO+Rvq6zhbJiLNRKS1s+98Vc0BEJH5+L5I3jnrFhhjar38Qg+/e2cNjSPDeHpsX0JC/HN3aq+2TfngN0NK3Fa/Xih9Y5uxPM2/F3NVlfQDx/hu2wEWb81mx8HjnNe++nLgynYktVTVPc7yXqCls9wW2FVsv0ynrLTynxCRu/D9lUD79u0rWT1jTG3y17mb2Lz3KK/dPpBzGkfWWD0SO0Yz49vt5LncVd7Prqo8M38rc1ZnkXXoBAAdohtw06B23Dak9L9sqtpZt0pVVUTKvgOiYuebCcwESEhIqLLzGmMC0/yN+3hj6Q5+eUE8F3c7p0brMti5mLtqRy7DusZU6bmTMw/z/NepDOkUzW8u7sSFXVrQIbphlb5HeVT26sE+p9sG5+d+pzwLaFdsv1inrLRyY0wd4fUqOw7+dDx8WfYezmfS+8n0bNOESSO7+alm5TegQ3PCQsQvQzc/Td5NeKgwY/wAxid2qJGAD5UP+p8ARSNwJgAfFyv/hTOKJxE47HQDzQMuF5HmzgXcy50yY0wtt/dwPs8v3MZF//iGYf9YxCtL0st1nMer3D97LQVuL8+P619tz5MvS4N6YfSJbcryKg76Xq/y+bo9DOt6Dk3rV99w0JKcsXtHRN7BdyG2hYhk4huF8yTwnojcCewAxjq7zwVGAanAceB2AFXNEZEngJXOfo8XXdQ1xgSOlKzDuNwezmvfvMzhkB6v8s3m/by7cidfb96PV2Fo52g6RDdg2ucb6XxOozN2j7z07XaWph3kqev70DGmUVU3pdISO0Yzc3EaxwvcpwzpPBtJO3LZeySfqaO6V8n5zkZ5Ru+MK2XT8BL2VWBiKeeZBcyqUO2MMdUi91gBT36xmdlJvvEWCR2ac+/PunBB5xanBP9jLjfvJe1i1vfp7Mo5QUzjCO4e1okbB7ajQ3RDjrnc/HzGD9zz9uqTDz8ryeqduTwzfytX923DDQNiq6WN5TW4YzT/XrSdVTtyubBL1fTrf5q8m8jwEH52bssz7+xndkeuMUFMVfnfqkz+NncTR/Pd/HpYR9o2q8+MRdu59ZUV9G/fjN8N70L3Vo15/YcdvL18B0fy3Qzo0JypV5zLZT1aEh76Yy9xw4gw/jshgdEvfM8vX0/io98OPWU8vary2bo9PP7ZRlo3jWTatb2qbfKQ8kro0JxQp1+/KoK+2+Pli5Q9DO/e8uQdxjWp5mtgjKkR2/Yd5eEPU1iRkcPAuOb8ZUxvurVqDMCNA9vx/qpM/v3Ndm5/1dcrGyJwRa/W3HlhfJnjymObN+DlWwcw7j/LmPj2al67fSBhoSFs2H2YP3+6kRXpOfRs04Snx/alSTU+7qC8GkaE0btt0yobr78sLYcDeQVc3bd1lZzvbFnQNyYIfbctm7veWEVEeAhP/bwP1w+IPeWGqIiwUG4Z3IEbBrTjozVZZOYe54aEdrSLalCu8yfERTFtTG8mf7COP368gRCBd1bspFmDevztut6MTWhHqJ9uwKoKiR2jeWVJGicKPNSvd3YXmD9N3k3DeqE1Phy1iAV9Y4LMlyl7+N07a+kY05A37hxU5s1Q9cJCGDuwXanbyzJ2YDu27jvKf5ekExoiTBgSx33Du1bo8Qk1ZXDHKF76djurd+YytHMLwHfxOjnzEPmFHoZ0alGu8xS4vXy5YS+X92xFZDU+aqEsFvSNCSL/S9rFlA/W0a9dM169bZDfA/DUUefSIboBiR2j6dKysV/fqyoV9esv2LSP3OMFfL1pP4u2ZpNzrAAR+PLei052hZVlSWo2h08UBkzXDtjz9I0JGq9+n86k99cxpFML3rxzcLVk3KEhwq3nx9WqgA/QODKcXm2b8ur3Gdzz9hq+3rKfi7q04J839KVRvTCe/mpLuc7zafIemtYP54LOVXt379mwTN+YOiy/0EPq/jw+Xbebl79NY0TPlkwPkBuhAt2UEd1YlnaQi7rG0L9985PXIHYfOsEz87eSvOsQfds1K/X4/EIP8zfu48reralXjY9OPhML+sbUEXkuN8m7DrFmZy6b9h5l854jpB84htd5gtX1A2J58rrehIUGTgAKZEM6t2BI55/23d9xQTyv/ZDBP7/awpt3Di71+EVb9pPncnN13zb+rGaFWdA3ppY6fLyQRVv3s2pHLkkZuWzee+RkgG8f1YDurRpzZe/WdG/dhO6tGhPfomHAjYmvjRpFhPHbizvxl883sXT7Qc7vFF3ifp8m7yG6YT0SO0ZVcw3LZkHflGnu+j18uyWbv13X22/POK+L8lxuDp8oJC/fTZ7LzTGXmxOFHgbGRRHVsF6lz+tye1i0JZsPV2fx9eb9FHi8NKwXSv/2zbnn0i4M6NCcfu2a1fjzXeq68Ykd+M93afzzqy28f/f5P/kyPZjnYuHmfdwwoF3A/WVlQd+UKv3AMR54L5kThR6GdI5mdL8Sp0Aw+H7JV6TnsCztIMvSctiy72iJ+zWOCOOeSzszYUhcuYfwnSjwsCIjh6827OWzdXs4fKKQFo3qcUtie67p24bebZsGXGCp6yLDQ/nd8C48/GEK32zZz6Xdf3y8QkrWYe5+axVer+8mt0AjRbPDB6KEhARNSkqq6WoEJbfHyw0vL2X7/jzOaRJJgdvLgt8PC6gLUoFg/sZ9PP3VFjbv9QX5+uGhJMQ1Z1BcFOc0iaBhRBgNI8JoFBGG16vMXJzGws37iW1en4eu6M6VvVv/JEv0eJVNe47w3bYDLEnNZmVGLgVuL5HhIYzo2Yox/dtyYecWFuhrWKHHy/Cnv6VRRBif/b8LCAkR/pe0i0c+SiGqYT1mjB9AvzIu9PqTiKxS1YSStlmmb0r08uI01oc1MhwAABCcSURBVOw8xL9u6kfjyDDueC2J2Um7uDWxQ01XLSAcL3DzxGebeGfFTrq1bMzkkd0YHB9Nn9impzyL5nSDO0azZNsB/vL5Ru55ew2z2qczpFMLdh86QZbz2ns4H7fTOd+9VWN+kdiBC7q0YFB8VJU99dGcvfDQEO6/rAv3z07m4+QsVu3I5a1lOxnSKZrnx/UnulFETVexRJbpB6lvNu8nqmG9EoecpWQdZsyL3zOyVyteuPk8VJWxLy9lx8HjfDvpkrO+Lb22W5d5iPveXUv6wWPcdVFHfn9Z1woPgfR4lQ9WZfKPr7aQc6yAVk0iadMskjbN6tO2WX26tGzE0M4tanTqQHNmHq8y8rnFbNufB8Cvh3Vk0uXdavyvsLIyfQv6QSbP5eZPH6cwZ7Vv4rKxCbE8dMW5Jy8u5hd6uOaFJRw6Xsi8+y6iuVO+MiOHG15aypSR3fnNxZ1qrP41yeNVZixK5bkF24hpHMHTY/uW+3b8ss6pqjUeJEzlfbs1mz/MWc/DV57LqN6Bceetde8YAJJ3HeJ3765hV85xfje8C65CD68sSWfehn1MHtmNcQPb8/RXW9i6L49Xbx94MuADDIyL4pJuMbz07XZuHtw+6EaHfLctm2mf+ybvvqpPa6aN6V0ld7T6bvixUVG12bCuMXz/0KU1XY1ys6AfBLxe5eXFaTz91RZaNolk9q/PZ2Ccb+zwzwfE8sePUnj4wxTeXLqDLfuOcsvg9lxSwhMBHxzRjSunL2Hm4u1MGlG5GYB2HDzGIx+lcGGXFtx1UeD/xbB131H+OncTi7Zk0y6qPi/efB6jerey8e6m1jqroC8iGcBRwAO4VTVBRKKA2UAckAGMVdVc8f2W/AvfdIrHgdtUdfXZvL85M5fbw6/eWMXirdmM6t2Kv13b55QMtWvLxrx7VyIfr93NXz7fRHx0Q/4w6twSz9WzTVOu7tuGWUsymDAkrsL9zR+uyeSRD1M4Xujhu20HaB/VkJG9Wp1V+yqiwO1ldtIuerRuwoAOpT8PHuBAnotn52/lnRU7aRgRxh9GdWfCkDh7fIGp9aoi079EVQ8UW38IWKiqT4rIQ876FOAKoIvzGgzMcH4aP3ryi80s3prNE6N7Mj6xQ4kZqogwpn9bRvZqhVe1zBEiv7+sK3PX7+GFr1N5fHSvctUhz+XmTx+lMGdNFgPjmvP3n/fh/veSeeC9tXSMGUrXangY18qMHKbOWU+qc8Ft3KD2PHRF9590UxW4vbz2QzrPL0zlRKGHX5wfx++GdzmrG6qMCST+uHo0GnjdWX4dGFOs/A31WQY0E5HAuOpRR83bsJdXv8/gtiFx3Hp+3Bm7JCLDQ884JDC+RUPGJrTjnRU7Wbhp3xnrkLzrEFdO/46P1mZx38+68M6vEukY04iXxw+gQUQYv3ojicPHCyvUroo4fLyQqXPWccNLSzlR4OGl8QP45QXxzF65k+FPf8unybtR9V1MXbBxHyOeW8xf525mYHwU8+6/iMeu6WkB39QpZzV6R0TSgVxAgZdVdaaIHFLVZs52AXJVtZmIfAY8qapLnG0LgSmqmnTaOe8C7gJo3779gB07dlS6fsFsV85xrpz+HR2iG/L+b86v0m6J3GMF/GLWCjbuOcLfru1d4iQbXq/yn+/S+Me8LZzTOILnburPoPhTn0GyakcON81cxvmdWvDqbQOrdCYlj1f5bN1unvhsE7nHC7hjaBz3X9b15JdaStZhps5Zz/qsw1zcLQaPV/lu2wE6xTTkj1f1CJhZjoypDH+O3rlAVbNE5BxgvohsLr5RVVVEKvStoqozgZngG7J5lvULSgVuL//vnTWowgs3V/1jdJs3rMc7dyXym7dWMfmDdew/ms/ESzqf/Eti/5F8HvhfMt9tO8AVvVrx5HV9ShzpMqBDFI+P7sXUOet5at5mpl5R8rWE4tbszOWLlL2c174ZQzu3oPFpc6zmHitgdtIu3lq2g8zcE/SJbcprtw+kV9ump+zXq21TPpo4lNd/yODpr7YQGiI8enUPxid2KPPmKmNqu7MK+qqa5fzcLyIfAoOAfSLSWlX3ON03+53ds4DiKWGsU2aq2D+/2sLaXYd48ebz6BDd0C/v0SgijFcmDGTy+8n886ut7D/q4tGre7J4azYP/i+ZYwVu/nptb8YNaldmt9K4Qe1JyTrMy9+mERfdkJsGlry/qvLaDxlM+3zTybtVw0OFgXFRXNr9HLq1aszHa3fzSfJuCtxeBsdHMfWKcxnZq1Wpf0GEhgh3XBDPmP5tCQ2RoBuGaoJTpYO+iDQEQlT1qLN8OfA48AkwAXjS+fmxc8gnwD0i8i6+C7iHVXXP2VTe/NTXm/cxc3Ea4xPbc2Uf/14yqRcWwjNj+3FOk0hmLk5jRXoOm/cepXurxrw7LrHcsyU9enVPtmfnMXXOeuaszmTqqHM5r/2Po2vyXG6mfLCOz9ft4WfntuSp6/uwdd9Rvtmyn2827+cvn28CoEG9UMYmxHJrYly5prIrYn32JphUuk9fRDoCHzqrYcDbqjpNRKKB94D2wA58QzZznP79F4CR+IZs3n56f/7p6uIdufuP5rMiPYflaTmszMjhQJ5vzk0B56cUW/dlqCIlbytaDhEhxFnflXOc9tEN+fC3Q6p1Iub/fpfGX+du4tbEDkwddW6F39vt8Q2nfHb+Ng7kubiiVysmj+xOocfL3W+tIuPAMSaP7M6vL+r4k78EduUcZ9OeIyR2iqZJpGXrxthjGGpQ1qETrEg/yPK0HFak55B24Bjgy0oHdGhObPMGgKKK71W0zI/rnFzXYuXgdRa8qnjVd1xkeCgPXN7Vb906Zckv9Jz1F80xl5v/fpfOy4u3U+D2EhYqNIoI5/lx/UudrMIYcyoL+tVEVdlx8LjvuepOoM86dAKAxpFhDIqLYnDHKAbFR9OrTRN73koZso+6mL5wG3uP5DNtTC/OaWIPHjOmvOzZO36iqqTuz2N5eg7L03NYkX6QfUdcgK+feFBcFL+8MJ5B8VF0b9WkSock1nUxjSN4Ykz5bv4yxpSfBf0K8HiVzXuPnOyqWZGRQ86xAgBaNolgcHw0g+KjSOwYRaeYRvZ8FmNMwLGgXwa3x0vK7iMsTzvIinTfhdcj+W4AYpvX5+JuMSTGRzO4YxTtoxpYkDfGBDwL+sW43B7WZR5medpBlqfnsGpHLscLPAB0bNGQK/u0ZlC8r0++bbP6NVxbY4ypuKAO+icKPKzZmcsypz9+zc5DuNxeALq1bMz1A2KdIB9lMxgZY+qEoAr6R/MLWbUj1zdOPj2HdZmHKPQoIQI92jRhfGIHX5CPizplAhFjjKkr6nTQP3y8kBUZvix+eXoOKVmH8SqEhQi9Y5ty5wUdGRwfxYC45nZTjzEmKNTJoL/70AnueG0lW/YdRdX3uIB+7ZpxzyWdGRQfzXkdmp3xEcLGGFMX1cnId07jCNo0q8+VvX0XXvu2a1atjyQwxphAVSeDflhoCLNuG1jT1TDGmIBjzwEwxpggYkHfGGOCiAV9Y4wJIhb0jTEmiFjQN8aYIGJB3xhjgogFfWOMCSIW9I0xJogE9HSJIpKNb3L1mtACOFBD7+0Pdak9daktULfaU5faArW3PR1UNaakDQEd9GuSiCSVNsdkbVSX2lOX2gJ1qz11qS1Q99oD1r1jjDFBxYK+McYEEQv6pZtZ0xWoYnWpPXWpLVC32lOX2gJ1rz3Wp2+MMcHEMn1jjAkiFvSNMSaIBFXQF5FZIrJfRFKKlfUVkaUisl5EPhWRJqcd015E8kTkwWJlI0Vki4ikishD1dmGYnUod1tEJE5ETojIWuf1UrFjBjj7p4rIdBGRQG+Ps62Ps22Dsz0yUNpTwc/mlmKfy1oR8YpIv0BpSyXaEy4irzvlm0RkarFjavz3xqlHRdpTT0RedcqTReTiYscExOdTYaoaNC/gIuA8IKVY2UpgmLN8B/DEace8D/wPeNBZDwW2Ax2BekAy0COQ2wLEFd/vtPOsABIBAb4Argj0zwbfjG/rgL7OejQQGijtqcz/M6e8N7C9ln82NwPvOssNgAzn/19A/N5Uoj0TgVed5XOAVUBIIH0+FX0FVaavqouBnNOKuwKLneX5wM+LNojIGCAd2FBs/0FAqqqmqWoB8C4w2m+VLkVF21ISEWkNNFHVZer7X/wGMKaq61oeFWzP5cA6VU12jj2oqp5Aac9ZfDbj8P1/qs2fjQINRSQMqA8UAEcIkN8bqHB7egBfO8ftBw4BCYH0+VRUUAX9Umzgx/98NwDtAESkETAF+PNp+7cFdhVbz3TKAkGJbXHEi8gaEflWRC50ytriq3+RQGoLlN6eroCKyDwRWS0ik53yQG5PWZ9NkRuBd5zlQG4LlN6e94FjwB5gJ/BPVc0hsH9voPT2JAPXiEiYiMQDA5xtgf75lMqCvu9Pud+KyCqgMb7MBOAx4FlVzaupilVCaW3ZA7RX1f7A74G3T792EaBKa08YcAFwi/PzWhEZXjNVLLfS2gKAiAwGjqtqSkkHB6DS2jMI8ABtgHjgARHpWDNVrJDS2jMLX0BPAp4DfsDXvlorrKYrUNNUdTO+7gJEpCtwpbNpMHC9iDwFNAO8IpKPr0+veJYWC2RVX41LV1pbVNUFuJzlVSKyHV+2nIWv/kUCpi1Q5meTCSxW1QPOtrn4+mjfIkDbU0ZbitzEj1k+1N7P5mbgS1UtBPaLyPdAAr4sPyB/b6DM3x03cH/RfiLyA7AVyCWAP5+yBH2mLyLnOD9DgEeAlwBU9UJVjVPVOHzf8H9V1RfwXfDpIiLxIlIP3y/rJzVS+dOU1hYRiRGRUGe5I9AFSFPVPcAREUl0Rh78Avi4RipfgtLaA8wDeotIA6fveBiwMZDbU0ZbisrG4vTnAwRyW6DM9uwELnW2NcR3oXMzAfx7A2X+7jRw2oGIXAa4VTWg/6+dUU1fSa7OF75Mag9QiC9bvBO4F98391bgSZy7lE877jGc0TvO+ihn/+3Aw4HeFnwXpTYAa4HVwNXFzpMApDhteaGk9gdae5z9xzttSgGeCqT2VKItFwPLSjhPjbelEv/XGuEb7bYB2AhMKnaeGv+9qUR74oAtwCZgAb5HFgfU51PRlz2GwRhjgkjQd+8YY0wwsaBvjDFBxIK+McYEEQv6xhgTRCzoG2NMELGgb4wxQcSCvjF+VnRjnDGBwIK+McWIyOMicl+x9Wkicq+ITBKRlSKyTkT+XGz7RyKySnzP9b+rWHmeiDwtIsnA+dXcDGNKZUHfmFPNwndLfdEt+TcBe/E9umIQ0A8YICIXOfvfoaoD8N2d+TsRiXbKGwLLVbWvqi6pzgYYU5agf+CaMcWpaoaIHBSR/kBLYA0wEN/DuNY4uzXC9yWwGF+gv9Ypb+eUH8T3JMYPqrPuxpSHBX1jfuq/wG1AK3yZ/3Dgb6r6cvGdnKnzfgacr6rHRWQREOlszlfVWv0IXlM3WfeOMT/1ITASX4Y/z3nd4Uysg4i0dZ7K2BTIdQJ+d3xPlDQmoFmmb8xpVLVARL4BDjnZ+lcici6w1Jn7Og/fUz6/BO4WkU34nsS4rKbqbEx52VM2jTmNcwF3NXCDqm6r6foYU5Wse8eYYkSkB5AKLLSAb+oiy/SNMSaIWKZvjDFBxIK+McYEEQv6xhgTRCzoG2NMELGgb4wxQeT/A4WVuHKPCGRYAAAAAElFTkSuQmCC\n",
308 |             "text/plain": [
309 |               "<Figure size 432x288 with 1 Axes>"
310 |             ]
311 |           },
312 |           "metadata": {
313 |             "needs_background": "light"
314 |           }
315 |         }
316 |       ]
317 |     }
318 |   ]
319 | }


--------------------------------------------------------------------------------
/Web_Scraping_from_TrustPilot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyOWtJ2tpj3chJADoVCotlOE",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/analyticswithadam/Python/blob/main/Web_Scraping_from_TrustPilot.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "execution_count": 2,
 32 |       "metadata": {
 33 |         "id": "JP6PLQnwDvlC"
 34 |       },
 35 |       "outputs": [],
 36 |       "source": [
 37 |         "from time import sleep\n",
 38 |         "import requests\n",
 39 |         "from google.colab import files\n",
 40 |         "import pandas as pd\n",
 41 |         "from bs4 import BeautifulSoup\n",
 42 |         "\n",
 43 |         "def soup2list(src, list_, attr=None):\n",
 44 |         "    if attr:\n",
 45 |         "        for val in src:\n",
 46 |         "            list_.append(val[attr])\n",
 47 |         "    else:\n",
 48 |         "        for val in src:\n",
 49 |         "            list_.append(val.get_text())\n",
 50 |         "\n",
 51 |         "users = []\n",
 52 |         "userReviewNum = []\n",
 53 |         "ratings = []\n",
 54 |         "locations = []\n",
 55 |         "dates = []\n",
 56 |         "reviews = []\n",
 57 |         "\n",
 58 |         "from_page = 1\n",
 59 |         "to_page = 6\n",
 60 |         "company = 'mefx.company'\n",
 61 |         "\n",
 62 |         "for i in range(from_page, to_page+1):\n",
 63 |         "\n",
 64 |         "   result = requests.get(fr\"https://www.trustpilot.com/review/{company}?page={i}\")\n",
 65 |         "   soup = BeautifulSoup(result.content)\n",
 66 |         "\n",
 67 |         "   # Trust Pilot was setup in a way that's not friendly to scraping, so this hacky method will do.\n",
 68 |         "   soup2list(soup.find_all('span', {'class','typography_heading-xxs__QKBS8 typography_appearance-default__AAY17'}), users)\n",
 69 |         "   soup2list(soup.find_all('div', {'class','typography_body-m__xgxZ_ typography_appearance-subtle__8_H2l styles_detailsIcon__Fo_ua'}), locations)\n",
 70 |         "   soup2list(soup.find_all('span', {'class','typography_body-m__xgxZ_ typography_appearance-subtle__8_H2l'}), userReviewNum)\n",
 71 |         "   soup2list(soup.find_all('div', {'class','styles_reviewHeader__iU9Px'}), dates)\n",
 72 |         "   soup2list(soup.find_all('div', {'class','styles_reviewHeader__iU9Px'}), ratings, attr='data-service-review-rating')\n",
 73 |         "   soup2list(soup.find_all('div', {'class','styles_reviewContent__0Q2Tg'}), reviews)\n",
 74 |         "\n",
 75 |         "   # To avoid throttling\n",
 76 |         "   sleep(1)\n",
 77 |         "\n",
 78 |         "review_data = pd.DataFrame(\n",
 79 |         "{\n",
 80 |         "   'Username':users,\n",
 81 |         "   'Total reviews':userReviewNum,\n",
 82 |         "   'location':locations,\n",
 83 |         "   'date':dates,\n",
 84 |         "   'content':reviews,\n",
 85 |         "   'Rating': ratings\n",
 86 |         "})\n"
 87 |       ]
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "source": [
 92 |         "review_data"
 93 |       ],
 94 |       "metadata": {
 95 |         "colab": {
 96 |           "base_uri": "https://localhost:8080/",
 97 |           "height": 423
 98 |         },
 99 |         "id": "_soYMTNmEDNF",
100 |         "outputId": "f7d6df63-61fc-45e9-e9af-8ed83e67235d"
101 |       },
102 |       "execution_count": 3,
103 |       "outputs": [
104 |         {
105 |           "output_type": "execute_result",
106 |           "data": {
107 |             "text/plain": [
108 |               "                   Username Total reviews location          date  \\\n",
109 |               "0    Jorge Guilherme Eggers     3 reviews       BR  Dec 13, 2022   \n",
110 |               "1             Darryl Hayton     4 reviews       GB  Dec 13, 2022   \n",
111 |               "2                Jude Crown      1 review       CA  Oct 25, 2022   \n",
112 |               "3           Stéphane MOTARD     4 reviews       ES   Aug 6, 2022   \n",
113 |               "4             Mahadev Kumar     2 reviews       NP  Oct 25, 2022   \n",
114 |               "..                      ...           ...      ...           ...   \n",
115 |               "114            Asling Elvis      1 review       US  Jan 29, 2022   \n",
116 |               "115         Elfrem Neftalem      1 review       GB   Jun 4, 2021   \n",
117 |               "116           Nabinur Islam      1 review       BD   Jun 2, 2021   \n",
118 |               "117             Byron Clark      1 review       US   Jun 1, 2021   \n",
119 |               "118              Rohan Smit      1 review       GB  May 21, 2021   \n",
120 |               "\n",
121 |               "                                               content Rating  \n",
122 |               "0    I have been a member of the MEFX…I have been a...      5  \n",
123 |               "1    MEFX Company LTD is the best thing to…MEFX Com...      5  \n",
124 |               "2    I have never believed in anything…I have never...      5  \n",
125 |               "3    Still with the same opinionStill with the same...      1  \n",
126 |               "4    I have never been this lucky in…I have never b...      5  \n",
127 |               "..                                                 ...    ...  \n",
128 |               "114  Amazing investment company with good…Amazing i...      5  \n",
129 |               "115  Amazing passive income projectDate of experien...      5  \n",
130 |               "116  Best Investment platformDate of experience: Ju...      5  \n",
131 |               "117  Awesome platform!Date of experience: June 01, ...      5  \n",
132 |               "118   Fast withdrawalsDate of experience: May 21, 2021      5  \n",
133 |               "\n",
134 |               "[119 rows x 6 columns]"
135 |             ],
136 |             "text/html": [
137 |               "\n",
138 |               "  <div id=\"df-2fb29c06-90b2-4fb6-a93a-a249d4e13bf0\" class=\"colab-df-container\">\n",
139 |               "    <div>\n",
140 |               "<style scoped>\n",
141 |               "    .dataframe tbody tr th:only-of-type {\n",
142 |               "        vertical-align: middle;\n",
143 |               "    }\n",
144 |               "\n",
145 |               "    .dataframe tbody tr th {\n",
146 |               "        vertical-align: top;\n",
147 |               "    }\n",
148 |               "\n",
149 |               "    .dataframe thead th {\n",
150 |               "        text-align: right;\n",
151 |               "    }\n",
152 |               "</style>\n",
153 |               "<table border=\"1\" class=\"dataframe\">\n",
154 |               "  <thead>\n",
155 |               "    <tr style=\"text-align: right;\">\n",
156 |               "      <th></th>\n",
157 |               "      <th>Username</th>\n",
158 |               "      <th>Total reviews</th>\n",
159 |               "      <th>location</th>\n",
160 |               "      <th>date</th>\n",
161 |               "      <th>content</th>\n",
162 |               "      <th>Rating</th>\n",
163 |               "    </tr>\n",
164 |               "  </thead>\n",
165 |               "  <tbody>\n",
166 |               "    <tr>\n",
167 |               "      <th>0</th>\n",
168 |               "      <td>Jorge Guilherme Eggers</td>\n",
169 |               "      <td>3 reviews</td>\n",
170 |               "      <td>BR</td>\n",
171 |               "      <td>Dec 13, 2022</td>\n",
172 |               "      <td>I have been a member of the MEFX…I have been a...</td>\n",
173 |               "      <td>5</td>\n",
174 |               "    </tr>\n",
175 |               "    <tr>\n",
176 |               "      <th>1</th>\n",
177 |               "      <td>Darryl Hayton</td>\n",
178 |               "      <td>4 reviews</td>\n",
179 |               "      <td>GB</td>\n",
180 |               "      <td>Dec 13, 2022</td>\n",
181 |               "      <td>MEFX Company LTD is the best thing to…MEFX Com...</td>\n",
182 |               "      <td>5</td>\n",
183 |               "    </tr>\n",
184 |               "    <tr>\n",
185 |               "      <th>2</th>\n",
186 |               "      <td>Jude Crown</td>\n",
187 |               "      <td>1 review</td>\n",
188 |               "      <td>CA</td>\n",
189 |               "      <td>Oct 25, 2022</td>\n",
190 |               "      <td>I have never believed in anything…I have never...</td>\n",
191 |               "      <td>5</td>\n",
192 |               "    </tr>\n",
193 |               "    <tr>\n",
194 |               "      <th>3</th>\n",
195 |               "      <td>Stéphane MOTARD</td>\n",
196 |               "      <td>4 reviews</td>\n",
197 |               "      <td>ES</td>\n",
198 |               "      <td>Aug 6, 2022</td>\n",
199 |               "      <td>Still with the same opinionStill with the same...</td>\n",
200 |               "      <td>1</td>\n",
201 |               "    </tr>\n",
202 |               "    <tr>\n",
203 |               "      <th>4</th>\n",
204 |               "      <td>Mahadev Kumar</td>\n",
205 |               "      <td>2 reviews</td>\n",
206 |               "      <td>NP</td>\n",
207 |               "      <td>Oct 25, 2022</td>\n",
208 |               "      <td>I have never been this lucky in…I have never b...</td>\n",
209 |               "      <td>5</td>\n",
210 |               "    </tr>\n",
211 |               "    <tr>\n",
212 |               "      <th>...</th>\n",
213 |               "      <td>...</td>\n",
214 |               "      <td>...</td>\n",
215 |               "      <td>...</td>\n",
216 |               "      <td>...</td>\n",
217 |               "      <td>...</td>\n",
218 |               "      <td>...</td>\n",
219 |               "    </tr>\n",
220 |               "    <tr>\n",
221 |               "      <th>114</th>\n",
222 |               "      <td>Asling Elvis</td>\n",
223 |               "      <td>1 review</td>\n",
224 |               "      <td>US</td>\n",
225 |               "      <td>Jan 29, 2022</td>\n",
226 |               "      <td>Amazing investment company with good…Amazing i...</td>\n",
227 |               "      <td>5</td>\n",
228 |               "    </tr>\n",
229 |               "    <tr>\n",
230 |               "      <th>115</th>\n",
231 |               "      <td>Elfrem Neftalem</td>\n",
232 |               "      <td>1 review</td>\n",
233 |               "      <td>GB</td>\n",
234 |               "      <td>Jun 4, 2021</td>\n",
235 |               "      <td>Amazing passive income projectDate of experien...</td>\n",
236 |               "      <td>5</td>\n",
237 |               "    </tr>\n",
238 |               "    <tr>\n",
239 |               "      <th>116</th>\n",
240 |               "      <td>Nabinur Islam</td>\n",
241 |               "      <td>1 review</td>\n",
242 |               "      <td>BD</td>\n",
243 |               "      <td>Jun 2, 2021</td>\n",
244 |               "      <td>Best Investment platformDate of experience: Ju...</td>\n",
245 |               "      <td>5</td>\n",
246 |               "    </tr>\n",
247 |               "    <tr>\n",
248 |               "      <th>117</th>\n",
249 |               "      <td>Byron Clark</td>\n",
250 |               "      <td>1 review</td>\n",
251 |               "      <td>US</td>\n",
252 |               "      <td>Jun 1, 2021</td>\n",
253 |               "      <td>Awesome platform!Date of experience: June 01, ...</td>\n",
254 |               "      <td>5</td>\n",
255 |               "    </tr>\n",
256 |               "    <tr>\n",
257 |               "      <th>118</th>\n",
258 |               "      <td>Rohan Smit</td>\n",
259 |               "      <td>1 review</td>\n",
260 |               "      <td>GB</td>\n",
261 |               "      <td>May 21, 2021</td>\n",
262 |               "      <td>Fast withdrawalsDate of experience: May 21, 2021</td>\n",
263 |               "      <td>5</td>\n",
264 |               "    </tr>\n",
265 |               "  </tbody>\n",
266 |               "</table>\n",
267 |               "<p>119 rows × 6 columns</p>\n",
268 |               "</div>\n",
269 |               "    <div class=\"colab-df-buttons\">\n",
270 |               "\n",
271 |               "  <div class=\"colab-df-container\">\n",
272 |               "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2fb29c06-90b2-4fb6-a93a-a249d4e13bf0')\"\n",
273 |               "            title=\"Convert this dataframe to an interactive table.\"\n",
274 |               "            style=\"display:none;\">\n",
275 |               "\n",
276 |               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
277 |               "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
278 |               "  </svg>\n",
279 |               "    </button>\n",
280 |               "\n",
281 |               "  <style>\n",
282 |               "    .colab-df-container {\n",
283 |               "      display:flex;\n",
284 |               "      gap: 12px;\n",
285 |               "    }\n",
286 |               "\n",
287 |               "    .colab-df-convert {\n",
288 |               "      background-color: #E8F0FE;\n",
289 |               "      border: none;\n",
290 |               "      border-radius: 50%;\n",
291 |               "      cursor: pointer;\n",
292 |               "      display: none;\n",
293 |               "      fill: #1967D2;\n",
294 |               "      height: 32px;\n",
295 |               "      padding: 0 0 0 0;\n",
296 |               "      width: 32px;\n",
297 |               "    }\n",
298 |               "\n",
299 |               "    .colab-df-convert:hover {\n",
300 |               "      background-color: #E2EBFA;\n",
301 |               "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
302 |               "      fill: #174EA6;\n",
303 |               "    }\n",
304 |               "\n",
305 |               "    .colab-df-buttons div {\n",
306 |               "      margin-bottom: 4px;\n",
307 |               "    }\n",
308 |               "\n",
309 |               "    [theme=dark] .colab-df-convert {\n",
310 |               "      background-color: #3B4455;\n",
311 |               "      fill: #D2E3FC;\n",
312 |               "    }\n",
313 |               "\n",
314 |               "    [theme=dark] .colab-df-convert:hover {\n",
315 |               "      background-color: #434B5C;\n",
316 |               "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
317 |               "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
318 |               "      fill: #FFFFFF;\n",
319 |               "    }\n",
320 |               "  </style>\n",
321 |               "\n",
322 |               "    <script>\n",
323 |               "      const buttonEl =\n",
324 |               "        document.querySelector('#df-2fb29c06-90b2-4fb6-a93a-a249d4e13bf0 button.colab-df-convert');\n",
325 |               "      buttonEl.style.display =\n",
326 |               "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
327 |               "\n",
328 |               "      async function convertToInteractive(key) {\n",
329 |               "        const element = document.querySelector('#df-2fb29c06-90b2-4fb6-a93a-a249d4e13bf0');\n",
330 |               "        const dataTable =\n",
331 |               "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
332 |               "                                                    [key], {});\n",
333 |               "        if (!dataTable) return;\n",
334 |               "\n",
335 |               "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
336 |               "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
337 |               "          + ' to learn more about interactive tables.';\n",
338 |               "        element.innerHTML = '';\n",
339 |               "        dataTable['output_type'] = 'display_data';\n",
340 |               "        await google.colab.output.renderOutput(dataTable, element);\n",
341 |               "        const docLink = document.createElement('div');\n",
342 |               "        docLink.innerHTML = docLinkHtml;\n",
343 |               "        element.appendChild(docLink);\n",
344 |               "      }\n",
345 |               "    </script>\n",
346 |               "  </div>\n",
347 |               "\n",
348 |               "\n",
349 |               "<div id=\"df-49fa05e5-d30f-461c-b42e-066050c68b64\">\n",
350 |               "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-49fa05e5-d30f-461c-b42e-066050c68b64')\"\n",
351 |               "            title=\"Suggest charts\"\n",
352 |               "            style=\"display:none;\">\n",
353 |               "\n",
354 |               "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
355 |               "     width=\"24px\">\n",
356 |               "    <g>\n",
357 |               "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
358 |               "    </g>\n",
359 |               "</svg>\n",
360 |               "  </button>\n",
361 |               "\n",
362 |               "<style>\n",
363 |               "  .colab-df-quickchart {\n",
364 |               "      --bg-color: #E8F0FE;\n",
365 |               "      --fill-color: #1967D2;\n",
366 |               "      --hover-bg-color: #E2EBFA;\n",
367 |               "      --hover-fill-color: #174EA6;\n",
368 |               "      --disabled-fill-color: #AAA;\n",
369 |               "      --disabled-bg-color: #DDD;\n",
370 |               "  }\n",
371 |               "\n",
372 |               "  [theme=dark] .colab-df-quickchart {\n",
373 |               "      --bg-color: #3B4455;\n",
374 |               "      --fill-color: #D2E3FC;\n",
375 |               "      --hover-bg-color: #434B5C;\n",
376 |               "      --hover-fill-color: #FFFFFF;\n",
377 |               "      --disabled-bg-color: #3B4455;\n",
378 |               "      --disabled-fill-color: #666;\n",
379 |               "  }\n",
380 |               "\n",
381 |               "  .colab-df-quickchart {\n",
382 |               "    background-color: var(--bg-color);\n",
383 |               "    border: none;\n",
384 |               "    border-radius: 50%;\n",
385 |               "    cursor: pointer;\n",
386 |               "    display: none;\n",
387 |               "    fill: var(--fill-color);\n",
388 |               "    height: 32px;\n",
389 |               "    padding: 0;\n",
390 |               "    width: 32px;\n",
391 |               "  }\n",
392 |               "\n",
393 |               "  .colab-df-quickchart:hover {\n",
394 |               "    background-color: var(--hover-bg-color);\n",
395 |               "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
396 |               "    fill: var(--button-hover-fill-color);\n",
397 |               "  }\n",
398 |               "\n",
399 |               "  .colab-df-quickchart-complete:disabled,\n",
400 |               "  .colab-df-quickchart-complete:disabled:hover {\n",
401 |               "    background-color: var(--disabled-bg-color);\n",
402 |               "    fill: var(--disabled-fill-color);\n",
403 |               "    box-shadow: none;\n",
404 |               "  }\n",
405 |               "\n",
406 |               "  .colab-df-spinner {\n",
407 |               "    border: 2px solid var(--fill-color);\n",
408 |               "    border-color: transparent;\n",
409 |               "    border-bottom-color: var(--fill-color);\n",
410 |               "    animation:\n",
411 |               "      spin 1s steps(1) infinite;\n",
412 |               "  }\n",
413 |               "\n",
414 |               "  @keyframes spin {\n",
415 |               "    0% {\n",
416 |               "      border-color: transparent;\n",
417 |               "      border-bottom-color: var(--fill-color);\n",
418 |               "      border-left-color: var(--fill-color);\n",
419 |               "    }\n",
420 |               "    20% {\n",
421 |               "      border-color: transparent;\n",
422 |               "      border-left-color: var(--fill-color);\n",
423 |               "      border-top-color: var(--fill-color);\n",
424 |               "    }\n",
425 |               "    30% {\n",
426 |               "      border-color: transparent;\n",
427 |               "      border-left-color: var(--fill-color);\n",
428 |               "      border-top-color: var(--fill-color);\n",
429 |               "      border-right-color: var(--fill-color);\n",
430 |               "    }\n",
431 |               "    40% {\n",
432 |               "      border-color: transparent;\n",
433 |               "      border-right-color: var(--fill-color);\n",
434 |               "      border-top-color: var(--fill-color);\n",
435 |               "    }\n",
436 |               "    60% {\n",
437 |               "      border-color: transparent;\n",
438 |               "      border-right-color: var(--fill-color);\n",
439 |               "    }\n",
440 |               "    80% {\n",
441 |               "      border-color: transparent;\n",
442 |               "      border-right-color: var(--fill-color);\n",
443 |               "      border-bottom-color: var(--fill-color);\n",
444 |               "    }\n",
445 |               "    90% {\n",
446 |               "      border-color: transparent;\n",
447 |               "      border-bottom-color: var(--fill-color);\n",
448 |               "    }\n",
449 |               "  }\n",
450 |               "</style>\n",
451 |               "\n",
452 |               "  <script>\n",
453 |               "    async function quickchart(key) {\n",
454 |               "      const quickchartButtonEl =\n",
455 |               "        document.querySelector('#' + key + ' button');\n",
456 |               "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
457 |               "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
458 |               "      try {\n",
459 |               "        const charts = await google.colab.kernel.invokeFunction(\n",
460 |               "            'suggestCharts', [key], {});\n",
461 |               "      } catch (error) {\n",
462 |               "        console.error('Error during call to suggestCharts:', error);\n",
463 |               "      }\n",
464 |               "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
465 |               "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
466 |               "    }\n",
467 |               "    (() => {\n",
468 |               "      let quickchartButtonEl =\n",
469 |               "        document.querySelector('#df-49fa05e5-d30f-461c-b42e-066050c68b64 button');\n",
470 |               "      quickchartButtonEl.style.display =\n",
471 |               "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
472 |               "    })();\n",
473 |               "  </script>\n",
474 |               "</div>\n",
475 |               "    </div>\n",
476 |               "  </div>\n"
477 |             ]
478 |           },
479 |           "metadata": {},
480 |           "execution_count": 3
481 |         }
482 |       ]
483 |     },
484 |     {
485 |       "cell_type": "code",
486 |       "source": [
487 |         "# Convert the DataFrame to a CSV file and download it\n",
488 |         "review_data.to_csv('reviews.csv', index=False)\n",
489 |         "files.download('reviews.csv')"
490 |       ],
491 |       "metadata": {
492 |         "colab": {
493 |           "base_uri": "https://localhost:8080/",
494 |           "height": 17
495 |         },
496 |         "id": "XfeWnNaSVd02",
497 |         "outputId": "680e8a17-e0fd-4873-deb5-355d38772162"
498 |       },
499 |       "execution_count": 4,
500 |       "outputs": [
501 |         {
502 |           "output_type": "display_data",
503 |           "data": {
504 |             "text/plain": [
505 |               "<IPython.core.display.Javascript object>"
506 |             ],
507 |             "application/javascript": [
508 |               "\n",
509 |               "    async function download(id, filename, size) {\n",
510 |               "      if (!google.colab.kernel.accessAllowed) {\n",
511 |               "        return;\n",
512 |               "      }\n",
513 |               "      const div = document.createElement('div');\n",
514 |               "      const label = document.createElement('label');\n",
515 |               "      label.textContent = `Downloading \"${filename}\": `;\n",
516 |               "      div.appendChild(label);\n",
517 |               "      const progress = document.createElement('progress');\n",
518 |               "      progress.max = size;\n",
519 |               "      div.appendChild(progress);\n",
520 |               "      document.body.appendChild(div);\n",
521 |               "\n",
522 |               "      const buffers = [];\n",
523 |               "      let downloaded = 0;\n",
524 |               "\n",
525 |               "      const channel = await google.colab.kernel.comms.open(id);\n",
526 |               "      // Send a message to notify the kernel that we're ready.\n",
527 |               "      channel.send({})\n",
528 |               "\n",
529 |               "      for await (const message of channel.messages) {\n",
530 |               "        // Send a message to notify the kernel that we're ready.\n",
531 |               "        channel.send({})\n",
532 |               "        if (message.buffers) {\n",
533 |               "          for (const buffer of message.buffers) {\n",
534 |               "            buffers.push(buffer);\n",
535 |               "            downloaded += buffer.byteLength;\n",
536 |               "            progress.value = downloaded;\n",
537 |               "          }\n",
538 |               "        }\n",
539 |               "      }\n",
540 |               "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
541 |               "      const a = document.createElement('a');\n",
542 |               "      a.href = window.URL.createObjectURL(blob);\n",
543 |               "      a.download = filename;\n",
544 |               "      div.appendChild(a);\n",
545 |               "      a.click();\n",
546 |               "      div.remove();\n",
547 |               "    }\n",
548 |               "  "
549 |             ]
550 |           },
551 |           "metadata": {}
552 |         },
553 |         {
554 |           "output_type": "display_data",
555 |           "data": {
556 |             "text/plain": [
557 |               "<IPython.core.display.Javascript object>"
558 |             ],
559 |             "application/javascript": [
560 |               "download(\"download_9c6b9184-994b-440a-9e86-267dc193321c\", \"reviews.csv\", 32399)"
561 |             ]
562 |           },
563 |           "metadata": {}
564 |         }
565 |       ]
566 |     }
567 |   ]
568 | }


--------------------------------------------------------------------------------