├── random
    ├── __init__.py
    ├── .ipynb_checkpoints
    │   ├── __init__-checkpoint.py
    │   ├── web_services-checkpoint.png
    │   ├── grouped_workers-checkpoint.png
    │   └── display_util-checkpoint.py
    ├── page_0.png
    ├── virat.png
    ├── messi_vs_nld.png
    ├── warn_report.pdf
    ├── web_services.png
    ├── grouped_workers.png
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   └── display_util.cpython-38.pyc
    ├── display_util.py
    ├── download_youtube.ipynb
    ├── embeddings.ipynb
    ├── latexify.ipynb
    ├── matmul_operator.ipynb
    ├── shapely_polygon_intersection.ipynb
    ├── pivot_table_JS.ipynb
    ├── one_hot_encoding.ipynb
    ├── tf_decision_forests.ipynb
    └── python_for_sql.ipynb
├── .DS_Store
├── numpy
    ├── resources
    │   ├── cat.png
    │   └── dog.png
    ├── argsort_vs_argpartition.ipynb
    └── numpy_indexing_slicing.ipynb
├── computer_vision
    ├── lena.png
    └── squid_games.png
├── PyTorch
    ├── resources
    │   ├── .DS_Store
    │   ├── basic_neural_net.png
    │   └── computational_graph.png
    ├── torch_autograd.ipynb
    └── tensors.ipynb
├── PyTorch_Lightning
    ├── .DS_Store
    └── autoencoders.ipynb
├── azure_ML
    ├── source_dir
    │   ├── env.yml
    │   └── score.py
    └── deployment.ipynb
├── pandas
    ├── weather_data.csv
    ├── stocks2.csv
    ├── bessel_correction.ipynb
    ├── iterrows_vs_itertuples.ipynb
    ├── iris.csv
    ├── df_apply.ipynb
    ├── assigning_new_columns.ipynb
    └── df.loc_pandas.ipynb
├── maths_for_ml
    └── clt_streamlit.py
├── LICENSE
├── NLP
    ├── conformer_speech2text.ipynb
    ├── question_answering.ipynb
    └── tokenization.ipynb
├── CONTRIBUTING.md
├── random_stuff
    └── audiobook.ipynb
├── README.md
├── LLMs
    ├── APIChain_LangChain.ipynb
    ├── llamaindex_101.ipynb
    ├── langchain.ipynb
    └── openai_function_calling.ipynb
├── neat-tricks
    └── mem_cache.ipynb
├── Python
    └── decorators.ipynb
├── ml_from_scratch
    └── KNN.ipynb
└── unsupervised_learning
    └── dummy_data_clustering.ipynb


/random/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/random/.ipynb_checkpoints/__init__-checkpoint.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/.DS_Store


--------------------------------------------------------------------------------
/random/page_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/page_0.png


--------------------------------------------------------------------------------
/random/virat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/virat.png


--------------------------------------------------------------------------------
/numpy/resources/cat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/numpy/resources/cat.png


--------------------------------------------------------------------------------
/numpy/resources/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/numpy/resources/dog.png


--------------------------------------------------------------------------------
/random/messi_vs_nld.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/messi_vs_nld.png


--------------------------------------------------------------------------------
/random/warn_report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/warn_report.pdf


--------------------------------------------------------------------------------
/random/web_services.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/web_services.png


--------------------------------------------------------------------------------
/computer_vision/lena.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/computer_vision/lena.png


--------------------------------------------------------------------------------
/PyTorch/resources/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch/resources/.DS_Store


--------------------------------------------------------------------------------
/PyTorch_Lightning/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch_Lightning/.DS_Store


--------------------------------------------------------------------------------
/random/grouped_workers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/grouped_workers.png


--------------------------------------------------------------------------------
/computer_vision/squid_games.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/computer_vision/squid_games.png


--------------------------------------------------------------------------------
/PyTorch/resources/basic_neural_net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch/resources/basic_neural_net.png


--------------------------------------------------------------------------------
/PyTorch/resources/computational_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch/resources/computational_graph.png


--------------------------------------------------------------------------------
/random/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/random/__pycache__/display_util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/__pycache__/display_util.cpython-38.pyc


--------------------------------------------------------------------------------
/random/.ipynb_checkpoints/web_services-checkpoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/.ipynb_checkpoints/web_services-checkpoint.png


--------------------------------------------------------------------------------
/random/.ipynb_checkpoints/grouped_workers-checkpoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/.ipynb_checkpoints/grouped_workers-checkpoint.png


--------------------------------------------------------------------------------
/azure_ML/source_dir/env.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - anaconda
 3 | - conda-forge
 4 | dependencies:
 5 | - python=3.6.2
 6 | - pip:
 7 |   - pandas==1.1.5
 8 |   - azureml-defaults
 9 |   - joblib==0.17.0
10 |   - scikit-learn==0.23.2


--------------------------------------------------------------------------------
/pandas/weather_data.csv:
--------------------------------------------------------------------------------
1 | day,city,temperature,windspeed,event
2 | 1/3/2022,new york,28,12,Snow
3 | 1/3/2022,mumbai,87,15,Fog
4 | 1/4/2022,new york,33,7,Sunny
5 | 1/4/2022,mumbai,92,5,Rain
6 | 1/1/2022,paris,45,20,Sunny
7 | 1/2/2022,paris,50,13,Cloudy
8 | 1/3/2022,paris,54,8,Cloudy


--------------------------------------------------------------------------------
/azure_ML/source_dir/score.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import joblib
 3 | from azureml.core import Model
 4 | 
 5 | 
 6 | def init():
 7 |     global model
 8 |     model_name = "irisclassifier"
 9 |     path = Model.get_model_path(model_name)
10 |     model = joblib.load(path)
11 | 
12 | 
13 | def run(data):
14 |     try:
15 |         data = json.loads(data)
16 |         result = model.predict(data["data"])
17 |         return {"data": result.tolist(), "message": "Prediction successful"}
18 |     except Exception as e:
19 |         return {"data": e, "message": "Failed to predict"}
20 | 


--------------------------------------------------------------------------------
/pandas/stocks2.csv:
--------------------------------------------------------------------------------
 1 | date,open,high,low,close,volume
 2 | 2014-06-30,57.5,57.76,57.18,57.44,1314906
 3 | 2015-12-17,41.55,41.63,40.77,40.78,2218837
 4 | 2017-09-27,64.27,64.66,64.05,64.39,1272526
 5 | 2016-11-08,45.48,45.9599,45.32,45.62,2136740
 6 | 2014-11-07,41.39,41.48,40.86,40.93,2458734
 7 | 2014-04-14,53.19,53.249,52.3075,,2840577
 8 | 2017-11-14,66.98,67.8,66.89,67.46,2426247
 9 | 2017-04-21,,53.39,52.8399,53.27,2189351
10 | 2014-04-08,54.21,,53.82,54.66,1842491
11 | 2015-01-05,40.32,40.46,39.7,39.8,2042240
12 | 2016-09-16,45.39,45.39,44.74,44.79,2592850
13 | 2015-04-06,41.68,42.2,41.51,41.93,2379808
14 | 2015-08-06,40.95,40.96,39.91,40.12,1932226
15 | 


--------------------------------------------------------------------------------
/random/display_util.py:
--------------------------------------------------------------------------------
 1 | # This util is just used for Display purpose
 2 | import cv2
 3 | from matplotlib import pyplot as plt
 4 | 
 5 | 
 6 | images = [cv2.cvtColor(cv2.imread('grouped_workers.png'), cv2.COLOR_BGR2RGB), 
 7 |           cv2.cvtColor(cv2.imread('web_services.png'), cv2.COLOR_BGR2RGB)]
 8 | image_names = ['grouped_workers', 'web_services']
 9 | 
10 | 
11 | def display(images, image_names, fig_zize):
12 |     num_images = len(images) # Maximum number of images to display
13 |     num_cols = 2 # Number of columns in display
14 |     num_rows = num_images//num_cols # Number of rows in display
15 |     plt.figure(figsize=(fig_zize*2, fig_zize*num_cols))
16 |     for i in range(num_rows*num_cols):
17 | 
18 |         plt.subplot(num_rows, num_cols, i+1)
19 |         plt.imshow(images[i], cmap='gray')
20 |         plt.title(image_names[i], size=12)
21 |         plt.axis('off') 


--------------------------------------------------------------------------------
/maths_for_ml/clt_streamlit.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | 
 6 | st.title("Illustrating the Central Limit Theorem")
 7 | 
 8 | # Add a slider for sample size
 9 | sample_size = st.slider("Sample Size", min_value=1, max_value=100, value=5)
10 | 
11 | perc_heads = st.number_input(
12 |     label="Chance of Coins Landing on Heads", min_value=0.0, max_value=1.0, value=0.5
13 | )
14 | 
15 | binom_dist = np.random.binomial(1, perc_heads, 1000)
16 | 
17 | list_of_means = []
18 | 
19 | for i in range(0, 1000):
20 |     sample = np.random.choice(binom_dist, sample_size, replace=True)
21 |     list_of_means.append(sample.mean())
22 | 
23 | # Plotting
24 | fig, ax = plt.subplots()
25 | sns.histplot(list_of_means, ax=ax, color="cyan", stat="density")
26 | sns.kdeplot(list_of_means, ax=ax, color="hotpink", lw=2)
27 | st.pyplot(fig)
28 | 


--------------------------------------------------------------------------------
/random/.ipynb_checkpoints/display_util-checkpoint.py:
--------------------------------------------------------------------------------
 1 | # This util is just used for Display purpose
 2 | import cv2
 3 | from matplotlib import pyplot as plt
 4 | 
 5 | 
 6 | images = [cv2.cvtColor(cv2.imread('grouped_workers.png'), cv2.COLOR_BGR2RGB), 
 7 |           cv2.cvtColor(cv2.imread('web_services.png'), cv2.COLOR_BGR2RGB)]
 8 | image_names = ['grouped_workers', 'web_services']
 9 | 
10 | 
11 | def display(images, image_names, fig_zize):
12 |     num_images = len(images) # Maximum number of images to display
13 |     num_cols = 2 # Number of columns in display
14 |     num_rows = num_images//num_cols # Number of rows in display
15 |     plt.figure(figsize=(fig_zize*2, fig_zize*num_cols))
16 |     for i in range(num_rows*num_cols):
17 | 
18 |         plt.subplot(num_rows, num_cols, i+1)
19 |         plt.imshow(images[i], cmap='gray')
20 |         plt.title(image_names[i], size=12)
21 |         plt.axis('off') 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 patchy631
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NLP/conformer_speech2text.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "### 🗣 Speech to Text! 🚀  "
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import assemblyai as aai\n",
17 |     "\n",
18 |     "# Replace with your API key\n",
19 |     "aai.settings.api_key = \"your_api_key\"\n",
20 |     "\n",
21 |     "# URL of the file to transcribe\n",
22 |     "FILE_URL = \"tinyurl.com/QuantumTheoryMP3\"\n",
23 |     "\n",
24 |     "# You can also transcribe a local file by passing in a file path\n",
25 |     "# FILE_URL = './path/to/file.mp3'\n",
26 |     "\n",
27 |     "transcriber = aai.Transcriber()\n",
28 |     "transcript = transcriber.transcribe(FILE_URL)\n",
29 |     "\n",
30 |     "if transcript.status == aai.TranscriptStatus.error:\n",
31 |     "    print(transcript.error)\n",
32 |     "else:\n",
33 |     "    print(transcript.text)"
34 |    ]
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": "env_twitter",
40 |    "language": "python",
41 |    "name": "env_twitter"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 3
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython3",
53 |    "version": "3.10.6"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 4
58 | }
59 | 


--------------------------------------------------------------------------------
/random/download_youtube.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "#### ❇️ Downloading a YouTube video using Python 🐍 "
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import pytube"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "# Ask the user to enter url of YouTube video\n",
26 |     "video_url = input('Enter url: ')\n",
27 |     "\n",
28 |     "# Create an instance of YouTube video\n",
29 |     "video_instance = pytube.YouTube(video_url)\n",
30 |     "\n",
31 |     "stream = video_instance.streams.get_highest_resolution()\n",
32 |     "\n",
33 |     "# download 🚀 \n",
34 |     "stream.download()"
35 |    ]
36 |   },
37 |   {
38 |    "cell_type": "markdown",
39 |    "metadata": {},
40 |    "source": [
41 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
42 |     "##### ❇️ follow → @akshay_pachaar  "
43 |    ]
44 |   },
45 |   {
46 |    "cell_type": "code",
47 |    "execution_count": null,
48 |    "metadata": {},
49 |    "outputs": [],
50 |    "source": []
51 |   }
52 |  ],
53 |  "metadata": {
54 |   "kernelspec": {
55 |    "display_name": "env_twitter",
56 |    "language": "python",
57 |    "name": "env_twitter"
58 |   },
59 |   "language_info": {
60 |    "codemirror_mode": {
61 |     "name": "ipython",
62 |     "version": 3
63 |    },
64 |    "file_extension": ".py",
65 |    "mimetype": "text/x-python",
66 |    "name": "python",
67 |    "nbconvert_exporter": "python",
68 |    "pygments_lexer": "ipython3",
69 |    "version": "3.10.5"
70 |   }
71 |  },
72 |  "nbformat": 4,
73 |  "nbformat_minor": 4
74 | }
75 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Contributing to Machine Learning Tutorials Repository
 3 | 
 4 | First off, thank you for considering contributing to this repository! Your contributions are what make the open-source community such an amazing place to be, learn, and grow.
 5 | 
 6 | ## How to Contribute?
 7 | 
 8 | 1. **Fork the Repository**: 
 9 |     - Click on the 'Fork' button at the top right corner of the repo page.
10 |     - This will create a copy of this repository in your account.
11 | 
12 | 2. **Clone the Forked Repository**:
13 |     ```bash
14 |     git clone https://github.com/patchy631/machine-learning
15 |     ```
16 | 
17 | 3. **Navigate to the Repository**:
18 |     ```bash
19 |     cd machine-learning
20 |     ```
21 | 
22 | 4. **Create a New Branch**:
23 |     ```bash
24 |     git checkout -b YOUR_BRANCH_NAME
25 |     ```
26 | 
27 | 5. **Make Necessary Changes**: Implement your feature or bugfix.
28 | 
29 | 6. **Commit Your Changes**:
30 |     ```bash
31 |     git add .
32 |     git commit -m "Add some feature or fix a bug"
33 |     ```
34 | 
35 | 7. **Push to the Branch**:
36 |     ```bash
37 |     git push origin YOUR_BRANCH_NAME
38 |     ```
39 | 
40 | 8. **Open a Pull Request**: 
41 |     - Go to the repository in your account.
42 |     - Click on the 'Pull Request' button.
43 |     - Click on 'New Pull Request'.
44 | 
45 | 9. **Ensure Your PR is Up-to-Date with Upstream Master**:
46 |     - Ensure your branch is rebased to the latest `master` branch from upstream.
47 |     ```bash
48 |     git fetch upstream
49 |     git rebase upstream/master
50 |     ```
51 | 
52 | 10. **Describe Your PR**:
53 |     - Give your pull request a meaningful title.
54 |     - Provide a description of the changes you are making. Reference the issue(s) your PR resolves.
55 | 
56 | ## Need Help?
57 | 
58 | If you have any questions or need help with the process, please feel free to contact me on Twitter: [@akshay_pachaar](https://twitter.com/akshay_pachaar).
59 | 
60 | Thank you for your contribution!
61 | 


--------------------------------------------------------------------------------
/random_stuff/audiobook.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "9bf0dbd6",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "#### Create your own `Audiobook` 🎧 🚀 "
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 1,
14 |    "id": "9ec08fba",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "import pdfplumber as pp\n",
19 |     "from gtts import gTTS"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "markdown",
24 |    "id": "71b77cfa-30a4-48c3-8433-115925f6d5e2",
25 |    "metadata": {},
26 |    "source": [
27 |     "##### 1️⃣ Extract text from pdf"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": 2,
33 |    "id": "605fc3ed-cbb8-48c6-9776-b701eba4cb5a",
34 |    "metadata": {},
35 |    "outputs": [],
36 |    "source": [
37 |     "pdf_text = ''\n",
38 |     "\n",
39 |     "with pp.open('attention_is_all_you_need.pdf') as pdf:\n",
40 |     "    for page in pdf.pages:\n",
41 |     "        pdf_text += page.extract_text()"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "markdown",
46 |    "id": "db3e6277-79b5-4b13-bcc4-3cb43b621fd1",
47 |    "metadata": {},
48 |    "source": [
49 |     "##### 2️⃣ Convert extracted text to speech"
50 |    ]
51 |   },
52 |   {
53 |    "cell_type": "code",
54 |    "execution_count": 3,
55 |    "id": "cdda44ac-8a0f-473e-abe2-29d4409dc2de",
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "tts = gTTS(text=pdf_text, lang='en')\n",
60 |     "tts.save('audio_book.mp3')"
61 |    ]
62 |   },
63 |   {
64 |    "cell_type": "code",
65 |    "execution_count": null,
66 |    "id": "df9e8880-a53c-4f00-8184-4106db3bf567",
67 |    "metadata": {},
68 |    "outputs": [],
69 |    "source": []
70 |   }
71 |  ],
72 |  "metadata": {
73 |   "kernelspec": {
74 |    "display_name": "env_twitter",
75 |    "language": "python",
76 |    "name": "env_twitter"
77 |   },
78 |   "language_info": {
79 |    "codemirror_mode": {
80 |     "name": "ipython",
81 |     "version": 3
82 |    },
83 |    "file_extension": ".py",
84 |    "mimetype": "text/x-python",
85 |    "name": "python",
86 |    "nbconvert_exporter": "python",
87 |    "pygments_lexer": "ipython3",
88 |    "version": "3.10.6"
89 |   }
90 |  },
91 |  "nbformat": 4,
92 |  "nbformat_minor": 5
93 | }
94 | 


--------------------------------------------------------------------------------
/random/embeddings.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## `Similarity Search` using `NumPy!` 🚀 "
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [
15 |     {
16 |      "name": "stdout",
17 |      "output_type": "stream",
18 |      "text": [
19 |       "Index of the nearest tweet: 0\n",
20 |       "Embedding of the nearest tweet: [1 2 3]\n"
21 |      ]
22 |     }
23 |    ],
24 |    "source": [
25 |     "import numpy as np\n",
26 |     "\n",
27 |     "\n",
28 |     "# Query tweet\n",
29 |     "query_tweet = np.array([1, 2, 3])\n",
30 |     "\n",
31 |     "# A database of all the tweets stored in form of embeddings\n",
32 |     "vector_database = np.array([[1, 2, 3],\n",
33 |     "                            [4, 5, 6],\n",
34 |     "                            [7, 8, 9],\n",
35 |     "                            [2, 3, 4],\n",
36 |     "                            [5, 6, 7]])\n",
37 |     "\n",
38 |     "\n",
39 |     "\n",
40 |     "# Normalize the matrix M and the target vector v\n",
41 |     "vector_database_norm = vector_database / np.linalg.norm(vector_database, axis=1, keepdims=True)\n",
42 |     "query_tweet_norm = query_tweet / np.linalg.norm(query_tweet)\n",
43 |     "\n",
44 |     "# Compute the dot product of the normalized matrix M and the normalized target vector v\n",
45 |     "dot_product = np.dot(vector_database_norm, query_tweet_norm)\n",
46 |     "\n",
47 |     "# Find the index of the nearest to query tweet in the vector data base\n",
48 |     "nearest_vector_index = np.argmax(dot_product)\n",
49 |     "\n",
50 |     "print(\"Index of the nearest tweet:\", nearest_vector_index)\n",
51 |     "print(\"Embedding of the nearest tweet:\", vector_database[nearest_vector_index])"
52 |    ]
53 |   },
54 |   {
55 |    "cell_type": "code",
56 |    "execution_count": null,
57 |    "metadata": {},
58 |    "outputs": [],
59 |    "source": []
60 |   }
61 |  ],
62 |  "metadata": {
63 |   "kernelspec": {
64 |    "display_name": "env_twitter",
65 |    "language": "python",
66 |    "name": "env_twitter"
67 |   },
68 |   "language_info": {
69 |    "codemirror_mode": {
70 |     "name": "ipython",
71 |     "version": 3
72 |    },
73 |    "file_extension": ".py",
74 |    "mimetype": "text/x-python",
75 |    "name": "python",
76 |    "nbconvert_exporter": "python",
77 |    "pygments_lexer": "ipython3",
78 |    "version": "3.10.6"
79 |   }
80 |  },
81 |  "nbformat": 4,
82 |  "nbformat_minor": 4
83 | }
84 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # 🤖 Machine Learning Tutorials Repository 🤖
 3 | 
 4 | Welcome to the **Machine Learning Tutorials Repository**! This is the go-to spot for all the code associated with my Twitter tutorials. If you're passionate about diving deep into the realms of Machine Learning and exploring various topics, you're in the right place!
 5 | 
 6 | ## Star history 🌟
 7 | 
 8 | [![Star History Chart](https://api.star-history.com/svg?repos=patchy631/machine-learning&type=Date)](https://star-history.com/#patchy631/machine-learning&Date)
 9 | 
10 | ## 📘 Topics Covered
11 | 
12 | 1. 🐍 **Python**: The core language for almost all things Machine Learning.
13 | 2. 🖼️ **Computer Vision**: Techniques, algorithms, and methods to give machines the ability to see and interpret visual data.
14 | 3. 📜 **NLP (Natural Language Processing)**: Delve into the world of words and understand how machines can comprehend, interpret, and respond to human languages.
15 | 4. 📊 **Matplotlib**: Visualize your data and results with one of the most popular plotting libraries.
16 | 5. 🔢 **NumPy**: Master the art of numerical computing with Python.
17 | 6. 🐼 **Pandas**: The ultimate tool for data analysis in Python.
18 | 7. 🚀 **MLOps**: Learn about the best practices, tools, and services to manage end-to-end ML lifecycle.
19 | 8. 🧠 **LLMs (Large Language Models)**: Dive deep into state-of-the-art models that understand and generate human-like text.
20 | 9. 🔥 **PyTorch/TensorFlow**: Get to grips with the two dominant deep learning frameworks.
21 | 
22 | ## 🚀 Getting Started
23 | 
24 | 1. **Clone the Repository**:
25 |     ```bash
26 |     git clone https://github.com/patchy631/machine-learning
27 |     ```
28 | 2. **Navigate to the Repository**:
29 |     ```bash
30 |     cd machine-learning
31 |     ```
32 | 3. **Install Required Libraries**:
33 |     ```bash
34 |     pip install -r requirements.txt
35 |     ```
36 | 
37 | > Note: Make sure you have Python installed on your system. If not, download and install [Python](https://www.python.org/downloads/).
38 | 
39 | ## 🤝 Contribution Guidelines
40 | 
41 | Contributions are always welcome! Whether it's fixing bugs, improving documentation, or adding new tutorials, your efforts will be appreciated. Please ensure you follow the contribution guidelines outlined in `CONTRIBUTING.md`.
42 | 
43 | ## 📱 Connect with Me
44 | 
45 | Follow me on Twitter for more tutorials and updates: [@akshay_pachaar](https://twitter.com/akshay_pachaar)
46 | 
47 | ## 📜 License
48 | 
49 | This project is licensed under the MIT License. See the [LICENSE.md](LICENSE.md) file for details.
50 | 


--------------------------------------------------------------------------------
/LLMs/APIChain_LangChain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### Introducing `ApiChain`! 🚀 "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 5,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "from langchain.chains import APIChain\n",
 18 |     "from langchain.chat_models import ChatOpenAI\n",
 19 |     "from IPython.display import Markdown"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 6,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# Set environment variables\n",
 29 |     "os.environ['OPENAI_API_KEY'] = '...'"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "user_message = 'Whats is the weather like in New Delhi today?'"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 11,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "data": {
 48 |       "text/markdown": [
 49 |        "The weather in New Delhi today is moderate to heavy rain with thunder, with a temperature of 25 degrees Celsius (77 degrees Fahrenheit) and high humidity."
 50 |       ],
 51 |       "text/plain": [
 52 |        "<IPython.core.display.Markdown object>"
 53 |       ]
 54 |      },
 55 |      "execution_count": 11,
 56 |      "metadata": {},
 57 |      "output_type": "execute_result"
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "llm = ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\", max_tokens=256, verbose=True)\n",
 62 |     "\n",
 63 |     "apiSpec = \"\"\"API documentation:\n",
 64 |     "Base URL:f\"http://api.weatherapi.com/\n",
 65 |     "Endpoint: /weather\n",
 66 |     "Example API call: http://api.weatherapi.com/v1/current.json?\\\n",
 67 |     "key=**your_api_key**&q=Delhi&aqi=no'\n",
 68 |     "\n",
 69 |     "This API is for retrieving weather information based on city name\n",
 70 |     "\n",
 71 |     "Request GET \n",
 72 |     "Query Parameter Name\tFormat\tRequired\tDescription\n",
 73 |     "q\tString\tYes\tName of city for which we want weather info\n",
 74 |     "\n",
 75 |     "INSTRUCTIONS FOR RESPONDING\n",
 76 |     "Respond in Natural Language\n",
 77 |     "\"\"\"\n",
 78 |     "\n",
 79 |     "chain = APIChain.from_llm_and_api_docs(llm, apiSpec, verbose=False)\n",
 80 |     "response = chain.run(user_message)\n",
 81 |     "Markdown(response)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": []
 90 |   }
 91 |  ],
 92 |  "metadata": {
 93 |   "kernelspec": {
 94 |    "display_name": "env_twitter",
 95 |    "language": "python",
 96 |    "name": "env_twitter"
 97 |   },
 98 |   "language_info": {
 99 |    "codemirror_mode": {
100 |     "name": "ipython",
101 |     "version": 3
102 |    },
103 |    "file_extension": ".py",
104 |    "mimetype": "text/x-python",
105 |    "name": "python",
106 |    "nbconvert_exporter": "python",
107 |    "pygments_lexer": "ipython3",
108 |    "version": "3.10.6"
109 |   }
110 |  },
111 |  "nbformat": 4,
112 |  "nbformat_minor": 4
113 | }
114 | 


--------------------------------------------------------------------------------
/pandas/bessel_correction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### ❇️ Introducing `Bessel's correction` 🚀 "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "import numpy as np"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "data = [1, 2, 3, 4, 5]"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "1.5811388300841898"
 38 |       ]
 39 |      },
 40 |      "execution_count": 3,
 41 |      "metadata": {},
 42 |      "output_type": "execute_result"
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "# Calculate the standard deviation using Pandas\n",
 47 |     "df = pd.DataFrame(data)\n",
 48 |     "float(df.std())"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "1.4142135623730951"
 60 |       ]
 61 |      },
 62 |      "execution_count": 4,
 63 |      "metadata": {},
 64 |      "output_type": "execute_result"
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "# Calculate the standard deviation using NumPy\n",
 69 |     "arr = np.array(data)\n",
 70 |     "np.std(arr)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "##### How can you make them return same value ❓\n",
 78 |     "To make both return the same values, you can specify <br> the `ddof` parameter in both Pandas and NumPy <br>to either `1` (biased) or `0` (unbiased)."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 5,
 84 |    "metadata": {},
 85 |    "outputs": [
 86 |     {
 87 |      "data": {
 88 |       "text/plain": [
 89 |        "1.4142135623730951"
 90 |       ]
 91 |      },
 92 |      "execution_count": 5,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "float(df.std(ddof=0))"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 6,
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "data": {
108 |       "text/plain": [
109 |        "1.4142135623730951"
110 |       ]
111 |      },
112 |      "execution_count": 6,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "np.std(arr, ddof=0)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": []
127 |   }
128 |  ],
129 |  "metadata": {
130 |   "kernelspec": {
131 |    "display_name": "env_twitter",
132 |    "language": "python",
133 |    "name": "env_twitter"
134 |   },
135 |   "language_info": {
136 |    "codemirror_mode": {
137 |     "name": "ipython",
138 |     "version": 3
139 |    },
140 |    "file_extension": ".py",
141 |    "mimetype": "text/x-python",
142 |    "name": "python",
143 |    "nbconvert_exporter": "python",
144 |    "pygments_lexer": "ipython3",
145 |    "version": "3.10.6"
146 |   }
147 |  },
148 |  "nbformat": 4,
149 |  "nbformat_minor": 4
150 | }
151 | 


--------------------------------------------------------------------------------
/random/latexify.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "##### ❇️ Latex for Python 🐍 "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import math\n",
 17 |     "import latexify"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "data": {
 27 |       "text/latex": [
 28 |        "$$ \\displaystyle \\mathrm{solve}(a, b, c)\\triangleq \\frac{-b + \\sqrt{b^{2} - 4ac}}{2a} $$"
 29 |       ],
 30 |       "text/plain": [
 31 |        "<latexify.core.with_latex.<locals>._LatexifiedFunction at 0x7fe8c0569cd0>"
 32 |       ]
 33 |      },
 34 |      "execution_count": 2,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "@latexify.with_latex\n",
 41 |     "def solve(a, b, c):\n",
 42 |     "  return (-b + math.sqrt(b**2 - 4*a*c)) / (2*a)\n",
 43 |     "solve"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "text/latex": [
 54 |        "$$ \\displaystyle \\mathrm{fib}(x)\\triangleq \\left\\{ \\begin{array}{ll} 1, & \\mathrm{if} \\ x=0 \\\\ 1, & \\mathrm{if} \\ x=1 \\\\ \\mathrm{fib}\\left(x - 1\\right) + \\mathrm{fib}\\left(x - 2\\right), & \\mathrm{otherwise} \\end{array} \\right. $$"
 55 |       ],
 56 |       "text/plain": [
 57 |        "<latexify.core.with_latex.<locals>._LatexifiedFunction at 0x7fe8c0685340>"
 58 |       ]
 59 |      },
 60 |      "execution_count": 3,
 61 |      "metadata": {},
 62 |      "output_type": "execute_result"
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "# Automatically unrolls Elif or nested else-if ⬇️ 💥 \n",
 67 |     "\n",
 68 |     "@latexify.with_latex\n",
 69 |     "def fib(x):\n",
 70 |     "  if x == 0:\n",
 71 |     "    return 1\n",
 72 |     "  elif x == 1:\n",
 73 |     "    return 1\n",
 74 |     "  else:\n",
 75 |     "    return fib(x-1) + fib(x-2)\n",
 76 |     "fib"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 4,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/latex": [
 87 |        "$$ \\displaystyle \\mathrm{greek}({\\alpha}, {\\beta}, {\\gamma}, {\\Omega})\\triangleq {\\alpha}{\\beta} + \\Gamma\\left({{\\gamma}}\\right) + {\\Omega} $$"
 88 |       ],
 89 |       "text/plain": [
 90 |        "<latexify.core.with_latex.<locals>._LatexifiedFunction at 0x7fe8c0569f10>"
 91 |       ]
 92 |      },
 93 |      "execution_count": 4,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "# Some math symbols are converted automatically.\n",
100 |     "@latexify.with_latex\n",
101 |     "def greek(alpha, beta, gamma, Omega):\n",
102 |     "  return alpha * beta + math.gamma(gamma) + Omega\n",
103 |     "greek"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
111 |     "##### ❇️ follow → @akshay_pachaar  "
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": []
120 |   }
121 |  ],
122 |  "metadata": {
123 |   "kernelspec": {
124 |    "display_name": "env_mld",
125 |    "language": "python",
126 |    "name": "env_mld"
127 |   },
128 |   "language_info": {
129 |    "codemirror_mode": {
130 |     "name": "ipython",
131 |     "version": 3
132 |    },
133 |    "file_extension": ".py",
134 |    "mimetype": "text/x-python",
135 |    "name": "python",
136 |    "nbconvert_exporter": "python",
137 |    "pygments_lexer": "ipython3",
138 |    "version": "3.8.5"
139 |   }
140 |  },
141 |  "nbformat": 4,
142 |  "nbformat_minor": 4
143 | }
144 | 


--------------------------------------------------------------------------------
/neat-tricks/mem_cache.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### ❇️ `Memory Cache`: Lazy evaluation of functions"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import time\n",
 17 |     "from joblib import Memory"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "cachedir = 'cache'\n",
 27 |     "mem = Memory(cachedir, verbose=0)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "@mem.cache\n",
 37 |     "def f(x):\n",
 38 |     "    print('I am slow like a sloth... 🦥 ')\n",
 39 |     "    time.sleep(10)\n",
 40 |     "    return x"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 4,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stdout",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "I am slow like a sloth... 🦥 \n",
 53 |       "CPU times: user 109 ms, sys: 31.8 ms, total: 141 ms\n",
 54 |       "Wall time: 10 s\n"
 55 |      ]
 56 |     },
 57 |     {
 58 |      "data": {
 59 |       "text/plain": [
 60 |        "1"
 61 |       ]
 62 |      },
 63 |      "execution_count": 4,
 64 |      "metadata": {},
 65 |      "output_type": "execute_result"
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "%%time\n",
 70 |     "f(1)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 5,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "CPU times: user 1.1 ms, sys: 1.01 ms, total: 2.11 ms\n",
 83 |       "Wall time: 1.3 ms\n"
 84 |      ]
 85 |     },
 86 |     {
 87 |      "data": {
 88 |       "text/plain": [
 89 |        "1"
 90 |       ]
 91 |      },
 92 |      "execution_count": 5,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "%%time\n",
 99 |     "f(1)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "Observe that nothing is printed & function executes quickly <br> when you call f() with same argument again❗️ "
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "Let's call f() with a different argument ⬇️ "
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "I am slow like a sloth... 🦥 \n",
126 |       "CPU times: user 3.71 ms, sys: 3 ms, total: 6.71 ms\n",
127 |       "Wall time: 10 s\n"
128 |      ]
129 |     },
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "2"
134 |       ]
135 |      },
136 |      "execution_count": 6,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "%%time\n",
143 |     "f(2)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": []
152 |   }
153 |  ],
154 |  "metadata": {
155 |   "kernelspec": {
156 |    "display_name": "env_twitter",
157 |    "language": "python",
158 |    "name": "env_twitter"
159 |   },
160 |   "language_info": {
161 |    "codemirror_mode": {
162 |     "name": "ipython",
163 |     "version": 3
164 |    },
165 |    "file_extension": ".py",
166 |    "mimetype": "text/x-python",
167 |    "name": "python",
168 |    "nbconvert_exporter": "python",
169 |    "pygments_lexer": "ipython3",
170 |    "version": "3.10.5"
171 |   }
172 |  },
173 |  "nbformat": 4,
174 |  "nbformat_minor": 4
175 | }
176 | 


--------------------------------------------------------------------------------
/pandas/iterrows_vs_itertuples.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### ❇️ Iterrows 🤜 🤛  Itertuples "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Reading from a csv\n",
 26 |     "df_penguins = pd.read_csv('penguins.csv')"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "0\n",
 39 |       "_______________________________\n",
 40 |       "species                 Adelie\n",
 41 |       "island               Torgersen\n",
 42 |       "bill_length_mm            39.1\n",
 43 |       "bill_depth_mm             18.7\n",
 44 |       "flipper_length_mm        181.0\n",
 45 |       "body_mass_g             3750.0\n",
 46 |       "sex                       Male\n",
 47 |       "Name: 0, dtype: object\n",
 48 |       "__________time taken___________\n",
 49 |       "CPU times: user 12.6 ms, sys: 1.29 ms, total: 13.9 ms\n",
 50 |       "Wall time: 16 ms\n"
 51 |      ]
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "%%time\n",
 56 |     "# 🔴 iterrows: Iterate over DataFrame rows as (index, Series) pairs.\n",
 57 |     "index_series_pairs = [tup for tup in df_penguins.iterrows()]\n",
 58 |     "idx, series = index_series_pairs[0]\n",
 59 |     "print(idx)\n",
 60 |     "print('_______________________________')\n",
 61 |     "print(series)\n",
 62 |     "print('__________time taken___________')"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Pandas(Index=0, species='Adelie', island='Torgersen', bill_length_mm=39.1, bill_depth_mm=18.7, flipper_length_mm=181.0, body_mass_g=3750.0, sex='Male')\n",
 75 |       "__________time taken___________\n",
 76 |       "CPU times: user 1.36 ms, sys: 341 µs, total: 1.7 ms\n",
 77 |       "Wall time: 1.48 ms\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "%%time\n",
 83 |     "# 🟡 itertuples: Iterate over DataFrame rows as namedtuples.\n",
 84 |     "# Wondering what a namedtuple is❓ I have added a tweet👇 in the thread 🧵 , don't worry 🙌\n",
 85 |     "named_tuples = [tup for tup in df_penguins.itertuples()]\n",
 86 |     "print(named_tuples[0])\n",
 87 |     "print('__________time taken___________')"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "#### ❇️ Concluding remarks 👇 \n",
 95 |     "##### Although, it's an anti pattern to iterate over dataframe rows in this manner\n",
 96 |     "##### (We will see better techniques in future post, stay tuned!)\n",
 97 |     "##### 👉 But, given the choice itertuples is way fater 🏎💨 than iterrows because iterrows has to typecast each\n",
 98 |     "##### row into a pandas Series, which is an overhead.\n",
 99 |     "#### ❇️ Cheers!! 🍻 "
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": []
108 |   }
109 |  ],
110 |  "metadata": {
111 |   "kernelspec": {
112 |    "display_name": "env_twitter",
113 |    "language": "python",
114 |    "name": "env_twitter"
115 |   },
116 |   "language_info": {
117 |    "codemirror_mode": {
118 |     "name": "ipython",
119 |     "version": 3
120 |    },
121 |    "file_extension": ".py",
122 |    "mimetype": "text/x-python",
123 |    "name": "python",
124 |    "nbconvert_exporter": "python",
125 |    "pygments_lexer": "ipython3",
126 |    "version": "3.10.4"
127 |   }
128 |  },
129 |  "nbformat": 4,
130 |  "nbformat_minor": 5
131 | }
132 | 


--------------------------------------------------------------------------------
/numpy/argsort_vs_argpartition.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### ❇️ argsort 🤜 🤛 argpartition"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Let's say we want the smallest 3 numbers in array 👇 \n",
 26 |     "# in any order\n",
 27 |     "arr = np.array([4, 5, 6, 7, 1, 2, 3])"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "#### ❇️ Method 1: argsort"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "indices: [4 5 6 0 1 2 3]\n",
 47 |       "smallest 3 elements: [1 2 3]\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "# argsort, Returns the indices that would sort an array.\n",
 53 |     "idx = np.argsort(arr)\n",
 54 |     "print(f'indices: {idx}')\n",
 55 |     "print(f'smallest 3 elements: {arr[idx[:3]]}')"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "#### ❇️ Method 2: argpartition"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "indices: [4 6 5 0 1 2 3]\n",
 75 |       "smallest 3 elements: [1 3 2]; ⬅️ notice the order\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "# It takes an array and the element index (kth) to partiotion by\n",
 81 |     "# The k-th element will be in its final sorted position and all smaller \n",
 82 |     "# elements will be moved before it and all larger elements behind it.\n",
 83 |     "# order of all the elements in each partition is undefined.\n",
 84 |     "idx = np.argpartition(a = arr, kth = 3)\n",
 85 |     "print(f'indices: {idx}')\n",
 86 |     "print(f'smallest 3 elements: {arr[idx[:3]]}; ⬅️ notice the order')"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "####  ❇️ Why use argpartition ⁉️\n",
 94 |     "##### ❇️ If the order in is not necessary then argpartition is faster 🏎💨 "
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 5,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "1.53 ms ± 24.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "%%timeit\n",
112 |     "xs = np.random.normal(size=56000)\n",
113 |     "np.argpartition(xs, 10)[:10]\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "4.76 ms ± 37.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "%%timeit\n",
131 |     "xs = np.random.normal(size=56000)\n",
132 |     "np.argsort(xs)[:10]"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "#### ❇️ Cheers!! 🍺 "
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": []
148 |   }
149 |  ],
150 |  "metadata": {
151 |   "kernelspec": {
152 |    "display_name": "env_twitter",
153 |    "language": "python",
154 |    "name": "env_twitter"
155 |   },
156 |   "language_info": {
157 |    "codemirror_mode": {
158 |     "name": "ipython",
159 |     "version": 3
160 |    },
161 |    "file_extension": ".py",
162 |    "mimetype": "text/x-python",
163 |    "name": "python",
164 |    "nbconvert_exporter": "python",
165 |    "pygments_lexer": "ipython3",
166 |    "version": "3.10.5"
167 |   }
168 |  },
169 |  "nbformat": 4,
170 |  "nbformat_minor": 4
171 | }
172 | 


--------------------------------------------------------------------------------
/Python/decorators.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### `Decorators` clearly explained! 🚀 "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "Hello, Python! You are awesome!\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "# Functions are first class objects!\n",
 25 |     "\n",
 26 |     "def greet(name):\n",
 27 |     "  return f'Hello, {name}!'\n",
 28 |     "\n",
 29 |     "def cheer(fun, name):\n",
 30 |     "  return fun(name) + ' You are awesome!'\n",
 31 |     "  \n",
 32 |     "print(cheer(greet, 'Python'))"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 5,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "Before function call\n",
 45 |       "Hello, World!\n",
 46 |       "After function call\n"
 47 |      ]
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "def decorate(fun):\n",
 52 |     "  def wrapper():\n",
 53 |     "    print(\"Before function call\")\n",
 54 |     "    fun()\n",
 55 |     "    print(\"After function call\")\n",
 56 |     "  return wrapper\n",
 57 |     "\n",
 58 |     "def greet():\n",
 59 |     "  print(\"Hello, World!\")\n",
 60 |     "\n",
 61 |     "greet = decorate(greet)\n",
 62 |     "greet()"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 11,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Before function call\n",
 75 |       "Hello, Akshay!\n",
 76 |       "After function call\n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "def decorate(fun):\n",
 82 |     "  def wrapper(arg):\n",
 83 |     "    print(\"Before function call\")\n",
 84 |     "    fun(arg)\n",
 85 |     "    print(\"After function call\")\n",
 86 |     "  return wrapper\n",
 87 |     "\n",
 88 |     "@decorate\n",
 89 |     "def greet(name):\n",
 90 |     "  print(f\"Hello, {name}!\")\n",
 91 |     "\n",
 92 |     "greet('Akshay')"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 9,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "name": "stderr",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "INFO:root:Executed fibonacci in 9.5367431640625e-07 seconds\n",
105 |       "INFO:root:Executed fibonacci in 7.152557373046875e-07 seconds\n",
106 |       "INFO:root:Executed fibonacci in 0.0016901493072509766 seconds\n"
107 |      ]
108 |     },
109 |     {
110 |      "name": "stdout",
111 |      "output_type": "stream",
112 |      "text": [
113 |       "1\n"
114 |      ]
115 |     }
116 |    ],
117 |    "source": [
118 |     "import time\n",
119 |     "import logging\n",
120 |     "\n",
121 |     "logging.basicConfig(level=logging.INFO)\n",
122 |     "\n",
123 |     "def timer_decorator(func):\n",
124 |     "    def wrapper(*args, **kwargs):\n",
125 |     "        start_time = time.time()\n",
126 |     "        result = func(*args, **kwargs)\n",
127 |     "        end_time = time.time()\n",
128 |     "        execution_time = end_time - start_time\n",
129 |     "        logging.info(f\"Executed {func.__name__} in {execution_time} seconds\")\n",
130 |     "        return result\n",
131 |     "    return wrapper\n",
132 |     "\n",
133 |     "@timer_decorator\n",
134 |     "def fibonacci(n):\n",
135 |     "    if n <= 1:\n",
136 |     "       return n\n",
137 |     "    else:\n",
138 |     "       return (fibonacci(n-1) + fibonacci(n-2))\n",
139 |     "\n",
140 |     "print(fibonacci(2))"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": []
149 |   }
150 |  ],
151 |  "metadata": {
152 |   "kernelspec": {
153 |    "display_name": "env_twitter",
154 |    "language": "python",
155 |    "name": "env_twitter"
156 |   },
157 |   "language_info": {
158 |    "codemirror_mode": {
159 |     "name": "ipython",
160 |     "version": 3
161 |    },
162 |    "file_extension": ".py",
163 |    "mimetype": "text/x-python",
164 |    "name": "python",
165 |    "nbconvert_exporter": "python",
166 |    "pygments_lexer": "ipython3",
167 |    "version": "3.10.6"
168 |   }
169 |  },
170 |  "nbformat": 4,
171 |  "nbformat_minor": 4
172 | }
173 | 


--------------------------------------------------------------------------------
/random/matmul_operator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "##### ❇️ Python matmul operator @"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import tensorflow as tf"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "arr1 = np.random.rand(3, 3)\n",
 27 |     "arr2 = np.random.rand(3, 3)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "##### ❇️ Matrix multiplication before Python 3.5"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/plain": [
 45 |        "array([[0.57104228, 0.6249187 , 0.75554827],\n",
 46 |        "       [0.84227088, 0.52005673, 0.96547005],\n",
 47 |        "       [1.51418666, 1.04428351, 1.77612534]])"
 48 |       ]
 49 |      },
 50 |      "execution_count": 3,
 51 |      "metadata": {},
 52 |      "output_type": "execute_result"
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "np.matmul(arr1, arr2)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 4,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "data": {
 66 |       "text/plain": [
 67 |        "<tf.Tensor: shape=(3, 3), dtype=float64, numpy=\n",
 68 |        "array([[0.57104228, 0.6249187 , 0.75554827],\n",
 69 |        "       [0.84227088, 0.52005673, 0.96547005],\n",
 70 |        "       [1.51418666, 1.04428351, 1.77612534]])>"
 71 |       ]
 72 |      },
 73 |      "execution_count": 4,
 74 |      "metadata": {},
 75 |      "output_type": "execute_result"
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "tf.linalg.matmul(arr1, arr2)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "##### ❇️ Matrix multiplication after Python 3.5"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "array([[0.57104228, 0.6249187 , 0.75554827],\n",
 98 |        "       [0.84227088, 0.52005673, 0.96547005],\n",
 99 |        "       [1.51418666, 1.04428351, 1.77612534]])"
100 |       ]
101 |      },
102 |      "execution_count": 5,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "arr1@arr2"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "##### ❇️ Matmul operator can be defined for any class using __ __matmul__ __ dunder."
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 6,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "class CustomInt:\n",
125 |     "  def __init__(self, val):\n",
126 |     "    self.val = val\n",
127 |     "  def __matmul__(self, input_int):\n",
128 |     "    return self.val**input_int.val"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 7,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "data": {
138 |       "text/plain": [
139 |        "8"
140 |       ]
141 |      },
142 |      "execution_count": 7,
143 |      "metadata": {},
144 |      "output_type": "execute_result"
145 |     }
146 |    ],
147 |    "source": [
148 |     "int_1, int_2 = CustomInt(2), CustomInt(3)\n",
149 |     "int_1@int_2"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
157 |     "##### ❇️ follow → @akshay_pachaar  "
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": []
166 |   }
167 |  ],
168 |  "metadata": {
169 |   "kernelspec": {
170 |    "display_name": "env_3D",
171 |    "language": "python",
172 |    "name": "env_3d"
173 |   },
174 |   "language_info": {
175 |    "codemirror_mode": {
176 |     "name": "ipython",
177 |     "version": 3
178 |    },
179 |    "file_extension": ".py",
180 |    "mimetype": "text/x-python",
181 |    "name": "python",
182 |    "nbconvert_exporter": "python",
183 |    "pygments_lexer": "ipython3",
184 |    "version": "3.6.13"
185 |   }
186 |  },
187 |  "nbformat": 4,
188 |  "nbformat_minor": 4
189 | }
190 | 


--------------------------------------------------------------------------------
/random/shapely_polygon_intersection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "##### ❇️ Finding intersection between polygons and it's area"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from shapely.geometry import Polygon"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "Note: the polygon can be of any arbitrary shape <br>\n",
 24 |     "Here we have consider simpler cases for demo"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stdout",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "Area1: 1.0\n"
 37 |      ]
 38 |     },
 39 |     {
 40 |      "data": {
 41 |       "image/svg+xml": [
 42 |        "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-0.04 -0.04 1.08 1.08\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,1.0)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0216\" opacity=\"0.6\" d=\"M 0.0,0.0 L 1.0,0.0 L 1.0,1.0 L 0.0,1.0 L 0.0,0.0 z\" /></g></svg>"
 43 |       ],
 44 |       "text/plain": [
 45 |        "<shapely.geometry.polygon.Polygon at 0x7ff1da1e0dc0>"
 46 |       ]
 47 |      },
 48 |      "execution_count": 2,
 49 |      "metadata": {},
 50 |      "output_type": "execute_result"
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "polygon1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])\n",
 55 |     "print(f'Area1: {polygon1.area}')\n",
 56 |     "polygon1"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "Area2: 0.5\n"
 69 |      ]
 70 |     },
 71 |     {
 72 |      "data": {
 73 |       "image/svg+xml": [
 74 |        "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-0.04 -0.04 1.08 1.08\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,1.0)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0216\" opacity=\"0.6\" d=\"M 0.0,0.0 L 1.0,0.0 L 1.0,1.0 L 0.0,0.0 z\" /></g></svg>"
 75 |       ],
 76 |       "text/plain": [
 77 |        "<shapely.geometry.polygon.Polygon at 0x7ff1da1ecbb0>"
 78 |       ]
 79 |      },
 80 |      "execution_count": 3,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "polygon2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 0)])\n",
 87 |     "print(f'Area2: {polygon2.area}')\n",
 88 |     "polygon2"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "Area of intersection: 0.5\n"
101 |      ]
102 |     },
103 |     {
104 |      "data": {
105 |       "image/svg+xml": [
106 |        "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-0.04 -0.04 1.08 1.08\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,1.0)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0216\" opacity=\"0.6\" d=\"M 0.0,0.0 L 1.0,1.0 L 1.0,0.0 L 0.0,0.0 z\" /></g></svg>"
107 |       ],
108 |       "text/plain": [
109 |        "<shapely.geometry.polygon.Polygon at 0x7ff1d65d8d90>"
110 |       ]
111 |      },
112 |      "execution_count": 4,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "intersection = polygon1.intersection(polygon2)\n",
119 |     "print(f'Area of intersection: {intersection.area}')\n",
120 |     "intersection"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
128 |     "##### ❇️ follow → @akshay_pachaar  "
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": []
137 |   }
138 |  ],
139 |  "metadata": {
140 |   "kernelspec": {
141 |    "display_name": "env_poi",
142 |    "language": "python",
143 |    "name": "env_poi"
144 |   },
145 |   "language_info": {
146 |    "codemirror_mode": {
147 |     "name": "ipython",
148 |     "version": 3
149 |    },
150 |    "file_extension": ".py",
151 |    "mimetype": "text/x-python",
152 |    "name": "python",
153 |    "nbconvert_exporter": "python",
154 |    "pygments_lexer": "ipython3",
155 |    "version": "3.8.13"
156 |   }
157 |  },
158 |  "nbformat": 4,
159 |  "nbformat_minor": 4
160 | }
161 | 


--------------------------------------------------------------------------------
/PyTorch/torch_autograd.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### ❇️ Automatic differentiation in PyTorch"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Back propagation is widely used for training neural nets. During back propagation we adjust model<br> parameters (weights & biases) based on the gradient of the loss function w.r.t. the given parameter.<br> ∂loss/∂w (gradient w.r.t weight w); ∂loss/∂b (gradient w.r.t. bias b)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "##### 🔴 Let's consider a simplest neural net with 3 inputs and 2 outputs 👇 "
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "![](./resources/basic_neural_net.png)"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "##### 🟡 Let's code ⬆️  this using pytorch ⬇️ "
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 1,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "import torch\n",
 45 |     "\n",
 46 |     "x = torch.ones(3)  # input tensor\n",
 47 |     "y = torch.zeros(2)  # expected output\n",
 48 |     "# Notice the use of requires_grad = True ⬇️ \n",
 49 |     "w = torch.randn(3, 2, requires_grad=True) # weights \n",
 50 |     "b = torch.randn(2, requires_grad=True) # biases \n",
 51 |     "z = torch.matmul(x, w)+b # output\n",
 52 |     "loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "Above ⬆️ code represents the following ⬇️ computationl graph <br>\n",
 60 |     "In this graph, w and b are parameters, which we need to optimize. <br> \n",
 61 |     "Thus, we need to be able to compute the gradients of loss function with respect to those variables. <br>\n",
 62 |     "In order to do that, we set the <b>requires_grad</b> property of those tensors. <br>"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "![](./resources/computational_graph.png)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "##### 🟢 grad_fn:\n",
 77 |     "grad_fn is an object of class Function that is applied to tensors to construct computational graph ⬆️ .<br> This object knows how to compute the function in the forward direction, and also how to compute its <br>derivative during the backward propagation step.<br> grad_fn becomes property of a tensor. Check this out 👇 "
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 6,
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "name": "stdout",
 87 |      "output_type": "stream",
 88 |      "text": [
 89 |       "grad_function for z = <AddBackward0 object at 0x7fd9d92fe748>\n",
 90 |       "grad_function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x7fd9d92fe5c0>\n"
 91 |      ]
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "print(f\"grad_function for z = {z.grad_fn}\")\n",
 96 |     "print(f\"grad_function for loss = {loss.grad_fn}\")"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "##### 🔵 Computing Gradients\n",
104 |     "To optimize weights (w) of our neural network, we need to compute we need ∂loss/∂w (gradient of loss w.r.t weight w);<br> ∂loss/∂b (gradient of loss w.r.t. bias b) under some fixed values of x and y. <br>\n",
105 |     "This is how we do it 👇 "
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 3,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "tensor([[0.2980, 0.4528],\n",
118 |       "        [0.2980, 0.4528],\n",
119 |       "        [0.2980, 0.4528]])\n",
120 |       "tensor([0.2980, 0.4528])\n"
121 |      ]
122 |     }
123 |    ],
124 |    "source": [
125 |     "# loss.backward() Computes the gradient of current tensor w.r.t. graph leaves.\n",
126 |     "# In the graph we see that the leaves are w and b (ones for which required_grad = True)\n",
127 |     "loss.backward()\n",
128 |     "print(w.grad)\n",
129 |     "print(b.grad)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": []
138 |   }
139 |  ],
140 |  "metadata": {
141 |   "kernelspec": {
142 |    "display_name": "env_torch",
143 |    "language": "python",
144 |    "name": "env_torch"
145 |   },
146 |   "language_info": {
147 |    "codemirror_mode": {
148 |     "name": "ipython",
149 |     "version": 3
150 |    },
151 |    "file_extension": ".py",
152 |    "mimetype": "text/x-python",
153 |    "name": "python",
154 |    "nbconvert_exporter": "python",
155 |    "pygments_lexer": "ipython3",
156 |    "version": "3.6.13"
157 |   }
158 |  },
159 |  "nbformat": 4,
160 |  "nbformat_minor": 4
161 | }
162 | 


--------------------------------------------------------------------------------
/NLP/question_answering.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "924e7dcf-fa8c-491c-99d5-a95b49e83094",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import warnings\n",
 11 |     "warnings.filterwarnings('ignore')"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "id": "6854adc2-d7b1-4524-960d-f4f33140277e",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "### 🔘 Question Answering using `HuggingFace` 🤗 "
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "id": "bec8941f-537e-4cbf-9750-bb283b21c5af",
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "from transformers import pipeline"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "id": "d3e0c1d1-91a5-4e12-a309-262f1fb1377a",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# Provider text over which we will generate questions\n",
 40 |     "\n",
 41 |     "text = \"Leo Messi, is an Argentine professional footballer who \\\n",
 42 |     "        plays as a forward for Paris Saint-Germain and captains \\\n",
 43 |     "        the Argentina national team. Widely regarded as one of \\\n",
 44 |     "        the greatest players of all time, Messi has won a record \\\n",
 45 |     "        seven Ballon d'Or awards\""
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "id": "f2f1a75f-8863-4414-90ae-2f5eeac25527",
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "# Setup the pipeline for question answering\n",
 56 |     "reader = pipeline(task = \"question-answering\", \n",
 57 |     "                  model = \"distilbert-base-cased-distilled-squad\")"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 5,
 63 |    "id": "b2a69df0-154c-48fa-88aa-2ef34757583e",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# Question that we want to ask\n",
 68 |     "question = \"Who is the GOAT of football?\""
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 6,
 74 |    "id": "5aa2c4d8-9ac3-4d0a-8bb9-907f70809b61",
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "outputs = reader(question=question, context=text)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 7,
 84 |    "id": "3f45f111-0d66-4944-9b9b-4d029e00feeb",
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/html": [
 90 |        "<div>\n",
 91 |        "<style scoped>\n",
 92 |        "    .dataframe tbody tr th:only-of-type {\n",
 93 |        "        vertical-align: middle;\n",
 94 |        "    }\n",
 95 |        "\n",
 96 |        "    .dataframe tbody tr th {\n",
 97 |        "        vertical-align: top;\n",
 98 |        "    }\n",
 99 |        "\n",
100 |        "    .dataframe thead th {\n",
101 |        "        text-align: right;\n",
102 |        "    }\n",
103 |        "</style>\n",
104 |        "<table border=\"1\" class=\"dataframe\">\n",
105 |        "  <thead>\n",
106 |        "    <tr style=\"text-align: right;\">\n",
107 |        "      <th></th>\n",
108 |        "      <th>score</th>\n",
109 |        "      <th>start</th>\n",
110 |        "      <th>end</th>\n",
111 |        "      <th>answer</th>\n",
112 |        "    </tr>\n",
113 |        "  </thead>\n",
114 |        "  <tbody>\n",
115 |        "    <tr>\n",
116 |        "      <th>0</th>\n",
117 |        "      <td>0.919524</td>\n",
118 |        "      <td>0</td>\n",
119 |        "      <td>9</td>\n",
120 |        "      <td>Leo Messi</td>\n",
121 |        "    </tr>\n",
122 |        "  </tbody>\n",
123 |        "</table>\n",
124 |        "</div>"
125 |       ],
126 |       "text/plain": [
127 |        "      score  start  end     answer\n",
128 |        "0  0.919524      0    9  Leo Messi"
129 |       ]
130 |      },
131 |      "execution_count": 7,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "# Put the results in a DataFrame\n",
138 |     "\n",
139 |     "import pandas as pd\n",
140 |     "pd.DataFrame([outputs])"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "id": "0ce27d9c-65b7-4359-90ab-8db8a897fc6b",
146 |    "metadata": {},
147 |    "source": [
148 |     "And we know who is the GOAT!! 😀"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "id": "8747eac2-842d-4b97-b83e-f10b2134db87",
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": []
158 |   }
159 |  ],
160 |  "metadata": {
161 |   "kernelspec": {
162 |    "display_name": "env_twitter",
163 |    "language": "python",
164 |    "name": "env_twitter"
165 |   },
166 |   "language_info": {
167 |    "codemirror_mode": {
168 |     "name": "ipython",
169 |     "version": 3
170 |    },
171 |    "file_extension": ".py",
172 |    "mimetype": "text/x-python",
173 |    "name": "python",
174 |    "nbconvert_exporter": "python",
175 |    "pygments_lexer": "ipython3",
176 |    "version": "3.10.5"
177 |   }
178 |  },
179 |  "nbformat": 4,
180 |  "nbformat_minor": 5
181 | }
182 | 


--------------------------------------------------------------------------------
/pandas/iris.csv:
--------------------------------------------------------------------------------
  1 | SepalLength,SepalWidth,PetalLength,PetalWidth,Name
  2 | 5.1,3.5,1.4,0.2,Iris-setosa
  3 | 4.9,3.0,1.4,0.2,Iris-setosa
  4 | 4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5.0,3.6,1.4,0.2,Iris-setosa
  7 | 5.4,3.9,1.7,0.4,Iris-setosa
  8 | 4.6,3.4,1.4,0.3,Iris-setosa
  9 | 5.0,3.4,1.5,0.2,Iris-setosa
 10 | 4.4,2.9,1.4,0.2,Iris-setosa
 11 | 4.9,3.1,1.5,0.1,Iris-setosa
 12 | 5.4,3.7,1.5,0.2,Iris-setosa
 13 | 4.8,3.4,1.6,0.2,Iris-setosa
 14 | 4.8,3.0,1.4,0.1,Iris-setosa
 15 | 4.3,3.0,1.1,0.1,Iris-setosa
 16 | 5.8,4.0,1.2,0.2,Iris-setosa
 17 | 5.7,4.4,1.5,0.4,Iris-setosa
 18 | 5.4,3.9,1.3,0.4,Iris-setosa
 19 | 5.1,3.5,1.4,0.3,Iris-setosa
 20 | 5.7,3.8,1.7,0.3,Iris-setosa
 21 | 5.1,3.8,1.5,0.3,Iris-setosa
 22 | 5.4,3.4,1.7,0.2,Iris-setosa
 23 | 5.1,3.7,1.5,0.4,Iris-setosa
 24 | 4.6,3.6,1.0,0.2,Iris-setosa
 25 | 5.1,3.3,1.7,0.5,Iris-setosa
 26 | 4.8,3.4,1.9,0.2,Iris-setosa
 27 | 5.0,3.0,1.6,0.2,Iris-setosa
 28 | 5.0,3.4,1.6,0.4,Iris-setosa
 29 | 5.2,3.5,1.5,0.2,Iris-setosa
 30 | 5.2,3.4,1.4,0.2,Iris-setosa
 31 | 4.7,3.2,1.6,0.2,Iris-setosa
 32 | 4.8,3.1,1.6,0.2,Iris-setosa
 33 | 5.4,3.4,1.5,0.4,Iris-setosa
 34 | 5.2,4.1,1.5,0.1,Iris-setosa
 35 | 5.5,4.2,1.4,0.2,Iris-setosa
 36 | 4.9,3.1,1.5,0.1,Iris-setosa
 37 | 5.0,3.2,1.2,0.2,Iris-setosa
 38 | 5.5,3.5,1.3,0.2,Iris-setosa
 39 | 4.9,3.1,1.5,0.1,Iris-setosa
 40 | 4.4,3.0,1.3,0.2,Iris-setosa
 41 | 5.1,3.4,1.5,0.2,Iris-setosa
 42 | 5.0,3.5,1.3,0.3,Iris-setosa
 43 | 4.5,2.3,1.3,0.3,Iris-setosa
 44 | 4.4,3.2,1.3,0.2,Iris-setosa
 45 | 5.0,3.5,1.6,0.6,Iris-setosa
 46 | 5.1,3.8,1.9,0.4,Iris-setosa
 47 | 4.8,3.0,1.4,0.3,Iris-setosa
 48 | 5.1,3.8,1.6,0.2,Iris-setosa
 49 | 4.6,3.2,1.4,0.2,Iris-setosa
 50 | 5.3,3.7,1.5,0.2,Iris-setosa
 51 | 5.0,3.3,1.4,0.2,Iris-setosa
 52 | 7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 6.2,2.9,4.3,1.3,Iris-versicolor
100 | 5.1,2.5,3.0,1.1,Iris-versicolor
101 | 5.7,2.8,4.1,1.3,Iris-versicolor
102 | 6.3,3.3,6.0,2.5,Iris-virginica
103 | 5.8,2.7,5.1,1.9,Iris-virginica
104 | 7.1,3.0,5.9,2.1,Iris-virginica
105 | 6.3,2.9,5.6,1.8,Iris-virginica
106 | 6.5,3.0,5.8,2.2,Iris-virginica
107 | 7.6,3.0,6.6,2.1,Iris-virginica
108 | 4.9,2.5,4.5,1.7,Iris-virginica
109 | 7.3,2.9,6.3,1.8,Iris-virginica
110 | 6.7,2.5,5.8,1.8,Iris-virginica
111 | 7.2,3.6,6.1,2.5,Iris-virginica
112 | 6.5,3.2,5.1,2.0,Iris-virginica
113 | 6.4,2.7,5.3,1.9,Iris-virginica
114 | 6.8,3.0,5.5,2.1,Iris-virginica
115 | 5.7,2.5,5.0,2.0,Iris-virginica
116 | 5.8,2.8,5.1,2.4,Iris-virginica
117 | 6.4,3.2,5.3,2.3,Iris-virginica
118 | 6.5,3.0,5.5,1.8,Iris-virginica
119 | 7.7,3.8,6.7,2.2,Iris-virginica
120 | 7.7,2.6,6.9,2.3,Iris-virginica
121 | 6.0,2.2,5.0,1.5,Iris-virginica
122 | 6.9,3.2,5.7,2.3,Iris-virginica
123 | 5.6,2.8,4.9,2.0,Iris-virginica
124 | 7.7,2.8,6.7,2.0,Iris-virginica
125 | 6.3,2.7,4.9,1.8,Iris-virginica
126 | 6.7,3.3,5.7,2.1,Iris-virginica
127 | 7.2,3.2,6.0,1.8,Iris-virginica
128 | 6.2,2.8,4.8,1.8,Iris-virginica
129 | 6.1,3.0,4.9,1.8,Iris-virginica
130 | 6.4,2.8,5.6,2.1,Iris-virginica
131 | 7.2,3.0,5.8,1.6,Iris-virginica
132 | 7.4,2.8,6.1,1.9,Iris-virginica
133 | 7.9,3.8,6.4,2.0,Iris-virginica
134 | 6.4,2.8,5.6,2.2,Iris-virginica
135 | 6.3,2.8,5.1,1.5,Iris-virginica
136 | 6.1,2.6,5.6,1.4,Iris-virginica
137 | 7.7,3.0,6.1,2.3,Iris-virginica
138 | 6.3,3.4,5.6,2.4,Iris-virginica
139 | 6.4,3.1,5.5,1.8,Iris-virginica
140 | 6.0,3.0,4.8,1.8,Iris-virginica
141 | 6.9,3.1,5.4,2.1,Iris-virginica
142 | 6.7,3.1,5.6,2.4,Iris-virginica
143 | 6.9,3.1,5.1,2.3,Iris-virginica
144 | 5.8,2.7,5.1,1.9,Iris-virginica
145 | 6.8,3.2,5.9,2.3,Iris-virginica
146 | 6.7,3.3,5.7,2.5,Iris-virginica
147 | 6.7,3.0,5.2,2.3,Iris-virginica
148 | 6.3,2.5,5.0,1.9,Iris-virginica
149 | 6.5,3.0,5.2,2.0,Iris-virginica
150 | 6.2,3.4,5.4,2.3,Iris-virginica
151 | 5.9,3.0,5.1,1.8,Iris-virginica
152 | 


--------------------------------------------------------------------------------
/ml_from_scratch/KNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### `KNN` from scratch! 🚀"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "\n",
 18 |     "class KNN:\n",
 19 |     "    def __init__(self, k=3, task='classification'):\n",
 20 |     "        self.k = k\n",
 21 |     "        self.task = task\n",
 22 |     "\n",
 23 |     "    def _euclidean_distance(self, a, b):\n",
 24 |     "        # Calculate the Euclidean distance between two points\n",
 25 |     "        return np.sqrt(np.sum((a - b)**2, axis=1))\n",
 26 |     "\n",
 27 |     "    def fit(self, X, y):\n",
 28 |     "        # Store training data and labels\n",
 29 |     "        self.X_train = X\n",
 30 |     "        self.y_train = y\n",
 31 |     "\n",
 32 |     "    def predict(self, X):\n",
 33 |     "        # Predict the class labels or target values for a set of data points\n",
 34 |     "        y_pred = [self._predict_single(x) for x in X]\n",
 35 |     "        return np.array(y_pred)\n",
 36 |     "\n",
 37 |     "    def _predict_single(self, x):\n",
 38 |     "        # Predict the class label or target value for a single data point\n",
 39 |     "        distances = self._euclidean_distance(x, self.X_train)\n",
 40 |     "        # Find K closest data points\n",
 41 |     "        k_indices = np.argsort(distances)[:self.k]\n",
 42 |     "        \n",
 43 |     "        # Get nearest neighbours\n",
 44 |     "        nn = [self.X_train[i].tolist() for i in k_indices] \n",
 45 |     "        print('Nearest_neighbours: ', nn)\n",
 46 |     "        \n",
 47 |     "        # Get their labels\n",
 48 |     "        k_nearest_labels = [self.y_train[i] for i in k_indices]  \n",
 49 |     "        print('Labels for Nearest_neighbours: ', k_nearest_labels)\n",
 50 |     "        \n",
 51 |     "        if self.task == 'classification':\n",
 52 |     "            return self._majority_vote(k_nearest_labels)\n",
 53 |     "        elif self.task == 'regression':\n",
 54 |     "            return self._average(k_nearest_labels)\n",
 55 |     "\n",
 56 |     "    def _majority_vote(self, labels):\n",
 57 |     "        # Determine the majority class label from a list of labels\n",
 58 |     "        return np.argmax(np.bincount(labels))\n",
 59 |     "\n",
 60 |     "    def _average(self, values):\n",
 61 |     "        # Calculate the average of a list of values\n",
 62 |     "        return np.mean(values)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "#### Let's test it for regression & Classification 🚀 "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "Nearest_neighbours:  [[0, 0], [1, 1], [2, 2]]\n",
 82 |       "Labels for Nearest_neighbours:  [0, 0, 1]\n",
 83 |       "Predicted labels: [0]\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "# Test the KNN implementation\n",
 89 |     "if __name__ == \"__main__\":\n",
 90 |     "    X_train = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])\n",
 91 |     "\n",
 92 |     "    # class lavels 👇 \n",
 93 |     "    y_train = np.array([0, 0, 1, 1])\n",
 94 |     "    \n",
 95 |     "    X_test = np.array([[0.5, 0.5]])\n",
 96 |     "\n",
 97 |     "    knn = KNN(k=3, task='classification')\n",
 98 |     "    knn.fit(X_train, y_train)\n",
 99 |     "    y_pred = knn.predict(X_test)\n",
100 |     "\n",
101 |     "    print(\"Predicted labels:\", y_pred)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 3,
107 |    "metadata": {},
108 |    "outputs": [
109 |     {
110 |      "name": "stdout",
111 |      "output_type": "stream",
112 |      "text": [
113 |       "Nearest_neighbours:  [[2, 2], [1, 1], [3, 3]]\n",
114 |       "Labels for Nearest_neighbours:  [1, 0, 1]\n",
115 |       "Predicted labels: [0.66666667]\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "# Test the KNN implementation\n",
121 |     "if __name__ == \"__main__\":\n",
122 |     "    X_train = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])\n",
123 |     "\n",
124 |     "    # class lavels 👇 \n",
125 |     "    y_train = np.array([0, 0, 1, 1])\n",
126 |     "    \n",
127 |     "    X_test = np.array([[2, 2]])\n",
128 |     "\n",
129 |     "    knn = KNN(k=3, task='regression')\n",
130 |     "    knn.fit(X_train, y_train)\n",
131 |     "    y_pred = knn.predict(X_test)\n",
132 |     "\n",
133 |     "    print(\"Predicted labels:\", y_pred)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": []
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "kernelspec": {
146 |    "display_name": "env_twitter",
147 |    "language": "python",
148 |    "name": "env_twitter"
149 |   },
150 |   "language_info": {
151 |    "codemirror_mode": {
152 |     "name": "ipython",
153 |     "version": 3
154 |    },
155 |    "file_extension": ".py",
156 |    "mimetype": "text/x-python",
157 |    "name": "python",
158 |    "nbconvert_exporter": "python",
159 |    "pygments_lexer": "ipython3",
160 |    "version": "3.10.6"
161 |   }
162 |  },
163 |  "nbformat": 4,
164 |  "nbformat_minor": 4
165 | }
166 | 


--------------------------------------------------------------------------------
/PyTorch_Lightning/autoencoders.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "2fd74713-1332-4a0b-a55d-3e0d1662b843",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import os\n",
 11 |     "from torch import optim, nn, utils, Tensor\n",
 12 |     "from torchvision.datasets import MNIST\n",
 13 |     "from torchvision.transforms import ToTensor\n",
 14 |     "import lightning.pytorch as pl\n",
 15 |     "\n",
 16 |     "# define any number of nn.Modules (or use your current ones)\n",
 17 |     "encoder = nn.Sequential(nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 64))\n",
 18 |     "decoder = nn.Sequential(nn.Linear(64, 128), nn.ReLU(), nn.Linear(128, 28 * 28))\n",
 19 |     "\n",
 20 |     "\n",
 21 |     "# define the LightningModule\n",
 22 |     "class LitAutoEncoder(pl.LightningModule):\n",
 23 |     "    def __init__(self, encoder, decoder):\n",
 24 |     "        super().__init__()\n",
 25 |     "        self.encoder = encoder\n",
 26 |     "        self.decoder = decoder\n",
 27 |     "\n",
 28 |     "    def training_step(self, batch, batch_idx):\n",
 29 |     "        # training_step defines the train loop.\n",
 30 |     "        # it is independent of forward\n",
 31 |     "        x, y = batch\n",
 32 |     "        x = x.view(x.size(0), -1)\n",
 33 |     "        z = self.encoder(x)\n",
 34 |     "        x_hat = self.decoder(z)\n",
 35 |     "        loss = nn.functional.mse_loss(x_hat, x)\n",
 36 |     "        # Logging to TensorBoard (if installed) by default\n",
 37 |     "        self.log(\"train_loss\", loss)\n",
 38 |     "        return loss\n",
 39 |     "\n",
 40 |     "    def configure_optimizers(self):\n",
 41 |     "        optimizer = optim.Adam(self.parameters(), lr=1e-3)\n",
 42 |     "        return optimizer\n",
 43 |     "\n",
 44 |     "\n",
 45 |     "# init the autoencoder\n",
 46 |     "autoencoder = LitAutoEncoder(encoder, decoder)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "id": "d35dd91a-8295-4404-aa60-de81cf639c0e",
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "# setup data\n",
 57 |     "dataset = MNIST(os.getcwd(), download=True, transform=ToTensor())\n",
 58 |     "train_loader = utils.data.DataLoader(dataset)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "id": "96e71aaf-dfa8-43b1-b8a0-bc030693518a",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)\n",
 69 |     "trainer = pl.Trainer(limit_train_batches=100, max_epochs=50)\n",
 70 |     "trainer.fit(model=autoencoder, train_dataloaders=train_loader)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "id": "2bbc796d-5254-4e6b-ab55-7148ba2a85f8",
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "import torch\n",
 81 |     "\n",
 82 |     "# load checkpoint\n",
 83 |     "checkpoint = \"./lightning_logs/version_7/checkpoints/epoch=49-step=5000.ckpt\"\n",
 84 |     "autoencoder = LitAutoEncoder.load_from_checkpoint(checkpoint, encoder=encoder, decoder=decoder)\n",
 85 |     "\n",
 86 |     "# choose your trained nn.Module\n",
 87 |     "encoder = autoencoder.encoder\n",
 88 |     "encoder.eval()\n",
 89 |     "\n",
 90 |     "# embed 4 fake images!\n",
 91 |     "fake_image_batch = torch.rand(4, 28 * 28, device=autoencoder.device)\n",
 92 |     "embeddings = encoder(fake_image_batch)\n",
 93 |     "print(\"⚡\" * 20, \"\\nPredictions (4 image embeddings):\\n\", embeddings, \"\\n\", \"⚡\" * 20)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "id": "355f7837-8e5b-4d1a-8ff6-96650d6ea4f8",
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "# Ensure the autoencoder is on the correct device\n",
104 |     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
105 |     "autoencoder = autoencoder.to(device)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "id": "0eb2d3c7-4f97-450d-867c-568d60482e4f",
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "import matplotlib.pyplot as plt\n",
116 |     "from torchvision.utils import make_grid\n",
117 |     "\n",
118 |     "def show_images(images, title=\"Images\"):\n",
119 |     "    \"\"\"Utility function to display a batch of images.\"\"\"\n",
120 |     "    grid_img = make_grid(images, nrow=4)\n",
121 |     "    plt.figure(figsize=(8, 8))\n",
122 |     "    plt.imshow(grid_img.permute(1, 2, 0))\n",
123 |     "    plt.title(title)\n",
124 |     "    plt.axis('off')\n",
125 |     "    plt.show()\n",
126 |     "\n",
127 |     "# Load a batch of images from the dataset\n",
128 |     "images, _ = next(iter(train_loader))\n",
129 |     "show_images(images, title=\"Original Images\")\n",
130 |     "\n",
131 |     "# Preprocess the images\n",
132 |     "images = images.view(images.size(0), -1)\n",
133 |     "\n",
134 |     "# Generate image from embeddings\n",
135 |     "embeddings = autoencoder.encoder(images)\n",
136 |     "reconstructed_images =autoencoder.decoder(embeddings).view(-1, 1, 28, 28)\n",
137 |     "show_images(reconstructed_images, title=\"Reconstructed Images\")"
138 |    ]
139 |   }
140 |  ],
141 |  "metadata": {
142 |   "kernelspec": {
143 |    "display_name": "env_twitter",
144 |    "language": "python",
145 |    "name": "env_twitter"
146 |   },
147 |   "language_info": {
148 |    "codemirror_mode": {
149 |     "name": "ipython",
150 |     "version": 3
151 |    },
152 |    "file_extension": ".py",
153 |    "mimetype": "text/x-python",
154 |    "name": "python",
155 |    "nbconvert_exporter": "python",
156 |    "pygments_lexer": "ipython3",
157 |    "version": "3.10.6"
158 |   }
159 |  },
160 |  "nbformat": 4,
161 |  "nbformat_minor": 5
162 | }
163 | 


--------------------------------------------------------------------------------
/LLMs/llamaindex_101.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "079206aa-322f-422f-9efe-7a840ff5d4db",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stderr",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "<frozen importlib._bootstrap>:241: RuntimeWarning: scipy._lib.messagestream.MessageStream size changed, may indicate binary incompatibility. Expected 56 from C header, got 64 from PyObject\n",
 14 |       "/Users/pachaar/opt/anaconda3/envs/env_twitter/lib/python3.10/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.8.20) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
 15 |       "  warnings.warn(\n"
 16 |      ]
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "import os\n",
 21 |     "import textwrap\n",
 22 |     "from dotenv import load_dotenv\n",
 23 |     "import re\n",
 24 |     "\n",
 25 |     "# Load environment variables\n",
 26 |     "load_dotenv()\n",
 27 |     "\n",
 28 |     "# Fetch and set API keys\n",
 29 |     "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
 30 |     "active_loop_token = os.getenv(\"ACTIVELOOP_TOKEN\")\n",
 31 |     "dataset_path = os.getenv(\"DATASET_PATH\")"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "id": "5c3db9fb-1afc-44b3-9538-04cd6881afae",
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "2\n"
 45 |      ]
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "from llama_index import download_loader\n",
 50 |     "\n",
 51 |     "WikipediaReader = download_loader(\"WikipediaReader\")\n",
 52 |     "\n",
 53 |     "loader = WikipediaReader()\n",
 54 |     "\n",
 55 |     "documents = loader.load_data(pages=['Delhi', 'Mumbai'])\n",
 56 |     "print(len(documents))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 4,
 62 |    "id": "23f20252-46bb-41fd-b9c2-7c3a861268aa",
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "146\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "from llama_index.node_parser import SimpleNodeParser\n",
 75 |     "\n",
 76 |     "\n",
 77 |     "# Initialize the parser\n",
 78 |     "parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=20)\n",
 79 |     "\n",
 80 |     "# Parse documents into nodes\n",
 81 |     "nodes = parser.get_nodes_from_documents(documents)\n",
 82 |     "print(len(nodes))"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 5,
 88 |    "id": "cfd9dc04-d4a9-4d48-8ec0-13acd860fd5d",
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Your Deep Lake dataset has been successfully created!\n"
 96 |      ]
 97 |     },
 98 |     {
 99 |      "name": "stderr",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "-"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "from llama_index.vector_stores import DeepLakeVectorStore\n",
108 |     "\n",
109 |     "my_activeloop_org_id = \"\"\n",
110 |     "my_activeloop_dataset_name = \"LlamaIndex-101\"\n",
111 |     "dataset_path = f\"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}\"\n",
112 |     "\n",
113 |     "# Create an index over the documnts\n",
114 |     "vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 9,
120 |    "id": "e761aa7f-0e49-4904-b663-25ea6956bd73",
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "from llama_index.storage.storage_context import StorageContext\n",
125 |     "from llama_index import VectorStoreIndex\n",
126 |     "\n",
127 |     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
128 |     "\n",
129 |     "index = VectorStoreIndex.from_documents(\n",
130 |     "    documents, storage_context=storage_context\n",
131 |     ")"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 8,
137 |    "id": "9832ed1c-d83a-48af-9795-be02ce9ed9e5",
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "Delhi has been historically significant as it has served as the capital of various empires and kingdoms throughout history. It has been a prominent political, cultural, and commercial center in India for centuries. Delhi's historical significance is rooted in its role as a seat of power, witnessing the rise and fall of different dynasties and playing a crucial part in shaping the country's history.\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "query_engine = index.as_query_engine()\n",
150 |     "response = query_engine.query(\"What is historical significance of Delhi?\")\n",
151 |     "print( response.response )"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "id": "a1643cfc-7bc1-44b8-921b-2c1806488ec5",
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": []
161 |   }
162 |  ],
163 |  "metadata": {
164 |   "kernelspec": {
165 |    "display_name": "env_twitter",
166 |    "language": "python",
167 |    "name": "env_twitter"
168 |   },
169 |   "language_info": {
170 |    "codemirror_mode": {
171 |     "name": "ipython",
172 |     "version": 3
173 |    },
174 |    "file_extension": ".py",
175 |    "mimetype": "text/x-python",
176 |    "name": "python",
177 |    "nbconvert_exporter": "python",
178 |    "pygments_lexer": "ipython3",
179 |    "version": "3.10.6"
180 |   }
181 |  },
182 |  "nbformat": 4,
183 |  "nbformat_minor": 5
184 | }
185 | 


--------------------------------------------------------------------------------
/random/pivot_table_JS.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "c50bc614-b608-415b-a0ae-03a7189b02fe",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "#### PivotTableJS 🚀 "
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "id": "78fb524c-b199-4107-90e8-1a85cfe92b58",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas as pd\n",
 19 |     "from pivottablejs import pivot_ui"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "id": "6b732b88-7eef-4a9d-bea8-6cf9953e38a8",
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Name</th>\n",
 50 |        "      <th>Party</th>\n",
 51 |        "      <th>Province</th>\n",
 52 |        "      <th>Age</th>\n",
 53 |        "      <th>Gender</th>\n",
 54 |        "    </tr>\n",
 55 |        "  </thead>\n",
 56 |        "  <tbody>\n",
 57 |        "    <tr>\n",
 58 |        "      <th>0</th>\n",
 59 |        "      <td>Liu, Laurin</td>\n",
 60 |        "      <td>NDP</td>\n",
 61 |        "      <td>Quebec</td>\n",
 62 |        "      <td>22.0</td>\n",
 63 |        "      <td>Female</td>\n",
 64 |        "    </tr>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>1</th>\n",
 67 |        "      <td>Mourani, Maria</td>\n",
 68 |        "      <td>Bloc Quebecois</td>\n",
 69 |        "      <td>Quebec</td>\n",
 70 |        "      <td>43.0</td>\n",
 71 |        "      <td>Female</td>\n",
 72 |        "    </tr>\n",
 73 |        "  </tbody>\n",
 74 |        "</table>\n",
 75 |        "</div>"
 76 |       ],
 77 |       "text/plain": [
 78 |        "             Name           Party Province   Age  Gender\n",
 79 |        "0     Liu, Laurin             NDP   Quebec  22.0  Female\n",
 80 |        "1  Mourani, Maria  Bloc Quebecois   Quebec  43.0  Female"
 81 |       ]
 82 |      },
 83 |      "execution_count": 2,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "df = pd.read_csv(\"mps.csv\")\n",
 90 |     "df.head(2)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 3,
 96 |    "id": "fe23d5a9-f422-4b36-b2e0-e539b8f65791",
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/html": [
102 |        "\n",
103 |        "        <iframe\n",
104 |        "            width=\"100%\"\n",
105 |        "            height=\"500\"\n",
106 |        "            src=\"pivottablejs.html\"\n",
107 |        "            frameborder=\"0\"\n",
108 |        "            allowfullscreen\n",
109 |        "            \n",
110 |        "        ></iframe>\n",
111 |        "        "
112 |       ],
113 |       "text/plain": [
114 |        "<IPython.lib.display.IFrame at 0x10d2daaa0>"
115 |       ]
116 |      },
117 |      "execution_count": 3,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "pivot_ui(df)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 5,
129 |    "id": "ae919931-6888-4a3b-b0a8-f863a1ae62a3",
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/html": [
135 |        "\n",
136 |        "        <iframe\n",
137 |        "            width=\"100%\"\n",
138 |        "            height=\"500\"\n",
139 |        "            src=\"pivottablejs.html\"\n",
140 |        "            frameborder=\"0\"\n",
141 |        "            allowfullscreen\n",
142 |        "            \n",
143 |        "        ></iframe>\n",
144 |        "        "
145 |       ],
146 |       "text/plain": [
147 |        "<IPython.lib.display.IFrame at 0x117007be0>"
148 |       ]
149 |      },
150 |      "execution_count": 5,
151 |      "metadata": {},
152 |      "output_type": "execute_result"
153 |     }
154 |    ],
155 |    "source": [
156 |     "\n",
157 |     "\n",
158 |     "pivot_ui(df,      \n",
159 |     "    cols= [\"Party\"],\n",
160 |     "    rows= [\"Province\"],\n",
161 |     "    rendererName= \"Horizontal Stacked Bar Chart\",\n",
162 |     "    rowOrder= \"value_z_to_a\", \n",
163 |     "    colOrder= \"value_z_to_a\",\n",
164 |     "    rendererOptions= {\n",
165 |     "        \"c3\": { \"data\": {\"colors\": {\n",
166 |     "            \"Liberal\": '#dc3912', \"Conservative\": '#3366cc', \"NDP\": '#ff9900',\n",
167 |     "            \"Green\":'#109618', 'Bloc Quebecois': '#990099'\n",
168 |     "        }}}\n",
169 |     "    }\n",
170 |     ")"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "id": "6129fd7e-d89d-4586-b346-bf6b8a9a6456",
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": []
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "env_twitter",
185 |    "language": "python",
186 |    "name": "env_twitter"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.10.5"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 5
203 | }
204 | 


--------------------------------------------------------------------------------
/LLMs/langchain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### The power of `Langchain 🦜` "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "#!pip install langchain\n",
 17 |     "#!pip install openai\n",
 18 |     "#!pip install google-api-python-client\n",
 19 |     "#!pip install wikipedia"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 19,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import os\n",
 29 |     "import openai\n",
 30 |     "from langchain.chat_models import ChatOpenAI\n",
 31 |     "from langchain.agents import load_tools, initialize_agent\n",
 32 |     "from langchain.memory import ConversationBufferMemory\n",
 33 |     "from IPython.display import Markdown"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 21,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Set environment variables\n",
 43 |     "os.environ['GOOGLE_API_KEY'] = '...'\n",
 44 |     "os.environ['OPENAI_API_KEY'] = '...'\n",
 45 |     "os.environ['GOOGLE_CSE_ID'] = '...'"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 22,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# A conversation buffer (memory) & import llm of choice\n",
 55 |     "memory = ConversationBufferMemory()\n",
 56 |     "llm = ChatOpenAI()"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 23,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# Provide access to a list of tools that the agents will use\n",
 66 |     "tools = load_tools(['wikipedia', 'google-search', 'llm-math'], llm=llm)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 27,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "# initialise the agents & make all the tools and llm available to it\n",
 76 |     "agent = initialize_agent(tools, llm, agent='zero-shot-react-description', verbose=True, memory=memory)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 31,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# provide a prompt and you are done!\n",
 86 |     "agent.run(\"Find the number of IPL titles won by MS Dhoni & find it's cube root\")"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 29,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "\n",
 99 |       "\n",
100 |       "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
101 |       "\u001b[32;1m\u001b[1;3mI need to find information about IPL titles won by Virat Kohli. Since this is a sports-related question, I should use Google Search.\n",
102 |       "Action: Google Search\n",
103 |       "Action Input: \"Virat Kohli IPL titles\"\u001b[0m\n",
104 |       "Observation: \u001b[33;1m\u001b[1;3mVirat Kohli, the captain of the Royal Challengers Bangalore (RCB) team in the Indian Premier League (IPL), has not won the IPL title so far. Virat Kohli is an Indian international cricketer and the former captain of the Indian national cricket team who plays as a right-handed batsman for Royal ... Jun 27, 2021 ... As India lost the World Test Championship final to New Zealand by eight wickets, it marked the third instance that under Virat Kohli, ... May 22, 2022 ... For all his exploits for India, he has never won the IPL title since joining Bangalore before the inaugural contest in 2008, including as ... Apr 23, 2023 ... It's a welcome reminder that Kohli is still chasing an IPL title, which if successful would be a deserved triumph for the most important player ... Mar 30, 2023 ... Sanjay Manjrekar believes Virat Kohli and Royal Challengers ... their first IPL title in the upcoming season of the Indian Premier League. May 14, 2023 ... Talisman Virat Kohli might be cricket's biggest superstar, but his performances in ... and somehow are still chasing an elusive IPL title. May 10, 2023 ... Virat Kohli's dream of winning the Indian Premier League trophy is almost over after RCB's defeat to Mumbai Indians. Oct 12, 2021 ... It was to be Virat Kohli's last match as captain of Royal Challengers Bangalore. Or of any IPL team. RCB put up a sub par performance ... 3 days ago ... Virat Kohli has dominated the Indian Premier League (IPL) as well as ... reach the playoffs as the wait for a maiden IPL title continues.\u001b[0m\n",
105 |       "Thought:\u001b[32;1m\u001b[1;3mBased on the observation, it seems that Virat Kohli has not won any IPL titles yet.\n",
106 |       "Final Answer: Virat Kohli has not won any IPL titles.\u001b[0m\n",
107 |       "\n",
108 |       "\u001b[1m> Finished chain.\u001b[0m\n"
109 |      ]
110 |     },
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "'Virat Kohli has not won any IPL titles.'"
115 |       ]
116 |      },
117 |      "execution_count": 29,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "agent.run(\"What is the number of IPL titles won by Virat Kohli?\")"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 18,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "#!pip install google-api-python-client\n",
133 |     "# !pip install wikipedia"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": []
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "kernelspec": {
146 |    "display_name": "env_twitter",
147 |    "language": "python",
148 |    "name": "env_twitter"
149 |   },
150 |   "language_info": {
151 |    "codemirror_mode": {
152 |     "name": "ipython",
153 |     "version": 3
154 |    },
155 |    "file_extension": ".py",
156 |    "mimetype": "text/x-python",
157 |    "name": "python",
158 |    "nbconvert_exporter": "python",
159 |    "pygments_lexer": "ipython3",
160 |    "version": "3.10.6"
161 |   }
162 |  },
163 |  "nbformat": 4,
164 |  "nbformat_minor": 4
165 | }
166 | 


--------------------------------------------------------------------------------
/azure_ML/deployment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "##### ❇️ Azure ML: Deploying your model as a Web Service"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "import sklearn\n",
 18 |     "from sklearn.svm import SVC\n",
 19 |     "import pickle\n",
 20 |     "import joblib\n",
 21 |     "from azureml.core import Workspace\n",
 22 |     "from sklearn.model_selection import train_test_split"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "##### ❇️ Let's qickly train & save an irisclassifier model \n",
 30 |     "##### The model here is just a place holder you can train <br> and deploy model of your choice "
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# Download dataset\n",
 40 |     "dataset = pd.read_csv(\n",
 41 |     "    \"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data\"\n",
 42 |     ")\n",
 43 |     "dataset.columns = [\"Petal Length\", \"Petal Width\", \"Sepal Length\", \"Sepal Width\", \"Species\"]"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "dataset = dataset.replace(\n",
 53 |     "    {\"Species\": {\"Iris-setosa\": 1, \"Iris-versicolor\": 2, \"Iris-virginica\": 3}}\n",
 54 |     ")\n",
 55 |     "dataset.head(5)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "# Train test split the data\n",
 65 |     "X = dataset.drop(['Species'], axis=1)\n",
 66 |     "y = dataset['Species']\n",
 67 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "# Instantiate a classifier\n",
 77 |     "classifier = SVC(kernel = 'linear', random_state = 0)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# Train classifier\n",
 87 |     "classifier.fit(X_train, y_train)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# Do prediction\n",
 97 |     "y_pred = classifier.predict(X_test)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "## Save as a pickle file\n",
107 |     "filename= 'saved_model_v1.pkl'\n",
108 |     "joblib.dump(classifier,open(filename, 'wb'))"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "##### ❇️ Create a Workspace"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "from azureml.core import Workspace\n",
125 |     "ws = Workspace.create(name='AzureML_Deployment_WS',\n",
126 |     "               subscription_id='2f##b8*****2',\n",
127 |     "               resource_group='AzureML_Deployment_RG',\n",
128 |     "               create_resource_group=True,\n",
129 |     "               location='eastus'\n",
130 |     "               )"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "##### ❇️ Register Model"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "from azureml.core.model import Model\n",
147 |     "\n",
148 |     "model = Model.register(ws, model_name=\"classifier\", model_path=\"saved_model_v1.pkl\")"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "##### ❇️ Setup Inference config"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "from azureml.core.model import InferenceConfig\n",
165 |     "\n",
166 |     "inference_config = InferenceConfig(\n",
167 |     "    conda_file='./env.yml',\n",
168 |     "    source_directory=\"./source_dir\",\n",
169 |     "    entry_script=\"./score.py\",\n",
170 |     ")"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "##### ❇️ Setup Deployment config"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "from azureml.core.webservice import AciWebservice\n",
187 |     "\n",
188 |     "deployment_config = AciWebservice.deploy_configuration(\n",
189 |     "    cpu_cores=2, memory_gb=3, auth_enabled=True\n",
190 |     ")"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "##### ❇️ Deploy the service"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "from azureml.core.model import Model\n",
207 |     "\n",
208 |     "service = Model.deploy(\n",
209 |     "    ws, # The instance of workspace created above\n",
210 |     "    \"myservice\",\n",
211 |     "    [Model(ws, 'bannerdetector')],\n",
212 |     "    inference_config,\n",
213 |     "    deployment_config,\n",
214 |     "    overwrite=True,\n",
215 |     ")\n",
216 |     "service.wait_for_deployment(show_output=True)"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": []
225 |   }
226 |  ],
227 |  "metadata": {
228 |   "kernelspec": {
229 |    "display_name": "env_poi",
230 |    "language": "python",
231 |    "name": "env_poi"
232 |   },
233 |   "language_info": {
234 |    "codemirror_mode": {
235 |     "name": "ipython",
236 |     "version": 3
237 |    },
238 |    "file_extension": ".py",
239 |    "mimetype": "text/x-python",
240 |    "name": "python",
241 |    "nbconvert_exporter": "python",
242 |    "pygments_lexer": "ipython3",
243 |    "version": "3.8.13"
244 |   }
245 |  },
246 |  "nbformat": 4,
247 |  "nbformat_minor": 4
248 | }
249 | 


--------------------------------------------------------------------------------
/pandas/df_apply.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "1d3e4889-a4a0-4167-82cc-086698ca9a4e",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "##### ❇️ Pandas 🐼: df.apply(func, axis)\n",
  9 |     "Objects passed to the function are Series objects whose index is either <br> the DataFrame’s index (axis=0) or the DataFrame’s columns (axis=1)."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "id": "b034ae9d-d565-4ca1-bfb9-67079b76786c",
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pandas as pd\n",
 20 |     "import numpy as np"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 2,
 26 |    "id": "e69b9a5c-a4b5-45c8-a3b5-f04d73449df5",
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "data": {
 31 |       "text/html": [
 32 |        "<div>\n",
 33 |        "<style scoped>\n",
 34 |        "    .dataframe tbody tr th:only-of-type {\n",
 35 |        "        vertical-align: middle;\n",
 36 |        "    }\n",
 37 |        "\n",
 38 |        "    .dataframe tbody tr th {\n",
 39 |        "        vertical-align: top;\n",
 40 |        "    }\n",
 41 |        "\n",
 42 |        "    .dataframe thead th {\n",
 43 |        "        text-align: right;\n",
 44 |        "    }\n",
 45 |        "</style>\n",
 46 |        "<table border=\"1\" class=\"dataframe\">\n",
 47 |        "  <thead>\n",
 48 |        "    <tr style=\"text-align: right;\">\n",
 49 |        "      <th></th>\n",
 50 |        "      <th>A</th>\n",
 51 |        "      <th>B</th>\n",
 52 |        "    </tr>\n",
 53 |        "  </thead>\n",
 54 |        "  <tbody>\n",
 55 |        "    <tr>\n",
 56 |        "      <th>0</th>\n",
 57 |        "      <td>1</td>\n",
 58 |        "      <td>4</td>\n",
 59 |        "    </tr>\n",
 60 |        "    <tr>\n",
 61 |        "      <th>1</th>\n",
 62 |        "      <td>2</td>\n",
 63 |        "      <td>5</td>\n",
 64 |        "    </tr>\n",
 65 |        "  </tbody>\n",
 66 |        "</table>\n",
 67 |        "</div>"
 68 |       ],
 69 |       "text/plain": [
 70 |        "   A  B\n",
 71 |        "0  1  4\n",
 72 |        "1  2  5"
 73 |       ]
 74 |      },
 75 |      "execution_count": 2,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "df = pd.DataFrame({'A': [1, 2], 'B': [4, 5]})\n",
 82 |     "df"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 3,
 88 |    "id": "fee22636-3ab1-404d-99b0-1041f83d2f9c",
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "data": {
 93 |       "text/plain": [
 94 |        "A    3\n",
 95 |        "B    9\n",
 96 |        "dtype: int64"
 97 |       ]
 98 |      },
 99 |      "execution_count": 3,
100 |      "metadata": {},
101 |      "output_type": "execute_result"
102 |     }
103 |    ],
104 |    "source": [
105 |     "# When axis = 0, every dataframe column is passed as a series to func\n",
106 |     "df.apply(func = np.sum, axis=0)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "id": "a18e0851-9f47-4436-aeef-4acf9d07574c",
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "0    5\n",
119 |        "1    7\n",
120 |        "dtype: int64"
121 |       ]
122 |      },
123 |      "execution_count": 4,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "# When axis = 1, every dataframe row is passed as a series to func\n",
130 |     "df.apply(func = np.sum, axis=1)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 5,
136 |    "id": "4fb4dacb-5351-4095-9715-4440913de146",
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/html": [
142 |        "<div>\n",
143 |        "<style scoped>\n",
144 |        "    .dataframe tbody tr th:only-of-type {\n",
145 |        "        vertical-align: middle;\n",
146 |        "    }\n",
147 |        "\n",
148 |        "    .dataframe tbody tr th {\n",
149 |        "        vertical-align: top;\n",
150 |        "    }\n",
151 |        "\n",
152 |        "    .dataframe thead th {\n",
153 |        "        text-align: right;\n",
154 |        "    }\n",
155 |        "</style>\n",
156 |        "<table border=\"1\" class=\"dataframe\">\n",
157 |        "  <thead>\n",
158 |        "    <tr style=\"text-align: right;\">\n",
159 |        "      <th></th>\n",
160 |        "      <th>A</th>\n",
161 |        "      <th>B</th>\n",
162 |        "      <th>A_plus_B</th>\n",
163 |        "    </tr>\n",
164 |        "  </thead>\n",
165 |        "  <tbody>\n",
166 |        "    <tr>\n",
167 |        "      <th>0</th>\n",
168 |        "      <td>1</td>\n",
169 |        "      <td>4</td>\n",
170 |        "      <td>5</td>\n",
171 |        "    </tr>\n",
172 |        "    <tr>\n",
173 |        "      <th>1</th>\n",
174 |        "      <td>2</td>\n",
175 |        "      <td>5</td>\n",
176 |        "      <td>7</td>\n",
177 |        "    </tr>\n",
178 |        "  </tbody>\n",
179 |        "</table>\n",
180 |        "</div>"
181 |       ],
182 |       "text/plain": [
183 |        "   A  B  A_plus_B\n",
184 |        "0  1  4         5\n",
185 |        "1  2  5         7"
186 |       ]
187 |      },
188 |      "execution_count": 5,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "# using a custom lambda function; axis = 1\n",
195 |     "# Notice axis = 1, which means each row is passed as a series whose index \n",
196 |     "# is data frame's column, that's why we are able to access values row['A'], row['B'] etc.\n",
197 |     "A_plus_B = df.apply(func = lambda row: row['A'] + row['B'], axis=1)\n",
198 |     "df['A_plus_B'] = A_plus_B\n",
199 |     "df"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 6,
205 |    "id": "ca087042-d8f4-4ea8-9eea-5c722804956d",
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/plain": [
211 |        "A           1.5\n",
212 |        "B           4.5\n",
213 |        "A_plus_B    6.0\n",
214 |        "dtype: float64"
215 |       ]
216 |      },
217 |      "execution_count": 6,
218 |      "metadata": {},
219 |      "output_type": "execute_result"
220 |     }
221 |    ],
222 |    "source": [
223 |     "# calculating average of each column\n",
224 |     "# Notice axis = 0, which means each column is passed as a series whose index \n",
225 |     "# is data frame's index, that's why we are able to access values col[0], col[1] etc.\n",
226 |     "df.apply(func = lambda col: (col[0] + col[1])/2, axis=0)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "id": "27ea53dc-39e9-40b5-9567-e2796f466bab",
232 |    "metadata": {},
233 |    "source": [
234 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
235 |     "##### ❇️ follow → @akshay_pachaar  "
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "id": "b87a73e5-a4e5-4d01-8df5-b1fcc888b1d4",
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": []
245 |   }
246 |  ],
247 |  "metadata": {
248 |   "kernelspec": {
249 |    "display_name": "env_twitter",
250 |    "language": "python",
251 |    "name": "env_twitter"
252 |   },
253 |   "language_info": {
254 |    "codemirror_mode": {
255 |     "name": "ipython",
256 |     "version": 3
257 |    },
258 |    "file_extension": ".py",
259 |    "mimetype": "text/x-python",
260 |    "name": "python",
261 |    "nbconvert_exporter": "python",
262 |    "pygments_lexer": "ipython3",
263 |    "version": "3.10.5"
264 |   }
265 |  },
266 |  "nbformat": 4,
267 |  "nbformat_minor": 5
268 | }
269 | 


--------------------------------------------------------------------------------
/random/one_hot_encoding.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "##### ❇️ OneHotEncoding"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "from sklearn.preprocessing import OneHotEncoder"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "data": {
 27 |       "text/html": [
 28 |        "<div>\n",
 29 |        "<style scoped>\n",
 30 |        "    .dataframe tbody tr th:only-of-type {\n",
 31 |        "        vertical-align: middle;\n",
 32 |        "    }\n",
 33 |        "\n",
 34 |        "    .dataframe tbody tr th {\n",
 35 |        "        vertical-align: top;\n",
 36 |        "    }\n",
 37 |        "\n",
 38 |        "    .dataframe thead th {\n",
 39 |        "        text-align: right;\n",
 40 |        "    }\n",
 41 |        "</style>\n",
 42 |        "<table border=\"1\" class=\"dataframe\">\n",
 43 |        "  <thead>\n",
 44 |        "    <tr style=\"text-align: right;\">\n",
 45 |        "      <th></th>\n",
 46 |        "      <th>Species</th>\n",
 47 |        "      <th>Sex</th>\n",
 48 |        "    </tr>\n",
 49 |        "  </thead>\n",
 50 |        "  <tbody>\n",
 51 |        "    <tr>\n",
 52 |        "      <th>Penguin1</th>\n",
 53 |        "      <td>Adelie</td>\n",
 54 |        "      <td>Male</td>\n",
 55 |        "    </tr>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>Penguin2</th>\n",
 58 |        "      <td>Chinstrap</td>\n",
 59 |        "      <td>Female</td>\n",
 60 |        "    </tr>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>Penguin3</th>\n",
 63 |        "      <td>Gentoo</td>\n",
 64 |        "      <td>Male</td>\n",
 65 |        "    </tr>\n",
 66 |        "  </tbody>\n",
 67 |        "</table>\n",
 68 |        "</div>"
 69 |       ],
 70 |       "text/plain": [
 71 |        "            Species     Sex\n",
 72 |        "Penguin1     Adelie    Male\n",
 73 |        "Penguin2  Chinstrap  Female\n",
 74 |        "Penguin3     Gentoo    Male"
 75 |       ]
 76 |      },
 77 |      "execution_count": 2,
 78 |      "metadata": {},
 79 |      "output_type": "execute_result"
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "df = pd.DataFrame({'Species': ['Adelie', 'Chinstrap', 'Gentoo'], \n",
 84 |     "                   'Sex': ['Male', 'Female', 'Male']},\n",
 85 |     "                 index=['Penguin1', 'Penguin2', 'Penguin3'])\n",
 86 |     "df"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 3,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "array([[1., 0., 0., 0., 1.],\n",
 98 |        "       [0., 1., 0., 1., 0.],\n",
 99 |        "       [0., 0., 1., 0., 1.]])"
100 |       ]
101 |      },
102 |      "execution_count": 3,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "# Define OneHotEncoder & apply transformation over df\n",
109 |     "encoder = OneHotEncoder(sparse=False)\n",
110 |     "encoded = encoder.fit_transform(df)\n",
111 |     "encoded"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 4,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "data": {
121 |       "text/plain": [
122 |        "array(['Species', 'Sex'], dtype=object)"
123 |       ]
124 |      },
125 |      "execution_count": 4,
126 |      "metadata": {},
127 |      "output_type": "execute_result"
128 |     }
129 |    ],
130 |    "source": [
131 |     "# input columns to encoder\n",
132 |     "encoder.feature_names_in_"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 5,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "text/plain": [
143 |        "array(['Species_Adelie', 'Species_Chinstrap', 'Species_Gentoo',\n",
144 |        "       'Sex_Female', 'Sex_Male'], dtype=object)"
145 |       ]
146 |      },
147 |      "execution_count": 5,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "# Output columns generated after OneHotEncoding\n",
154 |     "encoder.get_feature_names_out()"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 6,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/html": [
165 |        "<div>\n",
166 |        "<style scoped>\n",
167 |        "    .dataframe tbody tr th:only-of-type {\n",
168 |        "        vertical-align: middle;\n",
169 |        "    }\n",
170 |        "\n",
171 |        "    .dataframe tbody tr th {\n",
172 |        "        vertical-align: top;\n",
173 |        "    }\n",
174 |        "\n",
175 |        "    .dataframe thead th {\n",
176 |        "        text-align: right;\n",
177 |        "    }\n",
178 |        "</style>\n",
179 |        "<table border=\"1\" class=\"dataframe\">\n",
180 |        "  <thead>\n",
181 |        "    <tr style=\"text-align: right;\">\n",
182 |        "      <th></th>\n",
183 |        "      <th>Species_Adelie</th>\n",
184 |        "      <th>Species_Chinstrap</th>\n",
185 |        "      <th>Species_Gentoo</th>\n",
186 |        "      <th>Sex_Female</th>\n",
187 |        "      <th>Sex_Male</th>\n",
188 |        "    </tr>\n",
189 |        "  </thead>\n",
190 |        "  <tbody>\n",
191 |        "    <tr>\n",
192 |        "      <th>Penguin1</th>\n",
193 |        "      <td>1.0</td>\n",
194 |        "      <td>0.0</td>\n",
195 |        "      <td>0.0</td>\n",
196 |        "      <td>0.0</td>\n",
197 |        "      <td>1.0</td>\n",
198 |        "    </tr>\n",
199 |        "    <tr>\n",
200 |        "      <th>Penguin2</th>\n",
201 |        "      <td>0.0</td>\n",
202 |        "      <td>1.0</td>\n",
203 |        "      <td>0.0</td>\n",
204 |        "      <td>1.0</td>\n",
205 |        "      <td>0.0</td>\n",
206 |        "    </tr>\n",
207 |        "    <tr>\n",
208 |        "      <th>Penguin3</th>\n",
209 |        "      <td>0.0</td>\n",
210 |        "      <td>0.0</td>\n",
211 |        "      <td>1.0</td>\n",
212 |        "      <td>0.0</td>\n",
213 |        "      <td>1.0</td>\n",
214 |        "    </tr>\n",
215 |        "  </tbody>\n",
216 |        "</table>\n",
217 |        "</div>"
218 |       ],
219 |       "text/plain": [
220 |        "          Species_Adelie  Species_Chinstrap  Species_Gentoo  Sex_Female  \\\n",
221 |        "Penguin1             1.0                0.0             0.0         0.0   \n",
222 |        "Penguin2             0.0                1.0             0.0         1.0   \n",
223 |        "Penguin3             0.0                0.0             1.0         0.0   \n",
224 |        "\n",
225 |        "          Sex_Male  \n",
226 |        "Penguin1       1.0  \n",
227 |        "Penguin2       0.0  \n",
228 |        "Penguin3       1.0  "
229 |       ]
230 |      },
231 |      "execution_count": 6,
232 |      "metadata": {},
233 |      "output_type": "execute_result"
234 |     }
235 |    ],
236 |    "source": [
237 |     "# Create a new DataFrame with categorical features OnHotEncoded\n",
238 |     "df_encoded = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(), index=df.index)\n",
239 |     "df_encoded"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
247 |     "##### ❇️ follow → @akshay_pachaar  "
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": []
256 |   }
257 |  ],
258 |  "metadata": {
259 |   "kernelspec": {
260 |    "display_name": "env_mld",
261 |    "language": "python",
262 |    "name": "env_mld"
263 |   },
264 |   "language_info": {
265 |    "codemirror_mode": {
266 |     "name": "ipython",
267 |     "version": 3
268 |    },
269 |    "file_extension": ".py",
270 |    "mimetype": "text/x-python",
271 |    "name": "python",
272 |    "nbconvert_exporter": "python",
273 |    "pygments_lexer": "ipython3",
274 |    "version": "3.8.5"
275 |   }
276 |  },
277 |  "nbformat": 4,
278 |  "nbformat_minor": 4
279 | }
280 | 


--------------------------------------------------------------------------------
/random/tf_decision_forests.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "d81d63f4-a12f-4aa0-956e-a3f8702a1904",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import tensorflow_decision_forests as tfdf\n",
 11 |     "from sklearn.model_selection import train_test_split\n",
 12 |     "import pandas as pd"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "id": "e6c8c1db-8fc9-497e-b5e1-875d8a43635e",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "##### ❇️ Setup"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 2,
 26 |    "id": "db35588c-df20-4b42-95de-77b7f733a503",
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stdout",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "Label classes: ['Adelie', 'Gentoo', 'Chinstrap']\n"
 34 |      ]
 35 |     },
 36 |     {
 37 |      "data": {
 38 |       "text/html": [
 39 |        "<div>\n",
 40 |        "<style scoped>\n",
 41 |        "    .dataframe tbody tr th:only-of-type {\n",
 42 |        "        vertical-align: middle;\n",
 43 |        "    }\n",
 44 |        "\n",
 45 |        "    .dataframe tbody tr th {\n",
 46 |        "        vertical-align: top;\n",
 47 |        "    }\n",
 48 |        "\n",
 49 |        "    .dataframe thead th {\n",
 50 |        "        text-align: right;\n",
 51 |        "    }\n",
 52 |        "</style>\n",
 53 |        "<table border=\"1\" class=\"dataframe\">\n",
 54 |        "  <thead>\n",
 55 |        "    <tr style=\"text-align: right;\">\n",
 56 |        "      <th></th>\n",
 57 |        "      <th>species</th>\n",
 58 |        "      <th>island</th>\n",
 59 |        "      <th>bill_length_mm</th>\n",
 60 |        "      <th>bill_depth_mm</th>\n",
 61 |        "      <th>flipper_length_mm</th>\n",
 62 |        "      <th>body_mass_g</th>\n",
 63 |        "      <th>sex</th>\n",
 64 |        "      <th>year</th>\n",
 65 |        "    </tr>\n",
 66 |        "  </thead>\n",
 67 |        "  <tbody>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>0</th>\n",
 70 |        "      <td>0</td>\n",
 71 |        "      <td>Torgersen</td>\n",
 72 |        "      <td>39.1</td>\n",
 73 |        "      <td>18.7</td>\n",
 74 |        "      <td>181.0</td>\n",
 75 |        "      <td>3750.0</td>\n",
 76 |        "      <td>male</td>\n",
 77 |        "      <td>2007</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>1</th>\n",
 81 |        "      <td>0</td>\n",
 82 |        "      <td>Torgersen</td>\n",
 83 |        "      <td>39.5</td>\n",
 84 |        "      <td>17.4</td>\n",
 85 |        "      <td>186.0</td>\n",
 86 |        "      <td>3800.0</td>\n",
 87 |        "      <td>female</td>\n",
 88 |        "      <td>2007</td>\n",
 89 |        "    </tr>\n",
 90 |        "  </tbody>\n",
 91 |        "</table>\n",
 92 |        "</div>"
 93 |       ],
 94 |       "text/plain": [
 95 |        "   species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \\\n",
 96 |        "0        0  Torgersen            39.1           18.7              181.0   \n",
 97 |        "1        0  Torgersen            39.5           17.4              186.0   \n",
 98 |        "\n",
 99 |        "   body_mass_g     sex  year  \n",
100 |        "0       3750.0    male  2007  \n",
101 |        "1       3800.0  female  2007  "
102 |       ]
103 |      },
104 |      "execution_count": 2,
105 |      "metadata": {},
106 |      "output_type": "execute_result"
107 |     }
108 |    ],
109 |    "source": [
110 |     "# Load and prepare data; Our task would be to predict specie of the penguin\n",
111 |     "dataset_df = pd.read_csv('penguins.csv')\n",
112 |     "label = \"species\"\n",
113 |     "classes = dataset_df[label].unique().tolist()\n",
114 |     "print(f\"Label classes: {classes}\")\n",
115 |     "dataset_df[label] = dataset_df[label].map(classes.index)\n",
116 |     "dataset_df.head(2)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 3,
122 |    "id": "6cb0f048-989f-479b-8da4-7a9b4f3c87b8",
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "# Split data into train/test\n",
127 |     "train_ds_pd, test_ds_pd = train_test_split(dataset_df, test_size=0.3)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 6,
133 |    "id": "d280fe66-5228-4564-b1a4-082ee9bbfae8",
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "# Converting pandas dataframes to tensorflow datasets\n",
138 |     "train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label=label)\n",
139 |     "test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_ds_pd, label=label)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "id": "e11b12bb-c619-4136-aacd-5a632fa5493d",
145 |    "metadata": {},
146 |    "source": [
147 |     "##### ❇️ Training"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 7,
153 |    "id": "2472e857-ef42-4adc-970c-f57cb397fc59",
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "# Load and train model\n",
158 |     "model = tfdf.keras.RandomForestModel()\n",
159 |     "model.fit(x=train_ds)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "id": "c2be95d1-17ba-4cd6-a3e2-58aeef052dc0",
165 |    "metadata": {},
166 |    "source": [
167 |     "##### ❇️ Evaluation"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 8,
173 |    "id": "39765bdf-b0af-453a-82f2-c8632ae31455",
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "1/1 [==============================] - 0s 339ms/step - loss: 0.0000e+00 - accuracy: 0.9615\n",
181 |       "loss: 0.0000\n",
182 |       "accuracy: 0.9615\n"
183 |      ]
184 |     }
185 |    ],
186 |    "source": [
187 |     "model.compile(metrics=[\"accuracy\"])\n",
188 |     "evaluation = model.evaluate(test_ds, return_dict=True)\n",
189 |     "for name, value in evaluation.items():\n",
190 |     "    print(f\"{name}: {value:.4f}\")"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "id": "13e9eb33-2782-4198-a3e1-ca0a3d3a6adc",
196 |    "metadata": {},
197 |    "source": [
198 |     "##### ❇️ Save model; ready to be served using tf-serving"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 13,
204 |    "id": "e22ac43b-4925-4c5f-9618-cd1f1f0f327a",
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "model.save(\"/path_to_save_model_directory\")"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "id": "2b1d3d79-9a8b-48a5-82b2-6ad40056e09f",
214 |    "metadata": {},
215 |    "source": [
216 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
217 |     "##### ❇️ follow → @akshay_pachaar  "
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "id": "2511c8e5-391d-4ab4-be8d-3892093e133b",
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": []
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "id": "5f4e802a-99e1-499c-9063-b25f2c6fdb68",
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": []
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "id": "15e91cef-d4e9-40af-b61b-814b3a7f5c0f",
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": []
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "id": "1e2a41e7-594d-4d54-9a6f-47ead8adf871",
248 |    "metadata": {},
249 |    "outputs": [],
250 |    "source": []
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 5,
255 |    "id": "1e815d2a-ae92-4e91-9856-d0a0afcb3508",
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "import warnings\n",
260 |     "warnings.filterwarnings('ignore')"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "id": "78456c13-b03e-497e-9e3e-1b7b48565cdc",
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": []
270 |   }
271 |  ],
272 |  "metadata": {
273 |   "kernelspec": {
274 |    "display_name": "Python 3",
275 |    "language": "python",
276 |    "name": "python3"
277 |   },
278 |   "language_info": {
279 |    "codemirror_mode": {
280 |     "name": "ipython",
281 |     "version": 3
282 |    },
283 |    "file_extension": ".py",
284 |    "mimetype": "text/x-python",
285 |    "name": "python",
286 |    "nbconvert_exporter": "python",
287 |    "pygments_lexer": "ipython3",
288 |    "version": "3.8.12"
289 |   }
290 |  },
291 |  "nbformat": 4,
292 |  "nbformat_minor": 5
293 | }
294 | 


--------------------------------------------------------------------------------
/PyTorch/tensors.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### ❇️ Tensors "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Tensors are data structures similar to arrays and matrices wjich are use to encode inputs, outputs and parameters of models <br>\n",
 15 |     "Tensors are similar to NumPy’s ndarrays, except that tensors can run on GPUs or other hardware accelerators"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 1,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import torch\n",
 25 |     "import numpy as np"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "#### Initializing a Tensor"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 2,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "# Directly from data\n",
 42 |     "data = [[1, 2],[3, 4]]\n",
 43 |     "x_data = torch.tensor(data)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# From a NumPy array\n",
 53 |     "np_array = np.array(data)\n",
 54 |     "x_np = torch.from_numpy(np_array)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 4,
 60 |    "metadata": {},
 61 |    "outputs": [
 62 |     {
 63 |      "name": "stdout",
 64 |      "output_type": "stream",
 65 |      "text": [
 66 |       "Random Tensor: \n",
 67 |       " tensor([[0.2710, 0.9181],\n",
 68 |       "        [0.0190, 0.6447]]) \n",
 69 |       "\n",
 70 |       "Ones Tensor: \n",
 71 |       " tensor([[1., 1.],\n",
 72 |       "        [1., 1.]]) \n",
 73 |       "\n",
 74 |       "Zeros Tensor: \n",
 75 |       " tensor([[0., 0.],\n",
 76 |       "        [0., 0.]])\n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "# With random or constant values:\n",
 82 |     "shape=(2, 2)\n",
 83 |     "rand_tensor = torch.rand(shape)\n",
 84 |     "ones_tensor = torch.ones(shape)\n",
 85 |     "zeros_tensor = torch.zeros(shape)\n",
 86 |     "\n",
 87 |     "print(f\"Random Tensor: \\n {rand_tensor} \\n\")\n",
 88 |     "print(f\"Ones Tensor: \\n {ones_tensor} \\n\")\n",
 89 |     "print(f\"Zeros Tensor: \\n {zeros_tensor}\")"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "#### Moving tensors to GPU"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "By default tensors are created on CPU We need to explicitly move tensors to the GPU using .to method"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "# We move our tensor to the GPU if available\n",
113 |     "tensor = torch.rand(2, 3)\n",
114 |     "if torch.cuda.is_available():\n",
115 |     "    tensor = tensor.to(\"cuda\")"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "#### Attributes of a Tensor"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 6,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "Shape of tensor: torch.Size([2, 3])\n",
135 |       "Datatype of tensor: torch.float32\n",
136 |       "Tensor is stored on: cpu\n"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "tensor = torch.rand(2, 3)\n",
142 |     "\n",
143 |     "print(f\"Shape of tensor: {tensor.shape}\")\n",
144 |     "print(f\"Datatype of tensor: {tensor.dtype}\")\n",
145 |     "print(f\"Tensor is stored on: {tensor.device}\")"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "#### Operations on Tensors\n"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 7,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "name": "stdout",
162 |      "output_type": "stream",
163 |      "text": [
164 |       "First row: tensor([1., 1., 1., 1.])\n",
165 |       "First column: tensor([1., 1., 1., 1.])\n",
166 |       "Last column: tensor([1., 1., 1., 1.])\n",
167 |       "tensor([[1., 0., 1., 1.],\n",
168 |       "        [1., 0., 1., 1.],\n",
169 |       "        [1., 0., 1., 1.],\n",
170 |       "        [1., 0., 1., 1.]])\n"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "# Standard indexing and slicing just like NumPy\n",
176 |     "tensor = torch.ones(4, 4)\n",
177 |     "print(f\"First row: {tensor[0]}\")\n",
178 |     "print(f\"First column: {tensor[:, 0]}\")\n",
179 |     "print(f\"Last column: {tensor[..., -1]}\")\n",
180 |     "tensor[:,1] = 0\n",
181 |     "print(tensor)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 8,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "# Arithmetic Ops\n",
191 |     "# This computes the matrix multiplication between two tensors.\n",
192 |     "y1 = tensor.matmul(tensor.T)\n",
193 |     "\n",
194 |     "# This computes the element-wise product.\n",
195 |     "z1 = tensor * tensor"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 9,
201 |    "metadata": {},
202 |    "outputs": [
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "12.0 <class 'float'>\n"
208 |      ]
209 |     }
210 |    ],
211 |    "source": [
212 |     "# Converting single value tensors to a Python \n",
213 |     "# Numerical value\n",
214 |     "agg = tensor.sum()\n",
215 |     "agg_item = agg.item()\n",
216 |     "print(agg_item, type(agg_item))"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 10,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "tensor([[1., 0., 1., 1.],\n",
229 |       "        [1., 0., 1., 1.],\n",
230 |       "        [1., 0., 1., 1.],\n",
231 |       "        [1., 0., 1., 1.]]) \n",
232 |       "\n",
233 |       "tensor([[6., 5., 6., 6.],\n",
234 |       "        [6., 5., 6., 6.],\n",
235 |       "        [6., 5., 6., 6.],\n",
236 |       "        [6., 5., 6., 6.]])\n"
237 |      ]
238 |     }
239 |    ],
240 |    "source": [
241 |     "# Inplace Ops\n",
242 |     "print(f\"{tensor} \\n\")\n",
243 |     "tensor.add_(5)\n",
244 |     "print(tensor)"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "#### Bridge with NumPy"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "Tensors on the CPU and NumPy arrays can share their underlying memory locations, and changing one will change the other.\n",
259 |     "\n"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 11,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "tensor: tensor([1., 1., 1., 1., 1.])\n",
272 |       "numpy array: [1. 1. 1. 1. 1.]\n"
273 |      ]
274 |     }
275 |    ],
276 |    "source": [
277 |     "t = torch.ones(5)\n",
278 |     "print(f\"tensor: {t}\")\n",
279 |     "n = t.numpy()\n",
280 |     "print(f\"numpy array: {n}\")"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 12,
286 |    "metadata": {},
287 |    "outputs": [
288 |     {
289 |      "name": "stdout",
290 |      "output_type": "stream",
291 |      "text": [
292 |       "tensor: tensor([2., 2., 2., 2., 2.])\n",
293 |       "numpy array: [2. 2. 2. 2. 2.]\n"
294 |      ]
295 |     }
296 |    ],
297 |    "source": [
298 |     "t.add_(1)\n",
299 |     "print(f\"tensor: {t}\")\n",
300 |     "print(f\"numpy array: {n}\")"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": []
309 |   }
310 |  ],
311 |  "metadata": {
312 |   "kernelspec": {
313 |    "display_name": "env_torch",
314 |    "language": "python",
315 |    "name": "env_torch"
316 |   },
317 |   "language_info": {
318 |    "codemirror_mode": {
319 |     "name": "ipython",
320 |     "version": 3
321 |    },
322 |    "file_extension": ".py",
323 |    "mimetype": "text/x-python",
324 |    "name": "python",
325 |    "nbconvert_exporter": "python",
326 |    "pygments_lexer": "ipython3",
327 |    "version": "3.6.13"
328 |   }
329 |  },
330 |  "nbformat": 4,
331 |  "nbformat_minor": 4
332 | }
333 | 


--------------------------------------------------------------------------------
/random/python_for_sql.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "c168d69e-4ec2-4a56-b203-8da87e406e7b",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "### ⚙️ SQL Queries using `Python` 🐍 \n",
  9 |     "#### Loading the results in a Pandas `DataFrame` 🐼 "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "id": "a43dfc56-ad0b-43c9-8002-05f311f73a5b",
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "# !pip install psycopg2"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "id": "7561660d-9f24-4395-9444-16e20ad8c571",
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import psycopg2\n",
 30 |     "import pandas as pd"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "id": "fb3f2af1-c019-4c33-b01e-ef14e5feb696",
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "# Setting up connection; database is publicly available\n",
 41 |     "conn = psycopg2.connect(host='hh-pgsql-public.ebi.ac.uk', \n",
 42 |     "                        dbname='pfmegrnargs',\n",
 43 |     "                        user='reader', \n",
 44 |     "                        password='NWDMCE5xdipIjRrp')"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 4,
 50 |    "id": "66c0be6a-032a-49f9-994d-cdb1178451c2",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# SQL Query\n",
 55 |     "query = \"SELECT * FROM rnc_database\""
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 5,
 61 |    "id": "8ab80cc8-8c60-4921-a653-f8294ae6ef2b",
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# Executing the query\n",
 66 |     "cursor = conn.cursor()\n",
 67 |     "cursor.execute(query)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 6,
 73 |    "id": "aea4b0cf-c74c-468e-9b5a-ff6584367281",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# Loading the results in a pandas DataFrame\n",
 78 |     "df = pd.DataFrame(cursor.fetchall(), \n",
 79 |     "                  columns=[desc[0] for desc in cursor.description])"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 7,
 85 |    "id": "7f897634-a95a-4e83-ace1-61abf3da922c",
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "data": {
 90 |       "text/html": [
 91 |        "<div>\n",
 92 |        "<style scoped>\n",
 93 |        "    .dataframe tbody tr th:only-of-type {\n",
 94 |        "        vertical-align: middle;\n",
 95 |        "    }\n",
 96 |        "\n",
 97 |        "    .dataframe tbody tr th {\n",
 98 |        "        vertical-align: top;\n",
 99 |        "    }\n",
100 |        "\n",
101 |        "    .dataframe thead th {\n",
102 |        "        text-align: right;\n",
103 |        "    }\n",
104 |        "</style>\n",
105 |        "<table border=\"1\" class=\"dataframe\">\n",
106 |        "  <thead>\n",
107 |        "    <tr style=\"text-align: right;\">\n",
108 |        "      <th></th>\n",
109 |        "      <th>id</th>\n",
110 |        "      <th>timestamp</th>\n",
111 |        "      <th>userstamp</th>\n",
112 |        "      <th>descr</th>\n",
113 |        "      <th>current_release</th>\n",
114 |        "      <th>full_descr</th>\n",
115 |        "      <th>alive</th>\n",
116 |        "      <th>for_release</th>\n",
117 |        "      <th>display_name</th>\n",
118 |        "      <th>project_id</th>\n",
119 |        "      <th>avg_length</th>\n",
120 |        "      <th>min_length</th>\n",
121 |        "      <th>max_length</th>\n",
122 |        "      <th>num_sequences</th>\n",
123 |        "      <th>num_organisms</th>\n",
124 |        "    </tr>\n",
125 |        "  </thead>\n",
126 |        "  <tbody>\n",
127 |        "    <tr>\n",
128 |        "      <th>0</th>\n",
129 |        "      <td>51</td>\n",
130 |        "      <td>2022-09-22 15:14:59.500143</td>\n",
131 |        "      <td>RNACEN</td>\n",
132 |        "      <td>EXPRESSIONATLAS</td>\n",
133 |        "      <td>636</td>\n",
134 |        "      <td>Expression Atlas</td>\n",
135 |        "      <td>Y</td>\n",
136 |        "      <td>None</td>\n",
137 |        "      <td>Expression Atlas</td>\n",
138 |        "      <td>None</td>\n",
139 |        "      <td>953.0</td>\n",
140 |        "      <td>72.0</td>\n",
141 |        "      <td>32709.0</td>\n",
142 |        "      <td>11030</td>\n",
143 |        "      <td>3</td>\n",
144 |        "    </tr>\n",
145 |        "    <tr>\n",
146 |        "      <th>1</th>\n",
147 |        "      <td>5</td>\n",
148 |        "      <td>2017-05-17 00:00:00.000000</td>\n",
149 |        "      <td>RNACEN</td>\n",
150 |        "      <td>VEGA</td>\n",
151 |        "      <td>98</td>\n",
152 |        "      <td>VEGA</td>\n",
153 |        "      <td>N</td>\n",
154 |        "      <td>None</td>\n",
155 |        "      <td>VEGA</td>\n",
156 |        "      <td>PRJEB4568</td>\n",
157 |        "      <td>NaN</td>\n",
158 |        "      <td>NaN</td>\n",
159 |        "      <td>NaN</td>\n",
160 |        "      <td>0</td>\n",
161 |        "      <td>0</td>\n",
162 |        "    </tr>\n",
163 |        "    <tr>\n",
164 |        "      <th>2</th>\n",
165 |        "      <td>24</td>\n",
166 |        "      <td>2017-05-02 00:00:00.000000</td>\n",
167 |        "      <td>RNACEN</td>\n",
168 |        "      <td>FLYBASE</td>\n",
169 |        "      <td>614</td>\n",
170 |        "      <td>FlyBase</td>\n",
171 |        "      <td>Y</td>\n",
172 |        "      <td>None</td>\n",
173 |        "      <td>FlyBase</td>\n",
174 |        "      <td>PRJ_FLY</td>\n",
175 |        "      <td>765.0</td>\n",
176 |        "      <td>18.0</td>\n",
177 |        "      <td>21216.0</td>\n",
178 |        "      <td>4210</td>\n",
179 |        "      <td>1</td>\n",
180 |        "    </tr>\n",
181 |        "    <tr>\n",
182 |        "      <th>3</th>\n",
183 |        "      <td>50</td>\n",
184 |        "      <td>2022-08-16 15:52:33.990145</td>\n",
185 |        "      <td>RNACEN</td>\n",
186 |        "      <td>PLNCDB</td>\n",
187 |        "      <td>606</td>\n",
188 |        "      <td>PLncDB</td>\n",
189 |        "      <td>Y</td>\n",
190 |        "      <td>None</td>\n",
191 |        "      <td>PLncDB</td>\n",
192 |        "      <td>None</td>\n",
193 |        "      <td>6659.0</td>\n",
194 |        "      <td>199.0</td>\n",
195 |        "      <td>985945.0</td>\n",
196 |        "      <td>936926</td>\n",
197 |        "      <td>80</td>\n",
198 |        "    </tr>\n",
199 |        "  </tbody>\n",
200 |        "</table>\n",
201 |        "</div>"
202 |       ],
203 |       "text/plain": [
204 |        "   id                  timestamp userstamp            descr  current_release  \\\n",
205 |        "0  51 2022-09-22 15:14:59.500143    RNACEN  EXPRESSIONATLAS              636   \n",
206 |        "1   5 2017-05-17 00:00:00.000000    RNACEN             VEGA               98   \n",
207 |        "2  24 2017-05-02 00:00:00.000000    RNACEN          FLYBASE              614   \n",
208 |        "3  50 2022-08-16 15:52:33.990145    RNACEN           PLNCDB              606   \n",
209 |        "\n",
210 |        "         full_descr alive for_release      display_name project_id  \\\n",
211 |        "0  Expression Atlas     Y        None  Expression Atlas       None   \n",
212 |        "1              VEGA     N        None              VEGA  PRJEB4568   \n",
213 |        "2           FlyBase     Y        None           FlyBase    PRJ_FLY   \n",
214 |        "3            PLncDB     Y        None            PLncDB       None   \n",
215 |        "\n",
216 |        "   avg_length  min_length  max_length  num_sequences  num_organisms  \n",
217 |        "0       953.0        72.0     32709.0          11030              3  \n",
218 |        "1         NaN         NaN         NaN              0              0  \n",
219 |        "2       765.0        18.0     21216.0           4210              1  \n",
220 |        "3      6659.0       199.0    985945.0         936926             80  "
221 |       ]
222 |      },
223 |      "execution_count": 7,
224 |      "metadata": {},
225 |      "output_type": "execute_result"
226 |     }
227 |    ],
228 |    "source": [
229 |     "df.head(4)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 8,
235 |    "id": "b31d46e9-7790-482d-b435-8c210c8316e2",
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "# Close connection\n",
240 |     "conn.close()"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "id": "977e0793-ebae-4d37-8ff1-5180f44b5a8c",
247 |    "metadata": {},
248 |    "outputs": [],
249 |    "source": []
250 |   }
251 |  ],
252 |  "metadata": {
253 |   "kernelspec": {
254 |    "display_name": "env_poi",
255 |    "language": "python",
256 |    "name": "env_poi"
257 |   },
258 |   "language_info": {
259 |    "codemirror_mode": {
260 |     "name": "ipython",
261 |     "version": 3
262 |    },
263 |    "file_extension": ".py",
264 |    "mimetype": "text/x-python",
265 |    "name": "python",
266 |    "nbconvert_exporter": "python",
267 |    "pygments_lexer": "ipython3",
268 |    "version": "3.8.13"
269 |   }
270 |  },
271 |  "nbformat": 4,
272 |  "nbformat_minor": 5
273 | }
274 | 


--------------------------------------------------------------------------------
/numpy/numpy_indexing_slicing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "#### ❇️ Basic Indexing"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "0\n",
 29 |       "7\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "# A regular 1D array\n",
 35 |     "x = np.arange(10)\n",
 36 |     "print(x[0])\n",
 37 |     "print(x[-3])"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "name": "stdout",
 47 |      "output_type": "stream",
 48 |      "text": [
 49 |       "[[0 1 2 3 4]\n",
 50 |       " [5 6 7 8 9]]\n"
 51 |      ]
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "# Let's reshape x and make it a 2D array\n",
 56 |     "x.shape = (2, 5)\n",
 57 |     "print(x)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "8\n",
 70 |       "9\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "# No need to separate each dimension’s index into its own set of square brackets.\n",
 76 |     "# check this out 👇\n",
 77 |     "print(x[1, 3])\n",
 78 |     "print(x[1, -1])"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 5,
 84 |    "metadata": {},
 85 |    "outputs": [
 86 |     {
 87 |      "data": {
 88 |       "text/plain": [
 89 |        "array([0, 1, 2, 3, 4])"
 90 |       ]
 91 |      },
 92 |      "execution_count": 5,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "# If number of indices passed is fewer than the dimension of array\n",
 99 |     "# A sub dimensional array is obtained 👇 \n",
100 |     "x[0]"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "#### ❇️ Slicing and striding"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 6,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "array([1, 3, 5])"
119 |       ]
120 |      },
121 |      "execution_count": 6,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "# The basic slice syntax is i:j:k where i is the starting index,\n",
128 |     "# j is the stopping index, and k is the step (k should be non-zero)\n",
129 |     "# Consider 👇 \n",
130 |     "x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
131 |     "x[1:7:2]"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 7,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "[7 8 9]\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "# Negative i and j are interpreted as n + i and n + j where n is the \n",
149 |     "# number of elements in the corresponding dimension.\n",
150 |     "print(x[-3:10]) # i = -3; j = 10; k = 1 (if not given k defaults to 1)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 8,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "name": "stdout",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "[7 6 5 4]\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "# Negative k makes stepping go towards smaller indices\n",
168 |     "print(x[-3:3:-1]) # i = -3; j = 3; k = -1 "
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 9,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "[0 1 2 3 4]\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "# If i is not given it defaults to 0 for k > 0 and n - 1 for k < 0 .\n",
186 |     "print(x[:5])"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 10,
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "name": "stdout",
196 |      "output_type": "stream",
197 |      "text": [
198 |       "[5 6 7 8 9]\n"
199 |      ]
200 |     }
201 |    ],
202 |    "source": [
203 |     "# If j is not given it defaults to n for k > 0 and -n-1 for k < 0 . \n",
204 |     "print(x[5:])"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 11,
210 |    "metadata": {},
211 |    "outputs": [
212 |     {
213 |      "name": "stdout",
214 |      "output_type": "stream",
215 |      "text": [
216 |       "[9 8 7 6 5 4 3 2 1 0]\n"
217 |      ]
218 |     }
219 |    ],
220 |    "source": [
221 |     "# Let's reverse the array\n",
222 |     " # Since, k < 0; i not given it defaults to 10 - 1; j becomes -11\n",
223 |     "print(x[::-1]) # ⬅️ is equivalent to x[10:-11:-1]; check next shell ⬇️ "
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 12,
229 |    "metadata": {},
230 |    "outputs": [
231 |     {
232 |      "data": {
233 |       "text/plain": [
234 |        "array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])"
235 |       ]
236 |      },
237 |      "execution_count": 12,
238 |      "metadata": {},
239 |      "output_type": "execute_result"
240 |     }
241 |    ],
242 |    "source": [
243 |     "x[10:-11:-1]"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "#### ❇️ Integer array indexing"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 13,
256 |    "metadata": {},
257 |    "outputs": [
258 |     {
259 |      "data": {
260 |       "text/plain": [
261 |        "array([10,  9,  8,  7,  6,  5,  4,  3,  2])"
262 |       ]
263 |      },
264 |      "execution_count": 13,
265 |      "metadata": {},
266 |      "output_type": "execute_result"
267 |     }
268 |    ],
269 |    "source": [
270 |     "x = np.arange(10, 1, -1)\n",
271 |     "x"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 14,
277 |    "metadata": {},
278 |    "outputs": [
279 |     {
280 |      "data": {
281 |       "text/plain": [
282 |        "array([7, 7, 4, 2])"
283 |       ]
284 |      },
285 |      "execution_count": 14,
286 |      "metadata": {},
287 |      "output_type": "execute_result"
288 |     }
289 |    ],
290 |    "source": [
291 |     "# One can directly access the elements at indices\n",
292 |     "# specified by integer array; Check this out 👇 \n",
293 |     "x[np.array([3, 3, -3, 8])]"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "#### ❇️ Boolean array Indexing"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 15,
306 |    "metadata": {},
307 |    "outputs": [
308 |     {
309 |      "data": {
310 |       "text/plain": [
311 |        "array([False,  True,  True, False])"
312 |       ]
313 |      },
314 |      "execution_count": 15,
315 |      "metadata": {},
316 |      "output_type": "execute_result"
317 |     }
318 |    ],
319 |    "source": [
320 |     "# When boolean array is used, indices corresponsing to True values\n",
321 |     "# in boolean array are accessed from array x \n",
322 |     "x = np.array([1., -1., -2., 3])\n",
323 |     "\n",
324 |     "# a booelan array 👇 \n",
325 |     "x < 0 # ⬅️ True where elements in x < 0"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 16,
331 |    "metadata": {},
332 |    "outputs": [
333 |     {
334 |      "data": {
335 |       "text/plain": [
336 |        "array([-1., -2.])"
337 |       ]
338 |      },
339 |      "execution_count": 16,
340 |      "metadata": {},
341 |      "output_type": "execute_result"
342 |     }
343 |    ],
344 |    "source": [
345 |     "# accessing the elements based on booelan array\n",
346 |     "x[x<0]"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 17,
352 |    "metadata": {},
353 |    "outputs": [
354 |     {
355 |      "data": {
356 |       "text/plain": [
357 |        "array([ 1., 19., 18.,  3.])"
358 |       ]
359 |      },
360 |      "execution_count": 17,
361 |      "metadata": {},
362 |      "output_type": "execute_result"
363 |     }
364 |    ],
365 |    "source": [
366 |     "# adding 20 to all elements < 0\n",
367 |     "x[x < 0] += 20\n",
368 |     "x"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": null,
374 |    "metadata": {},
375 |    "outputs": [],
376 |    "source": []
377 |   }
378 |  ],
379 |  "metadata": {
380 |   "kernelspec": {
381 |    "display_name": "env_twitter",
382 |    "language": "python",
383 |    "name": "env_twitter"
384 |   },
385 |   "language_info": {
386 |    "codemirror_mode": {
387 |     "name": "ipython",
388 |     "version": 3
389 |    },
390 |    "file_extension": ".py",
391 |    "mimetype": "text/x-python",
392 |    "name": "python",
393 |    "nbconvert_exporter": "python",
394 |    "pygments_lexer": "ipython3",
395 |    "version": "3.10.5"
396 |   }
397 |  },
398 |  "nbformat": 4,
399 |  "nbformat_minor": 4
400 | }
401 | 


--------------------------------------------------------------------------------
/LLMs/openai_function_calling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### The power of `OpenAI function calling` 🚀  "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 10,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import json\n",
 18 |     "import openai\n",
 19 |     "import requests\n",
 20 |     "from ast import literal_eval\n",
 21 |     "from IPython.display import JSON"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# Set environment variables\n",
 31 |     "os.environ['OPENAI_API_KEY'] = '...'\n",
 32 |     "os.environ['WEATHER_API_KEY'] = '...'"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "open_ai_url = \"https://api.openai.com/v1/chat/completions\"\n",
 42 |     "model = \"gpt-3.5-turbo-0613\"\n",
 43 |     "user_message = \"What is the weather like in Delhi?\""
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 4,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "application/json": {
 54 |        "choices": [
 55 |         {
 56 |          "finish_reason": "function_call",
 57 |          "index": 0,
 58 |          "message": {
 59 |           "content": null,
 60 |           "function_call": {
 61 |            "arguments": "{\n  \"location\": \"Delhi\"\n}",
 62 |            "name": "get_current_weather"
 63 |           },
 64 |           "role": "assistant"
 65 |          }
 66 |         }
 67 |        ],
 68 |        "created": 1687324647,
 69 |        "id": "chatcmpl-7TkVbj3AUg1PJDGQOAqE3MNQQ3AP3",
 70 |        "model": "gpt-3.5-turbo-0613",
 71 |        "object": "chat.completion",
 72 |        "usage": {
 73 |         "completion_tokens": 17,
 74 |         "prompt_tokens": 82,
 75 |         "total_tokens": 99
 76 |        }
 77 |       },
 78 |       "text/plain": [
 79 |        "<IPython.core.display.JSON object>"
 80 |       ]
 81 |      },
 82 |      "execution_count": 4,
 83 |      "metadata": {
 84 |       "application/json": {
 85 |        "expanded": false,
 86 |        "root": "root"
 87 |       }
 88 |      },
 89 |      "output_type": "execute_result"
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "# A natural language request to the OpenAI API, asking about weather in delhi\n",
 94 |     "\n",
 95 |     "headers = {\n",
 96 |     "    \"Content-Type\": \"application/json\",\n",
 97 |     "    \"Authorization\": f\"Bearer {os.getenv('OPENAI_API_KEY')}\",\n",
 98 |     "}\n",
 99 |     "\n",
100 |     "data = {\n",
101 |     "   \"model\": model,\n",
102 |     "   \"messages\":[\n",
103 |     "      {\n",
104 |     "         \"role\":\"user\",\n",
105 |     "         \"content\": user_message\n",
106 |     "      }\n",
107 |     "   ],\n",
108 |     "   \"functions\":[\n",
109 |     "      {\n",
110 |     "         \"name\":\"get_current_weather\",\n",
111 |     "         \"description\":\"Get the current weather in a given location\",\n",
112 |     "         \"parameters\":{\n",
113 |     "            \"type\":\"object\",\n",
114 |     "            \"properties\":{\n",
115 |     "               \"location\":{\n",
116 |     "                  \"type\":\"string\",\n",
117 |     "                  \"description\":\"The city and state, e.g. San Francisco, CA\"\n",
118 |     "               },\n",
119 |     "               \"unit\":{\n",
120 |     "                  \"type\":\"string\",\n",
121 |     "                  \"enum\":[\n",
122 |     "                     \"celsius\",\n",
123 |     "                     \"fahrenheit\"\n",
124 |     "                  ]\n",
125 |     "               }\n",
126 |     "            },\n",
127 |     "            \"required\":[\n",
128 |     "               \"location\"\n",
129 |     "            ]\n",
130 |     "         }\n",
131 |     "      }\n",
132 |     "   ]\n",
133 |     "}\n",
134 |     "\n",
135 |     "response = requests.post(open_ai_url, headers=headers, data=json.dumps(data)).json()\n",
136 |     "\n",
137 |     "JSON(response)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 12,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "application/json": {
148 |        "base": "stations",
149 |        "clouds": {
150 |         "all": 75
151 |        },
152 |        "cod": 200,
153 |        "coord": {
154 |         "lat": 28.6667,
155 |         "lon": 77.2167
156 |        },
157 |        "dt": 1687324475,
158 |        "id": 1273294,
159 |        "main": {
160 |         "feels_like": 308.94,
161 |         "humidity": 84,
162 |         "pressure": 1004,
163 |         "temp": 302.2,
164 |         "temp_max": 302.2,
165 |         "temp_min": 302.2
166 |        },
167 |        "name": "Delhi",
168 |        "sys": {
169 |         "country": "IN",
170 |         "id": 9165,
171 |         "sunrise": 1687305220,
172 |         "sunset": 1687355512,
173 |         "type": 1
174 |        },
175 |        "timezone": 19800,
176 |        "visibility": 3500,
177 |        "weather": [
178 |         {
179 |          "description": "mist",
180 |          "icon": "50d",
181 |          "id": 701,
182 |          "main": "Mist"
183 |         }
184 |        ],
185 |        "wind": {
186 |         "deg": 220,
187 |         "speed": 1.54
188 |        }
189 |       },
190 |       "text/plain": [
191 |        "<IPython.core.display.JSON object>"
192 |       ]
193 |      },
194 |      "execution_count": 12,
195 |      "metadata": {
196 |       "application/json": {
197 |        "expanded": false,
198 |        "root": "root"
199 |       }
200 |      },
201 |      "output_type": "execute_result"
202 |     }
203 |    ],
204 |    "source": [
205 |     "# Based on the response above we extract the necessary information required to call Weather API\n",
206 |     "arguments = response['choices'][0]['message']['function_call']['arguments']\n",
207 |     "city = literal_eval(arguments)['location']\n",
208 |     "\n",
209 |     "base_url = f\"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={os.getenv('WEATHER_API_KEY')}\"\n",
210 |     "\n",
211 |     "weather_api_response = requests.get(base_url)\n",
212 |     "weather_api_response = weather_api_response.json()\n",
213 |     "\n",
214 |     "JSON(weather_api_response)"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 15,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "{'id': 'chatcmpl-7TkYby42BwPDCBAnhKqpd0oAaZLPt', 'object': 'chat.completion', 'created': 1687324833, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'The current weather in Delhi is misty. The temperature is 302.2 Kelvin (approximately 29.05 degrees Celsius) with a humidity of 84%. The visibility is 3500 meters.'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 330, 'completion_tokens': 42, 'total_tokens': 372}}\n"
227 |      ]
228 |     }
229 |    ],
230 |    "source": [
231 |     "# Converting the weather API response back to Natural Language\n",
232 |     "\n",
233 |     "headers = {\n",
234 |     "    \"Content-Type\": \"application/json\",\n",
235 |     "    \"Authorization\": f\"Bearer {os.getenv('OPENAI_API_KEY')}\",\n",
236 |     "}\n",
237 |     "\n",
238 |     "data = {\n",
239 |     "  \"model\": model,\n",
240 |     "  \"messages\": [\n",
241 |     "    {\"role\": \"user\", \"content\": user_message},\n",
242 |     "    {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": arguments}},\n",
243 |     "    {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": str(weather_api_response)}\n",
244 |     "  ],\n",
245 |     "  \"functions\": [\n",
246 |     "    {\n",
247 |     "      \"name\": \"get_current_weather\",\n",
248 |     "      \"description\": \"Get the current weather in a given location\",\n",
249 |     "      \"parameters\": {\n",
250 |     "        \"type\": \"object\",\n",
251 |     "        \"properties\": {\n",
252 |     "          \"location\": {\n",
253 |     "            \"type\": \"string\",\n",
254 |     "            \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
255 |     "          },\n",
256 |     "          \"unit\": {\n",
257 |     "            \"type\": \"string\",\n",
258 |     "            \"enum\": [\"celsius\", \"fahrenheit\"]\n",
259 |     "          }\n",
260 |     "        },\n",
261 |     "        \"required\": [\"location\"]\n",
262 |     "      }\n",
263 |     "    }\n",
264 |     "  ]\n",
265 |     "}\n",
266 |     "\n",
267 |     "response = requests.post(open_ai_url, headers=headers, data=json.dumps(data))\n",
268 |     "\n",
269 |     "print(response.json())"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 20,
275 |    "metadata": {},
276 |    "outputs": [
277 |     {
278 |      "data": {
279 |       "text/plain": [
280 |        "'The current weather in Delhi is misty. The temperature is 302.2 Kelvin (approximately 29.05 degrees Celsius) with a humidity of 84%. The visibility is 3500 meters.'"
281 |       ]
282 |      },
283 |      "execution_count": 20,
284 |      "metadata": {},
285 |      "output_type": "execute_result"
286 |     }
287 |    ],
288 |    "source": [
289 |     "response.json()['choices'][0]['message']['content']"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": []
298 |   }
299 |  ],
300 |  "metadata": {
301 |   "kernelspec": {
302 |    "display_name": "env_twitter",
303 |    "language": "python",
304 |    "name": "env_twitter"
305 |   },
306 |   "language_info": {
307 |    "codemirror_mode": {
308 |     "name": "ipython",
309 |     "version": 3
310 |    },
311 |    "file_extension": ".py",
312 |    "mimetype": "text/x-python",
313 |    "name": "python",
314 |    "nbconvert_exporter": "python",
315 |    "pygments_lexer": "ipython3",
316 |    "version": "3.10.6"
317 |   }
318 |  },
319 |  "nbformat": 4,
320 |  "nbformat_minor": 4
321 | }
322 | 


--------------------------------------------------------------------------------
/pandas/assigning_new_columns.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "##### ❇️ Adding new columns: 🐼 df.assign(**kwargs)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Returns a new dataframe with all original columns in addition to new ones. <br>\n",
 15 |     "kwargs: <br>\n",
 16 |     "- The column names are keywords. If the values are callable, they are computed on the DataFrame <br>\n",
 17 |     "and assigned to the new columns. <br>\n",
 18 |     "- If the values are not callable, (e.g. a Series, scalar, or array), <br>\n",
 19 |     "they are simply assigned."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import pandas as pd"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "data": {
 38 |       "text/html": [
 39 |        "<div>\n",
 40 |        "<style scoped>\n",
 41 |        "    .dataframe tbody tr th:only-of-type {\n",
 42 |        "        vertical-align: middle;\n",
 43 |        "    }\n",
 44 |        "\n",
 45 |        "    .dataframe tbody tr th {\n",
 46 |        "        vertical-align: top;\n",
 47 |        "    }\n",
 48 |        "\n",
 49 |        "    .dataframe thead th {\n",
 50 |        "        text-align: right;\n",
 51 |        "    }\n",
 52 |        "</style>\n",
 53 |        "<table border=\"1\" class=\"dataframe\">\n",
 54 |        "  <thead>\n",
 55 |        "    <tr style=\"text-align: right;\">\n",
 56 |        "      <th></th>\n",
 57 |        "      <th>temp_celcius</th>\n",
 58 |        "    </tr>\n",
 59 |        "  </thead>\n",
 60 |        "  <tbody>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>Delhi</th>\n",
 63 |        "      <td>35.0</td>\n",
 64 |        "    </tr>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>Mumbai</th>\n",
 67 |        "      <td>25.0</td>\n",
 68 |        "    </tr>\n",
 69 |        "  </tbody>\n",
 70 |        "</table>\n",
 71 |        "</div>"
 72 |       ],
 73 |       "text/plain": [
 74 |        "        temp_celcius\n",
 75 |        "Delhi           35.0\n",
 76 |        "Mumbai          25.0"
 77 |       ]
 78 |      },
 79 |      "execution_count": 2,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "df = pd.DataFrame({'temp_celcius': [35.0, 25.0]}, index=['Delhi', 'Mumbai'])\n",
 86 |     "df"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 3,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/html": [
 97 |        "<div>\n",
 98 |        "<style scoped>\n",
 99 |        "    .dataframe tbody tr th:only-of-type {\n",
100 |        "        vertical-align: middle;\n",
101 |        "    }\n",
102 |        "\n",
103 |        "    .dataframe tbody tr th {\n",
104 |        "        vertical-align: top;\n",
105 |        "    }\n",
106 |        "\n",
107 |        "    .dataframe thead th {\n",
108 |        "        text-align: right;\n",
109 |        "    }\n",
110 |        "</style>\n",
111 |        "<table border=\"1\" class=\"dataframe\">\n",
112 |        "  <thead>\n",
113 |        "    <tr style=\"text-align: right;\">\n",
114 |        "      <th></th>\n",
115 |        "      <th>temp_celcius</th>\n",
116 |        "      <th>wind_speed_kmph</th>\n",
117 |        "    </tr>\n",
118 |        "  </thead>\n",
119 |        "  <tbody>\n",
120 |        "    <tr>\n",
121 |        "      <th>Delhi</th>\n",
122 |        "      <td>35.0</td>\n",
123 |        "      <td>26.0</td>\n",
124 |        "    </tr>\n",
125 |        "    <tr>\n",
126 |        "      <th>Mumbai</th>\n",
127 |        "      <td>25.0</td>\n",
128 |        "      <td>31.0</td>\n",
129 |        "    </tr>\n",
130 |        "  </tbody>\n",
131 |        "</table>\n",
132 |        "</div>"
133 |       ],
134 |       "text/plain": [
135 |        "        temp_celcius  wind_speed_kmph\n",
136 |        "Delhi           35.0             26.0\n",
137 |        "Mumbai          25.0             31.0"
138 |       ]
139 |      },
140 |      "execution_count": 3,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "# Let's add a new column wind_speed_kmph; where we take values from a list\n",
147 |     "speed_values = [26.0, 31.0]\n",
148 |     "df = df.assign(wind_speed_kmph=speed_values) # kmph: kilometers per hour\n",
149 |     "df"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 4,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "data": {
159 |       "text/html": [
160 |        "<div>\n",
161 |        "<style scoped>\n",
162 |        "    .dataframe tbody tr th:only-of-type {\n",
163 |        "        vertical-align: middle;\n",
164 |        "    }\n",
165 |        "\n",
166 |        "    .dataframe tbody tr th {\n",
167 |        "        vertical-align: top;\n",
168 |        "    }\n",
169 |        "\n",
170 |        "    .dataframe thead th {\n",
171 |        "        text-align: right;\n",
172 |        "    }\n",
173 |        "</style>\n",
174 |        "<table border=\"1\" class=\"dataframe\">\n",
175 |        "  <thead>\n",
176 |        "    <tr style=\"text-align: right;\">\n",
177 |        "      <th></th>\n",
178 |        "      <th>temp_celcius</th>\n",
179 |        "      <th>wind_speed_kmph</th>\n",
180 |        "      <th>wind_speed_mph</th>\n",
181 |        "    </tr>\n",
182 |        "  </thead>\n",
183 |        "  <tbody>\n",
184 |        "    <tr>\n",
185 |        "      <th>Delhi</th>\n",
186 |        "      <td>35.0</td>\n",
187 |        "      <td>26.0</td>\n",
188 |        "      <td>16.146</td>\n",
189 |        "    </tr>\n",
190 |        "    <tr>\n",
191 |        "      <th>Mumbai</th>\n",
192 |        "      <td>25.0</td>\n",
193 |        "      <td>31.0</td>\n",
194 |        "      <td>19.251</td>\n",
195 |        "    </tr>\n",
196 |        "  </tbody>\n",
197 |        "</table>\n",
198 |        "</div>"
199 |       ],
200 |       "text/plain": [
201 |        "        temp_celcius  wind_speed_kmph  wind_speed_mph\n",
202 |        "Delhi           35.0             26.0          16.146\n",
203 |        "Mumbai          25.0             31.0          19.251"
204 |       ]
205 |      },
206 |      "execution_count": 4,
207 |      "metadata": {},
208 |      "output_type": "execute_result"
209 |     }
210 |    ],
211 |    "source": [
212 |     "# Using a callable to calculate values of new column based on existing column values\n",
213 |     "df = df.assign(wind_speed_mph = lambda x: x['wind_speed_kmph']*0.621) # mph: miles per hour\n",
214 |     "df"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 5,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "data": {
224 |       "text/html": [
225 |        "<div>\n",
226 |        "<style scoped>\n",
227 |        "    .dataframe tbody tr th:only-of-type {\n",
228 |        "        vertical-align: middle;\n",
229 |        "    }\n",
230 |        "\n",
231 |        "    .dataframe tbody tr th {\n",
232 |        "        vertical-align: top;\n",
233 |        "    }\n",
234 |        "\n",
235 |        "    .dataframe thead th {\n",
236 |        "        text-align: right;\n",
237 |        "    }\n",
238 |        "</style>\n",
239 |        "<table border=\"1\" class=\"dataframe\">\n",
240 |        "  <thead>\n",
241 |        "    <tr style=\"text-align: right;\">\n",
242 |        "      <th></th>\n",
243 |        "      <th>temp_celcius</th>\n",
244 |        "      <th>wind_speed_kmph</th>\n",
245 |        "      <th>wind_speed_mph</th>\n",
246 |        "      <th>temp_fahrenheit</th>\n",
247 |        "      <th>temp_kelvin</th>\n",
248 |        "    </tr>\n",
249 |        "  </thead>\n",
250 |        "  <tbody>\n",
251 |        "    <tr>\n",
252 |        "      <th>Delhi</th>\n",
253 |        "      <td>35.0</td>\n",
254 |        "      <td>26.0</td>\n",
255 |        "      <td>16.146</td>\n",
256 |        "      <td>95.0</td>\n",
257 |        "      <td>308.15</td>\n",
258 |        "    </tr>\n",
259 |        "    <tr>\n",
260 |        "      <th>Mumbai</th>\n",
261 |        "      <td>25.0</td>\n",
262 |        "      <td>31.0</td>\n",
263 |        "      <td>19.251</td>\n",
264 |        "      <td>77.0</td>\n",
265 |        "      <td>298.15</td>\n",
266 |        "    </tr>\n",
267 |        "  </tbody>\n",
268 |        "</table>\n",
269 |        "</div>"
270 |       ],
271 |       "text/plain": [
272 |        "        temp_celcius  wind_speed_kmph  wind_speed_mph  temp_fahrenheit  \\\n",
273 |        "Delhi           35.0             26.0          16.146             95.0   \n",
274 |        "Mumbai          25.0             31.0          19.251             77.0   \n",
275 |        "\n",
276 |        "        temp_kelvin  \n",
277 |        "Delhi        308.15  \n",
278 |        "Mumbai       298.15  "
279 |       ]
280 |      },
281 |      "execution_count": 5,
282 |      "metadata": {},
283 |      "output_type": "execute_result"
284 |     }
285 |    ],
286 |    "source": [
287 |     "# You can create multiple columns within the same assign \n",
288 |     "# where one of the columns depends on another one defined within the same assign 💥 \n",
289 |     "df = df.assign(temp_fahrenheit=lambda x: x['temp_celcius'] * 9 / 5 + 32, temp_kelvin=lambda x: (x['temp_fahrenheit'] +  459.67) * 5 / 9)\n",
290 |     "df"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
298 |     "##### ❇️ follow → @akshay_pachaar  "
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": null,
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": []
307 |   }
308 |  ],
309 |  "metadata": {
310 |   "kernelspec": {
311 |    "display_name": "env_twitter",
312 |    "language": "python",
313 |    "name": "env_twitter"
314 |   },
315 |   "language_info": {
316 |    "codemirror_mode": {
317 |     "name": "ipython",
318 |     "version": 3
319 |    },
320 |    "file_extension": ".py",
321 |    "mimetype": "text/x-python",
322 |    "name": "python",
323 |    "nbconvert_exporter": "python",
324 |    "pygments_lexer": "ipython3",
325 |    "version": "3.10.5"
326 |   }
327 |  },
328 |  "nbformat": 4,
329 |  "nbformat_minor": 4
330 | }
331 | 


--------------------------------------------------------------------------------
/NLP/tokenization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#### 🔴 Understanding `Tokenization` in NLP!"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "1️⃣ Character Tokenization"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 49,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "['W', 'e', ' ', 'l', 'o', 'v', 'e', ' ', 'N', 'L', 'P', '!']\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "raw_text = \"We love NLP!\"\n",
 32 |     "tokens = list(raw_text)\n",
 33 |     "print(tokens)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 50,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "{' ': 0, '!': 1, 'L': 2, 'N': 3, 'P': 4, 'W': 5, 'e': 6, 'l': 7, 'o': 8, 'v': 9}\n"
 46 |      ]
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# Numerical encoding of individual character\n",
 51 |     "token2idx = {char: idx for idx, char in enumerate(sorted(set(tokens)))}\n",
 52 |     "print(token2idx)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 51,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "[5, 6, 0, 7, 8, 9, 6, 0, 3, 2, 4, 1]\n"
 65 |      ]
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "# Using token2idx to map our tokenized text to integers\n",
 70 |     "integer_tokens = [token2idx[token] for token in tokens]\n",
 71 |     "print(integer_tokens)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 52,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "torch.Size([12, 10])"
 83 |       ]
 84 |      },
 85 |      "execution_count": 52,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "# One-hot encoding the numbers\n",
 92 |     "import torch\n",
 93 |     "import torch.nn.functional as F\n",
 94 |     "\n",
 95 |     "integer_tokens = torch.tensor(integer_tokens)\n",
 96 |     "one_hot_encode_tokens = F.one_hot(integer_tokens, num_classes=len(token2idx))\n",
 97 |     "one_hot_encode_tokens.shape"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 53,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "name": "stdout",
107 |      "output_type": "stream",
108 |      "text": [
109 |       "Token = W\n",
110 |       "Integer Encoded Token = 5\n",
111 |       "One hot encoded Token = tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "print(f\"Token = {tokens[0]}\")\n",
117 |     "print(f\"Integer Encoded Token = {integer_tokens[0]}\")\n",
118 |     "print(f\"One hot encoded Token = {one_hot_encode_tokens[0]}\")"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "2️⃣ Word tokenization"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 54,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "['We', 'love', 'NLP!']\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "# Splitting raw text based on whitespaces\n",
143 |     "word_tokens = raw_text.split()\n",
144 |     "print(word_tokens)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "3️⃣ Subword Tokenization"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 55,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "from transformers import AutoTokenizer"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 56,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "model_ckpt = 'distilbert-base-uncased'\n",
170 |     "tokenizer = AutoTokenizer.from_pretrained(model_ckpt)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 57,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "from transformers import DistilBertTokenizer\n",
180 |     "distilbert_tokenizer = DistilBertTokenizer.from_pretrained(model_ckpt)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 58,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "name": "stdout",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "{'input_ids': [101, 2057, 2293, 17953, 2361, 999, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "# Lets see the tokenizer in action now \n",
198 |     "encoded_text = tokenizer(raw_text)\n",
199 |     "print(encoded_text)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 59,
205 |    "metadata": {},
206 |    "outputs": [
207 |     {
208 |      "name": "stdout",
209 |      "output_type": "stream",
210 |      "text": [
211 |       "['[CLS]', 'we', 'love', 'nl', '##p', '!', '[SEP]']\n"
212 |      ]
213 |     }
214 |    ],
215 |    "source": [
216 |     "tokens = tokenizer.convert_ids_to_tokens(encoded_text.input_ids)\n",
217 |     "print(tokens)"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "3️⃣ Tokenizing entire Dataset"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 32,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "# !pip install datasets"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 60,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "from datasets import load_dataset"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 66,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "name": "stderr",
252 |      "output_type": "stream",
253 |      "text": [
254 |       "Found cached dataset emotion (/Users/pachaar/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705)\n"
255 |      ]
256 |     },
257 |     {
258 |      "data": {
259 |       "application/vnd.jupyter.widget-view+json": {
260 |        "model_id": "ee4202f445ef41b7bd91cc51ccfe27dc",
261 |        "version_major": 2,
262 |        "version_minor": 0
263 |       },
264 |       "text/plain": [
265 |        "  0%|          | 0/3 [00:00<?, ?it/s]"
266 |       ]
267 |      },
268 |      "metadata": {},
269 |      "output_type": "display_data"
270 |     },
271 |     {
272 |      "data": {
273 |       "text/plain": [
274 |        "DatasetDict({\n",
275 |        "    train: Dataset({\n",
276 |        "        features: ['text', 'label'],\n",
277 |        "        num_rows: 16000\n",
278 |        "    })\n",
279 |        "    validation: Dataset({\n",
280 |        "        features: ['text', 'label'],\n",
281 |        "        num_rows: 2000\n",
282 |        "    })\n",
283 |        "    test: Dataset({\n",
284 |        "        features: ['text', 'label'],\n",
285 |        "        num_rows: 2000\n",
286 |        "    })\n",
287 |        "})"
288 |       ]
289 |      },
290 |      "execution_count": 66,
291 |      "metadata": {},
292 |      "output_type": "execute_result"
293 |     }
294 |    ],
295 |    "source": [
296 |     "# we will load the tweet emotions dataset\n",
297 |     "tweet_emotions = load_dataset(\"emotion\")\n",
298 |     "tweet_emotions"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": 67,
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": [
307 |     "# Let's define a function for tokenization\n",
308 |     "def tokenize(batch):\n",
309 |     "    return tokenizer(batch[\"text\"], padding=True, truncation=True)"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 68,
315 |    "metadata": {},
316 |    "outputs": [
317 |     {
318 |      "name": "stdout",
319 |      "output_type": "stream",
320 |      "text": [
321 |       "{'input_ids': [[101, 1045, 2134, 2102, 2514, 26608, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1045, 2064, 2175, 2013, 3110, 2061, 20625, 2000, 2061, 9636, 17772, 2074, 2013, 2108, 2105, 2619, 2040, 14977, 1998, 2003, 8300, 102]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}\n"
322 |      ]
323 |     }
324 |    ],
325 |    "source": [
326 |     "print(tokenize(tweet_emotions[\"train\"][:2]))"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 69,
332 |    "metadata": {},
333 |    "outputs": [
334 |     {
335 |      "data": {
336 |       "application/vnd.jupyter.widget-view+json": {
337 |        "model_id": "0e3e83bd4e2f4328bbda2dcba85d46af",
338 |        "version_major": 2,
339 |        "version_minor": 0
340 |       },
341 |       "text/plain": [
342 |        "  0%|          | 0/1 [00:00<?, ?ba/s]"
343 |       ]
344 |      },
345 |      "metadata": {},
346 |      "output_type": "display_data"
347 |     },
348 |     {
349 |      "data": {
350 |       "application/vnd.jupyter.widget-view+json": {
351 |        "model_id": "01780754e110423da5df6e8294b5ea39",
352 |        "version_major": 2,
353 |        "version_minor": 0
354 |       },
355 |       "text/plain": [
356 |        "  0%|          | 0/1 [00:00<?, ?ba/s]"
357 |       ]
358 |      },
359 |      "metadata": {},
360 |      "output_type": "display_data"
361 |     },
362 |     {
363 |      "data": {
364 |       "application/vnd.jupyter.widget-view+json": {
365 |        "model_id": "918f15064a20417d92d7c42d389e2646",
366 |        "version_major": 2,
367 |        "version_minor": 0
368 |       },
369 |       "text/plain": [
370 |        "  0%|          | 0/1 [00:00<?, ?ba/s]"
371 |       ]
372 |      },
373 |      "metadata": {},
374 |      "output_type": "display_data"
375 |     },
376 |     {
377 |      "name": "stdout",
378 |      "output_type": "stream",
379 |      "text": [
380 |       "['text', 'label', 'input_ids', 'attention_mask']\n"
381 |      ]
382 |     }
383 |    ],
384 |    "source": [
385 |     "# Applying tokenization across entire data set\n",
386 |     "tweet_emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)\n",
387 |     "\n",
388 |     "print(tweet_emotions_encoded['test'].column_names)"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": []
397 |   }
398 |  ],
399 |  "metadata": {
400 |   "kernelspec": {
401 |    "display_name": "env_twitter",
402 |    "language": "python",
403 |    "name": "env_twitter"
404 |   },
405 |   "language_info": {
406 |    "codemirror_mode": {
407 |     "name": "ipython",
408 |     "version": 3
409 |    },
410 |    "file_extension": ".py",
411 |    "mimetype": "text/x-python",
412 |    "name": "python",
413 |    "nbconvert_exporter": "python",
414 |    "pygments_lexer": "ipython3",
415 |    "version": "3.10.5"
416 |   }
417 |  },
418 |  "nbformat": 4,
419 |  "nbformat_minor": 4
420 | }
421 | 


--------------------------------------------------------------------------------
/unsupervised_learning/dummy_data_clustering.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "672ee0e6",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## 🔘 Create dummy data for `Clustering`\n",
  9 |     "\n",
 10 |     "`sklearn.datasets.make_blobs()` Generates isotropic Gaussian blobs for clustering."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "id": "2e343277",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from sklearn.datasets import make_blobs\n",
 21 |     "import matplotlib.pyplot as plt"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "id": "d0b4791b",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "**Make Dataset**"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "id": "1104be50",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# Make the features (X) and output (y) with 100 samples,\n",
 40 |     "X, y = make_blobs(n_samples = 100,\n",
 41 |     "                  n_features = 2, # dimension of each data point,\n",
 42 |     "                  centers = 3, # number of clusters,\n",
 43 |     "                  cluster_std = 0.5, # std deviation of each cluster\n",
 44 |     "                 )"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "id": "81072c52",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "**Plot Blobs**"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "id": "4f7785d5",
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAb5UlEQVR4nO3df5Dc9X3f8edbp8WssKvD4WyqBUVqYx82VtGZC3aixqkE4exA4CrbMW7dcdpM1Mm4DhDPeaTCGPAklZLr1PakmbQa7CQdK1gY5LNdWh8mIs6EqUhP3BEhi0vSgAQrO5wHzmnRxqyOd/+43dPe7vf73V/33e/ud1+PGYa73b39vHexX9/Pvj+f73fN3RERkXRal3QBIiISH4W8iEiKKeRFRFJMIS8ikmIKeRGRFFufdAGVLrvsMt+yZUvSZYiI9JTjx4//wN2Hgu7rqpDfsmULMzMzSZchItJTzOx02H1q14iIpJhCXkQkxRTyIiIpppAXEUmx2EPezAbN7CEze9bMTpnZT8U9poiILOvE7povAN9y9w+Z2UXAhg6MKSIJmJrNMzk9z9nFApsGs0yMDTM+kku6rL4Wa8ib2UbgfcAvAbj7a8BrcY4pIsm4e+oEh46doXxd2/xigX1HTgAo6BMUd7tmK7AA/L6ZzZrZ/WZ2SeUDzGyPmc2Y2czCwkLM5YhI2dRsnh0HjrJ17yPsOHCUqdl8W89VGfBlheISk9Pz7RUqbYk75NcD7wZ+z91HgFeBvZUPcPeD7j7q7qNDQ4EnbInIGpuazTPx0NPkFws4y7PuiYeebjnoJ6fnawK+7OxioeU6YW0PRv0o7p78i8CL7v5k6feHqAp5EWlNq/3vqdk8v/7gHK9XpXJxybnvmydbaq1EBfnGbKahmoJey9Rsnn1HTlAoLgH1W0BaE6gVa8i7+/fN7AUzG3b3eeB64LtxjinSD5oNv+q/qw74slfOFZuqoRyo68xYCvmWObP6zxP2Wian51duLyu3gKpfZ6vvSdp1Yp/8J4FDZvYXwHbgP3RgTJFUiwq/Zv+uFeVALbd7wgIeYLHOgSPqtYR9Qgi6vdX3JO1i30Lp7nPAaNzjiPSTZsKvmfsHK1orUa2PZg4WmwazLdVUHjcfcH9QC6jV9yTtdMarSA8KC856gVrv/ntvuRqonannFwvceXiOu6eW2x+NBqeV/nbHgaPcPXUicAE16rVMjA2TWVfb71ksFNnSxPP0M4W8SA+aGBsmmxlYdVs2M8DE2HDdvwtrkV+6IRM5U3fg0LEzTM3mGw7Oyj3zXz52ZvVunq8+zchnHyW/WKipqfxaxkdyvPHi8IZDue8+NZtv+T1Ju666nryINKYyjJvZSTI+kmPm9Ms1e9rLs+Wtex8JbZHAcmjf982Ta/ESKL7uKwu91R39izMX5p/1evrlvvsTe3cBzb8naWcesWDSaaOjo64vDRGJX2W/fWM2w6uvnae4dCELjNrg7bRsZoD9u7cxOT0fetApM+C5Azd1prAuZGbH3T1w7VPtGpE+ND6S44m9u3juwE1c8ob1qwIekg94uDBDD2rDVItqH/X7yVRq14j0uHZPAIpaRG11Rp8ZMPDllkw7zi4WVrWmyv37ymeN6rtr77xm8iI9LWgXTHkhslFhs+DcYJbPfWQ7ucEsVvp9MOLsVav4u8kPXcPkh69Z9bcfe+/mld8Hs5nlA0GDtZU/eTx/4Kaamj54bY7J6fnAmbr2zmsmL9LTmjkjNMzE2PCq2S4sz453XjVU8wkBqHlsmbMcuuUFUKh/9m3UukDYDH18JLfyvPVm6to7r5m8SE9bixAbH8mxf/e2mtnxw8fzNZ8QAPbv3tZ0PWHjltcF5u65kckPrZ7579+9re6Bqt5MXXvnNZMX6Wlh2x2bDbHK2THAjgNHQ8OzvNc+qNseNm4j6wbVNTSi3kEu7FNKP+2d10xepIfFdQJQVHiGXVbYSvVUW4t1gzD1ZupBn1Ia+YSQJprJi/SwVk+KqlY9096YzbBYqD0JadNgNvQA4AT34Ndi3SBMIzP1Vj4hpIlCXqTHRYVYI22SoMXLzICRWWertkCWwzPs5KRcyKw6zsXPtTrIpZlCXiSlGt0jHjTTLi45l27IsOGi9YHh2Uyfe63WDcL0+0y9HoW8SEo12iYJm1Evnisy+5kba25vdvasxc9kKeRFUqrRNkkrM+1mZs9qqSQr9pA3swFgBsi7+81xjyeSRq1cuqDR8O7ETFstleR0Ygvl7cCpDowjkkqtbkFsdHulthmmW6wzeTO7ArgJ+E3g1+McSyStWt2C2EybRDPt9Iq7XfN54NPAm8IeYGZ7gD0Amzdvjrkckd7TzhZEhbfE1q4xs5uBl9z9eNTj3P2gu4+6++jQ0FBc5Yj0LF1/RdoRZ09+B3CLmT0PfAXYZWZfjnE8kVTSd5dKO2ILeXff5+5XuPsW4DbgqLt/LK7xRNJKC6PSDu2TF+kB6q1LqzoS8u7+J8CfdGIsERG5QJcaFhFJMYW8iEiKKeRFRFJMIS8ikmIKeRGRFFPIi4ikmEJeRCTFFPIiIimmkBcRSTGFvIhIiinkRURSTCEvIpJiCnkRkRRTyIuIpJhCXkQkxWINeTO70sweN7PvmtlJM7s9zvFERGS1uL805DzwKXd/yszeBBw3s2+7+3djHldERIh5Ju/u33P3p0o//1/gFKDvMBMR6ZCO9eTNbAswAjxZdfseM5sxs5mFhYVOlSMi0hc6EvJm9kbgYeAOd/+7yvvc/aC7j7r76NDQUCfKERHpG7GHvJllWA74Q+5+JO7xRETkgrh31xjwReCUu/+nOMcSEZFacc/kdwD/CthlZnOlf34+5jFFRKQk1i2U7v5ngMU5hoiIhNMZryIiKaaQFxFJMYW8iEiKKeRFRFJMIS8ikmJxX6BMRCQWU7N5JqfnObtYYNNglomxYcZHcqG39yuFvIgkqpVQnprNs+/ICQrFJQDyiwX2HTnBzOmXefh4vuZ2oG+DXu0aEUlMOazziwWcC6E8NZuP/LvJ6fmVIC8rFJd44MkXAm+fnJ5f69J7hkJeRBITFtb1QvnsYiHw9iX3ph7fD9SuEZFYNNKGCQvfeqG8aTBLPuAxA2aBQb9pMNtE5emimbyItGxqNs+OA0fZuvcRdhw4utJmabQNExa+9UJ5YmyYbGZg1W3ZzAAffc+VgbdPjA2vyevqRZrJi0hLwhY/IboNUzmbnxgbXvUcUD+Uy58QCsWllZl7ruKTwuiPv7mt3TVRr6vyeXplF49CXkRaEhXkjbZhyqHYaFhWB/CS+8pBofw34yO5tsK2kQNUoweCRsR9sFDIi0hLwoI8v1jg0g0ZXjlXrLkvqA3TTCiHBfCnHnyamdMv8/izC22HZSMHqKg67jw81/ZWUFi7LZ8KeRFpSdjiJ8D/+/vzZAaM4tKFRdBGeuOVs9qN2QxmsHiuuBKaUbtqvnzszMrv7YRl2OsqH6CmZvOhr7u86Nvo+I22tdqhhVcRaUnQ4mdZ8XXnkovWkxvMYkBuMMv+3dsig+vuqRPceXhuZbF2sVDklXPFVQu3G7OZhutrdX982KLuxNjwysx7rcZvdXdRM2KfyZvZ+4EvAAPA/e5+IO4xRSR+5cC+4/Bc4P0/LBSZu+fGhp5rajbPoWNnCN7lvqxQXOLizDqymYGa2W+YsBl3lKB1gp1XDTE5Pd/087W6FXQtt3zG/R2vA8DvAh8A3gl81MzeGeeYItI54yM5ci1ug6w0OT0fGfBli+eK7N+9jQFr7AvnDFra/jg+kuOJvbt47sBNTIwN8/Dx8BZNlFa3gja75TNK3O2a64C/dve/cffXgK8At8Y8poh00FoEVaPtCWf5gBC0Hz7q8e0I6ptXG8xmWnoPxkdy7N+9ram2VrPibtfkgBcqfn8ReE/lA8xsD7AHYPPmzTGXIyJrrdltkGWVi6zrQs5UDZJfLPDw8TwfvDa3ajdN2Ey73f52vb/PZga495argebfA2h/y2c9ie+ucfeDwEGA0dHRxv4ri0hXqRdU1XvBd141tOpqkWEBf8lFA7z6Wu0sulBc4vFnF3hi766V23YcOBra325nL3rUASRX9VzdeDJU3O2aPHBlxe9XlG4TkT4RdImDQ8fORLZABrMZPv+R7Zz87PsJ675Xz7DD2kY7rxpq6UqX9Z738x/ZzhN7d3VlsFeKeyb/v4G3mdlWlsP9NuBfxDymiHSRoJ52vY/si4UidxyeY3J6no3ZDIuF+idWhbWNmtmLHjXj74VLGASJNeTd/byZ/TtgmuUtlF9y95Nxjiki3aWdnnh+sUBmwMisM4qv1z+xKqhtdGfIFs/quuqdfdoroV4t9pOh3P1/uPvb3f0fu/tvxj2eiHSXsG2EjW2ChOKS88aLmzuxqpHxq29v9dr23S7xhVcRSbewK02Wd8c0sv988VyR2c80dmJVo+NXfxLoxNmnSVDIi0isV0Ks19Me+eyjgRczq1Q9626m3kZ76p04+zQJ5g3uTe2E0dFRn5mZSboMkb5S3YuG5ZnuWp+UEzX+xENPr7qYWaXqWtqpN+rgkPT70A4zO+7uo0H3aSYv0uc6cSXEKNUz7aCrT1bW0Wy95WDPLxYwLuzsCVtY7dVdNGEU8iJ9Luq68FOz+Y4FfaPjNNM7r56dV39WqD449PIumjC61LBIn4vqOTdz0lCnhNW7zqym1kauO9PrC6v1KORF+lzUdeG7cQthWL1L7jUHpUYCvNcXVutRyIv0ufKVEMN020y3XG/Q5YarD0r1AnytL+vbjRTyIrJm14XvlPGRHK+H7AysPCgFzfrLh4Y4LuvbjbTwKiJA4ycNdYtG9rWndcdMMxTyIgL0XiA2elBK446ZZijkRWRFLwVirx2UkqKQF5Ge1UsHpaRo4VVEJMUU8iIiKaaQFxFJsdhC3swmzexZM/sLM/uamQ3GNZaIiASLcyb/beBd7v5PgL8E9sU4loiIBIgt5N39UXc/X/r1GHBFXGOJiEiwTvXk/w3wP4PuMLM9ZjZjZjMLCwsdKkdEpD+0tU/ezB4DLg+46y53/3rpMXcB54FDQc/h7geBg7D8zVDt1CMiIqu1FfLufkPU/Wb2S8DNwPXeTd8zKCLSJ2I749XM3g98GvhZdz8X1zgiIhIuzp78fwbeBHzbzObM7L/EOJaIiASIbSbv7j8R13OLiEhjdMariEiKKeRFRFJMIS8ikmIKeRGRFFPIi4ikmEJeRCTFFPIiIimmkBcRSTGFvIhIiinkRURSTCEvIpJiCnkRkRRTyIuIpJhCXkQkxRTyIiIpppAXEUmx2EPezD5lZm5ml8U9loiIrBZryJvZlcCNwJk4xxERkWBxz+Q/x/KXeXvM44iISIDYQt7MbgXy7v50ncftMbMZM5tZWFiIqxwRkb7U1hd5m9ljwOUBd90F/HuWWzWR3P0gcBBgdHRUM34RkTXUVsi7+w1Bt5vZNmAr8LSZAVwBPGVm17n799sZU0REGtdWyIdx9xPAW8q/m9nzwKi7/yCO8UREJJj2yYuIpFgsM/lq7r6lE+OIiMhqmsmLiKSYQl5EJMUU8iIiKaaQFxFJMYW8iEiKKeRFRFJMIS8ikmIKeRGRFFPIi4ikmEJeRCTFFPIiIimmkBcRSTGFvIhIiinkRURSTCEvIpJisYa8mX3SzJ41s5Nm9ttxjiUiIrVi+9IQM9sJ3Apc4+4/MrO31PsbERFZW3HO5H8VOODuPwJw95diHEtERALEGfJvB37GzJ40s++Y2U8GPcjM9pjZjJnNLCwsxFiOiEj/aatdY2aPAZcH3HVX6bnfDLwX+EngQTP7R+7ulQ9094PAQYDR0VGvfiIREWldWyHv7jeE3WdmvwocKYX6n5vZ68BlgKbrIiIdEme7ZgrYCWBmbwcuAn4Q43giIlIltt01wJeAL5nZM8BrwMerWzUiIhKv2ELe3V8DPhbX84uISH0641VEJMUU8iIiKaaQFxFJMYW8iEiKKeRFRFJMIS8ikmIKeRGRFFPIi4ikmEJeRCTFFPIiIimmkBcRSTGFvIhIisV5FcquMjWbZ3J6nrOLBTYNZpkYG2Z8JJd0WSIiseqLkJ+azbPvyAkKxSUA8osF9h05AaCgF5FU64t2zeT0/ErAlxWKS0xOzydUkYhIZ/RFyJ9dLDR1u4hIWsQW8ma23cyOmdmcmc2Y2XVxjVXPpsFsU7eLiKRFnDP53wbuc/ftwGdKvydiYmyYbGZg1W3ZzAATY8MJVdRdpmbz7DhwlK17H2HHgaNMzeaTLklE1kicC68O/IPSzxuBszGOFam8uKrdNbW0KC2SbhbXd2ub2TuAacBY/sTw0+5+OuBxe4A9AJs3b7729Omah0iMdhw4Sj5gbSI3mOWJvbsSqEhEmmVmx919NPC+dkLezB4DLg+46y7geuA77v6wmf0isMfdb4h6vtHRUZ+ZmWm5njTvhY/rtW3d+whh/wvIpew9FEmrqJBvq10TFdpm9t+A20u/fhW4v52x6klz2yHO17ZpMBs4k1/rcUQkGXEuvJ4Ffrb08y7gr2Ica832wjeyCNnsQmWrC5vlv7vj8Fxs+/yDFqXjGEdEkhHnwuuvAF8ws/XA31Pqu8dlLfbCB82Y7zw8x8zpl/mN8W2hj4ma7bY6C6/+u7DX1m4bp3JROmxGr/MJRHpXbDN5d/8zd7/W3a9x9/e4+/G4xoK12Qsf9GnAgUPHzqzMvsM+Mdz3zZOBs/V7v3GypVl40DjVBjdk2HfkBPnFAs6FA0iznywAnti7i5zOJxBJndSc8boWe+HDZqwOK6Ec9phXzhVrwvbuqRMsFopNjdXo/dnMAO40fQApf0IIOjDofAKR9ElNyI+P5Ni/exu5wSzG8s6Q/bu3NdW6iJqxlkO30VltobjEA0++EDlWVK8+apzyaws7gIS1XSB67WIt3kMR6S6pugrl+EiurUCaGBvmzsNzgVsKy6E7MTZct1dethSxPXXnVUORvfqgcbKZgVWh+6kHnw4cY8AsdNx6axftvoci0l1SM5NfC+MjOf7lezdTHZGVLYvK2S5EB2rYXZduyPD4swuRrZZGZtVhB5Gog4uu4yPSXxTyVX5jfBuf+8j2yHAtz7SzmYHIQDUgM7A66bOZAe75havrzqgb2TUTtlBavj2oHaS+u0h/ie2yBq1o94zXTgq7HEC1wWyGS96wviasoy4n0EirZmo2z33fPMkr51b35cuPA0KfA3QdH5E0ie2M137W6N7xHxaKzN1zY83tYUE+MTZcd3E0bA/9YDbDvbdcvXIQCXuOJ/buUqiL9Am1a1rUaA877HFRPfd6rZywPfSXvGH9Snjri1JEBBTyQGuXHWi0h/3qj86HXhqhfJbpOjPyiwUmp+eZms3XXRxtJMC1wCoi0MchXw72LXsf4c7Dc02fNTo+kuPSDZm64ywWijXPV3lCElzYDVMee+dVQ5GLo40EuBZYRQT6NOSrQ7Z66bnRi3Ld8wtX1+yeCVL9fFGXLCgUl3j82YXI7ZM7rxqK3OYJa3NymIj0vp5feG3lAl2NXBemkd71+EiOe79xMvTM07Dna+SSBmEnJU3N5nn4eH7VgcmAD15b+3id2CQiPR3yrV7hsZEAb7R3/cMGAr76+aKu4V5v7LCLqD3+7EJDdcQtzV/cItKLerpd0+o15OsFeDO960YOBtXPV+8a7mGLtdDdu2aiLn4mIsno6ZBvNfCCQrbc447qXTd6BmlmnXHphkzkGbOVl0aovvxB0GJtWTfvmlmrL24RkbXT0+2asLZHvcCr/KKMRtsKYa2h/bu3sX/3tqZbFJX98qCzXytPfqoUdRJV0rr5U4ZIv2or5M3sw8C9wDuA69x9puK+fcAvA0vAr7n7dDtjBWkn8JpdlIyapbZ7Bmkz4djKAapTWj3oikh82p3JPwPsBv5r5Y1m9k7gNuBqYBPwmJm93d3rX5+3CZ0MvLAgzpe+gq+dMZsNx27dNdPNnzJE+lVbIe/upwCs9pq6twJfcfcfAc+Z2V8D1wH/q53xgnQq8KJ2xDSyoydKWsKxmz9liPSruHryOeBYxe8vlm6rYWZ7KH3J9+bNm2Mqp31RXxYS1j9vVJrCsVs/ZYj0q7ohb2aPAZcH3HWXu3+93QLc/SBwEJYvNdzu88WlHFx3HJ4LvL/dxUWFo4jEoW7Iu/sNLTxvHriy4vcrSrf1tPGR3MpFxappcVFEulFc++S/AdxmZm8ws63A24A/j2msjtKFv0Skl7S7hfKfA78DDAGPmNmcu4+5+0kzexD4LnAe+MRa76xJSpr65yKSfvr6PxGRHhf19X89fVkDERGJppAXEUkxhbyISIop5EVEUkwhLyKSYl21u8bMFoDTHRzyMuAHHRyvVb1QZy/UCL1RZy/UCL1RZy/UCO3X+ePuPhR0R1eFfKeZ2UzYtqNu0gt19kKN0Bt19kKN0Bt19kKNEG+dateIiKSYQl5EJMX6PeQPJl1Ag3qhzl6oEXqjzl6oEXqjzl6oEWKss6978iIiadfvM3kRkVRTyIuIpFhfh7yZDZjZrJn996RrCWJmz5vZCTObM7OuvTynmQ2a2UNm9qyZnTKzn0q6pkpmNlx6D8v//J2Z3ZF0XUHM7E4zO2lmz5jZA2Z2cdI1VTOz20v1neym99HMvmRmL5nZMxW3vdnMvm1mf1X696VdWOOHS+/l62a25tso+zrkgduBU0kXUcdOd9/e5Xt9vwB8y92vAq6hy95Td58vvYfbgWuBc8DXkq2qlpnlgF8DRt39XcAAcFuyVa1mZu8CfgW4juX/1jeb2U8kW9WKPwDeX3XbXuCP3f1twB+Xfk/SH1Bb4zPAbuBP4xiwb0PezK4AbgLuT7qWXmZmG4H3AV8EcPfX3H0x0aKiXQ/8H3fv5JnVzVgPZM1sPbABOJtwPdXeATzp7ufc/TzwHZYDKnHu/qfAy1U33wr8YennPwTGO1lTtaAa3f2Uu8/HNWbfhjzweeDTwOsJ1xHFgUfN7LiZ7Um6mBBbgQXg90utr/vN7JKki4pwG/BA0kUEcfc88B+BM8D3gB+6+6PJVlXjGeBnzOzHzGwD8POs/j7nbvNWd/9e6efvA29Nspgk9GXIm9nNwEvufjzpWur4p+7+buADwCfM7H1JFxRgPfBu4PfcfQR4leQ/Egcys4uAW4CvJl1LkFK/+FaWD5ybgEvM7GPJVrWau58Cfgt4FPgWMAf0xFd7+vJ+8b7bM96XIQ/sAG4xs+eBrwC7zOzLyZZUqzSzw91fYrmHfF2yFQV6EXjR3Z8s/f4Qy6HfjT4APOXuf5t0ISFuAJ5z9wV3LwJHgJ9OuKYa7v5Fd7/W3d8HvAL8ZdI1RfhbM/uHAKV/v5RwPR3XlyHv7vvc/Qp338Lyx/ej7t5VMyYzu8TM3lT+GbiR5Y/KXcXdvw+8YGbDpZuuZ/kL3LvRR+nSVk3JGeC9ZrbBzIzl97KrFrEBzOwtpX9vZrkf/0fJVhTpG8DHSz9/HPh6grUkYn3SBUiotwJfW/7/OuuBP3L3byVbUqhPAodK7ZC/Af51wvXUKB0ofw74t0nXEsbdnzSzh4CngPPALN15Wv7DZvZjQBH4RLcstJvZA8A/Ay4zsxeBe4ADwINm9sssX8b8F5OrMLTGl4HfAYaAR8xszt3H1mxMXdZARCS9+rJdIyLSLxTyIiIpppAXEUkxhbyISIop5EVEUkwhLyKSYgp5EZEU+//GN9+2H/JQAQAAAABJRU5ErkJggg==\n",
 64 |       "text/plain": [
 65 |        "<Figure size 432x288 with 1 Axes>"
 66 |       ]
 67 |      },
 68 |      "metadata": {
 69 |       "needs_background": "light"
 70 |      },
 71 |      "output_type": "display_data"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "plt.scatter(X[:,0], X[:,1])\n",
 76 |     "plt.show()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "id": "df3f68aa",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "🔘 Hope you enjoyed reading!! 📖 <br>\n",
 85 |     "🔘 follow → `@akshay_pachaar`  "
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "id": "694ef174",
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": []
 95 |   }
 96 |  ],
 97 |  "metadata": {
 98 |   "kernelspec": {
 99 |    "display_name": "env_twitter",
100 |    "language": "python",
101 |    "name": "env_twitter"
102 |   },
103 |   "language_info": {
104 |    "codemirror_mode": {
105 |     "name": "ipython",
106 |     "version": 3
107 |    },
108 |    "file_extension": ".py",
109 |    "mimetype": "text/x-python",
110 |    "name": "python",
111 |    "nbconvert_exporter": "python",
112 |    "pygments_lexer": "ipython3",
113 |    "version": "3.10.5"
114 |   }
115 |  },
116 |  "nbformat": 4,
117 |  "nbformat_minor": 5
118 | }
119 | 


--------------------------------------------------------------------------------
/pandas/df.loc_pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "##### ❇️ df.loc : Access a group of rows and columns by label(s) or a boolean array."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/html": [
 27 |        "<div>\n",
 28 |        "<style scoped>\n",
 29 |        "    .dataframe tbody tr th:only-of-type {\n",
 30 |        "        vertical-align: middle;\n",
 31 |        "    }\n",
 32 |        "\n",
 33 |        "    .dataframe tbody tr th {\n",
 34 |        "        vertical-align: top;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe thead th {\n",
 38 |        "        text-align: right;\n",
 39 |        "    }\n",
 40 |        "</style>\n",
 41 |        "<table border=\"1\" class=\"dataframe\">\n",
 42 |        "  <thead>\n",
 43 |        "    <tr style=\"text-align: right;\">\n",
 44 |        "      <th></th>\n",
 45 |        "      <th>species</th>\n",
 46 |        "      <th>island</th>\n",
 47 |        "      <th>bill_length_mm</th>\n",
 48 |        "      <th>bill_depth_mm</th>\n",
 49 |        "      <th>flipper_length_mm</th>\n",
 50 |        "      <th>body_mass_g</th>\n",
 51 |        "      <th>sex</th>\n",
 52 |        "    </tr>\n",
 53 |        "  </thead>\n",
 54 |        "  <tbody>\n",
 55 |        "    <tr>\n",
 56 |        "      <th>0</th>\n",
 57 |        "      <td>Adelie</td>\n",
 58 |        "      <td>Torgersen</td>\n",
 59 |        "      <td>39.1</td>\n",
 60 |        "      <td>18.7</td>\n",
 61 |        "      <td>181.0</td>\n",
 62 |        "      <td>3750.0</td>\n",
 63 |        "      <td>Male</td>\n",
 64 |        "    </tr>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>1</th>\n",
 67 |        "      <td>Adelie</td>\n",
 68 |        "      <td>Torgersen</td>\n",
 69 |        "      <td>39.5</td>\n",
 70 |        "      <td>17.4</td>\n",
 71 |        "      <td>186.0</td>\n",
 72 |        "      <td>3800.0</td>\n",
 73 |        "      <td>Female</td>\n",
 74 |        "    </tr>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>2</th>\n",
 77 |        "      <td>Adelie</td>\n",
 78 |        "      <td>Torgersen</td>\n",
 79 |        "      <td>40.3</td>\n",
 80 |        "      <td>18.0</td>\n",
 81 |        "      <td>195.0</td>\n",
 82 |        "      <td>3250.0</td>\n",
 83 |        "      <td>Female</td>\n",
 84 |        "    </tr>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>3</th>\n",
 87 |        "      <td>Adelie</td>\n",
 88 |        "      <td>Torgersen</td>\n",
 89 |        "      <td>NaN</td>\n",
 90 |        "      <td>NaN</td>\n",
 91 |        "      <td>NaN</td>\n",
 92 |        "      <td>NaN</td>\n",
 93 |        "      <td>NaN</td>\n",
 94 |        "    </tr>\n",
 95 |        "  </tbody>\n",
 96 |        "</table>\n",
 97 |        "</div>"
 98 |       ],
 99 |       "text/plain": [
100 |        "  species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \\\n",
101 |        "0  Adelie  Torgersen            39.1           18.7              181.0   \n",
102 |        "1  Adelie  Torgersen            39.5           17.4              186.0   \n",
103 |        "2  Adelie  Torgersen            40.3           18.0              195.0   \n",
104 |        "3  Adelie  Torgersen             NaN            NaN                NaN   \n",
105 |        "\n",
106 |        "   body_mass_g     sex  \n",
107 |        "0       3750.0    Male  \n",
108 |        "1       3800.0  Female  \n",
109 |        "2       3250.0  Female  \n",
110 |        "3          NaN     NaN  "
111 |       ]
112 |      },
113 |      "execution_count": 2,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "df = pd.read_csv('penguins.csv')\n",
120 |     "df.head(4)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 3,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "species                 Adelie\n",
132 |        "island               Torgersen\n",
133 |        "bill_length_mm            40.3\n",
134 |        "bill_depth_mm             18.0\n",
135 |        "flipper_length_mm        195.0\n",
136 |        "body_mass_g             3250.0\n",
137 |        "sex                     Female\n",
138 |        "Name: 2, dtype: object"
139 |       ]
140 |      },
141 |      "execution_count": 3,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "# 🔴 using and index\n",
148 |     "df.loc[2]"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 4,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/html": [
159 |        "<div>\n",
160 |        "<style scoped>\n",
161 |        "    .dataframe tbody tr th:only-of-type {\n",
162 |        "        vertical-align: middle;\n",
163 |        "    }\n",
164 |        "\n",
165 |        "    .dataframe tbody tr th {\n",
166 |        "        vertical-align: top;\n",
167 |        "    }\n",
168 |        "\n",
169 |        "    .dataframe thead th {\n",
170 |        "        text-align: right;\n",
171 |        "    }\n",
172 |        "</style>\n",
173 |        "<table border=\"1\" class=\"dataframe\">\n",
174 |        "  <thead>\n",
175 |        "    <tr style=\"text-align: right;\">\n",
176 |        "      <th></th>\n",
177 |        "      <th>species</th>\n",
178 |        "      <th>island</th>\n",
179 |        "      <th>bill_length_mm</th>\n",
180 |        "      <th>bill_depth_mm</th>\n",
181 |        "      <th>flipper_length_mm</th>\n",
182 |        "      <th>body_mass_g</th>\n",
183 |        "      <th>sex</th>\n",
184 |        "    </tr>\n",
185 |        "  </thead>\n",
186 |        "  <tbody>\n",
187 |        "    <tr>\n",
188 |        "      <th>0</th>\n",
189 |        "      <td>Adelie</td>\n",
190 |        "      <td>Torgersen</td>\n",
191 |        "      <td>39.1</td>\n",
192 |        "      <td>18.7</td>\n",
193 |        "      <td>181.0</td>\n",
194 |        "      <td>3750.0</td>\n",
195 |        "      <td>Male</td>\n",
196 |        "    </tr>\n",
197 |        "    <tr>\n",
198 |        "      <th>2</th>\n",
199 |        "      <td>Adelie</td>\n",
200 |        "      <td>Torgersen</td>\n",
201 |        "      <td>40.3</td>\n",
202 |        "      <td>18.0</td>\n",
203 |        "      <td>195.0</td>\n",
204 |        "      <td>3250.0</td>\n",
205 |        "      <td>Female</td>\n",
206 |        "    </tr>\n",
207 |        "  </tbody>\n",
208 |        "</table>\n",
209 |        "</div>"
210 |       ],
211 |       "text/plain": [
212 |        "  species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \\\n",
213 |        "0  Adelie  Torgersen            39.1           18.7              181.0   \n",
214 |        "2  Adelie  Torgersen            40.3           18.0              195.0   \n",
215 |        "\n",
216 |        "   body_mass_g     sex  \n",
217 |        "0       3750.0    Male  \n",
218 |        "2       3250.0  Female  "
219 |       ]
220 |      },
221 |      "execution_count": 4,
222 |      "metadata": {},
223 |      "output_type": "execute_result"
224 |     }
225 |    ],
226 |    "source": [
227 |     "# 🟡 Slicing along indices with step size\n",
228 |     "# ⭕️ df.loc[start:stop:step]\n",
229 |     "df.loc[0:3:2]"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 5,
235 |    "metadata": {},
236 |    "outputs": [
237 |     {
238 |      "data": {
239 |       "text/html": [
240 |        "<div>\n",
241 |        "<style scoped>\n",
242 |        "    .dataframe tbody tr th:only-of-type {\n",
243 |        "        vertical-align: middle;\n",
244 |        "    }\n",
245 |        "\n",
246 |        "    .dataframe tbody tr th {\n",
247 |        "        vertical-align: top;\n",
248 |        "    }\n",
249 |        "\n",
250 |        "    .dataframe thead th {\n",
251 |        "        text-align: right;\n",
252 |        "    }\n",
253 |        "</style>\n",
254 |        "<table border=\"1\" class=\"dataframe\">\n",
255 |        "  <thead>\n",
256 |        "    <tr style=\"text-align: right;\">\n",
257 |        "      <th></th>\n",
258 |        "      <th>species</th>\n",
259 |        "      <th>sex</th>\n",
260 |        "    </tr>\n",
261 |        "  </thead>\n",
262 |        "  <tbody>\n",
263 |        "    <tr>\n",
264 |        "      <th>1</th>\n",
265 |        "      <td>Adelie</td>\n",
266 |        "      <td>Female</td>\n",
267 |        "    </tr>\n",
268 |        "    <tr>\n",
269 |        "      <th>2</th>\n",
270 |        "      <td>Adelie</td>\n",
271 |        "      <td>Female</td>\n",
272 |        "    </tr>\n",
273 |        "  </tbody>\n",
274 |        "</table>\n",
275 |        "</div>"
276 |       ],
277 |       "text/plain": [
278 |        "  species     sex\n",
279 |        "1  Adelie  Female\n",
280 |        "2  Adelie  Female"
281 |       ]
282 |      },
283 |      "execution_count": 5,
284 |      "metadata": {},
285 |      "output_type": "execute_result"
286 |     }
287 |    ],
288 |    "source": [
289 |     "# 🟢 Using a slice object along indices and list of column labels\n",
290 |     "df.loc[1:2, ['species', 'sex']] # ⬅️ Notice slicing is inclusive of start and stop indices. "
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 6,
296 |    "metadata": {},
297 |    "outputs": [
298 |     {
299 |      "data": {
300 |       "text/html": [
301 |        "<div>\n",
302 |        "<style scoped>\n",
303 |        "    .dataframe tbody tr th:only-of-type {\n",
304 |        "        vertical-align: middle;\n",
305 |        "    }\n",
306 |        "\n",
307 |        "    .dataframe tbody tr th {\n",
308 |        "        vertical-align: top;\n",
309 |        "    }\n",
310 |        "\n",
311 |        "    .dataframe thead th {\n",
312 |        "        text-align: right;\n",
313 |        "    }\n",
314 |        "</style>\n",
315 |        "<table border=\"1\" class=\"dataframe\">\n",
316 |        "  <thead>\n",
317 |        "    <tr style=\"text-align: right;\">\n",
318 |        "      <th></th>\n",
319 |        "      <th>species</th>\n",
320 |        "      <th>island</th>\n",
321 |        "      <th>bill_length_mm</th>\n",
322 |        "      <th>bill_depth_mm</th>\n",
323 |        "      <th>flipper_length_mm</th>\n",
324 |        "      <th>body_mass_g</th>\n",
325 |        "      <th>sex</th>\n",
326 |        "    </tr>\n",
327 |        "  </thead>\n",
328 |        "  <tbody>\n",
329 |        "    <tr>\n",
330 |        "      <th>169</th>\n",
331 |        "      <td>Chinstrap</td>\n",
332 |        "      <td>Dream</td>\n",
333 |        "      <td>58.0</td>\n",
334 |        "      <td>17.8</td>\n",
335 |        "      <td>181.0</td>\n",
336 |        "      <td>3700.0</td>\n",
337 |        "      <td>Female</td>\n",
338 |        "    </tr>\n",
339 |        "    <tr>\n",
340 |        "      <th>196</th>\n",
341 |        "      <td>Chinstrap</td>\n",
342 |        "      <td>Dream</td>\n",
343 |        "      <td>50.9</td>\n",
344 |        "      <td>17.9</td>\n",
345 |        "      <td>196.0</td>\n",
346 |        "      <td>3675.0</td>\n",
347 |        "      <td>Female</td>\n",
348 |        "    </tr>\n",
349 |        "  </tbody>\n",
350 |        "</table>\n",
351 |        "</div>"
352 |       ],
353 |       "text/plain": [
354 |        "       species island  bill_length_mm  bill_depth_mm  flipper_length_mm  \\\n",
355 |        "169  Chinstrap  Dream            58.0           17.8              181.0   \n",
356 |        "196  Chinstrap  Dream            50.9           17.9              196.0   \n",
357 |        "\n",
358 |        "     body_mass_g     sex  \n",
359 |        "169       3700.0  Female  \n",
360 |        "196       3675.0  Female  "
361 |       ]
362 |      },
363 |      "execution_count": 6,
364 |      "metadata": {},
365 |      "output_type": "execute_result"
366 |     }
367 |    ],
368 |    "source": [
369 |     "# 🟣 using a boolean 👇 array; \n",
370 |     "df.loc[(df['bill_length_mm'] > 50.5) & (df['sex'] == 'Female')]"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "metadata": {},
376 |    "source": [
377 |     "##### ❇️ Hope you enjoyed reading!! 📖 \n",
378 |     "##### ❇️ follow → @akshay_pachaar  "
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "metadata": {},
385 |    "outputs": [],
386 |    "source": []
387 |   }
388 |  ],
389 |  "metadata": {
390 |   "kernelspec": {
391 |    "display_name": "env_twitter",
392 |    "language": "python",
393 |    "name": "env_twitter"
394 |   },
395 |   "language_info": {
396 |    "codemirror_mode": {
397 |     "name": "ipython",
398 |     "version": 3
399 |    },
400 |    "file_extension": ".py",
401 |    "mimetype": "text/x-python",
402 |    "name": "python",
403 |    "nbconvert_exporter": "python",
404 |    "pygments_lexer": "ipython3",
405 |    "version": "3.10.5"
406 |   }
407 |  },
408 |  "nbformat": 4,
409 |  "nbformat_minor": 4
410 | }
411 | 


--------------------------------------------------------------------------------