├── random ├── __init__.py ├── .ipynb_checkpoints │ ├── __init__-checkpoint.py │ ├── web_services-checkpoint.png │ ├── grouped_workers-checkpoint.png │ └── display_util-checkpoint.py ├── page_0.png ├── virat.png ├── messi_vs_nld.png ├── warn_report.pdf ├── web_services.png ├── grouped_workers.png ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── display_util.cpython-38.pyc ├── display_util.py ├── download_youtube.ipynb ├── embeddings.ipynb ├── latexify.ipynb ├── matmul_operator.ipynb ├── shapely_polygon_intersection.ipynb ├── pivot_table_JS.ipynb ├── one_hot_encoding.ipynb ├── tf_decision_forests.ipynb └── python_for_sql.ipynb ├── .DS_Store ├── numpy ├── resources │ ├── cat.png │ └── dog.png ├── argsort_vs_argpartition.ipynb └── numpy_indexing_slicing.ipynb ├── computer_vision ├── lena.png └── squid_games.png ├── PyTorch ├── resources │ ├── .DS_Store │ ├── basic_neural_net.png │ └── computational_graph.png ├── torch_autograd.ipynb └── tensors.ipynb ├── PyTorch_Lightning ├── .DS_Store └── autoencoders.ipynb ├── azure_ML ├── source_dir │ ├── env.yml │ └── score.py └── deployment.ipynb ├── pandas ├── weather_data.csv ├── stocks2.csv ├── bessel_correction.ipynb ├── iterrows_vs_itertuples.ipynb ├── iris.csv ├── df_apply.ipynb ├── assigning_new_columns.ipynb └── df.loc_pandas.ipynb ├── maths_for_ml └── clt_streamlit.py ├── LICENSE ├── NLP ├── conformer_speech2text.ipynb ├── question_answering.ipynb └── tokenization.ipynb ├── CONTRIBUTING.md ├── random_stuff └── audiobook.ipynb ├── README.md ├── LLMs ├── APIChain_LangChain.ipynb ├── llamaindex_101.ipynb ├── langchain.ipynb └── openai_function_calling.ipynb ├── neat-tricks └── mem_cache.ipynb ├── Python └── decorators.ipynb ├── ml_from_scratch └── KNN.ipynb └── unsupervised_learning └── dummy_data_clustering.ipynb /random/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /random/.ipynb_checkpoints/__init__-checkpoint.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/.DS_Store -------------------------------------------------------------------------------- /random/page_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/page_0.png -------------------------------------------------------------------------------- /random/virat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/virat.png -------------------------------------------------------------------------------- /numpy/resources/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/numpy/resources/cat.png -------------------------------------------------------------------------------- /numpy/resources/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/numpy/resources/dog.png -------------------------------------------------------------------------------- /random/messi_vs_nld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/messi_vs_nld.png -------------------------------------------------------------------------------- /random/warn_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/warn_report.pdf -------------------------------------------------------------------------------- /random/web_services.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/web_services.png -------------------------------------------------------------------------------- /computer_vision/lena.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/computer_vision/lena.png -------------------------------------------------------------------------------- /PyTorch/resources/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch/resources/.DS_Store -------------------------------------------------------------------------------- /PyTorch_Lightning/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch_Lightning/.DS_Store -------------------------------------------------------------------------------- /random/grouped_workers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/grouped_workers.png -------------------------------------------------------------------------------- /computer_vision/squid_games.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/computer_vision/squid_games.png -------------------------------------------------------------------------------- /PyTorch/resources/basic_neural_net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch/resources/basic_neural_net.png -------------------------------------------------------------------------------- /PyTorch/resources/computational_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/PyTorch/resources/computational_graph.png -------------------------------------------------------------------------------- /random/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /random/__pycache__/display_util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/__pycache__/display_util.cpython-38.pyc -------------------------------------------------------------------------------- /random/.ipynb_checkpoints/web_services-checkpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/.ipynb_checkpoints/web_services-checkpoint.png -------------------------------------------------------------------------------- /random/.ipynb_checkpoints/grouped_workers-checkpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patchy631/machine-learning/HEAD/random/.ipynb_checkpoints/grouped_workers-checkpoint.png -------------------------------------------------------------------------------- /azure_ML/source_dir/env.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - anaconda 3 | - conda-forge 4 | dependencies: 5 | - python=3.6.2 6 | - pip: 7 | - pandas==1.1.5 8 | - azureml-defaults 9 | - joblib==0.17.0 10 | - scikit-learn==0.23.2 -------------------------------------------------------------------------------- /pandas/weather_data.csv: -------------------------------------------------------------------------------- 1 | day,city,temperature,windspeed,event 2 | 1/3/2022,new york,28,12,Snow 3 | 1/3/2022,mumbai,87,15,Fog 4 | 1/4/2022,new york,33,7,Sunny 5 | 1/4/2022,mumbai,92,5,Rain 6 | 1/1/2022,paris,45,20,Sunny 7 | 1/2/2022,paris,50,13,Cloudy 8 | 1/3/2022,paris,54,8,Cloudy -------------------------------------------------------------------------------- /azure_ML/source_dir/score.py: -------------------------------------------------------------------------------- 1 | import json 2 | import joblib 3 | from azureml.core import Model 4 | 5 | 6 | def init(): 7 | global model 8 | model_name = "irisclassifier" 9 | path = Model.get_model_path(model_name) 10 | model = joblib.load(path) 11 | 12 | 13 | def run(data): 14 | try: 15 | data = json.loads(data) 16 | result = model.predict(data["data"]) 17 | return {"data": result.tolist(), "message": "Prediction successful"} 18 | except Exception as e: 19 | return {"data": e, "message": "Failed to predict"} 20 | -------------------------------------------------------------------------------- /pandas/stocks2.csv: -------------------------------------------------------------------------------- 1 | date,open,high,low,close,volume 2 | 2014-06-30,57.5,57.76,57.18,57.44,1314906 3 | 2015-12-17,41.55,41.63,40.77,40.78,2218837 4 | 2017-09-27,64.27,64.66,64.05,64.39,1272526 5 | 2016-11-08,45.48,45.9599,45.32,45.62,2136740 6 | 2014-11-07,41.39,41.48,40.86,40.93,2458734 7 | 2014-04-14,53.19,53.249,52.3075,,2840577 8 | 2017-11-14,66.98,67.8,66.89,67.46,2426247 9 | 2017-04-21,,53.39,52.8399,53.27,2189351 10 | 2014-04-08,54.21,,53.82,54.66,1842491 11 | 2015-01-05,40.32,40.46,39.7,39.8,2042240 12 | 2016-09-16,45.39,45.39,44.74,44.79,2592850 13 | 2015-04-06,41.68,42.2,41.51,41.93,2379808 14 | 2015-08-06,40.95,40.96,39.91,40.12,1932226 15 | -------------------------------------------------------------------------------- /random/display_util.py: -------------------------------------------------------------------------------- 1 | # This util is just used for Display purpose 2 | import cv2 3 | from matplotlib import pyplot as plt 4 | 5 | 6 | images = [cv2.cvtColor(cv2.imread('grouped_workers.png'), cv2.COLOR_BGR2RGB), 7 | cv2.cvtColor(cv2.imread('web_services.png'), cv2.COLOR_BGR2RGB)] 8 | image_names = ['grouped_workers', 'web_services'] 9 | 10 | 11 | def display(images, image_names, fig_zize): 12 | num_images = len(images) # Maximum number of images to display 13 | num_cols = 2 # Number of columns in display 14 | num_rows = num_images//num_cols # Number of rows in display 15 | plt.figure(figsize=(fig_zize*2, fig_zize*num_cols)) 16 | for i in range(num_rows*num_cols): 17 | 18 | plt.subplot(num_rows, num_cols, i+1) 19 | plt.imshow(images[i], cmap='gray') 20 | plt.title(image_names[i], size=12) 21 | plt.axis('off') -------------------------------------------------------------------------------- /maths_for_ml/clt_streamlit.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | st.title("Illustrating the Central Limit Theorem") 7 | 8 | # Add a slider for sample size 9 | sample_size = st.slider("Sample Size", min_value=1, max_value=100, value=5) 10 | 11 | perc_heads = st.number_input( 12 | label="Chance of Coins Landing on Heads", min_value=0.0, max_value=1.0, value=0.5 13 | ) 14 | 15 | binom_dist = np.random.binomial(1, perc_heads, 1000) 16 | 17 | list_of_means = [] 18 | 19 | for i in range(0, 1000): 20 | sample = np.random.choice(binom_dist, sample_size, replace=True) 21 | list_of_means.append(sample.mean()) 22 | 23 | # Plotting 24 | fig, ax = plt.subplots() 25 | sns.histplot(list_of_means, ax=ax, color="cyan", stat="density") 26 | sns.kdeplot(list_of_means, ax=ax, color="hotpink", lw=2) 27 | st.pyplot(fig) 28 | -------------------------------------------------------------------------------- /random/.ipynb_checkpoints/display_util-checkpoint.py: -------------------------------------------------------------------------------- 1 | # This util is just used for Display purpose 2 | import cv2 3 | from matplotlib import pyplot as plt 4 | 5 | 6 | images = [cv2.cvtColor(cv2.imread('grouped_workers.png'), cv2.COLOR_BGR2RGB), 7 | cv2.cvtColor(cv2.imread('web_services.png'), cv2.COLOR_BGR2RGB)] 8 | image_names = ['grouped_workers', 'web_services'] 9 | 10 | 11 | def display(images, image_names, fig_zize): 12 | num_images = len(images) # Maximum number of images to display 13 | num_cols = 2 # Number of columns in display 14 | num_rows = num_images//num_cols # Number of rows in display 15 | plt.figure(figsize=(fig_zize*2, fig_zize*num_cols)) 16 | for i in range(num_rows*num_cols): 17 | 18 | plt.subplot(num_rows, num_cols, i+1) 19 | plt.imshow(images[i], cmap='gray') 20 | plt.title(image_names[i], size=12) 21 | plt.axis('off') -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 patchy631 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NLP/conformer_speech2text.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 🗣 Speech to Text! 🚀 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import assemblyai as aai\n", 17 | "\n", 18 | "# Replace with your API key\n", 19 | "aai.settings.api_key = \"your_api_key\"\n", 20 | "\n", 21 | "# URL of the file to transcribe\n", 22 | "FILE_URL = \"tinyurl.com/QuantumTheoryMP3\"\n", 23 | "\n", 24 | "# You can also transcribe a local file by passing in a file path\n", 25 | "# FILE_URL = './path/to/file.mp3'\n", 26 | "\n", 27 | "transcriber = aai.Transcriber()\n", 28 | "transcript = transcriber.transcribe(FILE_URL)\n", 29 | "\n", 30 | "if transcript.status == aai.TranscriptStatus.error:\n", 31 | " print(transcript.error)\n", 32 | "else:\n", 33 | " print(transcript.text)" 34 | ] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "env_twitter", 40 | "language": "python", 41 | "name": "env_twitter" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.10.6" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 4 58 | } 59 | -------------------------------------------------------------------------------- /random/download_youtube.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### ❇️ Downloading a YouTube video using Python 🐍 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pytube" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Ask the user to enter url of YouTube video\n", 26 | "video_url = input('Enter url: ')\n", 27 | "\n", 28 | "# Create an instance of YouTube video\n", 29 | "video_instance = pytube.YouTube(video_url)\n", 30 | "\n", 31 | "stream = video_instance.streams.get_highest_resolution()\n", 32 | "\n", 33 | "# download 🚀 \n", 34 | "stream.download()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 42 | "##### ❇️ follow → @akshay_pachaar " 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [] 51 | } 52 | ], 53 | "metadata": { 54 | "kernelspec": { 55 | "display_name": "env_twitter", 56 | "language": "python", 57 | "name": "env_twitter" 58 | }, 59 | "language_info": { 60 | "codemirror_mode": { 61 | "name": "ipython", 62 | "version": 3 63 | }, 64 | "file_extension": ".py", 65 | "mimetype": "text/x-python", 66 | "name": "python", 67 | "nbconvert_exporter": "python", 68 | "pygments_lexer": "ipython3", 69 | "version": "3.10.5" 70 | } 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 4 74 | } 75 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing to Machine Learning Tutorials Repository 3 | 4 | First off, thank you for considering contributing to this repository! Your contributions are what make the open-source community such an amazing place to be, learn, and grow. 5 | 6 | ## How to Contribute? 7 | 8 | 1. **Fork the Repository**: 9 | - Click on the 'Fork' button at the top right corner of the repo page. 10 | - This will create a copy of this repository in your account. 11 | 12 | 2. **Clone the Forked Repository**: 13 | ```bash 14 | git clone https://github.com/patchy631/machine-learning 15 | ``` 16 | 17 | 3. **Navigate to the Repository**: 18 | ```bash 19 | cd machine-learning 20 | ``` 21 | 22 | 4. **Create a New Branch**: 23 | ```bash 24 | git checkout -b YOUR_BRANCH_NAME 25 | ``` 26 | 27 | 5. **Make Necessary Changes**: Implement your feature or bugfix. 28 | 29 | 6. **Commit Your Changes**: 30 | ```bash 31 | git add . 32 | git commit -m "Add some feature or fix a bug" 33 | ``` 34 | 35 | 7. **Push to the Branch**: 36 | ```bash 37 | git push origin YOUR_BRANCH_NAME 38 | ``` 39 | 40 | 8. **Open a Pull Request**: 41 | - Go to the repository in your account. 42 | - Click on the 'Pull Request' button. 43 | - Click on 'New Pull Request'. 44 | 45 | 9. **Ensure Your PR is Up-to-Date with Upstream Master**: 46 | - Ensure your branch is rebased to the latest `master` branch from upstream. 47 | ```bash 48 | git fetch upstream 49 | git rebase upstream/master 50 | ``` 51 | 52 | 10. **Describe Your PR**: 53 | - Give your pull request a meaningful title. 54 | - Provide a description of the changes you are making. Reference the issue(s) your PR resolves. 55 | 56 | ## Need Help? 57 | 58 | If you have any questions or need help with the process, please feel free to contact me on Twitter: [@akshay_pachaar](https://twitter.com/akshay_pachaar). 59 | 60 | Thank you for your contribution! 61 | -------------------------------------------------------------------------------- /random_stuff/audiobook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "9bf0dbd6", 6 | "metadata": {}, 7 | "source": [ 8 | "#### Create your own `Audiobook` 🎧 🚀 " 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "9ec08fba", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pdfplumber as pp\n", 19 | "from gtts import gTTS" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "id": "71b77cfa-30a4-48c3-8433-115925f6d5e2", 25 | "metadata": {}, 26 | "source": [ 27 | "##### 1️⃣ Extract text from pdf" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "id": "605fc3ed-cbb8-48c6-9776-b701eba4cb5a", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "pdf_text = ''\n", 38 | "\n", 39 | "with pp.open('attention_is_all_you_need.pdf') as pdf:\n", 40 | " for page in pdf.pages:\n", 41 | " pdf_text += page.extract_text()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "id": "db3e6277-79b5-4b13-bcc4-3cb43b621fd1", 47 | "metadata": {}, 48 | "source": [ 49 | "##### 2️⃣ Convert extracted text to speech" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "id": "cdda44ac-8a0f-473e-abe2-29d4409dc2de", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "tts = gTTS(text=pdf_text, lang='en')\n", 60 | "tts.save('audio_book.mp3')" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "df9e8880-a53c-4f00-8184-4106db3bf567", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "env_twitter", 75 | "language": "python", 76 | "name": "env_twitter" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.10.6" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 5 93 | } 94 | -------------------------------------------------------------------------------- /random/embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## `Similarity Search` using `NumPy!` 🚀 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Index of the nearest tweet: 0\n", 20 | "Embedding of the nearest tweet: [1 2 3]\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "import numpy as np\n", 26 | "\n", 27 | "\n", 28 | "# Query tweet\n", 29 | "query_tweet = np.array([1, 2, 3])\n", 30 | "\n", 31 | "# A database of all the tweets stored in form of embeddings\n", 32 | "vector_database = np.array([[1, 2, 3],\n", 33 | " [4, 5, 6],\n", 34 | " [7, 8, 9],\n", 35 | " [2, 3, 4],\n", 36 | " [5, 6, 7]])\n", 37 | "\n", 38 | "\n", 39 | "\n", 40 | "# Normalize the matrix M and the target vector v\n", 41 | "vector_database_norm = vector_database / np.linalg.norm(vector_database, axis=1, keepdims=True)\n", 42 | "query_tweet_norm = query_tweet / np.linalg.norm(query_tweet)\n", 43 | "\n", 44 | "# Compute the dot product of the normalized matrix M and the normalized target vector v\n", 45 | "dot_product = np.dot(vector_database_norm, query_tweet_norm)\n", 46 | "\n", 47 | "# Find the index of the nearest to query tweet in the vector data base\n", 48 | "nearest_vector_index = np.argmax(dot_product)\n", 49 | "\n", 50 | "print(\"Index of the nearest tweet:\", nearest_vector_index)\n", 51 | "print(\"Embedding of the nearest tweet:\", vector_database[nearest_vector_index])" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [] 60 | } 61 | ], 62 | "metadata": { 63 | "kernelspec": { 64 | "display_name": "env_twitter", 65 | "language": "python", 66 | "name": "env_twitter" 67 | }, 68 | "language_info": { 69 | "codemirror_mode": { 70 | "name": "ipython", 71 | "version": 3 72 | }, 73 | "file_extension": ".py", 74 | "mimetype": "text/x-python", 75 | "name": "python", 76 | "nbconvert_exporter": "python", 77 | "pygments_lexer": "ipython3", 78 | "version": "3.10.6" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 4 83 | } 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # 🤖 Machine Learning Tutorials Repository 🤖 3 | 4 | Welcome to the **Machine Learning Tutorials Repository**! This is the go-to spot for all the code associated with my Twitter tutorials. If you're passionate about diving deep into the realms of Machine Learning and exploring various topics, you're in the right place! 5 | 6 | ## Star history 🌟 7 | 8 | [![Star History Chart](https://api.star-history.com/svg?repos=patchy631/machine-learning&type=Date)](https://star-history.com/#patchy631/machine-learning&Date) 9 | 10 | ## 📘 Topics Covered 11 | 12 | 1. 🐍 **Python**: The core language for almost all things Machine Learning. 13 | 2. 🖼️ **Computer Vision**: Techniques, algorithms, and methods to give machines the ability to see and interpret visual data. 14 | 3. 📜 **NLP (Natural Language Processing)**: Delve into the world of words and understand how machines can comprehend, interpret, and respond to human languages. 15 | 4. 📊 **Matplotlib**: Visualize your data and results with one of the most popular plotting libraries. 16 | 5. 🔢 **NumPy**: Master the art of numerical computing with Python. 17 | 6. 🐼 **Pandas**: The ultimate tool for data analysis in Python. 18 | 7. 🚀 **MLOps**: Learn about the best practices, tools, and services to manage end-to-end ML lifecycle. 19 | 8. 🧠 **LLMs (Large Language Models)**: Dive deep into state-of-the-art models that understand and generate human-like text. 20 | 9. 🔥 **PyTorch/TensorFlow**: Get to grips with the two dominant deep learning frameworks. 21 | 22 | ## 🚀 Getting Started 23 | 24 | 1. **Clone the Repository**: 25 | ```bash 26 | git clone https://github.com/patchy631/machine-learning 27 | ``` 28 | 2. **Navigate to the Repository**: 29 | ```bash 30 | cd machine-learning 31 | ``` 32 | 3. **Install Required Libraries**: 33 | ```bash 34 | pip install -r requirements.txt 35 | ``` 36 | 37 | > Note: Make sure you have Python installed on your system. If not, download and install [Python](https://www.python.org/downloads/). 38 | 39 | ## 🤝 Contribution Guidelines 40 | 41 | Contributions are always welcome! Whether it's fixing bugs, improving documentation, or adding new tutorials, your efforts will be appreciated. Please ensure you follow the contribution guidelines outlined in `CONTRIBUTING.md`. 42 | 43 | ## 📱 Connect with Me 44 | 45 | Follow me on Twitter for more tutorials and updates: [@akshay_pachaar](https://twitter.com/akshay_pachaar) 46 | 47 | ## 📜 License 48 | 49 | This project is licensed under the MIT License. See the [LICENSE.md](LICENSE.md) file for details. 50 | -------------------------------------------------------------------------------- /LLMs/APIChain_LangChain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Introducing `ApiChain`! 🚀 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 5, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "from langchain.chains import APIChain\n", 18 | "from langchain.chat_models import ChatOpenAI\n", 19 | "from IPython.display import Markdown" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 6, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# Set environment variables\n", 29 | "os.environ['OPENAI_API_KEY'] = '...'" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "user_message = 'Whats is the weather like in New Delhi today?'" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 11, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/markdown": [ 49 | "The weather in New Delhi today is moderate to heavy rain with thunder, with a temperature of 25 degrees Celsius (77 degrees Fahrenheit) and high humidity." 50 | ], 51 | "text/plain": [ 52 | "" 53 | ] 54 | }, 55 | "execution_count": 11, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "llm = ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\", max_tokens=256, verbose=True)\n", 62 | "\n", 63 | "apiSpec = \"\"\"API documentation:\n", 64 | "Base URL:f\"http://api.weatherapi.com/\n", 65 | "Endpoint: /weather\n", 66 | "Example API call: http://api.weatherapi.com/v1/current.json?\\\n", 67 | "key=**your_api_key**&q=Delhi&aqi=no'\n", 68 | "\n", 69 | "This API is for retrieving weather information based on city name\n", 70 | "\n", 71 | "Request GET \n", 72 | "Query Parameter Name\tFormat\tRequired\tDescription\n", 73 | "q\tString\tYes\tName of city for which we want weather info\n", 74 | "\n", 75 | "INSTRUCTIONS FOR RESPONDING\n", 76 | "Respond in Natural Language\n", 77 | "\"\"\"\n", 78 | "\n", 79 | "chain = APIChain.from_llm_and_api_docs(llm, apiSpec, verbose=False)\n", 80 | "response = chain.run(user_message)\n", 81 | "Markdown(response)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [] 90 | } 91 | ], 92 | "metadata": { 93 | "kernelspec": { 94 | "display_name": "env_twitter", 95 | "language": "python", 96 | "name": "env_twitter" 97 | }, 98 | "language_info": { 99 | "codemirror_mode": { 100 | "name": "ipython", 101 | "version": 3 102 | }, 103 | "file_extension": ".py", 104 | "mimetype": "text/x-python", 105 | "name": "python", 106 | "nbconvert_exporter": "python", 107 | "pygments_lexer": "ipython3", 108 | "version": "3.10.6" 109 | } 110 | }, 111 | "nbformat": 4, 112 | "nbformat_minor": 4 113 | } 114 | -------------------------------------------------------------------------------- /pandas/bessel_correction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### ❇️ Introducing `Bessel's correction` 🚀 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "data = [1, 2, 3, 4, 5]" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "1.5811388300841898" 38 | ] 39 | }, 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "# Calculate the standard deviation using Pandas\n", 47 | "df = pd.DataFrame(data)\n", 48 | "float(df.std())" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": [ 59 | "1.4142135623730951" 60 | ] 61 | }, 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "# Calculate the standard deviation using NumPy\n", 69 | "arr = np.array(data)\n", 70 | "np.std(arr)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "##### How can you make them return same value ❓\n", 78 | "To make both return the same values, you can specify
the `ddof` parameter in both Pandas and NumPy
to either `1` (biased) or `0` (unbiased)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "1.4142135623730951" 90 | ] 91 | }, 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "float(df.std(ddof=0))" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 6, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "1.4142135623730951" 110 | ] 111 | }, 112 | "execution_count": 6, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "np.std(arr, ddof=0)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "env_twitter", 132 | "language": "python", 133 | "name": "env_twitter" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.10.6" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 4 150 | } 151 | -------------------------------------------------------------------------------- /random/latexify.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "##### ❇️ Latex for Python 🐍 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import math\n", 17 | "import latexify" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/latex": [ 28 | "$$ \\displaystyle \\mathrm{solve}(a, b, c)\\triangleq \\frac{-b + \\sqrt{b^{2} - 4ac}}{2a} $$" 29 | ], 30 | "text/plain": [ 31 | "._LatexifiedFunction at 0x7fe8c0569cd0>" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "@latexify.with_latex\n", 41 | "def solve(a, b, c):\n", 42 | " return (-b + math.sqrt(b**2 - 4*a*c)) / (2*a)\n", 43 | "solve" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/latex": [ 54 | "$$ \\displaystyle \\mathrm{fib}(x)\\triangleq \\left\\{ \\begin{array}{ll} 1, & \\mathrm{if} \\ x=0 \\\\ 1, & \\mathrm{if} \\ x=1 \\\\ \\mathrm{fib}\\left(x - 1\\right) + \\mathrm{fib}\\left(x - 2\\right), & \\mathrm{otherwise} \\end{array} \\right. $$" 55 | ], 56 | "text/plain": [ 57 | "._LatexifiedFunction at 0x7fe8c0685340>" 58 | ] 59 | }, 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "# Automatically unrolls Elif or nested else-if ⬇️ 💥 \n", 67 | "\n", 68 | "@latexify.with_latex\n", 69 | "def fib(x):\n", 70 | " if x == 0:\n", 71 | " return 1\n", 72 | " elif x == 1:\n", 73 | " return 1\n", 74 | " else:\n", 75 | " return fib(x-1) + fib(x-2)\n", 76 | "fib" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 4, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/latex": [ 87 | "$$ \\displaystyle \\mathrm{greek}({\\alpha}, {\\beta}, {\\gamma}, {\\Omega})\\triangleq {\\alpha}{\\beta} + \\Gamma\\left({{\\gamma}}\\right) + {\\Omega} $$" 88 | ], 89 | "text/plain": [ 90 | "._LatexifiedFunction at 0x7fe8c0569f10>" 91 | ] 92 | }, 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "# Some math symbols are converted automatically.\n", 100 | "@latexify.with_latex\n", 101 | "def greek(alpha, beta, gamma, Omega):\n", 102 | " return alpha * beta + math.gamma(gamma) + Omega\n", 103 | "greek" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 111 | "##### ❇️ follow → @akshay_pachaar " 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [] 120 | } 121 | ], 122 | "metadata": { 123 | "kernelspec": { 124 | "display_name": "env_mld", 125 | "language": "python", 126 | "name": "env_mld" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.8.5" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 4 143 | } 144 | -------------------------------------------------------------------------------- /neat-tricks/mem_cache.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### ❇️ `Memory Cache`: Lazy evaluation of functions" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import time\n", 17 | "from joblib import Memory" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "cachedir = 'cache'\n", 27 | "mem = Memory(cachedir, verbose=0)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "@mem.cache\n", 37 | "def f(x):\n", 38 | " print('I am slow like a sloth... 🦥 ')\n", 39 | " time.sleep(10)\n", 40 | " return x" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "I am slow like a sloth... 🦥 \n", 53 | "CPU times: user 109 ms, sys: 31.8 ms, total: 141 ms\n", 54 | "Wall time: 10 s\n" 55 | ] 56 | }, 57 | { 58 | "data": { 59 | "text/plain": [ 60 | "1" 61 | ] 62 | }, 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": [ 69 | "%%time\n", 70 | "f(1)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "CPU times: user 1.1 ms, sys: 1.01 ms, total: 2.11 ms\n", 83 | "Wall time: 1.3 ms\n" 84 | ] 85 | }, 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "1" 90 | ] 91 | }, 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "%%time\n", 99 | "f(1)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "Observe that nothing is printed & function executes quickly
when you call f() with same argument again❗️ " 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Let's call f() with a different argument ⬇️ " 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "I am slow like a sloth... 🦥 \n", 126 | "CPU times: user 3.71 ms, sys: 3 ms, total: 6.71 ms\n", 127 | "Wall time: 10 s\n" 128 | ] 129 | }, 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "2" 134 | ] 135 | }, 136 | "execution_count": 6, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "%%time\n", 143 | "f(2)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [] 152 | } 153 | ], 154 | "metadata": { 155 | "kernelspec": { 156 | "display_name": "env_twitter", 157 | "language": "python", 158 | "name": "env_twitter" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.10.5" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 4 175 | } 176 | -------------------------------------------------------------------------------- /pandas/iterrows_vs_itertuples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### ❇️ Iterrows 🤜 🤛 Itertuples " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Reading from a csv\n", 26 | "df_penguins = pd.read_csv('penguins.csv')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "0\n", 39 | "_______________________________\n", 40 | "species Adelie\n", 41 | "island Torgersen\n", 42 | "bill_length_mm 39.1\n", 43 | "bill_depth_mm 18.7\n", 44 | "flipper_length_mm 181.0\n", 45 | "body_mass_g 3750.0\n", 46 | "sex Male\n", 47 | "Name: 0, dtype: object\n", 48 | "__________time taken___________\n", 49 | "CPU times: user 12.6 ms, sys: 1.29 ms, total: 13.9 ms\n", 50 | "Wall time: 16 ms\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "%%time\n", 56 | "# 🔴 iterrows: Iterate over DataFrame rows as (index, Series) pairs.\n", 57 | "index_series_pairs = [tup for tup in df_penguins.iterrows()]\n", 58 | "idx, series = index_series_pairs[0]\n", 59 | "print(idx)\n", 60 | "print('_______________________________')\n", 61 | "print(series)\n", 62 | "print('__________time taken___________')" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Pandas(Index=0, species='Adelie', island='Torgersen', bill_length_mm=39.1, bill_depth_mm=18.7, flipper_length_mm=181.0, body_mass_g=3750.0, sex='Male')\n", 75 | "__________time taken___________\n", 76 | "CPU times: user 1.36 ms, sys: 341 µs, total: 1.7 ms\n", 77 | "Wall time: 1.48 ms\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "%%time\n", 83 | "# 🟡 itertuples: Iterate over DataFrame rows as namedtuples.\n", 84 | "# Wondering what a namedtuple is❓ I have added a tweet👇 in the thread 🧵 , don't worry 🙌\n", 85 | "named_tuples = [tup for tup in df_penguins.itertuples()]\n", 86 | "print(named_tuples[0])\n", 87 | "print('__________time taken___________')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "#### ❇️ Concluding remarks 👇 \n", 95 | "##### Although, it's an anti pattern to iterate over dataframe rows in this manner\n", 96 | "##### (We will see better techniques in future post, stay tuned!)\n", 97 | "##### 👉 But, given the choice itertuples is way fater 🏎💨 than iterrows because iterrows has to typecast each\n", 98 | "##### row into a pandas Series, which is an overhead.\n", 99 | "#### ❇️ Cheers!! 🍻 " 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [] 108 | } 109 | ], 110 | "metadata": { 111 | "kernelspec": { 112 | "display_name": "env_twitter", 113 | "language": "python", 114 | "name": "env_twitter" 115 | }, 116 | "language_info": { 117 | "codemirror_mode": { 118 | "name": "ipython", 119 | "version": 3 120 | }, 121 | "file_extension": ".py", 122 | "mimetype": "text/x-python", 123 | "name": "python", 124 | "nbconvert_exporter": "python", 125 | "pygments_lexer": "ipython3", 126 | "version": "3.10.4" 127 | } 128 | }, 129 | "nbformat": 4, 130 | "nbformat_minor": 5 131 | } 132 | -------------------------------------------------------------------------------- /numpy/argsort_vs_argpartition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### ❇️ argsort 🤜 🤛 argpartition" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Let's say we want the smallest 3 numbers in array 👇 \n", 26 | "# in any order\n", 27 | "arr = np.array([4, 5, 6, 7, 1, 2, 3])" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "#### ❇️ Method 1: argsort" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "indices: [4 5 6 0 1 2 3]\n", 47 | "smallest 3 elements: [1 2 3]\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "# argsort, Returns the indices that would sort an array.\n", 53 | "idx = np.argsort(arr)\n", 54 | "print(f'indices: {idx}')\n", 55 | "print(f'smallest 3 elements: {arr[idx[:3]]}')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "#### ❇️ Method 2: argpartition" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "indices: [4 6 5 0 1 2 3]\n", 75 | "smallest 3 elements: [1 3 2]; ⬅️ notice the order\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "# It takes an array and the element index (kth) to partiotion by\n", 81 | "# The k-th element will be in its final sorted position and all smaller \n", 82 | "# elements will be moved before it and all larger elements behind it.\n", 83 | "# order of all the elements in each partition is undefined.\n", 84 | "idx = np.argpartition(a = arr, kth = 3)\n", 85 | "print(f'indices: {idx}')\n", 86 | "print(f'smallest 3 elements: {arr[idx[:3]]}; ⬅️ notice the order')" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "#### ❇️ Why use argpartition ⁉️\n", 94 | "##### ❇️ If the order in is not necessary then argpartition is faster 🏎💨 " 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 5, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "1.53 ms ± 24.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "%%timeit\n", 112 | "xs = np.random.normal(size=56000)\n", 113 | "np.argpartition(xs, 10)[:10]\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "4.76 ms ± 37.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "%%timeit\n", 131 | "xs = np.random.normal(size=56000)\n", 132 | "np.argsort(xs)[:10]" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "#### ❇️ Cheers!! 🍺 " 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "env_twitter", 153 | "language": "python", 154 | "name": "env_twitter" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.10.5" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 4 171 | } 172 | -------------------------------------------------------------------------------- /Python/decorators.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### `Decorators` clearly explained! 🚀 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Hello, Python! You are awesome!\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "# Functions are first class objects!\n", 25 | "\n", 26 | "def greet(name):\n", 27 | " return f'Hello, {name}!'\n", 28 | "\n", 29 | "def cheer(fun, name):\n", 30 | " return fun(name) + ' You are awesome!'\n", 31 | " \n", 32 | "print(cheer(greet, 'Python'))" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 5, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Before function call\n", 45 | "Hello, World!\n", 46 | "After function call\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "def decorate(fun):\n", 52 | " def wrapper():\n", 53 | " print(\"Before function call\")\n", 54 | " fun()\n", 55 | " print(\"After function call\")\n", 56 | " return wrapper\n", 57 | "\n", 58 | "def greet():\n", 59 | " print(\"Hello, World!\")\n", 60 | "\n", 61 | "greet = decorate(greet)\n", 62 | "greet()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 11, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Before function call\n", 75 | "Hello, Akshay!\n", 76 | "After function call\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "def decorate(fun):\n", 82 | " def wrapper(arg):\n", 83 | " print(\"Before function call\")\n", 84 | " fun(arg)\n", 85 | " print(\"After function call\")\n", 86 | " return wrapper\n", 87 | "\n", 88 | "@decorate\n", 89 | "def greet(name):\n", 90 | " print(f\"Hello, {name}!\")\n", 91 | "\n", 92 | "greet('Akshay')" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 9, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stderr", 102 | "output_type": "stream", 103 | "text": [ 104 | "INFO:root:Executed fibonacci in 9.5367431640625e-07 seconds\n", 105 | "INFO:root:Executed fibonacci in 7.152557373046875e-07 seconds\n", 106 | "INFO:root:Executed fibonacci in 0.0016901493072509766 seconds\n" 107 | ] 108 | }, 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "1\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "import time\n", 119 | "import logging\n", 120 | "\n", 121 | "logging.basicConfig(level=logging.INFO)\n", 122 | "\n", 123 | "def timer_decorator(func):\n", 124 | " def wrapper(*args, **kwargs):\n", 125 | " start_time = time.time()\n", 126 | " result = func(*args, **kwargs)\n", 127 | " end_time = time.time()\n", 128 | " execution_time = end_time - start_time\n", 129 | " logging.info(f\"Executed {func.__name__} in {execution_time} seconds\")\n", 130 | " return result\n", 131 | " return wrapper\n", 132 | "\n", 133 | "@timer_decorator\n", 134 | "def fibonacci(n):\n", 135 | " if n <= 1:\n", 136 | " return n\n", 137 | " else:\n", 138 | " return (fibonacci(n-1) + fibonacci(n-2))\n", 139 | "\n", 140 | "print(fibonacci(2))" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [] 149 | } 150 | ], 151 | "metadata": { 152 | "kernelspec": { 153 | "display_name": "env_twitter", 154 | "language": "python", 155 | "name": "env_twitter" 156 | }, 157 | "language_info": { 158 | "codemirror_mode": { 159 | "name": "ipython", 160 | "version": 3 161 | }, 162 | "file_extension": ".py", 163 | "mimetype": "text/x-python", 164 | "name": "python", 165 | "nbconvert_exporter": "python", 166 | "pygments_lexer": "ipython3", 167 | "version": "3.10.6" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 4 172 | } 173 | -------------------------------------------------------------------------------- /random/matmul_operator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "##### ❇️ Python matmul operator @" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import tensorflow as tf" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "arr1 = np.random.rand(3, 3)\n", 27 | "arr2 = np.random.rand(3, 3)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "##### ❇️ Matrix multiplication before Python 3.5" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/plain": [ 45 | "array([[0.57104228, 0.6249187 , 0.75554827],\n", 46 | " [0.84227088, 0.52005673, 0.96547005],\n", 47 | " [1.51418666, 1.04428351, 1.77612534]])" 48 | ] 49 | }, 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "output_type": "execute_result" 53 | } 54 | ], 55 | "source": [ 56 | "np.matmul(arr1, arr2)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "" 71 | ] 72 | }, 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "tf.linalg.matmul(arr1, arr2)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "##### ❇️ Matrix multiplication after Python 3.5" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "array([[0.57104228, 0.6249187 , 0.75554827],\n", 98 | " [0.84227088, 0.52005673, 0.96547005],\n", 99 | " [1.51418666, 1.04428351, 1.77612534]])" 100 | ] 101 | }, 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "arr1@arr2" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "##### ❇️ Matmul operator can be defined for any class using __ __matmul__ __ dunder." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 6, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "class CustomInt:\n", 125 | " def __init__(self, val):\n", 126 | " self.val = val\n", 127 | " def __matmul__(self, input_int):\n", 128 | " return self.val**input_int.val" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 7, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "8" 140 | ] 141 | }, 142 | "execution_count": 7, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "int_1, int_2 = CustomInt(2), CustomInt(3)\n", 149 | "int_1@int_2" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 157 | "##### ❇️ follow → @akshay_pachaar " 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [] 166 | } 167 | ], 168 | "metadata": { 169 | "kernelspec": { 170 | "display_name": "env_3D", 171 | "language": "python", 172 | "name": "env_3d" 173 | }, 174 | "language_info": { 175 | "codemirror_mode": { 176 | "name": "ipython", 177 | "version": 3 178 | }, 179 | "file_extension": ".py", 180 | "mimetype": "text/x-python", 181 | "name": "python", 182 | "nbconvert_exporter": "python", 183 | "pygments_lexer": "ipython3", 184 | "version": "3.6.13" 185 | } 186 | }, 187 | "nbformat": 4, 188 | "nbformat_minor": 4 189 | } 190 | -------------------------------------------------------------------------------- /random/shapely_polygon_intersection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "##### ❇️ Finding intersection between polygons and it's area" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from shapely.geometry import Polygon" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "Note: the polygon can be of any arbitrary shape
\n", 24 | "Here we have consider simpler cases for demo" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "Area1: 1.0\n" 37 | ] 38 | }, 39 | { 40 | "data": { 41 | "image/svg+xml": [ 42 | "" 43 | ], 44 | "text/plain": [ 45 | "" 46 | ] 47 | }, 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": [ 54 | "polygon1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])\n", 55 | "print(f'Area1: {polygon1.area}')\n", 56 | "polygon1" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "Area2: 0.5\n" 69 | ] 70 | }, 71 | { 72 | "data": { 73 | "image/svg+xml": [ 74 | "" 75 | ], 76 | "text/plain": [ 77 | "" 78 | ] 79 | }, 80 | "execution_count": 3, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "polygon2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 0)])\n", 87 | "print(f'Area2: {polygon2.area}')\n", 88 | "polygon2" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Area of intersection: 0.5\n" 101 | ] 102 | }, 103 | { 104 | "data": { 105 | "image/svg+xml": [ 106 | "" 107 | ], 108 | "text/plain": [ 109 | "" 110 | ] 111 | }, 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "intersection = polygon1.intersection(polygon2)\n", 119 | "print(f'Area of intersection: {intersection.area}')\n", 120 | "intersection" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 128 | "##### ❇️ follow → @akshay_pachaar " 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [] 137 | } 138 | ], 139 | "metadata": { 140 | "kernelspec": { 141 | "display_name": "env_poi", 142 | "language": "python", 143 | "name": "env_poi" 144 | }, 145 | "language_info": { 146 | "codemirror_mode": { 147 | "name": "ipython", 148 | "version": 3 149 | }, 150 | "file_extension": ".py", 151 | "mimetype": "text/x-python", 152 | "name": "python", 153 | "nbconvert_exporter": "python", 154 | "pygments_lexer": "ipython3", 155 | "version": "3.8.13" 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 4 160 | } 161 | -------------------------------------------------------------------------------- /PyTorch/torch_autograd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### ❇️ Automatic differentiation in PyTorch" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Back propagation is widely used for training neural nets. During back propagation we adjust model
parameters (weights & biases) based on the gradient of the loss function w.r.t. the given parameter.
∂loss/∂w (gradient w.r.t weight w); ∂loss/∂b (gradient w.r.t. bias b)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "##### 🔴 Let's consider a simplest neural net with 3 inputs and 2 outputs 👇 " 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "![](./resources/basic_neural_net.png)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "##### 🟡 Let's code ⬆️ this using pytorch ⬇️ " 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "import torch\n", 45 | "\n", 46 | "x = torch.ones(3) # input tensor\n", 47 | "y = torch.zeros(2) # expected output\n", 48 | "# Notice the use of requires_grad = True ⬇️ \n", 49 | "w = torch.randn(3, 2, requires_grad=True) # weights \n", 50 | "b = torch.randn(2, requires_grad=True) # biases \n", 51 | "z = torch.matmul(x, w)+b # output\n", 52 | "loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Above ⬆️ code represents the following ⬇️ computationl graph
\n", 60 | "In this graph, w and b are parameters, which we need to optimize.
\n", 61 | "Thus, we need to be able to compute the gradients of loss function with respect to those variables.
\n", 62 | "In order to do that, we set the requires_grad property of those tensors.
" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "![](./resources/computational_graph.png)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "##### 🟢 grad_fn:\n", 77 | "grad_fn is an object of class Function that is applied to tensors to construct computational graph ⬆️ .
This object knows how to compute the function in the forward direction, and also how to compute its
derivative during the backward propagation step.
grad_fn becomes property of a tensor. Check this out 👇 " 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 6, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "grad_function for z = \n", 90 | "grad_function for loss = \n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "print(f\"grad_function for z = {z.grad_fn}\")\n", 96 | "print(f\"grad_function for loss = {loss.grad_fn}\")" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "##### 🔵 Computing Gradients\n", 104 | "To optimize weights (w) of our neural network, we need to compute we need ∂loss/∂w (gradient of loss w.r.t weight w);
∂loss/∂b (gradient of loss w.r.t. bias b) under some fixed values of x and y.
\n", 105 | "This is how we do it 👇 " 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 3, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "tensor([[0.2980, 0.4528],\n", 118 | " [0.2980, 0.4528],\n", 119 | " [0.2980, 0.4528]])\n", 120 | "tensor([0.2980, 0.4528])\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "# loss.backward() Computes the gradient of current tensor w.r.t. graph leaves.\n", 126 | "# In the graph we see that the leaves are w and b (ones for which required_grad = True)\n", 127 | "loss.backward()\n", 128 | "print(w.grad)\n", 129 | "print(b.grad)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [] 138 | } 139 | ], 140 | "metadata": { 141 | "kernelspec": { 142 | "display_name": "env_torch", 143 | "language": "python", 144 | "name": "env_torch" 145 | }, 146 | "language_info": { 147 | "codemirror_mode": { 148 | "name": "ipython", 149 | "version": 3 150 | }, 151 | "file_extension": ".py", 152 | "mimetype": "text/x-python", 153 | "name": "python", 154 | "nbconvert_exporter": "python", 155 | "pygments_lexer": "ipython3", 156 | "version": "3.6.13" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 4 161 | } 162 | -------------------------------------------------------------------------------- /NLP/question_answering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "924e7dcf-fa8c-491c-99d5-a95b49e83094", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import warnings\n", 11 | "warnings.filterwarnings('ignore')" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "id": "6854adc2-d7b1-4524-960d-f4f33140277e", 17 | "metadata": {}, 18 | "source": [ 19 | "### 🔘 Question Answering using `HuggingFace` 🤗 " 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "bec8941f-537e-4cbf-9750-bb283b21c5af", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "from transformers import pipeline" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "id": "d3e0c1d1-91a5-4e12-a309-262f1fb1377a", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# Provider text over which we will generate questions\n", 40 | "\n", 41 | "text = \"Leo Messi, is an Argentine professional footballer who \\\n", 42 | " plays as a forward for Paris Saint-Germain and captains \\\n", 43 | " the Argentina national team. Widely regarded as one of \\\n", 44 | " the greatest players of all time, Messi has won a record \\\n", 45 | " seven Ballon d'Or awards\"" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "id": "f2f1a75f-8863-4414-90ae-2f5eeac25527", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# Setup the pipeline for question answering\n", 56 | "reader = pipeline(task = \"question-answering\", \n", 57 | " model = \"distilbert-base-cased-distilled-squad\")" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 5, 63 | "id": "b2a69df0-154c-48fa-88aa-2ef34757583e", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# Question that we want to ask\n", 68 | "question = \"Who is the GOAT of football?\"" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 6, 74 | "id": "5aa2c4d8-9ac3-4d0a-8bb9-907f70809b61", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "outputs = reader(question=question, context=text)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 7, 84 | "id": "3f45f111-0d66-4944-9b9b-4d029e00feeb", 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/html": [ 90 | "
\n", 91 | "\n", 104 | "\n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | "
scorestartendanswer
00.91952409Leo Messi
\n", 124 | "
" 125 | ], 126 | "text/plain": [ 127 | " score start end answer\n", 128 | "0 0.919524 0 9 Leo Messi" 129 | ] 130 | }, 131 | "execution_count": 7, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "# Put the results in a DataFrame\n", 138 | "\n", 139 | "import pandas as pd\n", 140 | "pd.DataFrame([outputs])" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "id": "0ce27d9c-65b7-4359-90ab-8db8a897fc6b", 146 | "metadata": {}, 147 | "source": [ 148 | "And we know who is the GOAT!! 😀" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "id": "8747eac2-842d-4b97-b83e-f10b2134db87", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [] 158 | } 159 | ], 160 | "metadata": { 161 | "kernelspec": { 162 | "display_name": "env_twitter", 163 | "language": "python", 164 | "name": "env_twitter" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 3 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython3", 176 | "version": "3.10.5" 177 | } 178 | }, 179 | "nbformat": 4, 180 | "nbformat_minor": 5 181 | } 182 | -------------------------------------------------------------------------------- /pandas/iris.csv: -------------------------------------------------------------------------------- 1 | SepalLength,SepalWidth,PetalLength,PetalWidth,Name 2 | 5.1,3.5,1.4,0.2,Iris-setosa 3 | 4.9,3.0,1.4,0.2,Iris-setosa 4 | 4.7,3.2,1.3,0.2,Iris-setosa 5 | 4.6,3.1,1.5,0.2,Iris-setosa 6 | 5.0,3.6,1.4,0.2,Iris-setosa 7 | 5.4,3.9,1.7,0.4,Iris-setosa 8 | 4.6,3.4,1.4,0.3,Iris-setosa 9 | 5.0,3.4,1.5,0.2,Iris-setosa 10 | 4.4,2.9,1.4,0.2,Iris-setosa 11 | 4.9,3.1,1.5,0.1,Iris-setosa 12 | 5.4,3.7,1.5,0.2,Iris-setosa 13 | 4.8,3.4,1.6,0.2,Iris-setosa 14 | 4.8,3.0,1.4,0.1,Iris-setosa 15 | 4.3,3.0,1.1,0.1,Iris-setosa 16 | 5.8,4.0,1.2,0.2,Iris-setosa 17 | 5.7,4.4,1.5,0.4,Iris-setosa 18 | 5.4,3.9,1.3,0.4,Iris-setosa 19 | 5.1,3.5,1.4,0.3,Iris-setosa 20 | 5.7,3.8,1.7,0.3,Iris-setosa 21 | 5.1,3.8,1.5,0.3,Iris-setosa 22 | 5.4,3.4,1.7,0.2,Iris-setosa 23 | 5.1,3.7,1.5,0.4,Iris-setosa 24 | 4.6,3.6,1.0,0.2,Iris-setosa 25 | 5.1,3.3,1.7,0.5,Iris-setosa 26 | 4.8,3.4,1.9,0.2,Iris-setosa 27 | 5.0,3.0,1.6,0.2,Iris-setosa 28 | 5.0,3.4,1.6,0.4,Iris-setosa 29 | 5.2,3.5,1.5,0.2,Iris-setosa 30 | 5.2,3.4,1.4,0.2,Iris-setosa 31 | 4.7,3.2,1.6,0.2,Iris-setosa 32 | 4.8,3.1,1.6,0.2,Iris-setosa 33 | 5.4,3.4,1.5,0.4,Iris-setosa 34 | 5.2,4.1,1.5,0.1,Iris-setosa 35 | 5.5,4.2,1.4,0.2,Iris-setosa 36 | 4.9,3.1,1.5,0.1,Iris-setosa 37 | 5.0,3.2,1.2,0.2,Iris-setosa 38 | 5.5,3.5,1.3,0.2,Iris-setosa 39 | 4.9,3.1,1.5,0.1,Iris-setosa 40 | 4.4,3.0,1.3,0.2,Iris-setosa 41 | 5.1,3.4,1.5,0.2,Iris-setosa 42 | 5.0,3.5,1.3,0.3,Iris-setosa 43 | 4.5,2.3,1.3,0.3,Iris-setosa 44 | 4.4,3.2,1.3,0.2,Iris-setosa 45 | 5.0,3.5,1.6,0.6,Iris-setosa 46 | 5.1,3.8,1.9,0.4,Iris-setosa 47 | 4.8,3.0,1.4,0.3,Iris-setosa 48 | 5.1,3.8,1.6,0.2,Iris-setosa 49 | 4.6,3.2,1.4,0.2,Iris-setosa 50 | 5.3,3.7,1.5,0.2,Iris-setosa 51 | 5.0,3.3,1.4,0.2,Iris-setosa 52 | 7.0,3.2,4.7,1.4,Iris-versicolor 53 | 6.4,3.2,4.5,1.5,Iris-versicolor 54 | 6.9,3.1,4.9,1.5,Iris-versicolor 55 | 5.5,2.3,4.0,1.3,Iris-versicolor 56 | 6.5,2.8,4.6,1.5,Iris-versicolor 57 | 5.7,2.8,4.5,1.3,Iris-versicolor 58 | 6.3,3.3,4.7,1.6,Iris-versicolor 59 | 4.9,2.4,3.3,1.0,Iris-versicolor 60 | 6.6,2.9,4.6,1.3,Iris-versicolor 61 | 5.2,2.7,3.9,1.4,Iris-versicolor 62 | 5.0,2.0,3.5,1.0,Iris-versicolor 63 | 5.9,3.0,4.2,1.5,Iris-versicolor 64 | 6.0,2.2,4.0,1.0,Iris-versicolor 65 | 6.1,2.9,4.7,1.4,Iris-versicolor 66 | 5.6,2.9,3.6,1.3,Iris-versicolor 67 | 6.7,3.1,4.4,1.4,Iris-versicolor 68 | 5.6,3.0,4.5,1.5,Iris-versicolor 69 | 5.8,2.7,4.1,1.0,Iris-versicolor 70 | 6.2,2.2,4.5,1.5,Iris-versicolor 71 | 5.6,2.5,3.9,1.1,Iris-versicolor 72 | 5.9,3.2,4.8,1.8,Iris-versicolor 73 | 6.1,2.8,4.0,1.3,Iris-versicolor 74 | 6.3,2.5,4.9,1.5,Iris-versicolor 75 | 6.1,2.8,4.7,1.2,Iris-versicolor 76 | 6.4,2.9,4.3,1.3,Iris-versicolor 77 | 6.6,3.0,4.4,1.4,Iris-versicolor 78 | 6.8,2.8,4.8,1.4,Iris-versicolor 79 | 6.7,3.0,5.0,1.7,Iris-versicolor 80 | 6.0,2.9,4.5,1.5,Iris-versicolor 81 | 5.7,2.6,3.5,1.0,Iris-versicolor 82 | 5.5,2.4,3.8,1.1,Iris-versicolor 83 | 5.5,2.4,3.7,1.0,Iris-versicolor 84 | 5.8,2.7,3.9,1.2,Iris-versicolor 85 | 6.0,2.7,5.1,1.6,Iris-versicolor 86 | 5.4,3.0,4.5,1.5,Iris-versicolor 87 | 6.0,3.4,4.5,1.6,Iris-versicolor 88 | 6.7,3.1,4.7,1.5,Iris-versicolor 89 | 6.3,2.3,4.4,1.3,Iris-versicolor 90 | 5.6,3.0,4.1,1.3,Iris-versicolor 91 | 5.5,2.5,4.0,1.3,Iris-versicolor 92 | 5.5,2.6,4.4,1.2,Iris-versicolor 93 | 6.1,3.0,4.6,1.4,Iris-versicolor 94 | 5.8,2.6,4.0,1.2,Iris-versicolor 95 | 5.0,2.3,3.3,1.0,Iris-versicolor 96 | 5.6,2.7,4.2,1.3,Iris-versicolor 97 | 5.7,3.0,4.2,1.2,Iris-versicolor 98 | 5.7,2.9,4.2,1.3,Iris-versicolor 99 | 6.2,2.9,4.3,1.3,Iris-versicolor 100 | 5.1,2.5,3.0,1.1,Iris-versicolor 101 | 5.7,2.8,4.1,1.3,Iris-versicolor 102 | 6.3,3.3,6.0,2.5,Iris-virginica 103 | 5.8,2.7,5.1,1.9,Iris-virginica 104 | 7.1,3.0,5.9,2.1,Iris-virginica 105 | 6.3,2.9,5.6,1.8,Iris-virginica 106 | 6.5,3.0,5.8,2.2,Iris-virginica 107 | 7.6,3.0,6.6,2.1,Iris-virginica 108 | 4.9,2.5,4.5,1.7,Iris-virginica 109 | 7.3,2.9,6.3,1.8,Iris-virginica 110 | 6.7,2.5,5.8,1.8,Iris-virginica 111 | 7.2,3.6,6.1,2.5,Iris-virginica 112 | 6.5,3.2,5.1,2.0,Iris-virginica 113 | 6.4,2.7,5.3,1.9,Iris-virginica 114 | 6.8,3.0,5.5,2.1,Iris-virginica 115 | 5.7,2.5,5.0,2.0,Iris-virginica 116 | 5.8,2.8,5.1,2.4,Iris-virginica 117 | 6.4,3.2,5.3,2.3,Iris-virginica 118 | 6.5,3.0,5.5,1.8,Iris-virginica 119 | 7.7,3.8,6.7,2.2,Iris-virginica 120 | 7.7,2.6,6.9,2.3,Iris-virginica 121 | 6.0,2.2,5.0,1.5,Iris-virginica 122 | 6.9,3.2,5.7,2.3,Iris-virginica 123 | 5.6,2.8,4.9,2.0,Iris-virginica 124 | 7.7,2.8,6.7,2.0,Iris-virginica 125 | 6.3,2.7,4.9,1.8,Iris-virginica 126 | 6.7,3.3,5.7,2.1,Iris-virginica 127 | 7.2,3.2,6.0,1.8,Iris-virginica 128 | 6.2,2.8,4.8,1.8,Iris-virginica 129 | 6.1,3.0,4.9,1.8,Iris-virginica 130 | 6.4,2.8,5.6,2.1,Iris-virginica 131 | 7.2,3.0,5.8,1.6,Iris-virginica 132 | 7.4,2.8,6.1,1.9,Iris-virginica 133 | 7.9,3.8,6.4,2.0,Iris-virginica 134 | 6.4,2.8,5.6,2.2,Iris-virginica 135 | 6.3,2.8,5.1,1.5,Iris-virginica 136 | 6.1,2.6,5.6,1.4,Iris-virginica 137 | 7.7,3.0,6.1,2.3,Iris-virginica 138 | 6.3,3.4,5.6,2.4,Iris-virginica 139 | 6.4,3.1,5.5,1.8,Iris-virginica 140 | 6.0,3.0,4.8,1.8,Iris-virginica 141 | 6.9,3.1,5.4,2.1,Iris-virginica 142 | 6.7,3.1,5.6,2.4,Iris-virginica 143 | 6.9,3.1,5.1,2.3,Iris-virginica 144 | 5.8,2.7,5.1,1.9,Iris-virginica 145 | 6.8,3.2,5.9,2.3,Iris-virginica 146 | 6.7,3.3,5.7,2.5,Iris-virginica 147 | 6.7,3.0,5.2,2.3,Iris-virginica 148 | 6.3,2.5,5.0,1.9,Iris-virginica 149 | 6.5,3.0,5.2,2.0,Iris-virginica 150 | 6.2,3.4,5.4,2.3,Iris-virginica 151 | 5.9,3.0,5.1,1.8,Iris-virginica 152 | -------------------------------------------------------------------------------- /ml_from_scratch/KNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### `KNN` from scratch! 🚀" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "\n", 18 | "class KNN:\n", 19 | " def __init__(self, k=3, task='classification'):\n", 20 | " self.k = k\n", 21 | " self.task = task\n", 22 | "\n", 23 | " def _euclidean_distance(self, a, b):\n", 24 | " # Calculate the Euclidean distance between two points\n", 25 | " return np.sqrt(np.sum((a - b)**2, axis=1))\n", 26 | "\n", 27 | " def fit(self, X, y):\n", 28 | " # Store training data and labels\n", 29 | " self.X_train = X\n", 30 | " self.y_train = y\n", 31 | "\n", 32 | " def predict(self, X):\n", 33 | " # Predict the class labels or target values for a set of data points\n", 34 | " y_pred = [self._predict_single(x) for x in X]\n", 35 | " return np.array(y_pred)\n", 36 | "\n", 37 | " def _predict_single(self, x):\n", 38 | " # Predict the class label or target value for a single data point\n", 39 | " distances = self._euclidean_distance(x, self.X_train)\n", 40 | " # Find K closest data points\n", 41 | " k_indices = np.argsort(distances)[:self.k]\n", 42 | " \n", 43 | " # Get nearest neighbours\n", 44 | " nn = [self.X_train[i].tolist() for i in k_indices] \n", 45 | " print('Nearest_neighbours: ', nn)\n", 46 | " \n", 47 | " # Get their labels\n", 48 | " k_nearest_labels = [self.y_train[i] for i in k_indices] \n", 49 | " print('Labels for Nearest_neighbours: ', k_nearest_labels)\n", 50 | " \n", 51 | " if self.task == 'classification':\n", 52 | " return self._majority_vote(k_nearest_labels)\n", 53 | " elif self.task == 'regression':\n", 54 | " return self._average(k_nearest_labels)\n", 55 | "\n", 56 | " def _majority_vote(self, labels):\n", 57 | " # Determine the majority class label from a list of labels\n", 58 | " return np.argmax(np.bincount(labels))\n", 59 | "\n", 60 | " def _average(self, values):\n", 61 | " # Calculate the average of a list of values\n", 62 | " return np.mean(values)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "#### Let's test it for regression & Classification 🚀 " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Nearest_neighbours: [[0, 0], [1, 1], [2, 2]]\n", 82 | "Labels for Nearest_neighbours: [0, 0, 1]\n", 83 | "Predicted labels: [0]\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "# Test the KNN implementation\n", 89 | "if __name__ == \"__main__\":\n", 90 | " X_train = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])\n", 91 | "\n", 92 | " # class lavels 👇 \n", 93 | " y_train = np.array([0, 0, 1, 1])\n", 94 | " \n", 95 | " X_test = np.array([[0.5, 0.5]])\n", 96 | "\n", 97 | " knn = KNN(k=3, task='classification')\n", 98 | " knn.fit(X_train, y_train)\n", 99 | " y_pred = knn.predict(X_test)\n", 100 | "\n", 101 | " print(\"Predicted labels:\", y_pred)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 3, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "Nearest_neighbours: [[2, 2], [1, 1], [3, 3]]\n", 114 | "Labels for Nearest_neighbours: [1, 0, 1]\n", 115 | "Predicted labels: [0.66666667]\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "# Test the KNN implementation\n", 121 | "if __name__ == \"__main__\":\n", 122 | " X_train = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])\n", 123 | "\n", 124 | " # class lavels 👇 \n", 125 | " y_train = np.array([0, 0, 1, 1])\n", 126 | " \n", 127 | " X_test = np.array([[2, 2]])\n", 128 | "\n", 129 | " knn = KNN(k=3, task='regression')\n", 130 | " knn.fit(X_train, y_train)\n", 131 | " y_pred = knn.predict(X_test)\n", 132 | "\n", 133 | " print(\"Predicted labels:\", y_pred)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "env_twitter", 147 | "language": "python", 148 | "name": "env_twitter" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.10.6" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 4 165 | } 166 | -------------------------------------------------------------------------------- /PyTorch_Lightning/autoencoders.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2fd74713-1332-4a0b-a55d-3e0d1662b843", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "from torch import optim, nn, utils, Tensor\n", 12 | "from torchvision.datasets import MNIST\n", 13 | "from torchvision.transforms import ToTensor\n", 14 | "import lightning.pytorch as pl\n", 15 | "\n", 16 | "# define any number of nn.Modules (or use your current ones)\n", 17 | "encoder = nn.Sequential(nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 64))\n", 18 | "decoder = nn.Sequential(nn.Linear(64, 128), nn.ReLU(), nn.Linear(128, 28 * 28))\n", 19 | "\n", 20 | "\n", 21 | "# define the LightningModule\n", 22 | "class LitAutoEncoder(pl.LightningModule):\n", 23 | " def __init__(self, encoder, decoder):\n", 24 | " super().__init__()\n", 25 | " self.encoder = encoder\n", 26 | " self.decoder = decoder\n", 27 | "\n", 28 | " def training_step(self, batch, batch_idx):\n", 29 | " # training_step defines the train loop.\n", 30 | " # it is independent of forward\n", 31 | " x, y = batch\n", 32 | " x = x.view(x.size(0), -1)\n", 33 | " z = self.encoder(x)\n", 34 | " x_hat = self.decoder(z)\n", 35 | " loss = nn.functional.mse_loss(x_hat, x)\n", 36 | " # Logging to TensorBoard (if installed) by default\n", 37 | " self.log(\"train_loss\", loss)\n", 38 | " return loss\n", 39 | "\n", 40 | " def configure_optimizers(self):\n", 41 | " optimizer = optim.Adam(self.parameters(), lr=1e-3)\n", 42 | " return optimizer\n", 43 | "\n", 44 | "\n", 45 | "# init the autoencoder\n", 46 | "autoencoder = LitAutoEncoder(encoder, decoder)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "id": "d35dd91a-8295-4404-aa60-de81cf639c0e", 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "# setup data\n", 57 | "dataset = MNIST(os.getcwd(), download=True, transform=ToTensor())\n", 58 | "train_loader = utils.data.DataLoader(dataset)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "96e71aaf-dfa8-43b1-b8a0-bc030693518a", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)\n", 69 | "trainer = pl.Trainer(limit_train_batches=100, max_epochs=50)\n", 70 | "trainer.fit(model=autoencoder, train_dataloaders=train_loader)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "2bbc796d-5254-4e6b-ab55-7148ba2a85f8", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "import torch\n", 81 | "\n", 82 | "# load checkpoint\n", 83 | "checkpoint = \"./lightning_logs/version_7/checkpoints/epoch=49-step=5000.ckpt\"\n", 84 | "autoencoder = LitAutoEncoder.load_from_checkpoint(checkpoint, encoder=encoder, decoder=decoder)\n", 85 | "\n", 86 | "# choose your trained nn.Module\n", 87 | "encoder = autoencoder.encoder\n", 88 | "encoder.eval()\n", 89 | "\n", 90 | "# embed 4 fake images!\n", 91 | "fake_image_batch = torch.rand(4, 28 * 28, device=autoencoder.device)\n", 92 | "embeddings = encoder(fake_image_batch)\n", 93 | "print(\"⚡\" * 20, \"\\nPredictions (4 image embeddings):\\n\", embeddings, \"\\n\", \"⚡\" * 20)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "355f7837-8e5b-4d1a-8ff6-96650d6ea4f8", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# Ensure the autoencoder is on the correct device\n", 104 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 105 | "autoencoder = autoencoder.to(device)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "id": "0eb2d3c7-4f97-450d-867c-568d60482e4f", 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "import matplotlib.pyplot as plt\n", 116 | "from torchvision.utils import make_grid\n", 117 | "\n", 118 | "def show_images(images, title=\"Images\"):\n", 119 | " \"\"\"Utility function to display a batch of images.\"\"\"\n", 120 | " grid_img = make_grid(images, nrow=4)\n", 121 | " plt.figure(figsize=(8, 8))\n", 122 | " plt.imshow(grid_img.permute(1, 2, 0))\n", 123 | " plt.title(title)\n", 124 | " plt.axis('off')\n", 125 | " plt.show()\n", 126 | "\n", 127 | "# Load a batch of images from the dataset\n", 128 | "images, _ = next(iter(train_loader))\n", 129 | "show_images(images, title=\"Original Images\")\n", 130 | "\n", 131 | "# Preprocess the images\n", 132 | "images = images.view(images.size(0), -1)\n", 133 | "\n", 134 | "# Generate image from embeddings\n", 135 | "embeddings = autoencoder.encoder(images)\n", 136 | "reconstructed_images =autoencoder.decoder(embeddings).view(-1, 1, 28, 28)\n", 137 | "show_images(reconstructed_images, title=\"Reconstructed Images\")" 138 | ] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "env_twitter", 144 | "language": "python", 145 | "name": "env_twitter" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 3 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython3", 157 | "version": "3.10.6" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 5 162 | } 163 | -------------------------------------------------------------------------------- /LLMs/llamaindex_101.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "079206aa-322f-422f-9efe-7a840ff5d4db", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stderr", 11 | "output_type": "stream", 12 | "text": [ 13 | ":241: RuntimeWarning: scipy._lib.messagestream.MessageStream size changed, may indicate binary incompatibility. Expected 56 from C header, got 64 from PyObject\n", 14 | "/Users/pachaar/opt/anaconda3/envs/env_twitter/lib/python3.10/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.8.20) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n", 15 | " warnings.warn(\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "import os\n", 21 | "import textwrap\n", 22 | "from dotenv import load_dotenv\n", 23 | "import re\n", 24 | "\n", 25 | "# Load environment variables\n", 26 | "load_dotenv()\n", 27 | "\n", 28 | "# Fetch and set API keys\n", 29 | "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", 30 | "active_loop_token = os.getenv(\"ACTIVELOOP_TOKEN\")\n", 31 | "dataset_path = os.getenv(\"DATASET_PATH\")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "id": "5c3db9fb-1afc-44b3-9538-04cd6881afae", 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "2\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "from llama_index import download_loader\n", 50 | "\n", 51 | "WikipediaReader = download_loader(\"WikipediaReader\")\n", 52 | "\n", 53 | "loader = WikipediaReader()\n", 54 | "\n", 55 | "documents = loader.load_data(pages=['Delhi', 'Mumbai'])\n", 56 | "print(len(documents))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "id": "23f20252-46bb-41fd-b9c2-7c3a861268aa", 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "146\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "from llama_index.node_parser import SimpleNodeParser\n", 75 | "\n", 76 | "\n", 77 | "# Initialize the parser\n", 78 | "parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=20)\n", 79 | "\n", 80 | "# Parse documents into nodes\n", 81 | "nodes = parser.get_nodes_from_documents(documents)\n", 82 | "print(len(nodes))" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "id": "cfd9dc04-d4a9-4d48-8ec0-13acd860fd5d", 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "Your Deep Lake dataset has been successfully created!\n" 96 | ] 97 | }, 98 | { 99 | "name": "stderr", 100 | "output_type": "stream", 101 | "text": [ 102 | "-" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "from llama_index.vector_stores import DeepLakeVectorStore\n", 108 | "\n", 109 | "my_activeloop_org_id = \"\"\n", 110 | "my_activeloop_dataset_name = \"LlamaIndex-101\"\n", 111 | "dataset_path = f\"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}\"\n", 112 | "\n", 113 | "# Create an index over the documnts\n", 114 | "vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 9, 120 | "id": "e761aa7f-0e49-4904-b663-25ea6956bd73", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "from llama_index.storage.storage_context import StorageContext\n", 125 | "from llama_index import VectorStoreIndex\n", 126 | "\n", 127 | "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", 128 | "\n", 129 | "index = VectorStoreIndex.from_documents(\n", 130 | " documents, storage_context=storage_context\n", 131 | ")" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 8, 137 | "id": "9832ed1c-d83a-48af-9795-be02ce9ed9e5", 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "Delhi has been historically significant as it has served as the capital of various empires and kingdoms throughout history. It has been a prominent political, cultural, and commercial center in India for centuries. Delhi's historical significance is rooted in its role as a seat of power, witnessing the rise and fall of different dynasties and playing a crucial part in shaping the country's history.\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "query_engine = index.as_query_engine()\n", 150 | "response = query_engine.query(\"What is historical significance of Delhi?\")\n", 151 | "print( response.response )" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "id": "a1643cfc-7bc1-44b8-921b-2c1806488ec5", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [] 161 | } 162 | ], 163 | "metadata": { 164 | "kernelspec": { 165 | "display_name": "env_twitter", 166 | "language": "python", 167 | "name": "env_twitter" 168 | }, 169 | "language_info": { 170 | "codemirror_mode": { 171 | "name": "ipython", 172 | "version": 3 173 | }, 174 | "file_extension": ".py", 175 | "mimetype": "text/x-python", 176 | "name": "python", 177 | "nbconvert_exporter": "python", 178 | "pygments_lexer": "ipython3", 179 | "version": "3.10.6" 180 | } 181 | }, 182 | "nbformat": 4, 183 | "nbformat_minor": 5 184 | } 185 | -------------------------------------------------------------------------------- /random/pivot_table_JS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c50bc614-b608-415b-a0ae-03a7189b02fe", 6 | "metadata": {}, 7 | "source": [ 8 | "#### PivotTableJS 🚀 " 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "78fb524c-b199-4107-90e8-1a85cfe92b58", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "from pivottablejs import pivot_ui" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "6b732b88-7eef-4a9d-bea8-6cf9953e38a8", 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | "
NamePartyProvinceAgeGender
0Liu, LaurinNDPQuebec22.0Female
1Mourani, MariaBloc QuebecoisQuebec43.0Female
\n", 75 | "
" 76 | ], 77 | "text/plain": [ 78 | " Name Party Province Age Gender\n", 79 | "0 Liu, Laurin NDP Quebec 22.0 Female\n", 80 | "1 Mourani, Maria Bloc Quebecois Quebec 43.0 Female" 81 | ] 82 | }, 83 | "execution_count": 2, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "df = pd.read_csv(\"mps.csv\")\n", 90 | "df.head(2)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 3, 96 | "id": "fe23d5a9-f422-4b36-b2e0-e539b8f65791", 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/html": [ 102 | "\n", 103 | " \n", 111 | " " 112 | ], 113 | "text/plain": [ 114 | "" 115 | ] 116 | }, 117 | "execution_count": 3, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "pivot_ui(df)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "id": "ae919931-6888-4a3b-b0a8-f863a1ae62a3", 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/html": [ 135 | "\n", 136 | " \n", 144 | " " 145 | ], 146 | "text/plain": [ 147 | "" 148 | ] 149 | }, 150 | "execution_count": 5, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "\n", 157 | "\n", 158 | "pivot_ui(df, \n", 159 | " cols= [\"Party\"],\n", 160 | " rows= [\"Province\"],\n", 161 | " rendererName= \"Horizontal Stacked Bar Chart\",\n", 162 | " rowOrder= \"value_z_to_a\", \n", 163 | " colOrder= \"value_z_to_a\",\n", 164 | " rendererOptions= {\n", 165 | " \"c3\": { \"data\": {\"colors\": {\n", 166 | " \"Liberal\": '#dc3912', \"Conservative\": '#3366cc', \"NDP\": '#ff9900',\n", 167 | " \"Green\":'#109618', 'Bloc Quebecois': '#990099'\n", 168 | " }}}\n", 169 | " }\n", 170 | ")" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "id": "6129fd7e-d89d-4586-b346-bf6b8a9a6456", 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "env_twitter", 185 | "language": "python", 186 | "name": "env_twitter" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.10.5" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 5 203 | } 204 | -------------------------------------------------------------------------------- /LLMs/langchain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### The power of `Langchain 🦜` " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#!pip install langchain\n", 17 | "#!pip install openai\n", 18 | "#!pip install google-api-python-client\n", 19 | "#!pip install wikipedia" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 19, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import os\n", 29 | "import openai\n", 30 | "from langchain.chat_models import ChatOpenAI\n", 31 | "from langchain.agents import load_tools, initialize_agent\n", 32 | "from langchain.memory import ConversationBufferMemory\n", 33 | "from IPython.display import Markdown" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 21, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Set environment variables\n", 43 | "os.environ['GOOGLE_API_KEY'] = '...'\n", 44 | "os.environ['OPENAI_API_KEY'] = '...'\n", 45 | "os.environ['GOOGLE_CSE_ID'] = '...'" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 22, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# A conversation buffer (memory) & import llm of choice\n", 55 | "memory = ConversationBufferMemory()\n", 56 | "llm = ChatOpenAI()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 23, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# Provide access to a list of tools that the agents will use\n", 66 | "tools = load_tools(['wikipedia', 'google-search', 'llm-math'], llm=llm)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 27, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# initialise the agents & make all the tools and llm available to it\n", 76 | "agent = initialize_agent(tools, llm, agent='zero-shot-react-description', verbose=True, memory=memory)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 31, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "# provide a prompt and you are done!\n", 86 | "agent.run(\"Find the number of IPL titles won by MS Dhoni & find it's cube root\")" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 29, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "\n", 99 | "\n", 100 | "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", 101 | "\u001b[32;1m\u001b[1;3mI need to find information about IPL titles won by Virat Kohli. Since this is a sports-related question, I should use Google Search.\n", 102 | "Action: Google Search\n", 103 | "Action Input: \"Virat Kohli IPL titles\"\u001b[0m\n", 104 | "Observation: \u001b[33;1m\u001b[1;3mVirat Kohli, the captain of the Royal Challengers Bangalore (RCB) team in the Indian Premier League (IPL), has not won the IPL title so far. Virat Kohli is an Indian international cricketer and the former captain of the Indian national cricket team who plays as a right-handed batsman for Royal ... Jun 27, 2021 ... As India lost the World Test Championship final to New Zealand by eight wickets, it marked the third instance that under Virat Kohli, ... May 22, 2022 ... For all his exploits for India, he has never won the IPL title since joining Bangalore before the inaugural contest in 2008, including as ... Apr 23, 2023 ... It's a welcome reminder that Kohli is still chasing an IPL title, which if successful would be a deserved triumph for the most important player ... Mar 30, 2023 ... Sanjay Manjrekar believes Virat Kohli and Royal Challengers ... their first IPL title in the upcoming season of the Indian Premier League. May 14, 2023 ... Talisman Virat Kohli might be cricket's biggest superstar, but his performances in ... and somehow are still chasing an elusive IPL title. May 10, 2023 ... Virat Kohli's dream of winning the Indian Premier League trophy is almost over after RCB's defeat to Mumbai Indians. Oct 12, 2021 ... It was to be Virat Kohli's last match as captain of Royal Challengers Bangalore. Or of any IPL team. RCB put up a sub par performance ... 3 days ago ... Virat Kohli has dominated the Indian Premier League (IPL) as well as ... reach the playoffs as the wait for a maiden IPL title continues.\u001b[0m\n", 105 | "Thought:\u001b[32;1m\u001b[1;3mBased on the observation, it seems that Virat Kohli has not won any IPL titles yet.\n", 106 | "Final Answer: Virat Kohli has not won any IPL titles.\u001b[0m\n", 107 | "\n", 108 | "\u001b[1m> Finished chain.\u001b[0m\n" 109 | ] 110 | }, 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "'Virat Kohli has not won any IPL titles.'" 115 | ] 116 | }, 117 | "execution_count": 29, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "agent.run(\"What is the number of IPL titles won by Virat Kohli?\")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 18, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "#!pip install google-api-python-client\n", 133 | "# !pip install wikipedia" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "env_twitter", 147 | "language": "python", 148 | "name": "env_twitter" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.10.6" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 4 165 | } 166 | -------------------------------------------------------------------------------- /azure_ML/deployment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "##### ❇️ Azure ML: Deploying your model as a Web Service" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import sklearn\n", 18 | "from sklearn.svm import SVC\n", 19 | "import pickle\n", 20 | "import joblib\n", 21 | "from azureml.core import Workspace\n", 22 | "from sklearn.model_selection import train_test_split" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "##### ❇️ Let's qickly train & save an irisclassifier model \n", 30 | "##### The model here is just a place holder you can train
and deploy model of your choice " 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# Download dataset\n", 40 | "dataset = pd.read_csv(\n", 41 | " \"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data\"\n", 42 | ")\n", 43 | "dataset.columns = [\"Petal Length\", \"Petal Width\", \"Sepal Length\", \"Sepal Width\", \"Species\"]" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "dataset = dataset.replace(\n", 53 | " {\"Species\": {\"Iris-setosa\": 1, \"Iris-versicolor\": 2, \"Iris-virginica\": 3}}\n", 54 | ")\n", 55 | "dataset.head(5)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "# Train test split the data\n", 65 | "X = dataset.drop(['Species'], axis=1)\n", 66 | "y = dataset['Species']\n", 67 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# Instantiate a classifier\n", 77 | "classifier = SVC(kernel = 'linear', random_state = 0)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# Train classifier\n", 87 | "classifier.fit(X_train, y_train)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "# Do prediction\n", 97 | "y_pred = classifier.predict(X_test)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "## Save as a pickle file\n", 107 | "filename= 'saved_model_v1.pkl'\n", 108 | "joblib.dump(classifier,open(filename, 'wb'))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "##### ❇️ Create a Workspace" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "from azureml.core import Workspace\n", 125 | "ws = Workspace.create(name='AzureML_Deployment_WS',\n", 126 | " subscription_id='2f##b8*****2',\n", 127 | " resource_group='AzureML_Deployment_RG',\n", 128 | " create_resource_group=True,\n", 129 | " location='eastus'\n", 130 | " )" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "##### ❇️ Register Model" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "from azureml.core.model import Model\n", 147 | "\n", 148 | "model = Model.register(ws, model_name=\"classifier\", model_path=\"saved_model_v1.pkl\")" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "##### ❇️ Setup Inference config" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "from azureml.core.model import InferenceConfig\n", 165 | "\n", 166 | "inference_config = InferenceConfig(\n", 167 | " conda_file='./env.yml',\n", 168 | " source_directory=\"./source_dir\",\n", 169 | " entry_script=\"./score.py\",\n", 170 | ")" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "##### ❇️ Setup Deployment config" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "from azureml.core.webservice import AciWebservice\n", 187 | "\n", 188 | "deployment_config = AciWebservice.deploy_configuration(\n", 189 | " cpu_cores=2, memory_gb=3, auth_enabled=True\n", 190 | ")" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "##### ❇️ Deploy the service" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "from azureml.core.model import Model\n", 207 | "\n", 208 | "service = Model.deploy(\n", 209 | " ws, # The instance of workspace created above\n", 210 | " \"myservice\",\n", 211 | " [Model(ws, 'bannerdetector')],\n", 212 | " inference_config,\n", 213 | " deployment_config,\n", 214 | " overwrite=True,\n", 215 | ")\n", 216 | "service.wait_for_deployment(show_output=True)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "env_poi", 230 | "language": "python", 231 | "name": "env_poi" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.8.13" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 4 248 | } 249 | -------------------------------------------------------------------------------- /pandas/df_apply.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1d3e4889-a4a0-4167-82cc-086698ca9a4e", 6 | "metadata": {}, 7 | "source": [ 8 | "##### ❇️ Pandas 🐼: df.apply(func, axis)\n", 9 | "Objects passed to the function are Series objects whose index is either
the DataFrame’s index (axis=0) or the DataFrame’s columns (axis=1)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "b034ae9d-d565-4ca1-bfb9-67079b76786c", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd\n", 20 | "import numpy as np" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "id": "e69b9a5c-a4b5-45c8-a3b5-f04d73449df5", 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
\n", 33 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | "
AB
014
125
\n", 67 | "
" 68 | ], 69 | "text/plain": [ 70 | " A B\n", 71 | "0 1 4\n", 72 | "1 2 5" 73 | ] 74 | }, 75 | "execution_count": 2, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "df = pd.DataFrame({'A': [1, 2], 'B': [4, 5]})\n", 82 | "df" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 3, 88 | "id": "fee22636-3ab1-404d-99b0-1041f83d2f9c", 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/plain": [ 94 | "A 3\n", 95 | "B 9\n", 96 | "dtype: int64" 97 | ] 98 | }, 99 | "execution_count": 3, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "# When axis = 0, every dataframe column is passed as a series to func\n", 106 | "df.apply(func = np.sum, axis=0)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "id": "a18e0851-9f47-4436-aeef-4acf9d07574c", 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "0 5\n", 119 | "1 7\n", 120 | "dtype: int64" 121 | ] 122 | }, 123 | "execution_count": 4, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "# When axis = 1, every dataframe row is passed as a series to func\n", 130 | "df.apply(func = np.sum, axis=1)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 5, 136 | "id": "4fb4dacb-5351-4095-9715-4440913de146", 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/html": [ 142 | "
\n", 143 | "\n", 156 | "\n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | "
ABA_plus_B
0145
1257
\n", 180 | "
" 181 | ], 182 | "text/plain": [ 183 | " A B A_plus_B\n", 184 | "0 1 4 5\n", 185 | "1 2 5 7" 186 | ] 187 | }, 188 | "execution_count": 5, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "# using a custom lambda function; axis = 1\n", 195 | "# Notice axis = 1, which means each row is passed as a series whose index \n", 196 | "# is data frame's column, that's why we are able to access values row['A'], row['B'] etc.\n", 197 | "A_plus_B = df.apply(func = lambda row: row['A'] + row['B'], axis=1)\n", 198 | "df['A_plus_B'] = A_plus_B\n", 199 | "df" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "id": "ca087042-d8f4-4ea8-9eea-5c722804956d", 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "A 1.5\n", 212 | "B 4.5\n", 213 | "A_plus_B 6.0\n", 214 | "dtype: float64" 215 | ] 216 | }, 217 | "execution_count": 6, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "# calculating average of each column\n", 224 | "# Notice axis = 0, which means each column is passed as a series whose index \n", 225 | "# is data frame's index, that's why we are able to access values col[0], col[1] etc.\n", 226 | "df.apply(func = lambda col: (col[0] + col[1])/2, axis=0)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "id": "27ea53dc-39e9-40b5-9567-e2796f466bab", 232 | "metadata": {}, 233 | "source": [ 234 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 235 | "##### ❇️ follow → @akshay_pachaar " 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "id": "b87a73e5-a4e5-4d01-8df5-b1fcc888b1d4", 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [] 245 | } 246 | ], 247 | "metadata": { 248 | "kernelspec": { 249 | "display_name": "env_twitter", 250 | "language": "python", 251 | "name": "env_twitter" 252 | }, 253 | "language_info": { 254 | "codemirror_mode": { 255 | "name": "ipython", 256 | "version": 3 257 | }, 258 | "file_extension": ".py", 259 | "mimetype": "text/x-python", 260 | "name": "python", 261 | "nbconvert_exporter": "python", 262 | "pygments_lexer": "ipython3", 263 | "version": "3.10.5" 264 | } 265 | }, 266 | "nbformat": 4, 267 | "nbformat_minor": 5 268 | } 269 | -------------------------------------------------------------------------------- /random/one_hot_encoding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "##### ❇️ OneHotEncoding" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "from sklearn.preprocessing import OneHotEncoder" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/html": [ 28 | "
\n", 29 | "\n", 42 | "\n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | "
SpeciesSex
Penguin1AdelieMale
Penguin2ChinstrapFemale
Penguin3GentooMale
\n", 68 | "
" 69 | ], 70 | "text/plain": [ 71 | " Species Sex\n", 72 | "Penguin1 Adelie Male\n", 73 | "Penguin2 Chinstrap Female\n", 74 | "Penguin3 Gentoo Male" 75 | ] 76 | }, 77 | "execution_count": 2, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "df = pd.DataFrame({'Species': ['Adelie', 'Chinstrap', 'Gentoo'], \n", 84 | " 'Sex': ['Male', 'Female', 'Male']},\n", 85 | " index=['Penguin1', 'Penguin2', 'Penguin3'])\n", 86 | "df" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 3, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "array([[1., 0., 0., 0., 1.],\n", 98 | " [0., 1., 0., 1., 0.],\n", 99 | " [0., 0., 1., 0., 1.]])" 100 | ] 101 | }, 102 | "execution_count": 3, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "# Define OneHotEncoder & apply transformation over df\n", 109 | "encoder = OneHotEncoder(sparse=False)\n", 110 | "encoded = encoder.fit_transform(df)\n", 111 | "encoded" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "array(['Species', 'Sex'], dtype=object)" 123 | ] 124 | }, 125 | "execution_count": 4, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "# input columns to encoder\n", 132 | "encoder.feature_names_in_" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 5, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "array(['Species_Adelie', 'Species_Chinstrap', 'Species_Gentoo',\n", 144 | " 'Sex_Female', 'Sex_Male'], dtype=object)" 145 | ] 146 | }, 147 | "execution_count": 5, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "# Output columns generated after OneHotEncoding\n", 154 | "encoder.get_feature_names_out()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 6, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/html": [ 165 | "
\n", 166 | "\n", 179 | "\n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | "
Species_AdelieSpecies_ChinstrapSpecies_GentooSex_FemaleSex_Male
Penguin11.00.00.00.01.0
Penguin20.01.00.01.00.0
Penguin30.00.01.00.01.0
\n", 217 | "
" 218 | ], 219 | "text/plain": [ 220 | " Species_Adelie Species_Chinstrap Species_Gentoo Sex_Female \\\n", 221 | "Penguin1 1.0 0.0 0.0 0.0 \n", 222 | "Penguin2 0.0 1.0 0.0 1.0 \n", 223 | "Penguin3 0.0 0.0 1.0 0.0 \n", 224 | "\n", 225 | " Sex_Male \n", 226 | "Penguin1 1.0 \n", 227 | "Penguin2 0.0 \n", 228 | "Penguin3 1.0 " 229 | ] 230 | }, 231 | "execution_count": 6, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "# Create a new DataFrame with categorical features OnHotEncoded\n", 238 | "df_encoded = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(), index=df.index)\n", 239 | "df_encoded" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 247 | "##### ❇️ follow → @akshay_pachaar " 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [] 256 | } 257 | ], 258 | "metadata": { 259 | "kernelspec": { 260 | "display_name": "env_mld", 261 | "language": "python", 262 | "name": "env_mld" 263 | }, 264 | "language_info": { 265 | "codemirror_mode": { 266 | "name": "ipython", 267 | "version": 3 268 | }, 269 | "file_extension": ".py", 270 | "mimetype": "text/x-python", 271 | "name": "python", 272 | "nbconvert_exporter": "python", 273 | "pygments_lexer": "ipython3", 274 | "version": "3.8.5" 275 | } 276 | }, 277 | "nbformat": 4, 278 | "nbformat_minor": 4 279 | } 280 | -------------------------------------------------------------------------------- /random/tf_decision_forests.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "d81d63f4-a12f-4aa0-956e-a3f8702a1904", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import tensorflow_decision_forests as tfdf\n", 11 | "from sklearn.model_selection import train_test_split\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "e6c8c1db-8fc9-497e-b5e1-875d8a43635e", 18 | "metadata": {}, 19 | "source": [ 20 | "##### ❇️ Setup" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "id": "db35588c-df20-4b42-95de-77b7f733a503", 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "Label classes: ['Adelie', 'Gentoo', 'Chinstrap']\n" 34 | ] 35 | }, 36 | { 37 | "data": { 38 | "text/html": [ 39 | "
\n", 40 | "\n", 53 | "\n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
00Torgersen39.118.7181.03750.0male2007
10Torgersen39.517.4186.03800.0female2007
\n", 92 | "
" 93 | ], 94 | "text/plain": [ 95 | " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", 96 | "0 0 Torgersen 39.1 18.7 181.0 \n", 97 | "1 0 Torgersen 39.5 17.4 186.0 \n", 98 | "\n", 99 | " body_mass_g sex year \n", 100 | "0 3750.0 male 2007 \n", 101 | "1 3800.0 female 2007 " 102 | ] 103 | }, 104 | "execution_count": 2, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "# Load and prepare data; Our task would be to predict specie of the penguin\n", 111 | "dataset_df = pd.read_csv('penguins.csv')\n", 112 | "label = \"species\"\n", 113 | "classes = dataset_df[label].unique().tolist()\n", 114 | "print(f\"Label classes: {classes}\")\n", 115 | "dataset_df[label] = dataset_df[label].map(classes.index)\n", 116 | "dataset_df.head(2)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 3, 122 | "id": "6cb0f048-989f-479b-8da4-7a9b4f3c87b8", 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# Split data into train/test\n", 127 | "train_ds_pd, test_ds_pd = train_test_split(dataset_df, test_size=0.3)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "id": "d280fe66-5228-4564-b1a4-082ee9bbfae8", 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "# Converting pandas dataframes to tensorflow datasets\n", 138 | "train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label=label)\n", 139 | "test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_ds_pd, label=label)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "id": "e11b12bb-c619-4136-aacd-5a632fa5493d", 145 | "metadata": {}, 146 | "source": [ 147 | "##### ❇️ Training" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 7, 153 | "id": "2472e857-ef42-4adc-970c-f57cb397fc59", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# Load and train model\n", 158 | "model = tfdf.keras.RandomForestModel()\n", 159 | "model.fit(x=train_ds)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "c2be95d1-17ba-4cd6-a3e2-58aeef052dc0", 165 | "metadata": {}, 166 | "source": [ 167 | "##### ❇️ Evaluation" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 8, 173 | "id": "39765bdf-b0af-453a-82f2-c8632ae31455", 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "1/1 [==============================] - 0s 339ms/step - loss: 0.0000e+00 - accuracy: 0.9615\n", 181 | "loss: 0.0000\n", 182 | "accuracy: 0.9615\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "model.compile(metrics=[\"accuracy\"])\n", 188 | "evaluation = model.evaluate(test_ds, return_dict=True)\n", 189 | "for name, value in evaluation.items():\n", 190 | " print(f\"{name}: {value:.4f}\")" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "id": "13e9eb33-2782-4198-a3e1-ca0a3d3a6adc", 196 | "metadata": {}, 197 | "source": [ 198 | "##### ❇️ Save model; ready to be served using tf-serving" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 13, 204 | "id": "e22ac43b-4925-4c5f-9618-cd1f1f0f327a", 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "model.save(\"/path_to_save_model_directory\")" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "id": "2b1d3d79-9a8b-48a5-82b2-6ad40056e09f", 214 | "metadata": {}, 215 | "source": [ 216 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 217 | "##### ❇️ follow → @akshay_pachaar " 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "id": "2511c8e5-391d-4ab4-be8d-3892093e133b", 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "5f4e802a-99e1-499c-9063-b25f2c6fdb68", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "id": "15e91cef-d4e9-40af-b61b-814b3a7f5c0f", 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "id": "1e2a41e7-594d-4d54-9a6f-47ead8adf871", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 5, 255 | "id": "1e815d2a-ae92-4e91-9856-d0a0afcb3508", 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "import warnings\n", 260 | "warnings.filterwarnings('ignore')" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "id": "78456c13-b03e-497e-9e3e-1b7b48565cdc", 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [] 270 | } 271 | ], 272 | "metadata": { 273 | "kernelspec": { 274 | "display_name": "Python 3", 275 | "language": "python", 276 | "name": "python3" 277 | }, 278 | "language_info": { 279 | "codemirror_mode": { 280 | "name": "ipython", 281 | "version": 3 282 | }, 283 | "file_extension": ".py", 284 | "mimetype": "text/x-python", 285 | "name": "python", 286 | "nbconvert_exporter": "python", 287 | "pygments_lexer": "ipython3", 288 | "version": "3.8.12" 289 | } 290 | }, 291 | "nbformat": 4, 292 | "nbformat_minor": 5 293 | } 294 | -------------------------------------------------------------------------------- /PyTorch/tensors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### ❇️ Tensors " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Tensors are data structures similar to arrays and matrices wjich are use to encode inputs, outputs and parameters of models
\n", 15 | "Tensors are similar to NumPy’s ndarrays, except that tensors can run on GPUs or other hardware accelerators" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import torch\n", 25 | "import numpy as np" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "#### Initializing a Tensor" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# Directly from data\n", 42 | "data = [[1, 2],[3, 4]]\n", 43 | "x_data = torch.tensor(data)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# From a NumPy array\n", 53 | "np_array = np.array(data)\n", 54 | "x_np = torch.from_numpy(np_array)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "Random Tensor: \n", 67 | " tensor([[0.2710, 0.9181],\n", 68 | " [0.0190, 0.6447]]) \n", 69 | "\n", 70 | "Ones Tensor: \n", 71 | " tensor([[1., 1.],\n", 72 | " [1., 1.]]) \n", 73 | "\n", 74 | "Zeros Tensor: \n", 75 | " tensor([[0., 0.],\n", 76 | " [0., 0.]])\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "# With random or constant values:\n", 82 | "shape=(2, 2)\n", 83 | "rand_tensor = torch.rand(shape)\n", 84 | "ones_tensor = torch.ones(shape)\n", 85 | "zeros_tensor = torch.zeros(shape)\n", 86 | "\n", 87 | "print(f\"Random Tensor: \\n {rand_tensor} \\n\")\n", 88 | "print(f\"Ones Tensor: \\n {ones_tensor} \\n\")\n", 89 | "print(f\"Zeros Tensor: \\n {zeros_tensor}\")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "#### Moving tensors to GPU" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "By default tensors are created on CPU We need to explicitly move tensors to the GPU using .to method" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# We move our tensor to the GPU if available\n", 113 | "tensor = torch.rand(2, 3)\n", 114 | "if torch.cuda.is_available():\n", 115 | " tensor = tensor.to(\"cuda\")" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "#### Attributes of a Tensor" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "Shape of tensor: torch.Size([2, 3])\n", 135 | "Datatype of tensor: torch.float32\n", 136 | "Tensor is stored on: cpu\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "tensor = torch.rand(2, 3)\n", 142 | "\n", 143 | "print(f\"Shape of tensor: {tensor.shape}\")\n", 144 | "print(f\"Datatype of tensor: {tensor.dtype}\")\n", 145 | "print(f\"Tensor is stored on: {tensor.device}\")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "#### Operations on Tensors\n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 7, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "First row: tensor([1., 1., 1., 1.])\n", 165 | "First column: tensor([1., 1., 1., 1.])\n", 166 | "Last column: tensor([1., 1., 1., 1.])\n", 167 | "tensor([[1., 0., 1., 1.],\n", 168 | " [1., 0., 1., 1.],\n", 169 | " [1., 0., 1., 1.],\n", 170 | " [1., 0., 1., 1.]])\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "# Standard indexing and slicing just like NumPy\n", 176 | "tensor = torch.ones(4, 4)\n", 177 | "print(f\"First row: {tensor[0]}\")\n", 178 | "print(f\"First column: {tensor[:, 0]}\")\n", 179 | "print(f\"Last column: {tensor[..., -1]}\")\n", 180 | "tensor[:,1] = 0\n", 181 | "print(tensor)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 8, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "# Arithmetic Ops\n", 191 | "# This computes the matrix multiplication between two tensors.\n", 192 | "y1 = tensor.matmul(tensor.T)\n", 193 | "\n", 194 | "# This computes the element-wise product.\n", 195 | "z1 = tensor * tensor" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 9, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "12.0 \n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "# Converting single value tensors to a Python \n", 213 | "# Numerical value\n", 214 | "agg = tensor.sum()\n", 215 | "agg_item = agg.item()\n", 216 | "print(agg_item, type(agg_item))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 10, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "tensor([[1., 0., 1., 1.],\n", 229 | " [1., 0., 1., 1.],\n", 230 | " [1., 0., 1., 1.],\n", 231 | " [1., 0., 1., 1.]]) \n", 232 | "\n", 233 | "tensor([[6., 5., 6., 6.],\n", 234 | " [6., 5., 6., 6.],\n", 235 | " [6., 5., 6., 6.],\n", 236 | " [6., 5., 6., 6.]])\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "# Inplace Ops\n", 242 | "print(f\"{tensor} \\n\")\n", 243 | "tensor.add_(5)\n", 244 | "print(tensor)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "#### Bridge with NumPy" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "Tensors on the CPU and NumPy arrays can share their underlying memory locations, and changing one will change the other.\n", 259 | "\n" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 11, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "tensor: tensor([1., 1., 1., 1., 1.])\n", 272 | "numpy array: [1. 1. 1. 1. 1.]\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "t = torch.ones(5)\n", 278 | "print(f\"tensor: {t}\")\n", 279 | "n = t.numpy()\n", 280 | "print(f\"numpy array: {n}\")" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 12, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "name": "stdout", 290 | "output_type": "stream", 291 | "text": [ 292 | "tensor: tensor([2., 2., 2., 2., 2.])\n", 293 | "numpy array: [2. 2. 2. 2. 2.]\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "t.add_(1)\n", 299 | "print(f\"tensor: {t}\")\n", 300 | "print(f\"numpy array: {n}\")" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [] 309 | } 310 | ], 311 | "metadata": { 312 | "kernelspec": { 313 | "display_name": "env_torch", 314 | "language": "python", 315 | "name": "env_torch" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.6.13" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 4 332 | } 333 | -------------------------------------------------------------------------------- /random/python_for_sql.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c168d69e-4ec2-4a56-b203-8da87e406e7b", 6 | "metadata": {}, 7 | "source": [ 8 | "### ⚙️ SQL Queries using `Python` 🐍 \n", 9 | "#### Loading the results in a Pandas `DataFrame` 🐼 " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "a43dfc56-ad0b-43c9-8002-05f311f73a5b", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# !pip install psycopg2" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "7561660d-9f24-4395-9444-16e20ad8c571", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import psycopg2\n", 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "id": "fb3f2af1-c019-4c33-b01e-ef14e5feb696", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# Setting up connection; database is publicly available\n", 41 | "conn = psycopg2.connect(host='hh-pgsql-public.ebi.ac.uk', \n", 42 | " dbname='pfmegrnargs',\n", 43 | " user='reader', \n", 44 | " password='NWDMCE5xdipIjRrp')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "id": "66c0be6a-032a-49f9-994d-cdb1178451c2", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# SQL Query\n", 55 | "query = \"SELECT * FROM rnc_database\"" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "id": "8ab80cc8-8c60-4921-a653-f8294ae6ef2b", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# Executing the query\n", 66 | "cursor = conn.cursor()\n", 67 | "cursor.execute(query)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 6, 73 | "id": "aea4b0cf-c74c-468e-9b5a-ff6584367281", 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "# Loading the results in a pandas DataFrame\n", 78 | "df = pd.DataFrame(cursor.fetchall(), \n", 79 | " columns=[desc[0] for desc in cursor.description])" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 7, 85 | "id": "7f897634-a95a-4e83-ace1-61abf3da922c", 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/html": [ 91 | "
\n", 92 | "\n", 105 | "\n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | "
idtimestampuserstampdescrcurrent_releasefull_descralivefor_releasedisplay_nameproject_idavg_lengthmin_lengthmax_lengthnum_sequencesnum_organisms
0512022-09-22 15:14:59.500143RNACENEXPRESSIONATLAS636Expression AtlasYNoneExpression AtlasNone953.072.032709.0110303
152017-05-17 00:00:00.000000RNACENVEGA98VEGANNoneVEGAPRJEB4568NaNNaNNaN00
2242017-05-02 00:00:00.000000RNACENFLYBASE614FlyBaseYNoneFlyBasePRJ_FLY765.018.021216.042101
3502022-08-16 15:52:33.990145RNACENPLNCDB606PLncDBYNonePLncDBNone6659.0199.0985945.093692680
\n", 201 | "
" 202 | ], 203 | "text/plain": [ 204 | " id timestamp userstamp descr current_release \\\n", 205 | "0 51 2022-09-22 15:14:59.500143 RNACEN EXPRESSIONATLAS 636 \n", 206 | "1 5 2017-05-17 00:00:00.000000 RNACEN VEGA 98 \n", 207 | "2 24 2017-05-02 00:00:00.000000 RNACEN FLYBASE 614 \n", 208 | "3 50 2022-08-16 15:52:33.990145 RNACEN PLNCDB 606 \n", 209 | "\n", 210 | " full_descr alive for_release display_name project_id \\\n", 211 | "0 Expression Atlas Y None Expression Atlas None \n", 212 | "1 VEGA N None VEGA PRJEB4568 \n", 213 | "2 FlyBase Y None FlyBase PRJ_FLY \n", 214 | "3 PLncDB Y None PLncDB None \n", 215 | "\n", 216 | " avg_length min_length max_length num_sequences num_organisms \n", 217 | "0 953.0 72.0 32709.0 11030 3 \n", 218 | "1 NaN NaN NaN 0 0 \n", 219 | "2 765.0 18.0 21216.0 4210 1 \n", 220 | "3 6659.0 199.0 985945.0 936926 80 " 221 | ] 222 | }, 223 | "execution_count": 7, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "df.head(4)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 8, 235 | "id": "b31d46e9-7790-482d-b435-8c210c8316e2", 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "# Close connection\n", 240 | "conn.close()" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "id": "977e0793-ebae-4d37-8ff1-5180f44b5a8c", 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [] 250 | } 251 | ], 252 | "metadata": { 253 | "kernelspec": { 254 | "display_name": "env_poi", 255 | "language": "python", 256 | "name": "env_poi" 257 | }, 258 | "language_info": { 259 | "codemirror_mode": { 260 | "name": "ipython", 261 | "version": 3 262 | }, 263 | "file_extension": ".py", 264 | "mimetype": "text/x-python", 265 | "name": "python", 266 | "nbconvert_exporter": "python", 267 | "pygments_lexer": "ipython3", 268 | "version": "3.8.13" 269 | } 270 | }, 271 | "nbformat": 4, 272 | "nbformat_minor": 5 273 | } 274 | -------------------------------------------------------------------------------- /numpy/numpy_indexing_slicing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#### ❇️ Basic Indexing" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "0\n", 29 | "7\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "# A regular 1D array\n", 35 | "x = np.arange(10)\n", 36 | "print(x[0])\n", 37 | "print(x[-3])" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "[[0 1 2 3 4]\n", 50 | " [5 6 7 8 9]]\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "# Let's reshape x and make it a 2D array\n", 56 | "x.shape = (2, 5)\n", 57 | "print(x)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "8\n", 70 | "9\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "# No need to separate each dimension’s index into its own set of square brackets.\n", 76 | "# check this out 👇\n", 77 | "print(x[1, 3])\n", 78 | "print(x[1, -1])" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "array([0, 1, 2, 3, 4])" 90 | ] 91 | }, 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "# If number of indices passed is fewer than the dimension of array\n", 99 | "# A sub dimensional array is obtained 👇 \n", 100 | "x[0]" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "#### ❇️ Slicing and striding" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 6, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "array([1, 3, 5])" 119 | ] 120 | }, 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "# The basic slice syntax is i:j:k where i is the starting index,\n", 128 | "# j is the stopping index, and k is the step (k should be non-zero)\n", 129 | "# Consider 👇 \n", 130 | "x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n", 131 | "x[1:7:2]" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 7, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "[7 8 9]\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "# Negative i and j are interpreted as n + i and n + j where n is the \n", 149 | "# number of elements in the corresponding dimension.\n", 150 | "print(x[-3:10]) # i = -3; j = 10; k = 1 (if not given k defaults to 1)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 8, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "[7 6 5 4]\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "# Negative k makes stepping go towards smaller indices\n", 168 | "print(x[-3:3:-1]) # i = -3; j = 3; k = -1 " 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 9, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "[0 1 2 3 4]\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "# If i is not given it defaults to 0 for k > 0 and n - 1 for k < 0 .\n", 186 | "print(x[:5])" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 10, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "[5 6 7 8 9]\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "# If j is not given it defaults to n for k > 0 and -n-1 for k < 0 . \n", 204 | "print(x[5:])" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 11, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "[9 8 7 6 5 4 3 2 1 0]\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "# Let's reverse the array\n", 222 | " # Since, k < 0; i not given it defaults to 10 - 1; j becomes -11\n", 223 | "print(x[::-1]) # ⬅️ is equivalent to x[10:-11:-1]; check next shell ⬇️ " 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 12, 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "data": { 233 | "text/plain": [ 234 | "array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])" 235 | ] 236 | }, 237 | "execution_count": 12, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "x[10:-11:-1]" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "#### ❇️ Integer array indexing" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 13, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "data": { 260 | "text/plain": [ 261 | "array([10, 9, 8, 7, 6, 5, 4, 3, 2])" 262 | ] 263 | }, 264 | "execution_count": 13, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "x = np.arange(10, 1, -1)\n", 271 | "x" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 14, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "text/plain": [ 282 | "array([7, 7, 4, 2])" 283 | ] 284 | }, 285 | "execution_count": 14, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "# One can directly access the elements at indices\n", 292 | "# specified by integer array; Check this out 👇 \n", 293 | "x[np.array([3, 3, -3, 8])]" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "#### ❇️ Boolean array Indexing" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 15, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "array([False, True, True, False])" 312 | ] 313 | }, 314 | "execution_count": 15, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "# When boolean array is used, indices corresponsing to True values\n", 321 | "# in boolean array are accessed from array x \n", 322 | "x = np.array([1., -1., -2., 3])\n", 323 | "\n", 324 | "# a booelan array 👇 \n", 325 | "x < 0 # ⬅️ True where elements in x < 0" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 16, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "data": { 335 | "text/plain": [ 336 | "array([-1., -2.])" 337 | ] 338 | }, 339 | "execution_count": 16, 340 | "metadata": {}, 341 | "output_type": "execute_result" 342 | } 343 | ], 344 | "source": [ 345 | "# accessing the elements based on booelan array\n", 346 | "x[x<0]" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 17, 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "data": { 356 | "text/plain": [ 357 | "array([ 1., 19., 18., 3.])" 358 | ] 359 | }, 360 | "execution_count": 17, 361 | "metadata": {}, 362 | "output_type": "execute_result" 363 | } 364 | ], 365 | "source": [ 366 | "# adding 20 to all elements < 0\n", 367 | "x[x < 0] += 20\n", 368 | "x" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [] 377 | } 378 | ], 379 | "metadata": { 380 | "kernelspec": { 381 | "display_name": "env_twitter", 382 | "language": "python", 383 | "name": "env_twitter" 384 | }, 385 | "language_info": { 386 | "codemirror_mode": { 387 | "name": "ipython", 388 | "version": 3 389 | }, 390 | "file_extension": ".py", 391 | "mimetype": "text/x-python", 392 | "name": "python", 393 | "nbconvert_exporter": "python", 394 | "pygments_lexer": "ipython3", 395 | "version": "3.10.5" 396 | } 397 | }, 398 | "nbformat": 4, 399 | "nbformat_minor": 4 400 | } 401 | -------------------------------------------------------------------------------- /LLMs/openai_function_calling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### The power of `OpenAI function calling` 🚀 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 10, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import json\n", 18 | "import openai\n", 19 | "import requests\n", 20 | "from ast import literal_eval\n", 21 | "from IPython.display import JSON" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# Set environment variables\n", 31 | "os.environ['OPENAI_API_KEY'] = '...'\n", 32 | "os.environ['WEATHER_API_KEY'] = '...'" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "open_ai_url = \"https://api.openai.com/v1/chat/completions\"\n", 42 | "model = \"gpt-3.5-turbo-0613\"\n", 43 | "user_message = \"What is the weather like in Delhi?\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "application/json": { 54 | "choices": [ 55 | { 56 | "finish_reason": "function_call", 57 | "index": 0, 58 | "message": { 59 | "content": null, 60 | "function_call": { 61 | "arguments": "{\n \"location\": \"Delhi\"\n}", 62 | "name": "get_current_weather" 63 | }, 64 | "role": "assistant" 65 | } 66 | } 67 | ], 68 | "created": 1687324647, 69 | "id": "chatcmpl-7TkVbj3AUg1PJDGQOAqE3MNQQ3AP3", 70 | "model": "gpt-3.5-turbo-0613", 71 | "object": "chat.completion", 72 | "usage": { 73 | "completion_tokens": 17, 74 | "prompt_tokens": 82, 75 | "total_tokens": 99 76 | } 77 | }, 78 | "text/plain": [ 79 | "" 80 | ] 81 | }, 82 | "execution_count": 4, 83 | "metadata": { 84 | "application/json": { 85 | "expanded": false, 86 | "root": "root" 87 | } 88 | }, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "# A natural language request to the OpenAI API, asking about weather in delhi\n", 94 | "\n", 95 | "headers = {\n", 96 | " \"Content-Type\": \"application/json\",\n", 97 | " \"Authorization\": f\"Bearer {os.getenv('OPENAI_API_KEY')}\",\n", 98 | "}\n", 99 | "\n", 100 | "data = {\n", 101 | " \"model\": model,\n", 102 | " \"messages\":[\n", 103 | " {\n", 104 | " \"role\":\"user\",\n", 105 | " \"content\": user_message\n", 106 | " }\n", 107 | " ],\n", 108 | " \"functions\":[\n", 109 | " {\n", 110 | " \"name\":\"get_current_weather\",\n", 111 | " \"description\":\"Get the current weather in a given location\",\n", 112 | " \"parameters\":{\n", 113 | " \"type\":\"object\",\n", 114 | " \"properties\":{\n", 115 | " \"location\":{\n", 116 | " \"type\":\"string\",\n", 117 | " \"description\":\"The city and state, e.g. San Francisco, CA\"\n", 118 | " },\n", 119 | " \"unit\":{\n", 120 | " \"type\":\"string\",\n", 121 | " \"enum\":[\n", 122 | " \"celsius\",\n", 123 | " \"fahrenheit\"\n", 124 | " ]\n", 125 | " }\n", 126 | " },\n", 127 | " \"required\":[\n", 128 | " \"location\"\n", 129 | " ]\n", 130 | " }\n", 131 | " }\n", 132 | " ]\n", 133 | "}\n", 134 | "\n", 135 | "response = requests.post(open_ai_url, headers=headers, data=json.dumps(data)).json()\n", 136 | "\n", 137 | "JSON(response)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 12, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "application/json": { 148 | "base": "stations", 149 | "clouds": { 150 | "all": 75 151 | }, 152 | "cod": 200, 153 | "coord": { 154 | "lat": 28.6667, 155 | "lon": 77.2167 156 | }, 157 | "dt": 1687324475, 158 | "id": 1273294, 159 | "main": { 160 | "feels_like": 308.94, 161 | "humidity": 84, 162 | "pressure": 1004, 163 | "temp": 302.2, 164 | "temp_max": 302.2, 165 | "temp_min": 302.2 166 | }, 167 | "name": "Delhi", 168 | "sys": { 169 | "country": "IN", 170 | "id": 9165, 171 | "sunrise": 1687305220, 172 | "sunset": 1687355512, 173 | "type": 1 174 | }, 175 | "timezone": 19800, 176 | "visibility": 3500, 177 | "weather": [ 178 | { 179 | "description": "mist", 180 | "icon": "50d", 181 | "id": 701, 182 | "main": "Mist" 183 | } 184 | ], 185 | "wind": { 186 | "deg": 220, 187 | "speed": 1.54 188 | } 189 | }, 190 | "text/plain": [ 191 | "" 192 | ] 193 | }, 194 | "execution_count": 12, 195 | "metadata": { 196 | "application/json": { 197 | "expanded": false, 198 | "root": "root" 199 | } 200 | }, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "# Based on the response above we extract the necessary information required to call Weather API\n", 206 | "arguments = response['choices'][0]['message']['function_call']['arguments']\n", 207 | "city = literal_eval(arguments)['location']\n", 208 | "\n", 209 | "base_url = f\"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={os.getenv('WEATHER_API_KEY')}\"\n", 210 | "\n", 211 | "weather_api_response = requests.get(base_url)\n", 212 | "weather_api_response = weather_api_response.json()\n", 213 | "\n", 214 | "JSON(weather_api_response)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 15, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "{'id': 'chatcmpl-7TkYby42BwPDCBAnhKqpd0oAaZLPt', 'object': 'chat.completion', 'created': 1687324833, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'The current weather in Delhi is misty. The temperature is 302.2 Kelvin (approximately 29.05 degrees Celsius) with a humidity of 84%. The visibility is 3500 meters.'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 330, 'completion_tokens': 42, 'total_tokens': 372}}\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "# Converting the weather API response back to Natural Language\n", 232 | "\n", 233 | "headers = {\n", 234 | " \"Content-Type\": \"application/json\",\n", 235 | " \"Authorization\": f\"Bearer {os.getenv('OPENAI_API_KEY')}\",\n", 236 | "}\n", 237 | "\n", 238 | "data = {\n", 239 | " \"model\": model,\n", 240 | " \"messages\": [\n", 241 | " {\"role\": \"user\", \"content\": user_message},\n", 242 | " {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": arguments}},\n", 243 | " {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": str(weather_api_response)}\n", 244 | " ],\n", 245 | " \"functions\": [\n", 246 | " {\n", 247 | " \"name\": \"get_current_weather\",\n", 248 | " \"description\": \"Get the current weather in a given location\",\n", 249 | " \"parameters\": {\n", 250 | " \"type\": \"object\",\n", 251 | " \"properties\": {\n", 252 | " \"location\": {\n", 253 | " \"type\": \"string\",\n", 254 | " \"description\": \"The city and state, e.g. San Francisco, CA\"\n", 255 | " },\n", 256 | " \"unit\": {\n", 257 | " \"type\": \"string\",\n", 258 | " \"enum\": [\"celsius\", \"fahrenheit\"]\n", 259 | " }\n", 260 | " },\n", 261 | " \"required\": [\"location\"]\n", 262 | " }\n", 263 | " }\n", 264 | " ]\n", 265 | "}\n", 266 | "\n", 267 | "response = requests.post(open_ai_url, headers=headers, data=json.dumps(data))\n", 268 | "\n", 269 | "print(response.json())" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 20, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "'The current weather in Delhi is misty. The temperature is 302.2 Kelvin (approximately 29.05 degrees Celsius) with a humidity of 84%. The visibility is 3500 meters.'" 281 | ] 282 | }, 283 | "execution_count": 20, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "response.json()['choices'][0]['message']['content']" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "env_twitter", 303 | "language": "python", 304 | "name": "env_twitter" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 3 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython3", 316 | "version": "3.10.6" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 4 321 | } 322 | -------------------------------------------------------------------------------- /pandas/assigning_new_columns.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "##### ❇️ Adding new columns: 🐼 df.assign(**kwargs)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Returns a new dataframe with all original columns in addition to new ones.
\n", 15 | "kwargs:
\n", 16 | "- The column names are keywords. If the values are callable, they are computed on the DataFrame
\n", 17 | "and assigned to the new columns.
\n", 18 | "- If the values are not callable, (e.g. a Series, scalar, or array),
\n", 19 | "they are simply assigned." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/html": [ 39 | "
\n", 40 | "\n", 53 | "\n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | "
temp_celcius
Delhi35.0
Mumbai25.0
\n", 71 | "
" 72 | ], 73 | "text/plain": [ 74 | " temp_celcius\n", 75 | "Delhi 35.0\n", 76 | "Mumbai 25.0" 77 | ] 78 | }, 79 | "execution_count": 2, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "df = pd.DataFrame({'temp_celcius': [35.0, 25.0]}, index=['Delhi', 'Mumbai'])\n", 86 | "df" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 3, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/html": [ 97 | "
\n", 98 | "\n", 111 | "\n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | "
temp_celciuswind_speed_kmph
Delhi35.026.0
Mumbai25.031.0
\n", 132 | "
" 133 | ], 134 | "text/plain": [ 135 | " temp_celcius wind_speed_kmph\n", 136 | "Delhi 35.0 26.0\n", 137 | "Mumbai 25.0 31.0" 138 | ] 139 | }, 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "# Let's add a new column wind_speed_kmph; where we take values from a list\n", 147 | "speed_values = [26.0, 31.0]\n", 148 | "df = df.assign(wind_speed_kmph=speed_values) # kmph: kilometers per hour\n", 149 | "df" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/html": [ 160 | "
\n", 161 | "\n", 174 | "\n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | "
temp_celciuswind_speed_kmphwind_speed_mph
Delhi35.026.016.146
Mumbai25.031.019.251
\n", 198 | "
" 199 | ], 200 | "text/plain": [ 201 | " temp_celcius wind_speed_kmph wind_speed_mph\n", 202 | "Delhi 35.0 26.0 16.146\n", 203 | "Mumbai 25.0 31.0 19.251" 204 | ] 205 | }, 206 | "execution_count": 4, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "# Using a callable to calculate values of new column based on existing column values\n", 213 | "df = df.assign(wind_speed_mph = lambda x: x['wind_speed_kmph']*0.621) # mph: miles per hour\n", 214 | "df" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 5, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/html": [ 225 | "
\n", 226 | "\n", 239 | "\n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | "
temp_celciuswind_speed_kmphwind_speed_mphtemp_fahrenheittemp_kelvin
Delhi35.026.016.14695.0308.15
Mumbai25.031.019.25177.0298.15
\n", 269 | "
" 270 | ], 271 | "text/plain": [ 272 | " temp_celcius wind_speed_kmph wind_speed_mph temp_fahrenheit \\\n", 273 | "Delhi 35.0 26.0 16.146 95.0 \n", 274 | "Mumbai 25.0 31.0 19.251 77.0 \n", 275 | "\n", 276 | " temp_kelvin \n", 277 | "Delhi 308.15 \n", 278 | "Mumbai 298.15 " 279 | ] 280 | }, 281 | "execution_count": 5, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "# You can create multiple columns within the same assign \n", 288 | "# where one of the columns depends on another one defined within the same assign 💥 \n", 289 | "df = df.assign(temp_fahrenheit=lambda x: x['temp_celcius'] * 9 / 5 + 32, temp_kelvin=lambda x: (x['temp_fahrenheit'] + 459.67) * 5 / 9)\n", 290 | "df" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 298 | "##### ❇️ follow → @akshay_pachaar " 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [] 307 | } 308 | ], 309 | "metadata": { 310 | "kernelspec": { 311 | "display_name": "env_twitter", 312 | "language": "python", 313 | "name": "env_twitter" 314 | }, 315 | "language_info": { 316 | "codemirror_mode": { 317 | "name": "ipython", 318 | "version": 3 319 | }, 320 | "file_extension": ".py", 321 | "mimetype": "text/x-python", 322 | "name": "python", 323 | "nbconvert_exporter": "python", 324 | "pygments_lexer": "ipython3", 325 | "version": "3.10.5" 326 | } 327 | }, 328 | "nbformat": 4, 329 | "nbformat_minor": 4 330 | } 331 | -------------------------------------------------------------------------------- /NLP/tokenization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### 🔴 Understanding `Tokenization` in NLP!" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "1️⃣ Character Tokenization" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 49, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "['W', 'e', ' ', 'l', 'o', 'v', 'e', ' ', 'N', 'L', 'P', '!']\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "raw_text = \"We love NLP!\"\n", 32 | "tokens = list(raw_text)\n", 33 | "print(tokens)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 50, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "{' ': 0, '!': 1, 'L': 2, 'N': 3, 'P': 4, 'W': 5, 'e': 6, 'l': 7, 'o': 8, 'v': 9}\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "# Numerical encoding of individual character\n", 51 | "token2idx = {char: idx for idx, char in enumerate(sorted(set(tokens)))}\n", 52 | "print(token2idx)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 51, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "[5, 6, 0, 7, 8, 9, 6, 0, 3, 2, 4, 1]\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "# Using token2idx to map our tokenized text to integers\n", 70 | "integer_tokens = [token2idx[token] for token in tokens]\n", 71 | "print(integer_tokens)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 52, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "torch.Size([12, 10])" 83 | ] 84 | }, 85 | "execution_count": 52, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "# One-hot encoding the numbers\n", 92 | "import torch\n", 93 | "import torch.nn.functional as F\n", 94 | "\n", 95 | "integer_tokens = torch.tensor(integer_tokens)\n", 96 | "one_hot_encode_tokens = F.one_hot(integer_tokens, num_classes=len(token2idx))\n", 97 | "one_hot_encode_tokens.shape" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 53, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "Token = W\n", 110 | "Integer Encoded Token = 5\n", 111 | "One hot encoded Token = tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "print(f\"Token = {tokens[0]}\")\n", 117 | "print(f\"Integer Encoded Token = {integer_tokens[0]}\")\n", 118 | "print(f\"One hot encoded Token = {one_hot_encode_tokens[0]}\")" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "2️⃣ Word tokenization" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 54, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "['We', 'love', 'NLP!']\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "# Splitting raw text based on whitespaces\n", 143 | "word_tokens = raw_text.split()\n", 144 | "print(word_tokens)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "3️⃣ Subword Tokenization" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 55, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "from transformers import AutoTokenizer" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 56, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "model_ckpt = 'distilbert-base-uncased'\n", 170 | "tokenizer = AutoTokenizer.from_pretrained(model_ckpt)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 57, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "from transformers import DistilBertTokenizer\n", 180 | "distilbert_tokenizer = DistilBertTokenizer.from_pretrained(model_ckpt)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 58, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "name": "stdout", 190 | "output_type": "stream", 191 | "text": [ 192 | "{'input_ids': [101, 2057, 2293, 17953, 2361, 999, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n" 193 | ] 194 | } 195 | ], 196 | "source": [ 197 | "# Lets see the tokenizer in action now \n", 198 | "encoded_text = tokenizer(raw_text)\n", 199 | "print(encoded_text)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 59, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "['[CLS]', 'we', 'love', 'nl', '##p', '!', '[SEP]']\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "tokens = tokenizer.convert_ids_to_tokens(encoded_text.input_ids)\n", 217 | "print(tokens)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "3️⃣ Tokenizing entire Dataset" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 32, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "# !pip install datasets" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 60, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "from datasets import load_dataset" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 66, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "name": "stderr", 252 | "output_type": "stream", 253 | "text": [ 254 | "Found cached dataset emotion (/Users/pachaar/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705)\n" 255 | ] 256 | }, 257 | { 258 | "data": { 259 | "application/vnd.jupyter.widget-view+json": { 260 | "model_id": "ee4202f445ef41b7bd91cc51ccfe27dc", 261 | "version_major": 2, 262 | "version_minor": 0 263 | }, 264 | "text/plain": [ 265 | " 0%| | 0/3 [00:00" 66 | ] 67 | }, 68 | "metadata": { 69 | "needs_background": "light" 70 | }, 71 | "output_type": "display_data" 72 | } 73 | ], 74 | "source": [ 75 | "plt.scatter(X[:,0], X[:,1])\n", 76 | "plt.show()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "id": "df3f68aa", 82 | "metadata": {}, 83 | "source": [ 84 | "🔘 Hope you enjoyed reading!! 📖
\n", 85 | "🔘 follow → `@akshay_pachaar` " 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "id": "694ef174", 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "env_twitter", 100 | "language": "python", 101 | "name": "env_twitter" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.10.5" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 5 118 | } 119 | -------------------------------------------------------------------------------- /pandas/df.loc_pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "##### ❇️ df.loc : Access a group of rows and columns by label(s) or a boolean array." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/html": [ 27 | "
\n", 28 | "\n", 41 | "\n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsex
0AdelieTorgersen39.118.7181.03750.0Male
1AdelieTorgersen39.517.4186.03800.0Female
2AdelieTorgersen40.318.0195.03250.0Female
3AdelieTorgersenNaNNaNNaNNaNNaN
\n", 97 | "
" 98 | ], 99 | "text/plain": [ 100 | " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", 101 | "0 Adelie Torgersen 39.1 18.7 181.0 \n", 102 | "1 Adelie Torgersen 39.5 17.4 186.0 \n", 103 | "2 Adelie Torgersen 40.3 18.0 195.0 \n", 104 | "3 Adelie Torgersen NaN NaN NaN \n", 105 | "\n", 106 | " body_mass_g sex \n", 107 | "0 3750.0 Male \n", 108 | "1 3800.0 Female \n", 109 | "2 3250.0 Female \n", 110 | "3 NaN NaN " 111 | ] 112 | }, 113 | "execution_count": 2, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "df = pd.read_csv('penguins.csv')\n", 120 | "df.head(4)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 3, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "species Adelie\n", 132 | "island Torgersen\n", 133 | "bill_length_mm 40.3\n", 134 | "bill_depth_mm 18.0\n", 135 | "flipper_length_mm 195.0\n", 136 | "body_mass_g 3250.0\n", 137 | "sex Female\n", 138 | "Name: 2, dtype: object" 139 | ] 140 | }, 141 | "execution_count": 3, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "# 🔴 using and index\n", 148 | "df.loc[2]" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 4, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/html": [ 159 | "
\n", 160 | "\n", 173 | "\n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsex
0AdelieTorgersen39.118.7181.03750.0Male
2AdelieTorgersen40.318.0195.03250.0Female
\n", 209 | "
" 210 | ], 211 | "text/plain": [ 212 | " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", 213 | "0 Adelie Torgersen 39.1 18.7 181.0 \n", 214 | "2 Adelie Torgersen 40.3 18.0 195.0 \n", 215 | "\n", 216 | " body_mass_g sex \n", 217 | "0 3750.0 Male \n", 218 | "2 3250.0 Female " 219 | ] 220 | }, 221 | "execution_count": 4, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "# 🟡 Slicing along indices with step size\n", 228 | "# ⭕️ df.loc[start:stop:step]\n", 229 | "df.loc[0:3:2]" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 5, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "data": { 239 | "text/html": [ 240 | "
\n", 241 | "\n", 254 | "\n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | "
speciessex
1AdelieFemale
2AdelieFemale
\n", 275 | "
" 276 | ], 277 | "text/plain": [ 278 | " species sex\n", 279 | "1 Adelie Female\n", 280 | "2 Adelie Female" 281 | ] 282 | }, 283 | "execution_count": 5, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "# 🟢 Using a slice object along indices and list of column labels\n", 290 | "df.loc[1:2, ['species', 'sex']] # ⬅️ Notice slicing is inclusive of start and stop indices. " 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 6, 296 | "metadata": {}, 297 | "outputs": [ 298 | { 299 | "data": { 300 | "text/html": [ 301 | "
\n", 302 | "\n", 315 | "\n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsex
169ChinstrapDream58.017.8181.03700.0Female
196ChinstrapDream50.917.9196.03675.0Female
\n", 351 | "
" 352 | ], 353 | "text/plain": [ 354 | " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", 355 | "169 Chinstrap Dream 58.0 17.8 181.0 \n", 356 | "196 Chinstrap Dream 50.9 17.9 196.0 \n", 357 | "\n", 358 | " body_mass_g sex \n", 359 | "169 3700.0 Female \n", 360 | "196 3675.0 Female " 361 | ] 362 | }, 363 | "execution_count": 6, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "# 🟣 using a boolean 👇 array; \n", 370 | "df.loc[(df['bill_length_mm'] > 50.5) & (df['sex'] == 'Female')]" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "##### ❇️ Hope you enjoyed reading!! 📖 \n", 378 | "##### ❇️ follow → @akshay_pachaar " 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": {}, 385 | "outputs": [], 386 | "source": [] 387 | } 388 | ], 389 | "metadata": { 390 | "kernelspec": { 391 | "display_name": "env_twitter", 392 | "language": "python", 393 | "name": "env_twitter" 394 | }, 395 | "language_info": { 396 | "codemirror_mode": { 397 | "name": "ipython", 398 | "version": 3 399 | }, 400 | "file_extension": ".py", 401 | "mimetype": "text/x-python", 402 | "name": "python", 403 | "nbconvert_exporter": "python", 404 | "pygments_lexer": "ipython3", 405 | "version": "3.10.5" 406 | } 407 | }, 408 | "nbformat": 4, 409 | "nbformat_minor": 4 410 | } 411 | --------------------------------------------------------------------------------