├── Basic Python ├── Introduction to Python Basics.pdf ├── README.md └── intro.ipynb ├── Basic matplotlib ├── 02-Matplotlib Exercises.ipynb ├── matplotlib.ipynb ├── sample ├── samplefile.pdf └── samplefile.png ├── Basic operations in Pandas ├── Basic Operations with Dataframe.ipynb └── sample ├── CODE_OF_CONDUCT.md ├── Data Manipulation Using Pandas ├── Data_Manipulation_using_Pandas.ipynb ├── Datasets │ └── bigmart_data.csv └── sample file ├── Data Quest ├── Data_Quest.ipynb └── Datasets │ └── sample ├── Data visualization using Pandas ├── Pandas visualization.ipynb └── sample ├── Deep Learning & Neural Networks ├── MNIST_classifier.ipynb ├── Neural Network from scratch │ ├── 1 - OOP basics.ipynb │ ├── 2 - Operations.ipynb │ ├── 3 - contents.ipynb │ ├── 4 - session.ipynb │ └── README.md ├── README.md ├── Tensorflow Basics │ ├── 1 - basics.ipynb │ └── README.md └── sample.ipynb ├── Descriptive stats using Pandas ├── README.md └── Basic_Descriptive_Stats_using_Pandas.ipynb ├── Geographical Plotting ├── Agriculture USA.png ├── GDP World.png ├── Geographical plotting.ipynb ├── README.md └── sample ├── Hackathon ├── README.md ├── hackthon.ipynb ├── submission 3.csv ├── test.csv └── train.csv ├── LICENSE ├── Machine Learning ├── All models.ipynb ├── DBSCAN │ ├── DBSCAN.ipynb │ └── README.md ├── Decision Trees and Random Forests │ ├── Cancer_Dataset.ipynb │ ├── Decision_Tree_and_Random_Forest_classifiers.ipynb │ ├── README.md │ ├── RandomForestRegressor.ipynb │ └── kyphosis.csv ├── Fine Tuning Models │ ├── Encoding.ipynb │ ├── FMST.ipynb │ ├── Gridsearch.ipynb │ └── README.md ├── Hierarchical Clustering │ ├── Hierarchical Clustering dendogram.png │ ├── Hierarchical Clustering.ipynb │ └── README.md ├── K Means Clustering │ ├── Cluster Comparisons.png │ ├── Kmeans.ipynb │ ├── README.md │ └── University Clustering.ipynb ├── KNN Classification │ ├── 02-K Nearest Neighbors Project.ipynb │ ├── Diabetes_Classification.ipynb │ ├── Iris_KNN Classification.ipynb │ ├── KNNClassifier.ipynb │ └── README.md ├── Linear Regression │ ├── 02-Linear Regression Project.ipynb │ ├── Linear Regression.ipynb │ ├── README.md │ └── Simple Linear Regression.ipynb ├── Logistic Regression │ ├── Logistic_Regression.ipynb │ ├── Parkinson's_Disease.ipynb │ └── README.md ├── Principal Component Analysis │ ├── ML assign-1.ipynb │ ├── ML assign-2.ipynb │ ├── PCA.ipynb │ └── README.md ├── Python PPT │ ├── Group 2 - sklearn PPT.pdf │ ├── Python PPT.ipynb │ └── README.md ├── README.md ├── Recommender Systems │ ├── Collaborative Filtering.ipynb │ ├── README.md │ └── Recommender.ipynb ├── SGD Classifier │ ├── MNIST_SGDClassifier.ipynb │ └── README.md └── Support Vector Machine │ ├── README.md │ └── SVMClassifier.ipynb ├── Merging Data using Pandas ├── Merging Data using Pandas.ipynb └── sample ├── Natural Language Processing ├── Natural_Language.ipynb ├── Notebooks │ ├── 1 - Into to Python text basics.ipynb │ ├── 2 - Working with text files.ipynb │ ├── 3 - Working with PDF Files.ipynb │ ├── 4 - regex.ipynb │ ├── 5 - NLP Python basics.ipynb │ ├── 6 - Tokenization.ipynb │ └── readme.md └── README.md ├── Numpy ├── README.md ├── intro to numpy.pdf ├── numpy operations.ipynb └── numpy.ipynb ├── Plotly & Cufflinks ├── plotly_cufflinks.ipynb └── sample.txt ├── README.md ├── Seaborn ├── README.md ├── heatmap.pdf ├── sample.txt └── seaborn.ipynb ├── Streamlit ├── Certificate app │ ├── Procfile │ ├── README.md │ ├── certificates.py │ └── setup.sh ├── EDAapp.py ├── README.md ├── app1.py ├── app2.py ├── app3.py ├── app4.py ├── app5.py ├── app6.py └── simple.ipynb ├── Web Scraping ├── README.md └── web_scrap.ipynb └── _config.yml /Basic Python/Introduction to Python Basics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Basic Python/Introduction to Python Basics.pdf -------------------------------------------------------------------------------- /Basic Python/README.md: -------------------------------------------------------------------------------- 1 | This folder contains the notebook file of the basic python syntaxes and semantics 2 | -------------------------------------------------------------------------------- /Basic matplotlib/sample: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Basic matplotlib/samplefile.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Basic matplotlib/samplefile.pdf -------------------------------------------------------------------------------- /Basic matplotlib/samplefile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Basic matplotlib/samplefile.png -------------------------------------------------------------------------------- /Basic operations in Pandas/sample: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /Data Manipulation Using Pandas/sample file: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Data Quest/Datasets/sample: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Data visualization using Pandas/sample: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/MNIST_classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "MNIST classifier.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "colab": { 24 | "base_uri": "https://localhost:8080/" 25 | }, 26 | "id": "yoX0WXS4wzqG", 27 | "outputId": "962c7e74-72e6-4204-aafb-51a6e7152e1a" 28 | }, 29 | "outputs": [ 30 | { 31 | "output_type": "execute_result", 32 | "data": { 33 | "text/plain": [ 34 | "dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "execution_count": 1 39 | } 40 | ], 41 | "source": [ 42 | "from sklearn.datasets import fetch_openml\n", 43 | "mnist = fetch_openml('mnist_784', version = 1)\n", 44 | "mnist.keys()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "source": [ 50 | "print(mnist.DESCR)" 51 | ], 52 | "metadata": { 53 | "colab": { 54 | "base_uri": "https://localhost:8080/" 55 | }, 56 | "id": "TeRgr5nqe42A", 57 | "outputId": "3c26796a-17a4-490e-9afd-770f76268dc6" 58 | }, 59 | "execution_count": 3, 60 | "outputs": [ 61 | { 62 | "output_type": "stream", 63 | "name": "stdout", 64 | "text": [ 65 | "**Author**: Yann LeCun, Corinna Cortes, Christopher J.C. Burges \n", 66 | "**Source**: [MNIST Website](http://yann.lecun.com/exdb/mnist/) - Date unknown \n", 67 | "**Please cite**: \n", 68 | "\n", 69 | "The MNIST database of handwritten digits with 784 features, raw data available at: http://yann.lecun.com/exdb/mnist/. It can be split in a training set of the first 60,000 examples, and a test set of 10,000 examples \n", 70 | "\n", 71 | "It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image. It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting. The original black and white (bilevel) images from NIST were size normalized to fit in a 20x20 pixel box while preserving their aspect ratio. The resulting images contain grey levels as a result of the anti-aliasing technique used by the normalization algorithm. the images were centered in a 28x28 image by computing the center of mass of the pixels, and translating the image so as to position this point at the center of the 28x28 field. \n", 72 | "\n", 73 | "With some classification methods (particularly template-based methods, such as SVM and K-nearest neighbors), the error rate improves when the digits are centered by bounding box rather than center of mass. If you do this kind of pre-processing, you should report it in your publications. The MNIST database was constructed from NIST's NIST originally designated SD-3 as their training set and SD-1 as their test set. However, SD-3 is much cleaner and easier to recognize than SD-1. The reason for this can be found on the fact that SD-3 was collected among Census Bureau employees, while SD-1 was collected among high-school students. Drawing sensible conclusions from learning experiments requires that the result be independent of the choice of training set and test among the complete set of samples. Therefore it was necessary to build a new database by mixing NIST's datasets. \n", 74 | "\n", 75 | "The MNIST training set is composed of 30,000 patterns from SD-3 and 30,000 patterns from SD-1. Our test set was composed of 5,000 patterns from SD-3 and 5,000 patterns from SD-1. The 60,000 pattern training set contained examples from approximately 250 writers. We made sure that the sets of writers of the training set and test set were disjoint. SD-1 contains 58,527 digit images written by 500 different writers. In contrast to SD-3, where blocks of data from each writer appeared in sequence, the data in SD-1 is scrambled. Writer identities for SD-1 is available and we used this information to unscramble the writers. We then split SD-1 in two: characters written by the first 250 writers went into our new training set. The remaining 250 writers were placed in our test set. Thus we had two sets with nearly 30,000 examples each. The new training set was completed with enough examples from SD-3, starting at pattern # 0, to make a full set of 60,000 training patterns. Similarly, the new test set was completed with SD-3 examples starting at pattern # 35,000 to make a full set with 60,000 test patterns. Only a subset of 10,000 test images (5,000 from SD-1 and 5,000 from SD-3) is available on this site. The full 60,000 sample training set is available.\n", 76 | "\n", 77 | "Downloaded from openml.org.\n" 78 | ] 79 | } 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "source": [ 85 | "X, y = mnist['data'], mnist['target']" 86 | ], 87 | "metadata": { 88 | "id": "_pFdLf9SgC_1" 89 | }, 90 | "execution_count": 11, 91 | "outputs": [] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "source": [ 96 | "X.shape" 97 | ], 98 | "metadata": { 99 | "colab": { 100 | "base_uri": "https://localhost:8080/" 101 | }, 102 | "id": "iovk5Nhdg8LV", 103 | "outputId": "9cbbcdd7-ea12-401b-e90f-5e0756a31590" 104 | }, 105 | "execution_count": 13, 106 | "outputs": [ 107 | { 108 | "output_type": "execute_result", 109 | "data": { 110 | "text/plain": [ 111 | "(70000, 784)" 112 | ] 113 | }, 114 | "metadata": {}, 115 | "execution_count": 13 116 | } 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "source": [ 122 | "y.shape" 123 | ], 124 | "metadata": { 125 | "colab": { 126 | "base_uri": "https://localhost:8080/" 127 | }, 128 | "id": "y7IeTAJhg-8s", 129 | "outputId": "fe10838b-a7b5-49fc-ee40-ee3d8f66dd24" 130 | }, 131 | "execution_count": 14, 132 | "outputs": [ 133 | { 134 | "output_type": "execute_result", 135 | "data": { 136 | "text/plain": [ 137 | "(70000,)" 138 | ] 139 | }, 140 | "metadata": {}, 141 | "execution_count": 14 142 | } 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "source": [ 148 | "import matplotlib.pyplot as plt\n", 149 | "import matplotlib as mpl\n", 150 | "import pandas as pd\n", 151 | "import numpy as np\n", 152 | "\n", 153 | "some_digit = X.iloc[0]" 154 | ], 155 | "metadata": { 156 | "id": "A5sjAmibhDcS" 157 | }, 158 | "execution_count": 20, 159 | "outputs": [] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "source": [ 164 | "some_digit_image = np.array(some_digit).reshape(28,28)" 165 | ], 166 | "metadata": { 167 | "id": "isS39TyJiqdU" 168 | }, 169 | "execution_count": 22, 170 | "outputs": [] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "source": [ 175 | "plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation='nearest')\n", 176 | "plt.axis('off')\n", 177 | "plt.show()" 178 | ], 179 | "metadata": { 180 | "colab": { 181 | "base_uri": "https://localhost:8080/", 182 | "height": 248 183 | }, 184 | "id": "IpXDMjrQjBVO", 185 | "outputId": "d9d9c16b-ed4b-446c-b832-82a56ea698c5" 186 | }, 187 | "execution_count": 32, 188 | "outputs": [ 189 | { 190 | "output_type": "display_data", 191 | "data": { 192 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAGaElEQVR4nO3dPUiWfR/G8dveSyprs2gOXHqhcAh6hZqsNRqiJoPKRYnAoTGorWyLpqhFcmgpEmqIIByKXiAHIaKhFrGghiJ81ucBr991Z/Z4XPr5jB6cXSfVtxP6c2rb9PT0P0CeJfN9A8DMxAmhxAmhxAmhxAmhljXZ/Vcu/H1tM33RkxNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCLZvvG+B//fr1q9y/fPnyVz9/aGio4fb9+/fy2vHx8XK/ceNGuQ8MDDTc7t69W167atWqcr948WK5X7p0qdzngycnhBInhBInhBInhBInhBInhBInhHLOOYMPHz6U+48fP8r92bNn5f706dOG29TUVHnt8PBwuc+nLVu2lPv58+fLfWRkpOG2du3a8tpt27aV+759+8o9kScnhBInhBInhBInhBInhBInhGqbnp6u9nJsVS9evCj3gwcPlvvffm0r1dKlS8v91q1b5d7e3j7rz960aVO5b9iwody3bt0668/+P2ib6YuenBBKnBBKnBBKnBBKnBBKnBBKnBBqUZ5zTk5Olnt3d3e5T0xMzOXtzKlm997sPPDx48cNtxUrVpTXLtbz3zngnBNaiTghlDghlDghlDghlDghlDgh1KL81pgbN24s96tXr5b7/fv3y33Hjh3l3tfXV+6V7du3l/vo6Gi5N3un8s2bNw23a9euldcytzw5IZQ4IZQ4IZQ4IZQ4IZQ4IZQ4IdSifJ/zT339+rXcm/24ut7e3obbzZs3y2tv375d7idOnCh3InmfE1qJOCGUOCGUOCGUOCGUOCGUOCHUonyf80+tW7fuj65fv379rK9tdg56/Pjxcl+yxL/HrcKfFIQSJ4QSJ4QSJ4QSJ4QSJ4Tyytg8+PbtW8Otp6envPbJkyfl/uDBg3I/fPhwuTMvvDIGrUScEEqcEEqcEEqcEEqcEEqcEMo5Z5iJiYly37lzZ7l3dHSU+4EDB8p9165dDbezZ8+W17a1zXhcR3POOaGViBNCiRNCiRNCiRNCiRNCiRNCOedsMSMjI+V++vTpcm/24wsrly9fLveTJ0+We2dn56w/e4FzzgmtRJwQSpwQSpwQSpwQSpwQSpwQyjnnAvP69ety7+/vL/fR0dFZf/aZM2fKfXBwsNw3b948689ucc45oZWIE0KJE0KJE0KJE0KJE0KJE0I551xkpqamyv3+/fsNt1OnTpXXNvm79M+hQ4fK/dGjR+W+gDnnhFYiTgglTgglTgglTgglTgjlKIV/beXKleX+8+fPcl++fHm5P3z4sOG2f//+8toW5ygFWok4IZQ4IZQ4IZQ4IZQ4IZQ4IdSy+b4B5tarV6/KfXh4uNzHxsYabs3OMZvp6uoq97179/7Rr7/QeHJCKHFCKHFCKHFCKHFCKHFCKHFCKOecYcbHx8v9+vXr5X7v3r1y//Tp02/f07+1bFn916mzs7PclyzxrPhvfjcglDghlDghlDghlDghlDghlDghlHPOv6DZWeKdO3cabkNDQ+W179+/n80tzYndu3eX++DgYLkfPXp0Lm9nwfPkhFDihFDihFDihFDihFDihFCOUmbw+fPncn/79m25nzt3rtzfvXv32/c0V7q7u8v9woULDbdjx46V13rla2753YRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQC/acc3JysuHW29tbXvvy5ctyn5iYmNU9zYU9e/aUe39/f7kfOXKk3FevXv3b98Tf4ckJocQJocQJocQJocQJocQJocQJoWLPOZ8/f17uV65cKfexsbGG28ePH2d1T3NlzZo1Dbe+vr7y2mbffrK9vX1W90QeT04IJU4IJU4IJU4IJU4IJU4IJU4IFXvOOTIy8kf7n+jq6ir3np6ecl+6dGm5DwwMNNw6OjrKa1k8PDkhlDghlDghlDghlDghlDghlDghVNv09HS1lyMwJ9pm+qInJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4Rq9iMAZ/yWfcDf58kJocQJocQJocQJocQJocQJof4DO14Dhyk10VwAAAAASUVORK5CYII=\n", 193 | "text/plain": [ 194 | "
" 195 | ] 196 | }, 197 | "metadata": { 198 | "needs_background": "light" 199 | } 200 | } 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "source": [ 206 | "y = y.astype(np.uint8)" 207 | ], 208 | "metadata": { 209 | "id": "lMIaDsX6oJb1" 210 | }, 211 | "execution_count": 53, 212 | "outputs": [] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "source": [ 217 | "X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]" 218 | ], 219 | "metadata": { 220 | "id": "OeAyLE4rk3LC" 221 | }, 222 | "execution_count": 54, 223 | "outputs": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "source": [ 228 | "# binary classifier\n", 229 | "# The predicted number is 5 or not\n", 230 | "\n", 231 | "y_train_5 = (y_train == 5)\n", 232 | "y_test_5 = (y_test == 5)" 233 | ], 234 | "metadata": { 235 | "id": "tyTRVlSAla7o" 236 | }, 237 | "execution_count": 55, 238 | "outputs": [] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "source": [ 243 | "from sklearn.svm import SVC\n", 244 | "model = SVC()\n", 245 | "model.fit(X_train, y_train)" 246 | ], 247 | "metadata": { 248 | "colab": { 249 | "base_uri": "https://localhost:8080/" 250 | }, 251 | "id": "c_gfuU2hmdNP", 252 | "outputId": "5a467eff-8f49-4506-ddf9-6ff4d70c214e" 253 | }, 254 | "execution_count": 97, 255 | "outputs": [ 256 | { 257 | "output_type": "execute_result", 258 | "data": { 259 | "text/plain": [ 260 | "SVC()" 261 | ] 262 | }, 263 | "metadata": {}, 264 | "execution_count": 97 265 | } 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "source": [ 271 | "pred = model.predict(X_test)\n", 272 | "print(pred)" 273 | ], 274 | "metadata": { 275 | "colab": { 276 | "base_uri": "https://localhost:8080/" 277 | }, 278 | "id": "oX014R6vnv24", 279 | "outputId": "acace53f-4ae6-46e3-c944-5f5ece12aa97" 280 | }, 281 | "execution_count": 98, 282 | "outputs": [ 283 | { 284 | "output_type": "stream", 285 | "name": "stdout", 286 | "text": [ 287 | "[7 2 1 ... 4 5 6]\n" 288 | ] 289 | } 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "source": [ 295 | "model.classes_" 296 | ], 297 | "metadata": { 298 | "colab": { 299 | "base_uri": "https://localhost:8080/" 300 | }, 301 | "id": "p3hpnceLsuyB", 302 | "outputId": "4aebb33d-95c5-4a7f-de35-051ef03896bb" 303 | }, 304 | "execution_count": 94, 305 | "outputs": [ 306 | { 307 | "output_type": "execute_result", 308 | "data": { 309 | "text/plain": [ 310 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)" 311 | ] 312 | }, 313 | "metadata": {}, 314 | "execution_count": 94 315 | } 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "source": [ 321 | "from sklearn.metrics import confusion_matrix, classification_report\n", 322 | "print(confusion_matrix(y_test,pred))\n", 323 | "print(classification_report(y_test, pred))" 324 | ], 325 | "metadata": { 326 | "colab": { 327 | "base_uri": "https://localhost:8080/" 328 | }, 329 | "id": "6SlJ-KxmtSR_", 330 | "outputId": "ca0d196b-a77a-4125-851b-fcc4bd6749fb" 331 | }, 332 | "execution_count": 99, 333 | "outputs": [ 334 | { 335 | "output_type": "stream", 336 | "name": "stdout", 337 | "text": [ 338 | "[[ 973 0 1 0 0 2 1 1 2 0]\n", 339 | " [ 0 1126 3 1 0 1 1 1 2 0]\n", 340 | " [ 6 1 1006 2 1 0 2 7 6 1]\n", 341 | " [ 0 0 2 995 0 2 0 5 5 1]\n", 342 | " [ 0 0 5 0 961 0 3 0 2 11]\n", 343 | " [ 2 0 0 9 0 871 4 1 4 1]\n", 344 | " [ 6 2 0 0 2 3 944 0 1 0]\n", 345 | " [ 0 6 11 1 1 0 0 996 2 11]\n", 346 | " [ 3 0 2 6 3 2 2 3 950 3]\n", 347 | " [ 3 4 1 7 10 2 1 7 4 970]]\n", 348 | " precision recall f1-score support\n", 349 | "\n", 350 | " 0 0.98 0.99 0.99 980\n", 351 | " 1 0.99 0.99 0.99 1135\n", 352 | " 2 0.98 0.97 0.98 1032\n", 353 | " 3 0.97 0.99 0.98 1010\n", 354 | " 4 0.98 0.98 0.98 982\n", 355 | " 5 0.99 0.98 0.98 892\n", 356 | " 6 0.99 0.99 0.99 958\n", 357 | " 7 0.98 0.97 0.97 1028\n", 358 | " 8 0.97 0.98 0.97 974\n", 359 | " 9 0.97 0.96 0.97 1009\n", 360 | "\n", 361 | " accuracy 0.98 10000\n", 362 | " macro avg 0.98 0.98 0.98 10000\n", 363 | "weighted avg 0.98 0.98 0.98 10000\n", 364 | "\n" 365 | ] 366 | } 367 | ] 368 | } 369 | ] 370 | } -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/Neural Network from scratch/1 - OOP basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Manual Neural Network

" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "

Basics of object oriented programming to understand the manual way of creating a neural network

" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# to create a class we use the keyword class followed up by classname() in paranthesis\n", 24 | "\n", 25 | "class SimpleClass():\n", 26 | " def __init__(self): # initializes the class, acts as a constrcutor, calls when object is created\n", 27 | " print(\"hello\")" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "hello\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "# creating a instance of the class i.e, the object\n", 45 | "x = SimpleClass() # when object is created contents of the constructor is called automatically" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 5, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "class SimpleClass():\n", 55 | " def __init__(self): # initializes the class, acts as a constrcutor, calls when object is created\n", 56 | " print(\"hello\")\n", 57 | "\n", 58 | " # The keyword self represents the instance of a class and binds the attributes with the given arguments\n", 59 | " # creating a user defined method inside a class\n", 60 | " def yell(self):\n", 61 | " print(\"Yelling\")" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 6, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "hello\n", 74 | "Yelling\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "x = SimpleClass() # object creation, calls the constrcutor __init__()\n", 80 | "x.yell() # to access a method of inside a class" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 7, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# using the concept of inheritance\n", 90 | "class ExtendedClass(SimpleClass): # pass the parent class to be inherited inside the parantheis\n", 91 | " def __init__(self):\n", 92 | " print(\"Extend!!!\")" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 8, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "Extend!!!\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "y = ExtendedClass() # object creation for the inherited class" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 10, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "Yelling\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "y.yell() # contents of parent class can also be inherited using the object of ExtendedClass" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 11, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "# super keyword: it is used to give access to methods and properties of a parent class\n", 136 | "# as we see here that when instance of a parent class is inherited, we didn't get the hello as output, because it solely belongs to the parent class\n", 137 | "# So, to access all the contents of the parent class including the __init__(constructor) contents, we use super class\n", 138 | "\n", 139 | "class NewClass(SimpleClass): # inheriting from SimpleClass\n", 140 | " def __init__(self):\n", 141 | " super().__init__() # to access all the contents of the parent class\n", 142 | " print(\"Extend!!!\")" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 13, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "hello\n", 155 | "Extend!!!\n", 156 | "Yelling\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "z = NewClass()\n", 162 | "z.yell()\n", 163 | "\n", 164 | "\n", 165 | "# now we see that even hello is priniting after using super keyword" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 14, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "# Example code:-\n", 175 | "\n", 176 | "class Example():\n", 177 | " def __init__(self,name):\n", 178 | " print(\"Hello \", name)\n", 179 | " def method1(self,USN):\n", 180 | " print(\"Your USN is \", USN)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 17, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "name": "stdout", 190 | "output_type": "stream", 191 | "text": [ 192 | "Hello Mithun\n", 193 | "Your USN is 19BTRCR006\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "obj = Example(\"Mithun\")\n", 199 | "obj.method1(\"19BTRCR006\")" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 18, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/plain": [ 210 | "__main__.Example" 211 | ] 212 | }, 213 | "execution_count": 18, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "type(obj)\n", 220 | "\n", 221 | "# from this, we understand that the obj belongss to the type of the class it is created in" 222 | ] 223 | } 224 | ], 225 | "metadata": { 226 | "interpreter": { 227 | "hash": "69eb92836b941e979072a76c7fcfffe5419cca933cedd02cfafbdfca1a93358c" 228 | }, 229 | "kernelspec": { 230 | "display_name": "Python 3.9.10 64-bit", 231 | "language": "python", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "codemirror_mode": { 236 | "name": "ipython", 237 | "version": 3 238 | }, 239 | "file_extension": ".py", 240 | "mimetype": "text/x-python", 241 | "name": "python", 242 | "nbconvert_exporter": "python", 243 | "pygments_lexer": "ipython3", 244 | "version": "3.9.10" 245 | }, 246 | "orig_nbformat": 4 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/Neural Network from scratch/2 - Operations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Manual Neural Network

" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "

We shall build a operation class

\n", 15 | "\n", 16 | "It has following contents:-\n", 17 | "\n", 18 | "* Input Node \n", 19 | "\n", 20 | "* Output Node \n", 21 | "\n", 22 | "* Global default graph variable\n", 23 | "\n", 24 | "* Compute (which will be overwritten by the extended class) " 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "class Operation(): # creating a class\n", 34 | " def __init__(self, input_nodes=[]): # intializing the class using __init__ method\n", 35 | " self.input_nodes = input_nodes\n", 36 | " self.output_node = [] # we shall appened the outputs layer\n", 37 | "\n", 38 | " for node in input_nodes:\n", 39 | " node.output_node.append(self)\n", 40 | "\n", 41 | " def compute(self):\n", 42 | " pass # does nothing, created here so that we can override in extended class" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 5, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# Let's do addition operation\n", 52 | "\n", 53 | "class add(Operation): # inheriting from Operation()\n", 54 | " def __init__(self,x,y):\n", 55 | " super().__init__([x,y])\n", 56 | " def compute(self,x_var,y_var):\n", 57 | " self.input_nodes = [x_var,y_var]\n", 58 | " return x_var+y_var" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 6, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# Let's do multiplication operation\n", 68 | "\n", 69 | "class multiply(Operation): # inheriting from Operation()\n", 70 | " def __init__(self,x,y):\n", 71 | " super().__init__([x,y])\n", 72 | " def compute(self,x_var,y_var):\n", 73 | " self.input_nodes = [x_var,y_var]\n", 74 | " return x_var*y_var" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 7, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# Let's do matrix multiplication operation\n", 84 | "\n", 85 | "class matmul(Operation): # inheriting from Operation()\n", 86 | " def __init__(self,x,y):\n", 87 | " super().__init__([x,y])\n", 88 | " def compute(self,x_var,y_var):\n", 89 | " self.input_nodes = [x_var,y_var]\n", 90 | " return x_var.dot(y_var)" 91 | ] 92 | } 93 | ], 94 | "metadata": { 95 | "interpreter": { 96 | "hash": "69eb92836b941e979072a76c7fcfffe5419cca933cedd02cfafbdfca1a93358c" 97 | }, 98 | "kernelspec": { 99 | "display_name": "Python 3.9.10 64-bit", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.9.10" 114 | }, 115 | "orig_nbformat": 4 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 2 119 | } 120 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/Neural Network from scratch/3 - contents.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Manual Neural Network

" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "

creating variables, placeholders and graphs

" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "* placeholders :- an empty node that needs a value to be provided to compute output\n", 22 | "\n", 23 | "* variables :- changable parameter of graph like weights for example\n", 24 | "\n", 25 | "* Graph :- a global variable connecting variables and placeholders to operations" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 4, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "class Operation(): # creating a class\n", 35 | " def __init__(self, input_nodes=[]): # intializing the class using __init__ method\n", 36 | " self.input_nodes = input_nodes\n", 37 | " self.output_node = [] # we shall appened the outputs layer\n", 38 | "\n", 39 | " for node in input_nodes:\n", 40 | " node.output_node.append(self)\n", 41 | "\n", 42 | " def compute(self):\n", 43 | " pass # does nothing, created here so that we can override in extended class" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# Let's do addition operation\n", 53 | "\n", 54 | "class add(Operation): # inheriting from Operation()\n", 55 | " def __init__(self,x,y):\n", 56 | " super().__init__([x,y])\n", 57 | " def compute(self,x_var,y_var):\n", 58 | " self.input_nodes = [x_var,y_var]\n", 59 | " return x_var+y_var" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 6, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# Let's do multiplication operation\n", 69 | "\n", 70 | "class multiply(Operation): # inheriting from Operation()\n", 71 | " def __init__(self,x,y):\n", 72 | " super().__init__([x,y])\n", 73 | " def compute(self,x_var,y_var):\n", 74 | " self.input_nodes = [x_var,y_var]\n", 75 | " return x_var*y_var" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 7, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "# Let's do matrix multiplication operation\n", 85 | "\n", 86 | "class matmul(Operation): # inheriting from Operation()\n", 87 | " def __init__(self,x,y):\n", 88 | " super().__init__([x,y])\n", 89 | " def compute(self,x_var,y_var):\n", 90 | " self.input_nodes = [x_var,y_var]\n", 91 | " return x_var.dot(y_var)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 2, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "class Placeholder():\n", 101 | " def __init__(self):\n", 102 | " self.output_node = []\n", 103 | " _default_graph.placeholder.append(self)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 3, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "class Variable():\n", 113 | " def __init__(self, initial_value=None):\n", 114 | " self.value = initial_value\n", 115 | " self.output_node = []\n", 116 | " _default_graph.variable.append(self)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 8, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "class Graph():\n", 126 | " def __init__(self):\n", 127 | " self.operation = []\n", 128 | " self.placeholder = []\n", 129 | " self.variable = []\n", 130 | " \n", 131 | " def set_as_default(self):\n", 132 | " global _default_graph\n", 133 | " _default_graph = self" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# z = Ax + b\n", 143 | "# A = 10, b= 1\n", 144 | "\n", 145 | "g= Graph()\n", 146 | "g.set_as_default()\n", 147 | "A = Variable(10)\n", 148 | "b = Variable(1)\n", 149 | "x = Placeholder()\n", 150 | "y = multiply(A,x)\n", 151 | "z = add(y,b)" 152 | ] 153 | } 154 | ], 155 | "metadata": { 156 | "interpreter": { 157 | "hash": "69eb92836b941e979072a76c7fcfffe5419cca933cedd02cfafbdfca1a93358c" 158 | }, 159 | "kernelspec": { 160 | "display_name": "Python 3.9.10 64-bit", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.9.10" 175 | }, 176 | "orig_nbformat": 4 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 2 180 | } 181 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/Neural Network from scratch/4 - session.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Manual Neural Network

" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "

Creating a session to execute all the nodes in a graph

" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "* " 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 55, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "class Operation():\n", 31 | " \"\"\"\n", 32 | " An Operation is a node in a \"Graph\". TensorFlow will also use this concept of a Graph.\n", 33 | " \n", 34 | " This Operation class will be inherited by other classes that actually compute the specific\n", 35 | " operation, such as adding or matrix multiplication.\n", 36 | " \"\"\"\n", 37 | " \n", 38 | " def __init__(self, input_nodes = []):\n", 39 | " \"\"\"\n", 40 | " Intialize an Operation\n", 41 | " \"\"\"\n", 42 | " self.input_nodes = input_nodes # The list of input nodes\n", 43 | " self.output_nodes = [] # List of nodes consuming this node's output\n", 44 | " \n", 45 | " # For every node in the input, we append this operation (self) to the list of\n", 46 | " # the consumers of the input nodes\n", 47 | " for node in input_nodes:\n", 48 | " node.output_nodes.append(self)\n", 49 | " \n", 50 | " # There will be a global default graph (TensorFlow works this way)\n", 51 | " # We will then append this particular operation\n", 52 | " # Append this operation to the list of operations in the currently active default graph\n", 53 | " _default_graph.operations.append(self)\n", 54 | " \n", 55 | " def compute(self):\n", 56 | " \"\"\" \n", 57 | " This is a placeholder function. It will be overwritten by the actual specific operation\n", 58 | " that inherits from this class.\n", 59 | " \n", 60 | " \"\"\"\n", 61 | " \n", 62 | " pass" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 56, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Let's do addition operation\n", 72 | "\n", 73 | "class add(Operation):\n", 74 | " \n", 75 | " def __init__(self, x, y):\n", 76 | " \n", 77 | " super().__init__([x, y])\n", 78 | "\n", 79 | " def compute(self, x_var, y_var):\n", 80 | " \n", 81 | " self.inputs = [x_var, y_var]\n", 82 | " return x_var + y_var" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 59, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# Let's do multiplication operation\n", 92 | "\n", 93 | "class multiply(Operation):\n", 94 | " \n", 95 | " def __init__(self, x, y):\n", 96 | " \n", 97 | " super().__init__([x, y])\n", 98 | "\n", 99 | " def compute(self, x_var, y_var):\n", 100 | " \n", 101 | " self.inputs = [x_var, y_var]\n", 102 | " return x_var * y_var" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 60, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# Let's do matrix multiplication operation\n", 112 | "\n", 113 | "class matmul(Operation):\n", 114 | " \n", 115 | " def __init__(self, x, y):\n", 116 | " \n", 117 | " super().__init__([x, y])\n", 118 | "\n", 119 | " def compute(self, x_var, y_var):\n", 120 | " \n", 121 | " self.inputs = [x_var, y_var]\n", 122 | " return x_var.dot(y_var)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 68, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "class Placeholder():\n", 132 | " def __init__(self):\n", 133 | " self.output_nodes = []\n", 134 | " _default_graph.placeholders.append(self)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 65, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "class Variable():\n", 144 | " def __init__(self, initial_value=None):\n", 145 | " self.value = initial_value\n", 146 | " self.output_nodes = []\n", 147 | " _default_graph.variables.append(self)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 69, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "class Graph():\n", 157 | " def __init__(self):\n", 158 | " self.operations = []\n", 159 | " self.placeholders = []\n", 160 | " self.variables = []\n", 161 | " \n", 162 | " def set_as_default(self):\n", 163 | " global _default_graph\n", 164 | " _default_graph = self" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 70, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "# z = Ax + b\n", 174 | "# A = 10, b= 1\n", 175 | "\n", 176 | "g= Graph()\n", 177 | "g.set_as_default()\n", 178 | "A = Variable(10)\n", 179 | "b = Variable(1)\n", 180 | "x = Placeholder()\n", 181 | "y = multiply(A,x)\n", 182 | "z = add(y,b)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 71, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "# session\n", 192 | "def traverse_postorder(operation):\n", 193 | " nodes_postorder = []\n", 194 | " def recurse(node):\n", 195 | " if isinstance(node, Operation):\n", 196 | " for input_node in node.input_nodes:\n", 197 | " recurse(input_node)\n", 198 | " nodes_postorder.append(node)\n", 199 | "\n", 200 | " recurse(operation)\n", 201 | " return nodes_postorder" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 72, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "import numpy as np\n", 211 | "class Session:\n", 212 | " \n", 213 | " def run(self, operation, feed_dict = {}):\n", 214 | " \"\"\" \n", 215 | " operation: The operation to compute\n", 216 | " feed_dict: Dictionary mapping placeholders to input values (the data) \n", 217 | " \"\"\"\n", 218 | " \n", 219 | " # Puts nodes in correct order\n", 220 | " nodes_postorder = traverse_postorder(operation)\n", 221 | " \n", 222 | " for node in nodes_postorder:\n", 223 | "\n", 224 | " if type(node) == Placeholder:\n", 225 | " \n", 226 | " node.output = feed_dict[node]\n", 227 | " \n", 228 | " elif type(node) == Variable:\n", 229 | " \n", 230 | " node.output = node.value\n", 231 | " \n", 232 | " else: # Operation\n", 233 | " \n", 234 | " node.inputs = [input_node.output for input_node in node.input_nodes]\n", 235 | "\n", 236 | " \n", 237 | " node.output = node.compute(*node.inputs)\n", 238 | " \n", 239 | " # Convert lists to numpy arrays\n", 240 | " if type(node.output) == list:\n", 241 | " node.output = np.array(node.output)\n", 242 | " \n", 243 | " # Return the requested node value\n", 244 | " return operation.output" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 73, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "sess = Session()" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 75, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "100" 265 | ] 266 | }, 267 | "execution_count": 75, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "result = sess.run(operation=z,feed_dict={x:10})\n", 274 | "result" 275 | ] 276 | } 277 | ], 278 | "metadata": { 279 | "interpreter": { 280 | "hash": "69eb92836b941e979072a76c7fcfffe5419cca933cedd02cfafbdfca1a93358c" 281 | }, 282 | "kernelspec": { 283 | "display_name": "Python 3.9.10 64-bit", 284 | "language": "python", 285 | "name": "python3" 286 | }, 287 | "language_info": { 288 | "codemirror_mode": { 289 | "name": "ipython", 290 | "version": 3 291 | }, 292 | "file_extension": ".py", 293 | "mimetype": "text/x-python", 294 | "name": "python", 295 | "nbconvert_exporter": "python", 296 | "pygments_lexer": "ipython3", 297 | "version": "3.9.10" 298 | }, 299 | "orig_nbformat": 4 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 2 303 | } 304 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/Neural Network from scratch/README.md: -------------------------------------------------------------------------------- 1 | neural net from scratch 2 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/README.md: -------------------------------------------------------------------------------- 1 | This folder is to maintain notebooks and references for Deep Learning and Neural Networks 2 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/Tensorflow Basics/1 - basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Introduction to TensorFlow

" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# importing tensorflow\n", 17 | "\n", 18 | "import tensorflow as tf" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 7, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "tf.compat.v1.disable_eager_execution()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 8, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "Version of tensorflow: 2.7.0\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "# to check the version of tensorflow using\n", 45 | "print(\"Version of tensorflow: \", tf.__version__)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "Tensor - simply means n-dimensional array" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 2, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# playing with constants\n", 62 | "# constants are the basic tensors available\n", 63 | "hello = tf.constant(\"Hello\") \n", 64 | "world = tf.constant(\"World\")" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "tensorflow.python.framework.ops.EagerTensor" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "type(hello) # we see that this doesn't print the contents of the tensor" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 7, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "b'Hello'" 96 | ] 97 | }, 98 | "execution_count": 7, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "hello.numpy()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 4, 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "name": "stdout", 114 | "output_type": "stream", 115 | "text": [ 116 | "HelloWorld\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "# concatenation\n", 122 | "tf.print(hello+world) # prints the contents of the constants/tensors" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 12, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "" 134 | ] 135 | }, 136 | "execution_count": 12, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "# for addition of 2 numbers\n", 143 | "\n", 144 | "a = tf.constant(10)\n", 145 | "b = tf.constant(1)\n", 146 | "\n", 147 | "tf.print(a+b)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 13, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "b'HelloWorld'\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "with tf.compat.v1.Session() as sess: \n", 165 | " print(sess.run(hello+world))" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 14, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "b'HelloWorld'\n" 178 | ] 179 | } 180 | ], 181 | "source": [ 182 | "# So, to compute/print the tensors we need to run it under a Session\n", 183 | "\n", 184 | "with tf.compat.v1.Session() as sess:\n", 185 | " hello = tf.constant(\"Hello\") \n", 186 | " world = tf.constant(\"World\") \n", 187 | " print(sess.run(hello+world))" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "

Different tensors available to create

" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 15, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# creating constants\n", 204 | "\n", 205 | "const = tf.constant(10)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 16, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "" 217 | ] 218 | }, 219 | "execution_count": 16, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "# creating matrix filled with scalar values\n", 226 | "\n", 227 | "fill_mat = tf.fill((3,3),10) # creates a tensor filled with scalar values of dimensions specified\n", 228 | "tf.print(fill_mat)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 17, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/plain": [ 239 | "" 240 | ] 241 | }, 242 | "execution_count": 17, 243 | "metadata": {}, 244 | "output_type": "execute_result" 245 | } 246 | ], 247 | "source": [ 248 | "# Creating a null matrix\n", 249 | "\n", 250 | "null_mat = tf.zeros((3,3))\n", 251 | "tf.print(null_mat)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 18, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": [ 262 | "" 263 | ] 264 | }, 265 | "execution_count": 18, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "# creating a ones matrix\n", 272 | "ones_mat = tf.ones((3,3))\n", 273 | "tf.print(ones_mat)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 19, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/plain": [ 284 | "" 285 | ] 286 | }, 287 | "execution_count": 19, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "# creating a tensor filled with random values of normal distribution\n", 294 | "randn = tf.random.normal((3,3),mean=1,stddev=1.5) # we specify the mean and standard deviation the values to be generated from\n", 295 | "tf.print(randn)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 20, 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "data": { 305 | "text/plain": [ 306 | "" 307 | ] 308 | }, 309 | "execution_count": 20, 310 | "metadata": {}, 311 | "output_type": "execute_result" 312 | } 313 | ], 314 | "source": [ 315 | "# creating a tensor filled with random values of uniform distribution\n", 316 | "randu = tf.random.uniform((3,3),minval=0,maxval=1) # we specify the range between min and max value for random number to be generated\n", 317 | "tf.print(randu)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 21, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "sess = tf.compat.v1.InteractiveSession()" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 22, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "name": "stdout", 336 | "output_type": "stream", 337 | "text": [ 338 | "10\n", 339 | "\n", 340 | "\n", 341 | "[[10 10 10]\n", 342 | " [10 10 10]\n", 343 | " [10 10 10]]\n", 344 | "\n", 345 | "\n", 346 | "[[0. 0. 0.]\n", 347 | " [0. 0. 0.]\n", 348 | " [0. 0. 0.]]\n", 349 | "\n", 350 | "\n", 351 | "[[1. 1. 1.]\n", 352 | " [1. 1. 1.]\n", 353 | " [1. 1. 1.]]\n", 354 | "\n", 355 | "\n", 356 | "[[ 2.459766 0.12824059 -0.6721699 ]\n", 357 | " [ 4.1958275 1.6642165 0.45715904]\n", 358 | " [ 3.0148897 1.808645 0.9952977 ]]\n", 359 | "\n", 360 | "\n", 361 | "[[0.25765288 0.33422637 0.27154422]\n", 362 | " [0.71380854 0.9065243 0.57832813]\n", 363 | " [0.02766907 0.19157505 0.94844306]]\n", 364 | "\n", 365 | "\n" 366 | ] 367 | } 368 | ], 369 | "source": [ 370 | "my_ops = [const, fill_mat, null_mat, ones_mat, randn, randu]\n", 371 | "for op in my_ops:\n", 372 | " print(sess.run(op))\n", 373 | " print(\"\\n\")" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 23, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "# matrix multiplication\n", 383 | "\n", 384 | "a = tf.constant([[1,2],\n", 385 | " [3,4]])\n", 386 | " \n", 387 | "b = tf.constant([[10],[100]])" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 26, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "name": "stdout", 397 | "output_type": "stream", 398 | "text": [ 399 | "(2, 2)\n", 400 | "(2, 1)\n" 401 | ] 402 | } 403 | ], 404 | "source": [ 405 | "print(a.get_shape())\n", 406 | "print(b.get_shape())" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 27, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "array([[210],\n", 418 | " [430]])" 419 | ] 420 | }, 421 | "execution_count": 27, 422 | "metadata": {}, 423 | "output_type": "execute_result" 424 | } 425 | ], 426 | "source": [ 427 | "result = tf.matmul(a,b)\n", 428 | "sess.run(result)" 429 | ] 430 | } 431 | ], 432 | "metadata": { 433 | "interpreter": { 434 | "hash": "69eb92836b941e979072a76c7fcfffe5419cca933cedd02cfafbdfca1a93358c" 435 | }, 436 | "kernelspec": { 437 | "display_name": "Python 3.9.10 64-bit", 438 | "language": "python", 439 | "name": "python3" 440 | }, 441 | "language_info": { 442 | "codemirror_mode": { 443 | "name": "ipython", 444 | "version": 3 445 | }, 446 | "file_extension": ".py", 447 | "mimetype": "text/x-python", 448 | "name": "python", 449 | "nbconvert_exporter": "python", 450 | "pygments_lexer": "ipython3", 451 | "version": "3.9.10" 452 | }, 453 | "orig_nbformat": 4 454 | }, 455 | "nbformat": 4, 456 | "nbformat_minor": 2 457 | } 458 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/Tensorflow Basics/README.md: -------------------------------------------------------------------------------- 1 | to understand basic syntax of TF 2 | -------------------------------------------------------------------------------- /Deep Learning & Neural Networks/sample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import tensorflow as tf" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from tensorflow import keras" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "'2.7.0'" 39 | ] 40 | }, 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "tf.__version__" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "'2.7.0'" 59 | ] 60 | }, 61 | "execution_count": 4, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "keras.__version__" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "" 79 | ] 80 | }, 81 | "execution_count": 5, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "hello = tf.constant(\"Hello World\")\n", 88 | "hello " 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 6, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz\n", 101 | "32768/29515 [=================================] - 0s 1us/step\n", 102 | "40960/29515 [=========================================] - 0s 1us/step\n", 103 | "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz\n", 104 | "26427392/26421880 [==============================] - 4s 0us/step\n", 105 | "26435584/26421880 [==============================] - 4s 0us/step\n", 106 | "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz\n", 107 | "16384/5148 [===============================================================================================] - 0s 0s/step\n", 108 | "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz\n", 109 | "4423680/4422102 [==============================] - 1s 0us/step\n", 110 | "4431872/4422102 [==============================] - 1s 0us/step\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "fashion_mnist = keras.datasets.fashion_mnist\n", 116 | "(X_train_full, y_train_full),(X_test, y_test) = fashion_mnist.load_data()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 7, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "(60000, 28, 28)" 128 | ] 129 | }, 130 | "execution_count": 7, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "X_train_full.shape" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 8, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "dtype('uint8')" 148 | ] 149 | }, 150 | "execution_count": 8, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "X_train_full.dtype" 157 | ] 158 | } 159 | ], 160 | "metadata": { 161 | "interpreter": { 162 | "hash": "0989d4cb382ec003e6ad9ee0079fe5a34620af18f47069c43c62ee5030c1ec77" 163 | }, 164 | "kernelspec": { 165 | "display_name": "Python 3.7.9 64-bit ('myenv': conda)", 166 | "name": "python3" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.8.12" 179 | }, 180 | "orig_nbformat": 2 181 | }, 182 | "nbformat": 4, 183 | "nbformat_minor": 2 184 | } 185 | -------------------------------------------------------------------------------- /Descriptive stats using Pandas/ README.md: -------------------------------------------------------------------------------- 1 | All descriptive statistics tools are used in this Notebook 2 | -------------------------------------------------------------------------------- /Geographical Plotting/Agriculture USA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Geographical Plotting/Agriculture USA.png -------------------------------------------------------------------------------- /Geographical Plotting/GDP World.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Geographical Plotting/GDP World.png -------------------------------------------------------------------------------- /Geographical Plotting/README.md: -------------------------------------------------------------------------------- 1 | Plotly doesn't show the plots directly on Github page, to see the plots copy the code and try in your editors. 2 | 3 | So I have uploaded the output plots in terms on .png file in the same repository 4 | -------------------------------------------------------------------------------- /Geographical Plotting/sample: -------------------------------------------------------------------------------- 1 | THis is just a sample file 2 | -------------------------------------------------------------------------------- /Hackathon/README.md: -------------------------------------------------------------------------------- 1 | Hackathon Link: Click here 2 | 3 |

Overview

4 | Wipro Limited (NYSE: WIT, BSE: 507685, NSE: WIPRO) is a leading global information technology, consulting and business process services company. We harness the power of cognitive computing, hyper-automation, robotics, cloud, analytics and emerging technologies to help our clients adapt to the digital world and make them successful. A company recognized globally for its comprehensive portfolio of services, strong commitment to sustainability and good corporate citizenship, we have over 220,000 dedicated employees serving clients across six continents. Together, we discover ideas and connect the dots to build a better and a bold new future. 5 | 6 | Along with being a global leader in artificial intelligence services from the latest reports of analysts like Forrester, IDC and Everest Group, Wipro has been rated as the second-best organization for data scientists to work in India in 2021 by Analytics India Magazine. The company has also been committed to reaching a Net-Zero Greenhouse Gas Emissions by 2040. 7 | 8 | Though a little late in the day, the world is waking up to the deleterious effect of fossil fuels on our environment. As the doomsday clock ticks away, human beings are turning to renewable energy to avert a possible apocalypse. Fortunately, the sun is a well-spring of clean energy. Taking the cue, Wipro, in association with MachineHack, has designed a forecasting challenge to optimise solar power generation using ML models. 9 | 10 | A solar power generation company wants to optimize solar power production and needs the prediction model to predict the Clearsky Global Horizontal Irradiance(GHI). The data is ten years at an interval of every 30 mins with the following data points: 11 | 12 | ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Temperature', 'Clearsky DHI', 'Clearsky DNI', 'Clearsky GHI', 'Cloud Type', 'Dew Point', 'Fill Flag', 'Relative Humidity', 'Solar Zenith Angle', 'Surface Albedo', 'Pressure', 'Precipitable Water', 'Wind Direction', 'Wind Speed'] 13 | 14 |

Evaluation

15 | What is the Metric In this competition? How is the Leaderboard Calculated? 16 | The submission will be evaluated using the Mean Square Error. One can use sklearn.metrics.mean_squared_error to calculate the same 17 | This hackathon supports private and public leaderboards 18 | The public leaderboard is evaluated on 30% of Test data 19 | The private leaderboard will be made available at the end of the hackathon which will be evaluated on 100% of Test data 20 | The Final Score represents the score achieved based on the Best Score on the public leaderboard 21 | How to Generate a valid Submission File 22 | Sklearn models support the predict() method to generate the predicted values 23 | The participant should submit a .csv file with exactly 17,520 rows with 3 column ['Clearsky DHI', 'Clearsky DNI’,'Clearsky GHI']. The submission will return an Invalid Score if you have extra rows or columns. 24 | The file should have exactly 3 column. 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Mithun G 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Machine Learning/DBSCAN/README.md: -------------------------------------------------------------------------------- 1 | dbscan repo 2 | 3 | ![DBSCAN](https://user-images.githubusercontent.com/60751635/118082047-20d4ee00-b3da-11eb-970f-44d315fb15d9.png) 4 | -------------------------------------------------------------------------------- /Machine Learning/Decision Trees and Random Forests/README.md: -------------------------------------------------------------------------------- 1 | Repo for Decision trees and Random forest based notebooks 2 | -------------------------------------------------------------------------------- /Machine Learning/Decision Trees and Random Forests/kyphosis.csv: -------------------------------------------------------------------------------- 1 | "Kyphosis","Age","Number","Start" 2 | "absent",71,3,5 3 | "absent",158,3,14 4 | "present",128,4,5 5 | "absent",2,5,1 6 | "absent",1,4,15 7 | "absent",1,2,16 8 | "absent",61,2,17 9 | "absent",37,3,16 10 | "absent",113,2,16 11 | "present",59,6,12 12 | "present",82,5,14 13 | "absent",148,3,16 14 | "absent",18,5,2 15 | "absent",1,4,12 16 | "absent",168,3,18 17 | "absent",1,3,16 18 | "absent",78,6,15 19 | "absent",175,5,13 20 | "absent",80,5,16 21 | "absent",27,4,9 22 | "absent",22,2,16 23 | "present",105,6,5 24 | "present",96,3,12 25 | "absent",131,2,3 26 | "present",15,7,2 27 | "absent",9,5,13 28 | "absent",8,3,6 29 | "absent",100,3,14 30 | "absent",4,3,16 31 | "absent",151,2,16 32 | "absent",31,3,16 33 | "absent",125,2,11 34 | "absent",130,5,13 35 | "absent",112,3,16 36 | "absent",140,5,11 37 | "absent",93,3,16 38 | "absent",1,3,9 39 | "present",52,5,6 40 | "absent",20,6,9 41 | "present",91,5,12 42 | "present",73,5,1 43 | "absent",35,3,13 44 | "absent",143,9,3 45 | "absent",61,4,1 46 | "absent",97,3,16 47 | "present",139,3,10 48 | "absent",136,4,15 49 | "absent",131,5,13 50 | "present",121,3,3 51 | "absent",177,2,14 52 | "absent",68,5,10 53 | "absent",9,2,17 54 | "present",139,10,6 55 | "absent",2,2,17 56 | "absent",140,4,15 57 | "absent",72,5,15 58 | "absent",2,3,13 59 | "present",120,5,8 60 | "absent",51,7,9 61 | "absent",102,3,13 62 | "present",130,4,1 63 | "present",114,7,8 64 | "absent",81,4,1 65 | "absent",118,3,16 66 | "absent",118,4,16 67 | "absent",17,4,10 68 | "absent",195,2,17 69 | "absent",159,4,13 70 | "absent",18,4,11 71 | "absent",15,5,16 72 | "absent",158,5,14 73 | "absent",127,4,12 74 | "absent",87,4,16 75 | "absent",206,4,10 76 | "absent",11,3,15 77 | "absent",178,4,15 78 | "present",157,3,13 79 | "absent",26,7,13 80 | "absent",120,2,13 81 | "present",42,7,6 82 | "absent",36,4,13 83 | -------------------------------------------------------------------------------- /Machine Learning/Fine Tuning Models/README.md: -------------------------------------------------------------------------------- 1 | The notebooks is to show how to do feature engineering and fine tune your model performance 2 | -------------------------------------------------------------------------------- /Machine Learning/Hierarchical Clustering/Hierarchical Clustering dendogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Machine Learning/Hierarchical Clustering/Hierarchical Clustering dendogram.png -------------------------------------------------------------------------------- /Machine Learning/Hierarchical Clustering/README.md: -------------------------------------------------------------------------------- 1 | Repo for hierarchical Clustering 2 | -------------------------------------------------------------------------------- /Machine Learning/K Means Clustering/Cluster Comparisons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Machine Learning/K Means Clustering/Cluster Comparisons.png -------------------------------------------------------------------------------- /Machine Learning/K Means Clustering/README.md: -------------------------------------------------------------------------------- 1 | Repo for K means clustering 2 | -------------------------------------------------------------------------------- /Machine Learning/KNN Classification/README.md: -------------------------------------------------------------------------------- 1 | Notebook file of K Nearest Neighbour 2 | -------------------------------------------------------------------------------- /Machine Learning/Linear Regression/README.md: -------------------------------------------------------------------------------- 1 | This folder is for Linear Regression Algorithm 2 | -------------------------------------------------------------------------------- /Machine Learning/Logistic Regression/README.md: -------------------------------------------------------------------------------- 1 | A repo for notebooks of Logistic regression algorithms 2 | -------------------------------------------------------------------------------- /Machine Learning/Principal Component Analysis/README.md: -------------------------------------------------------------------------------- 1 | PRINCIPAL COMPONENT ANALYSIS 2 | 3 | ![PCA Plot](https://user-images.githubusercontent.com/60751635/118375973-e0948c00-b5e2-11eb-95a6-fc9708ccd96f.png) 4 | 5 | ![Components](https://user-images.githubusercontent.com/60751635/118375999-015ce180-b5e3-11eb-9296-937cbdde03d6.png) 6 | -------------------------------------------------------------------------------- /Machine Learning/Python PPT/Group 2 - sklearn PPT.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Machine Learning/Python PPT/Group 2 - sklearn PPT.pdf -------------------------------------------------------------------------------- /Machine Learning/Python PPT/README.md: -------------------------------------------------------------------------------- 1 |

UNDERSTANDING sklearn

2 | 3 | Python Presentation done by:- 4 | * Naveen Kumar M R 5 | * Mithun G 6 | * Myil Vaughanan V L 7 | * Pranav Polvarapu 8 | -------------------------------------------------------------------------------- /Machine Learning/README.md: -------------------------------------------------------------------------------- 1 | GIF 2 |

3 | 4 | This folder for machine learning 5 | 6 | Topics completed:- 7 | * Linear Regression 8 | * KNN Classification 9 | * Decision Trees 10 | * Random Forests 11 | * Logistic Regression 12 | * Support Vector Machine 13 | * K means clustering (partition based clustering) 14 | * Agglomerative clustering (Hierarchical clustering) 15 | * DBSCAN (desnity based clustering) 16 | * Principal Component Analysis (Data dimensionality reduction technique) 17 | * Basic Recommender systems 18 | * Fine tuning ML models 19 | -------------------------------------------------------------------------------- /Machine Learning/Recommender Systems/README.md: -------------------------------------------------------------------------------- 1 | This is a repo for Recommender Systems 2 | 3 | **Cosine Similarity:- (Used in Collaborative Filtering)** 4 | 5 | 6 | ![CF](https://user-images.githubusercontent.com/60751635/119685937-74781a80-be63-11eb-8fa9-193f28eae378.png) 7 | 8 | 9 | 10 | **The final recommendation generated for the movie *Forest Gump* using CF method** 11 | 12 | 13 | ![Ouput](https://user-images.githubusercontent.com/60751635/119686089-9a052400-be63-11eb-9a02-32b3d673dac7.png) 14 | -------------------------------------------------------------------------------- /Machine Learning/SGD Classifier/MNIST_SGDClassifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "MNIST SGDClassifier.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "id": "YVS-HLuflyaO" 22 | }, 23 | "source": [ 24 | " import numpy as np\n", 25 | " import pandas as pd\n", 26 | " import seaborn as sns\n", 27 | " import matplotlib.pyplot as plt\n", 28 | " %matplotlib inline" 29 | ], 30 | "execution_count": 1, 31 | "outputs": [] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "metadata": { 36 | "id": "PVoSq20WnStq" 37 | }, 38 | "source": [ 39 | "from sklearn.datasets import fetch_openml\n", 40 | "mnist = fetch_openml('mnist_784', version=1)" 41 | ], 42 | "execution_count": 3, 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "metadata": { 48 | "colab": { 49 | "base_uri": "https://localhost:8080/" 50 | }, 51 | "id": "5PtWYCGdnu9f", 52 | "outputId": "a8b5cba9-edf7-4048-f872-115e87235ba3" 53 | }, 54 | "source": [ 55 | "mnist.keys() # Dictionary keys include these many things" 56 | ], 57 | "execution_count": 16, 58 | "outputs": [ 59 | { 60 | "output_type": "execute_result", 61 | "data": { 62 | "text/plain": [ 63 | "dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'DESCR', 'details', 'categories', 'url'])" 64 | ] 65 | }, 66 | "metadata": { 67 | "tags": [] 68 | }, 69 | "execution_count": 16 70 | } 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "metadata": { 76 | "colab": { 77 | "base_uri": "https://localhost:8080/", 78 | "height": 137 79 | }, 80 | "id": "UfwIwOdNoUbZ", 81 | "outputId": "12723ee3-db33-4e07-cc9b-27bb283f8fb4" 82 | }, 83 | "source": [ 84 | "mnist.DESCR # gives the description of the dataset" 85 | ], 86 | "execution_count": 17, 87 | "outputs": [ 88 | { 89 | "output_type": "execute_result", 90 | "data": { 91 | "application/vnd.google.colaboratory.intrinsic+json": { 92 | "type": "string" 93 | }, 94 | "text/plain": [ 95 | "\"**Author**: Yann LeCun, Corinna Cortes, Christopher J.C. Burges \\n**Source**: [MNIST Website](http://yann.lecun.com/exdb/mnist/) - Date unknown \\n**Please cite**: \\n\\nThe MNIST database of handwritten digits with 784 features, raw data available at: http://yann.lecun.com/exdb/mnist/. It can be split in a training set of the first 60,000 examples, and a test set of 10,000 examples \\n\\nIt is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image. It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting. The original black and white (bilevel) images from NIST were size normalized to fit in a 20x20 pixel box while preserving their aspect ratio. The resulting images contain grey levels as a result of the anti-aliasing technique used by the normalization algorithm. the images were centered in a 28x28 image by computing the center of mass of the pixels, and translating the image so as to position this point at the center of the 28x28 field. \\n\\nWith some classification methods (particularly template-based methods, such as SVM and K-nearest neighbors), the error rate improves when the digits are centered by bounding box rather than center of mass. If you do this kind of pre-processing, you should report it in your publications. The MNIST database was constructed from NIST's NIST originally designated SD-3 as their training set and SD-1 as their test set. However, SD-3 is much cleaner and easier to recognize than SD-1. The reason for this can be found on the fact that SD-3 was collected among Census Bureau employees, while SD-1 was collected among high-school students. Drawing sensible conclusions from learning experiments requires that the result be independent of the choice of training set and test among the complete set of samples. Therefore it was necessary to build a new database by mixing NIST's datasets. \\n\\nThe MNIST training set is composed of 30,000 patterns from SD-3 and 30,000 patterns from SD-1. Our test set was composed of 5,000 patterns from SD-3 and 5,000 patterns from SD-1. The 60,000 pattern training set contained examples from approximately 250 writers. We made sure that the sets of writers of the training set and test set were disjoint. SD-1 contains 58,527 digit images written by 500 different writers. In contrast to SD-3, where blocks of data from each writer appeared in sequence, the data in SD-1 is scrambled. Writer identities for SD-1 is available and we used this information to unscramble the writers. We then split SD-1 in two: characters written by the first 250 writers went into our new training set. The remaining 250 writers were placed in our test set. Thus we had two sets with nearly 30,000 examples each. The new training set was completed with enough examples from SD-3, starting at pattern # 0, to make a full set of 60,000 training patterns. Similarly, the new test set was completed with SD-3 examples starting at pattern # 35,000 to make a full set with 60,000 test patterns. Only a subset of 10,000 test images (5,000 from SD-1 and 5,000 from SD-3) is available on this site. The full 60,000 sample training set is available.\\n\\nDownloaded from openml.org.\"" 96 | ] 97 | }, 98 | "metadata": { 99 | "tags": [] 100 | }, 101 | "execution_count": 17 102 | } 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "metadata": { 108 | "id": "AIkIEgP5odkC" 109 | }, 110 | "source": [ 111 | "X,y = mnist[\"data\"], mnist[\"target\"]" 112 | ], 113 | "execution_count": 18, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "colab": { 120 | "base_uri": "https://localhost:8080/" 121 | }, 122 | "id": "KFPkv66Won7j", 123 | "outputId": "557e9558-97f7-4a86-e364-8317e83627b9" 124 | }, 125 | "source": [ 126 | "X.shape # we have 70000 images and 784 features" 127 | ], 128 | "execution_count": 20, 129 | "outputs": [ 130 | { 131 | "output_type": "execute_result", 132 | "data": { 133 | "text/plain": [ 134 | "(70000, 784)" 135 | ] 136 | }, 137 | "metadata": { 138 | "tags": [] 139 | }, 140 | "execution_count": 20 141 | } 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "metadata": { 147 | "colab": { 148 | "base_uri": "https://localhost:8080/" 149 | }, 150 | "id": "owa-AbxZovCr", 151 | "outputId": "8f60180d-2c49-4869-c944-b9727e122ef4" 152 | }, 153 | "source": [ 154 | "y.shape" 155 | ], 156 | "execution_count": 21, 157 | "outputs": [ 158 | { 159 | "output_type": "execute_result", 160 | "data": { 161 | "text/plain": [ 162 | "(70000,)" 163 | ] 164 | }, 165 | "metadata": { 166 | "tags": [] 167 | }, 168 | "execution_count": 21 169 | } 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": { 175 | "id": "Ke8XVYYVo6t1" 176 | }, 177 | "source": [ 178 | "* There are 70000 images and each image has 784 features\n", 179 | "* This is because each image is 28*28 image and each feature simply represents one pixel's intensity, from 0 to 255(w/b image)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "metadata": { 185 | "id": "A2uwlrIZo22n" 186 | }, 187 | "source": [ 188 | "import matplotlib as mpl " 189 | ], 190 | "execution_count": 22, 191 | "outputs": [] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "metadata": { 196 | "id": "TDm-DJvFpa_P" 197 | }, 198 | "source": [ 199 | "some_digit = X[0]\n", 200 | "some_digit_image = some_digit.reshape(28,28)" 201 | ], 202 | "execution_count": 23, 203 | "outputs": [] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "metadata": { 208 | "colab": { 209 | "base_uri": "https://localhost:8080/", 210 | "height": 248 211 | }, 212 | "id": "sBJT9Adhpnet", 213 | "outputId": "e52d7a74-9920-4370-ec1b-e5a995cd7a5e" 214 | }, 215 | "source": [ 216 | "plt.imshow(some_digit_image, cmap=mpl.cm.binary, interpolation='nearest')\n", 217 | "plt.axis('off')\n", 218 | "plt.show() # this looks like 5" 219 | ], 220 | "execution_count": 24, 221 | "outputs": [ 222 | { 223 | "output_type": "display_data", 224 | "data": { 225 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAGaElEQVR4nO3dPUiWfR/G8dveSyprs2gOXHqhcAh6hZqsNRqiJoPKRYnAoTGorWyLpqhFcmgpEmqIIByKXiAHIaKhFrGghiJ81ucBr991Z/Z4XPr5jB6cXSfVtxP6c2rb9PT0P0CeJfN9A8DMxAmhxAmhxAmhxAmhljXZ/Vcu/H1tM33RkxNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCiRNCLZvvG+B//fr1q9y/fPnyVz9/aGio4fb9+/fy2vHx8XK/ceNGuQ8MDDTc7t69W167atWqcr948WK5X7p0qdzngycnhBInhBInhBInhBInhBInhBInhHLOOYMPHz6U+48fP8r92bNn5f706dOG29TUVHnt8PBwuc+nLVu2lPv58+fLfWRkpOG2du3a8tpt27aV+759+8o9kScnhBInhBInhBInhBInhBInhGqbnp6u9nJsVS9evCj3gwcPlvvffm0r1dKlS8v91q1b5d7e3j7rz960aVO5b9iwody3bt0668/+P2ib6YuenBBKnBBKnBBKnBBKnBBKnBBKnBBqUZ5zTk5Olnt3d3e5T0xMzOXtzKlm997sPPDx48cNtxUrVpTXLtbz3zngnBNaiTghlDghlDghlDghlDghlDgh1KL81pgbN24s96tXr5b7/fv3y33Hjh3l3tfXV+6V7du3l/vo6Gi5N3un8s2bNw23a9euldcytzw5IZQ4IZQ4IZQ4IZQ4IZQ4IZQ4IdSifJ/zT339+rXcm/24ut7e3obbzZs3y2tv375d7idOnCh3InmfE1qJOCGUOCGUOCGUOCGUOCGUOCHUonyf80+tW7fuj65fv379rK9tdg56/Pjxcl+yxL/HrcKfFIQSJ4QSJ4QSJ4QSJ4QSJ4Tyytg8+PbtW8Otp6envPbJkyfl/uDBg3I/fPhwuTMvvDIGrUScEEqcEEqcEEqcEEqcEEqcEMo5Z5iJiYly37lzZ7l3dHSU+4EDB8p9165dDbezZ8+W17a1zXhcR3POOaGViBNCiRNCiRNCiRNCiRNCiRNCOedsMSMjI+V++vTpcm/24wsrly9fLveTJ0+We2dn56w/e4FzzgmtRJwQSpwQSpwQSpwQSpwQSpwQyjnnAvP69ety7+/vL/fR0dFZf/aZM2fKfXBwsNw3b948689ucc45oZWIE0KJE0KJE0KJE0KJE0KJE0I551xkpqamyv3+/fsNt1OnTpXXNvm79M+hQ4fK/dGjR+W+gDnnhFYiTgglTgglTgglTgglTgjlKIV/beXKleX+8+fPcl++fHm5P3z4sOG2f//+8toW5ygFWok4IZQ4IZQ4IZQ4IZQ4IZQ4IdSy+b4B5tarV6/KfXh4uNzHxsYabs3OMZvp6uoq97179/7Rr7/QeHJCKHFCKHFCKHFCKHFCKHFCKHFCKOecYcbHx8v9+vXr5X7v3r1y//Tp02/f07+1bFn916mzs7PclyzxrPhvfjcglDghlDghlDghlDghlDghlDghlHPOv6DZWeKdO3cabkNDQ+W179+/n80tzYndu3eX++DgYLkfPXp0Lm9nwfPkhFDihFDihFDihFDihFDihFCOUmbw+fPncn/79m25nzt3rtzfvXv32/c0V7q7u8v9woULDbdjx46V13rla2753YRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQC/acc3JysuHW29tbXvvy5ctyn5iYmNU9zYU9e/aUe39/f7kfOXKk3FevXv3b98Tf4ckJocQJocQJocQJocQJocQJocQJoWLPOZ8/f17uV65cKfexsbGG28ePH2d1T3NlzZo1Dbe+vr7y2mbffrK9vX1W90QeT04IJU4IJU4IJU4IJU4IJU4IJU4IFXvOOTIy8kf7n+jq6ir3np6ecl+6dGm5DwwMNNw6OjrKa1k8PDkhlDghlDghlDghlDghlDghlDghVNv09HS1lyMwJ9pm+qInJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4QSJ4Rq9iMAZ/yWfcDf58kJocQJocQJocQJocQJocQJof4DO14Dhyk10VwAAAAASUVORK5CYII=\n", 226 | "text/plain": [ 227 | "

" 228 | ] 229 | }, 230 | "metadata": { 231 | "tags": [], 232 | "needs_background": "light" 233 | } 234 | } 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "metadata": { 240 | "colab": { 241 | "base_uri": "https://localhost:8080/", 242 | "height": 35 243 | }, 244 | "id": "KXmDw3nHqQ9n", 245 | "outputId": "e80d4e5a-fbb6-4490-84e3-f491e99e956e" 246 | }, 247 | "source": [ 248 | "# let's see what the label tells us\n", 249 | "y[0]\n", 250 | "# It's 5" 251 | ], 252 | "execution_count": 25, 253 | "outputs": [ 254 | { 255 | "output_type": "execute_result", 256 | "data": { 257 | "application/vnd.google.colaboratory.intrinsic+json": { 258 | "type": "string" 259 | }, 260 | "text/plain": [ 261 | "'5'" 262 | ] 263 | }, 264 | "metadata": { 265 | "tags": [] 266 | }, 267 | "execution_count": 25 268 | } 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "metadata": { 274 | "id": "VIV0jniYqcn5" 275 | }, 276 | "source": [ 277 | "type(y[0])\n", 278 | "# The label is a string, we needed it to be numerical\n", 279 | "y= y.astype(np.uint8)" 280 | ], 281 | "execution_count": 28, 282 | "outputs": [] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "metadata": { 287 | "colab": { 288 | "base_uri": "https://localhost:8080/" 289 | }, 290 | "id": "ZGtfNVA9qv9L", 291 | "outputId": "6ba518e1-e980-4c17-c284-8012610e7786" 292 | }, 293 | "source": [ 294 | "type(y[0])" 295 | ], 296 | "execution_count": 30, 297 | "outputs": [ 298 | { 299 | "output_type": "execute_result", 300 | "data": { 301 | "text/plain": [ 302 | "numpy.uint8" 303 | ] 304 | }, 305 | "metadata": { 306 | "tags": [] 307 | }, 308 | "execution_count": 30 309 | } 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "metadata": { 315 | "id": "mTkKAo5qqzWT" 316 | }, 317 | "source": [ 318 | "# The MNIST dataset is actually already split into a training set, the first 60000 observations and the test set being last 10000 images\n", 319 | "X_train, X_test, y_train, y_test= X[:60000], X[60000:], y[:60000], y[60000:]" 320 | ], 321 | "execution_count": 32, 322 | "outputs": [] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "metadata": { 327 | "id": "4YJ7CSrMrTf0" 328 | }, 329 | "source": [ 330 | "# Training a binary classifier\n", 331 | "# Now let's try only to identify one digit - the number 5\n", 332 | "# The model we build should be capable of distingusihing between just two categories: 5 and not 5\n", 333 | "\n", 334 | "y_train_5 = (y_train == 5)\n", 335 | "y_test_5 = (y_test == 5)" 336 | ], 337 | "execution_count": 33, 338 | "outputs": [] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": { 343 | "id": "eQ-7CQoXsUMb" 344 | }, 345 | "source": [ 346 | "* Now let's choose a classifier algorithm\n", 347 | "* Since n>30000, we can choose Gradient Descent based classifiers\n", 348 | "* Here we choose Stochastic Gradient Descent(SGD Classifier)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "metadata": { 354 | "id": "9DZxkt4sr-Kn" 355 | }, 356 | "source": [ 357 | "from sklearn.linear_model import SGDClassifier\n", 358 | "model = SGDClassifier(random_state = 42)" 359 | ], 360 | "execution_count": 34, 361 | "outputs": [] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "metadata": { 366 | "colab": { 367 | "base_uri": "https://localhost:8080/" 368 | }, 369 | "id": "u9qSInLfs6b_", 370 | "outputId": "5983beed-f98c-45e4-bf96-75036b35eeb8" 371 | }, 372 | "source": [ 373 | "model.fit(X_train, y_train_5)" 374 | ], 375 | "execution_count": 35, 376 | "outputs": [ 377 | { 378 | "output_type": "execute_result", 379 | "data": { 380 | "text/plain": [ 381 | "SGDClassifier(alpha=0.0001, average=False, class_weight=None,\n", 382 | " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n", 383 | " l1_ratio=0.15, learning_rate='optimal', loss='hinge',\n", 384 | " max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',\n", 385 | " power_t=0.5, random_state=42, shuffle=True, tol=0.001,\n", 386 | " validation_fraction=0.1, verbose=0, warm_start=False)" 387 | ] 388 | }, 389 | "metadata": { 390 | "tags": [] 391 | }, 392 | "execution_count": 35 393 | } 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "metadata": { 399 | "colab": { 400 | "base_uri": "https://localhost:8080/" 401 | }, 402 | "id": "JCKNOfnBtGsX", 403 | "outputId": "70a695f9-2e40-4f7b-8a57-37d11d7b4407" 404 | }, 405 | "source": [ 406 | "# Now let's predict\n", 407 | "predict = model.predict([some_digit])\n", 408 | "predict" 409 | ], 410 | "execution_count": 36, 411 | "outputs": [ 412 | { 413 | "output_type": "execute_result", 414 | "data": { 415 | "text/plain": [ 416 | "array([ True])" 417 | ] 418 | }, 419 | "metadata": { 420 | "tags": [] 421 | }, 422 | "execution_count": 36 423 | } 424 | ] 425 | } 426 | ] 427 | } -------------------------------------------------------------------------------- /Machine Learning/SGD Classifier/README.md: -------------------------------------------------------------------------------- 1 | MNIST data set is used 2 | -------------------------------------------------------------------------------- /Machine Learning/Support Vector Machine/README.md: -------------------------------------------------------------------------------- 1 | A folder of SVM classifier notebooks 2 | -------------------------------------------------------------------------------- /Merging Data using Pandas/Merging Data using Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "language_info": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 3 7 | }, 8 | "file_extension": ".py", 9 | "mimetype": "text/x-python", 10 | "name": "python", 11 | "nbconvert_exporter": "python", 12 | "pygments_lexer": "ipython3", 13 | "version": "3.7.9-final" 14 | }, 15 | "orig_nbformat": 2, 16 | "kernelspec": { 17 | "name": "python3", 18 | "display_name": "Python 3" 19 | } 20 | }, 21 | "nbformat": 4, 22 | "nbformat_minor": 2, 23 | "cells": [ 24 | { 25 | "source": [ 26 | "Merging Data using Pandas" 27 | ], 28 | "cell_type": "markdown", 29 | "metadata": {} 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import numpy as np \n", 38 | "import pandas as pd" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "output_type": "execute_result", 48 | "data": { 49 | "text/plain": [ 50 | " A B\n", 51 | "0 1 4\n", 52 | "1 2 5\n", 53 | "2 3 6" 54 | ], 55 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AB
014
125
236
\n
" 56 | }, 57 | "metadata": {}, 58 | "execution_count": 2 59 | } 60 | ], 61 | "source": [ 62 | "dict1={'A': [1,2,3], 'B': [4,5,6]}\n", 63 | "dummy1= pd.DataFrame(dict1, index= [0,1,2])\n", 64 | "dummy1" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "output_type": "execute_result", 74 | "data": { 75 | "text/plain": [ 76 | " A B\n", 77 | "3 7 10\n", 78 | "4 8 11\n", 79 | "5 9 12" 80 | ], 81 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AB
3710
4811
5912
\n
" 82 | }, 83 | "metadata": {}, 84 | "execution_count": 3 85 | } 86 | ], 87 | "source": [ 88 | "dict2={'A': [7,8,9], 'B': [10,11,12]}\n", 89 | "dummy2= pd.DataFrame(dict2, index= [3,4,5])\n", 90 | "dummy2" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "output_type": "execute_result", 100 | "data": { 101 | "text/plain": [ 102 | " A B\n", 103 | "6 7 10\n", 104 | "7 8 11\n", 105 | "8 9 12" 106 | ], 107 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AB
6710
7811
8912
\n
" 108 | }, 109 | "metadata": {}, 110 | "execution_count": 4 111 | } 112 | ], 113 | "source": [ 114 | "dict3={'A': [13,14,15], 'B': [16,17,18]}\n", 115 | "dummy3= pd.DataFrame(dict2, index= [6,7,8])\n", 116 | "dummy3" 117 | ] 118 | }, 119 | { 120 | "source": [ 121 | "Concatenating row wise" 122 | ], 123 | "cell_type": "markdown", 124 | "metadata": {} 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 32, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "output_type": "execute_result", 133 | "data": { 134 | "text/plain": [ 135 | " A B\n", 136 | "x 0 1 4\n", 137 | " 1 2 5\n", 138 | " 2 3 6\n", 139 | "y 3 7 10\n", 140 | " 4 8 11\n", 141 | " 5 9 12\n", 142 | "z 6 7 10\n", 143 | " 7 8 11\n", 144 | " 8 9 12" 145 | ], 146 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AB
x014
125
236
y3710
4811
5912
z6710
7811
8912
\n
" 147 | }, 148 | "metadata": {}, 149 | "execution_count": 32 150 | } 151 | ], 152 | "source": [ 153 | "con_cat= pd.concat([dummy1, dummy2, dummy3], keys= ['x','y','z'])\n", 154 | "con_cat" 155 | ] 156 | }, 157 | { 158 | "source": [ 159 | "Concatenating column wise" 160 | ], 161 | "cell_type": "markdown", 162 | "metadata": {} 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 33, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "output_type": "execute_result", 171 | "data": { 172 | "text/plain": [ 173 | " A B A B A B\n", 174 | "0 1.0 4.0 NaN NaN NaN NaN\n", 175 | "1 2.0 5.0 NaN NaN NaN NaN\n", 176 | "2 3.0 6.0 NaN NaN NaN NaN\n", 177 | "3 NaN NaN 7.0 10.0 NaN NaN\n", 178 | "4 NaN NaN 8.0 11.0 NaN NaN\n", 179 | "5 NaN NaN 9.0 12.0 NaN NaN\n", 180 | "6 NaN NaN NaN NaN 7.0 10.0\n", 181 | "7 NaN NaN NaN NaN 8.0 11.0\n", 182 | "8 NaN NaN NaN NaN 9.0 12.0" 183 | ], 184 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABABAB
01.04.0NaNNaNNaNNaN
12.05.0NaNNaNNaNNaN
23.06.0NaNNaNNaNNaN
3NaNNaN7.010.0NaNNaN
4NaNNaN8.011.0NaNNaN
5NaNNaN9.012.0NaNNaN
6NaNNaNNaNNaN7.010.0
7NaNNaNNaNNaN8.011.0
8NaNNaNNaNNaN9.012.0
\n
" 185 | }, 186 | "metadata": {}, 187 | "execution_count": 33 188 | } 189 | ], 190 | "source": [ 191 | "con_cat_col= pd.concat([dummy1, dummy2, dummy3], axis=1) # By default does the outer join, as there is no data loss in this outer join\n", 192 | "con_cat_col" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 34, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "output_type": "execute_result", 202 | "data": { 203 | "text/plain": [ 204 | " A B\n", 205 | "0 7 10\n", 206 | "1 8 11\n", 207 | "7 9 12" 208 | ], 209 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AB
0710
1811
7912
\n
" 210 | }, 211 | "metadata": {}, 212 | "execution_count": 34 213 | } 214 | ], 215 | "source": [ 216 | "# Creating new dummy variable\n", 217 | "dict4={'A': [13,14,15], 'B': [16,17,18]}\n", 218 | "dummy4= pd.DataFrame(dict2, index= [0,1,7])\n", 219 | "dummy4" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 8, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "output_type": "execute_result", 229 | "data": { 230 | "text/plain": [ 231 | " A B A B\n", 232 | "0 1 4 7 10\n", 233 | "1 2 5 8 11" 234 | ], 235 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABAB
014710
125811
\n
" 236 | }, 237 | "metadata": {}, 238 | "execution_count": 8 239 | } 240 | ], 241 | "source": [ 242 | "con_cat_col= pd.concat([dummy1,dummy4], axis=1, join='inner') # inner join takes the intersection between the data frames passed\n", 243 | "con_cat_col # here only row indices 0 and 1 are common between dummy1 and dummy4" 244 | ] 245 | }, 246 | { 247 | "source": [ 248 | "merge()" 249 | ], 250 | "cell_type": "markdown", 251 | "metadata": {} 252 | }, 253 | { 254 | "source": [ 255 | "Creating new Data Frames" 256 | ], 257 | "cell_type": "markdown", 258 | "metadata": {} 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 37, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],\n", 267 | " 'value': [1, 2, 6, 5]})\n", 268 | "df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],\n", 269 | " 'value': [5, 6, 7, 8]}, index= [4,5,6,7])" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 38, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "output_type": "execute_result", 279 | "data": { 280 | "text/plain": [ 281 | " lkey value\n", 282 | "0 foo 1\n", 283 | "1 bar 2\n", 284 | "2 baz 6\n", 285 | "3 foo 5" 286 | ], 287 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
lkeyvalue
0foo1
1bar2
2baz6
3foo5
\n
" 288 | }, 289 | "metadata": {}, 290 | "execution_count": 38 291 | } 292 | ], 293 | "source": [ 294 | "df1" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 39, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "output_type": "execute_result", 304 | "data": { 305 | "text/plain": [ 306 | " rkey value\n", 307 | "4 foo 5\n", 308 | "5 bar 6\n", 309 | "6 baz 7\n", 310 | "7 foo 8" 311 | ], 312 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
rkeyvalue
4foo5
5bar6
6baz7
7foo8
\n
" 313 | }, 314 | "metadata": {}, 315 | "execution_count": 39 316 | } 317 | ], 318 | "source": [ 319 | "df2" 320 | ] 321 | }, 322 | { 323 | "source": [ 324 | "how='outer'" 325 | ], 326 | "cell_type": "markdown", 327 | "metadata": {} 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 40, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "output_type": "execute_result", 336 | "data": { 337 | "text/plain": [ 338 | " lkey value rkey\n", 339 | "0 foo 1 NaN\n", 340 | "1 bar 2 NaN\n", 341 | "2 baz 6 bar\n", 342 | "3 foo 5 foo\n", 343 | "4 NaN 7 baz\n", 344 | "5 NaN 8 foo" 345 | ], 346 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
lkeyvaluerkey
0foo1NaN
1bar2NaN
2baz6bar
3foo5foo
4NaN7baz
5NaN8foo
\n
" 347 | }, 348 | "metadata": {}, 349 | "execution_count": 40 350 | } 351 | ], 352 | "source": [ 353 | "merged= pd.merge(df1,df2, on='value', how='outer')\n", 354 | "merged" 355 | ] 356 | }, 357 | { 358 | "source": [ 359 | "how='inner'" 360 | ], 361 | "cell_type": "markdown", 362 | "metadata": {} 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 41, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "output_type": "execute_result", 371 | "data": { 372 | "text/plain": [ 373 | " lkey value rkey\n", 374 | "0 baz 6 bar\n", 375 | "1 foo 5 foo" 376 | ], 377 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
lkeyvaluerkey
0baz6bar
1foo5foo
\n
" 378 | }, 379 | "metadata": {}, 380 | "execution_count": 41 381 | } 382 | ], 383 | "source": [ 384 | "merged=pd.merge(df1,df2, on='value', how='inner')\n", 385 | "merged" 386 | ] 387 | }, 388 | { 389 | "source": [ 390 | "how='right'" 391 | ], 392 | "cell_type": "markdown", 393 | "metadata": {} 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 42, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "output_type": "execute_result", 402 | "data": { 403 | "text/plain": [ 404 | " lkey value rkey\n", 405 | "0 foo 5 foo\n", 406 | "1 baz 6 bar\n", 407 | "2 NaN 7 baz\n", 408 | "3 NaN 8 foo" 409 | ], 410 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
lkeyvaluerkey
0foo5foo
1baz6bar
2NaN7baz
3NaN8foo
\n
" 411 | }, 412 | "metadata": {}, 413 | "execution_count": 42 414 | } 415 | ], 416 | "source": [ 417 | "merged= pd.merge(df1,df2, on='value', how='right')\n", 418 | "merged" 419 | ] 420 | }, 421 | { 422 | "source": [ 423 | "how='left'" 424 | ], 425 | "cell_type": "markdown", 426 | "metadata": {} 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 43, 431 | "metadata": {}, 432 | "outputs": [ 433 | { 434 | "output_type": "execute_result", 435 | "data": { 436 | "text/plain": [ 437 | " lkey value rkey\n", 438 | "0 foo 1 NaN\n", 439 | "1 bar 2 NaN\n", 440 | "2 baz 6 bar\n", 441 | "3 foo 5 foo" 442 | ], 443 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
lkeyvaluerkey
0foo1NaN
1bar2NaN
2baz6bar
3foo5foo
\n
" 444 | }, 445 | "metadata": {}, 446 | "execution_count": 43 447 | } 448 | ], 449 | "source": [ 450 | "merged= pd.merge(df1,df2, on='value', how='left')\n", 451 | "merged" 452 | ] 453 | } 454 | ] 455 | } -------------------------------------------------------------------------------- /Merging Data using Pandas/sample: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Natural Language Processing/Natural_Language.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "language_info": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 3 7 | }, 8 | "file_extension": ".py", 9 | "mimetype": "text/x-python", 10 | "name": "python", 11 | "nbconvert_exporter": "python", 12 | "pygments_lexer": "ipython3", 13 | "version": "3.7.9-final" 14 | }, 15 | "orig_nbformat": 2, 16 | "kernelspec": { 17 | "name": "python3", 18 | "display_name": "Python 3.7.9 64-bit ('myenv': conda)", 19 | "metadata": { 20 | "interpreter": { 21 | "hash": "0989d4cb382ec003e6ad9ee0079fe5a34620af18f47069c43c62ee5030c1ec77" 22 | } 23 | } 24 | } 25 | }, 26 | "nbformat": 4, 27 | "nbformat_minor": 2, 28 | "cells": [ 29 | { 30 | "source": [ 31 | "# NATURAL LANGAUGE TOOL KIT LIBRARY" 32 | ], 33 | "cell_type": "markdown", 34 | "metadata": {} 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "name": "stdout", 44 | "text": [ 45 | "Paragraphs are the building blocks of papers. Many students define paragraphs in terms of length: a paragraph is a group of at least five sentences, a paragraph is half a page long, etc. In reality, though, the unity and coherence of ideas among sentences is what constitutes a paragraph. A paragraph is defined as “a group of sentences or a single sentence that forms a unit” (Lunsford and Connors 116). Length and appearance do not determine whether a section in a paper is a paragraph. For instance, in some styles of writing, particularly journalistic styles, a paragraph can be just one sentence long. Ultimately, a paragraph is a sentence or group of sentences that support one main idea. In this handout, we will refer to this as the “controlling idea,” because it controls what happens in the rest of the paragraph.\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "text= \"Paragraphs are the building blocks of papers. Many students define paragraphs in terms of length: a paragraph is a group of at least five sentences, a paragraph is half a page long, etc. In reality, though, the unity and coherence of ideas among sentences is what constitutes a paragraph. A paragraph is defined as “a group of sentences or a single sentence that forms a unit” (Lunsford and Connors 116). Length and appearance do not determine whether a section in a paper is a paragraph. For instance, in some styles of writing, particularly journalistic styles, a paragraph can be just one sentence long. Ultimately, a paragraph is a sentence or group of sentences that support one main idea. In this handout, we will refer to this as the “controlling idea,” because it controls what happens in the rest of the paragraph.\"\n", 51 | "print(text)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 7, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "output_type": "stream", 61 | "name": "stderr", 62 | "text": [ 63 | "[nltk_data] Downloading package punkt to\n", 64 | "[nltk_data] C:\\Users\\LENOVO\\AppData\\Roaming\\nltk_data...\n", 65 | "[nltk_data] Unzipping tokenizers\\punkt.zip.\n" 66 | ] 67 | }, 68 | { 69 | "output_type": "execute_result", 70 | "data": { 71 | "text/plain": [ 72 | "True" 73 | ] 74 | }, 75 | "metadata": {}, 76 | "execution_count": 7 77 | } 78 | ], 79 | "source": [ 80 | "import nltk\n", 81 | "nltk.download('punkt')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 10, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "output_type": "stream", 91 | "name": "stdout", 92 | "text": [ 93 | "['Paragraphs are the building blocks of papers.', 'Many students define paragraphs in terms of length: a paragraph is a group of at least five sentences, a paragraph is half a page long, etc.', 'In reality, though, the unity and coherence of ideas among sentences is what constitutes a paragraph.', 'A paragraph is defined as “a group of sentences or a single sentence that forms a unit” (Lunsford and Connors 116).', 'Length and appearance do not determine whether a section in a paper is a paragraph.', 'For instance, in some styles of writing, particularly journalistic styles, a paragraph can be just one sentence long.', 'Ultimately, a paragraph is a sentence or group of sentences that support one main idea.', 'In this handout, we will refer to this as the “controlling idea,” because it controls what happens in the rest of the paragraph.']\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "# Sentence tokenizer\n", 99 | "from nltk.tokenize import sent_tokenize\n", 100 | "sent_tk = sent_tokenize(text)\n", 101 | "print(sent_tk)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 13, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "output_type": "stream", 111 | "name": "stdout", 112 | "text": [ 113 | "['Paragraphs', 'are', 'the', 'building', 'blocks', 'of', 'papers', '.', 'Many', 'students', 'define', 'paragraphs', 'in', 'terms', 'of', 'length', ':', 'a', 'paragraph', 'is', 'a', 'group', 'of', 'at', 'least', 'five', 'sentences', ',', 'a', 'paragraph', 'is', 'half', 'a', 'page', 'long', ',', 'etc', '.', 'In', 'reality', ',', 'though', ',', 'the', 'unity', 'and', 'coherence', 'of', 'ideas', 'among', 'sentences', 'is', 'what', 'constitutes', 'a', 'paragraph', '.', 'A', 'paragraph', 'is', 'defined', 'as', '“', 'a', 'group', 'of', 'sentences', 'or', 'a', 'single', 'sentence', 'that', 'forms', 'a', 'unit', '”', '(', 'Lunsford', 'and', 'Connors', '116', ')', '.', 'Length', 'and', 'appearance', 'do', 'not', 'determine', 'whether', 'a', 'section', 'in', 'a', 'paper', 'is', 'a', 'paragraph', '.', 'For', 'instance', ',', 'in', 'some', 'styles', 'of', 'writing', ',', 'particularly', 'journalistic', 'styles', ',', 'a', 'paragraph', 'can', 'be', 'just', 'one', 'sentence', 'long', '.', 'Ultimately', ',', 'a', 'paragraph', 'is', 'a', 'sentence', 'or', 'group', 'of', 'sentences', 'that', 'support', 'one', 'main', 'idea', '.', 'In', 'this', 'handout', ',', 'we', 'will', 'refer', 'to', 'this', 'as', 'the', '“', 'controlling', 'idea', ',', '”', 'because', 'it', 'controls', 'what', 'happens', 'in', 'the', 'rest', 'of', 'the', 'paragraph', '.']\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "# Word tokenizer\n", 119 | "from nltk.tokenize import word_tokenize\n", 120 | "word_tk= word_tokenize(text)\n", 121 | "print(word_tk)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 15, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "output_type": "stream", 131 | "name": "stderr", 132 | "text": [ 133 | "[nltk_data] Downloading package stopwords to\n", 134 | "[nltk_data] C:\\Users\\LENOVO\\AppData\\Roaming\\nltk_data...\n", 135 | "[nltk_data] Unzipping corpora\\stopwords.zip.\n" 136 | ] 137 | }, 138 | { 139 | "output_type": "execute_result", 140 | "data": { 141 | "text/plain": [ 142 | "True" 143 | ] 144 | }, 145 | "metadata": {}, 146 | "execution_count": 15 147 | } 148 | ], 149 | "source": [ 150 | "nltk.download('stopwords')" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 16, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "output_type": "stream", 160 | "name": "stdout", 161 | "text": [ 162 | "Stop words in english are: {'i', 'himself', 'haven', 'further', 'not', 'be', 'up', 'had', 'don', 'didn', 'both', \"shan't\", 'can', 'an', 'yourselves', \"should've\", 'with', 't', 'we', \"that'll\", 'did', 'what', 'it', \"haven't\", 'my', \"you've\", 'to', 'are', 'our', 'which', 's', \"hasn't\", 'before', 'until', 'whom', 'in', 'd', 'yours', 'should', 'o', 'these', 'on', 'own', 'll', 'because', 'other', 'yourself', 'does', 'he', \"aren't\", 'or', 'now', \"needn't\", \"shouldn't\", 'won', 'm', 'off', 'a', 'of', 'has', \"you'd\", 'into', 'through', 'me', 'so', 'am', 'ain', 'wasn', 'will', 'just', 'were', 'again', \"you're\", 'against', 're', 'while', 'do', 'they', 'than', 'her', \"mustn't\", 'doing', 'their', 'only', \"couldn't\", \"won't\", \"hadn't\", 'the', 'out', 'was', 'if', \"isn't\", 'shouldn', 'this', 'such', 'between', 'shan', 'is', 'over', 'no', \"doesn't\", 'she', 'them', 'hadn', 'couldn', 'doesn', \"you'll\", 'above', 'weren', 'you', 'but', 'your', \"mightn't\", \"didn't\", 'from', 'all', 'have', 'at', 'how', \"don't\", 'y', 'that', 'and', 'during', 'when', 'same', \"wasn't\", 'nor', 'here', 'mightn', 'myself', 'having', 'hers', 'his', \"wouldn't\", 'where', \"it's\", 'who', 'there', 'most', 'down', 'him', 'each', 'few', 'below', 'been', 'isn', 'any', 'after', 'being', 'theirs', 'once', 'about', 'its', 'by', 'under', 'ours', 'hasn', 'mustn', 'aren', 'ma', 'as', 'itself', 'those', \"weren't\", 'themselves', \"she's\", 'ourselves', 'very', 'needn', 've', 'for', 'more', 'wouldn', 'why', 'some', 'herself', 'then', 'too'}\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "# Removing stop words\n", 168 | "from nltk.corpus import stopwords\n", 169 | "sw= set(stopwords.words('english'))\n", 170 | "print(\"Stop words in english are:\", sw)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 20, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "output_type": "stream", 180 | "name": "stdout", 181 | "text": [ 182 | "['Paragraphs', 'building', 'blocks', 'papers', '.', 'Many', 'students', 'define', 'paragraphs', 'terms', 'length', ':', 'paragraph', 'group', 'least', 'five', 'sentences', ',', 'paragraph', 'half', 'page', 'long', ',', 'etc', '.', 'In', 'reality', ',', 'though', ',', 'unity', 'coherence', 'ideas', 'among', 'sentences', 'constitutes', 'paragraph', '.', 'A', 'paragraph', 'defined', '“', 'group', 'sentences', 'single', 'sentence', 'forms', 'unit', '”', '(', 'Lunsford', 'Connors', '116', ')', '.', 'Length', 'appearance', 'determine', 'whether', 'section', 'paper', 'paragraph', '.', 'For', 'instance', ',', 'styles', 'writing', ',', 'particularly', 'journalistic', 'styles', ',', 'paragraph', 'one', 'sentence', 'long', '.', 'Ultimately', ',', 'paragraph', 'sentence', 'group', 'sentences', 'support', 'one', 'main', 'idea', '.', 'In', 'handout', ',', 'refer', '“', 'controlling', 'idea', ',', '”', 'controls', 'happens', 'rest', 'paragraph', '.']\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "filter= [w for w in word_tk if not w in sw]\n", 188 | "print(filter)" 189 | ] 190 | } 191 | ] 192 | } -------------------------------------------------------------------------------- /Natural Language Processing/Notebooks/1 - Into to Python text basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "Format method in the print method" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [ 16 | { 17 | "name": "stdout", 18 | "output_type": "stream", 19 | "text": [ 20 | "Name is Jose\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "person = \"Jose\"\n", 26 | "print(\"Name is {}\".format(person))" 27 | ] 28 | }, 29 | { 30 | "attachments": {}, 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "f-string literals" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "Name is Jose\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "print(f\"Name is {person}\")" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "The number in the key a is 123\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "# we can perform operations inside of the f-string as well\n", 69 | "dict = {'a':123, 'b':456} # can be perforned the same on the list as well\n", 70 | "print(f\"The number in the key a is {dict['a']}\")" 71 | ] 72 | }, 73 | { 74 | "attachments": {}, 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Alignment and Padding" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "[('Author', 'Topic', 'Pages'),\n", 90 | " ('Twain', 'Rafting', 601),\n", 91 | " ('Feynman', 'Physics', 95),\n", 92 | " ('Hamilton', 'Mythology', 144)]" 93 | ] 94 | }, 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "library = [('Author', 'Topic', 'Pages'), ('Twain', 'Rafting', 601), ('Feynman', 'Physics', 95), ('Hamilton', 'Mythology', 144)]\n", 102 | "library" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 7, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "Author is Author\n", 115 | "Author is Twain\n", 116 | "Author is Feynman\n", 117 | "Author is Hamilton\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "for author,book in library:\n", 123 | " print(f\"Author is {book[0]}\")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 8, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "Author Topic Pages\n", 136 | "Twain Rafting 601\n", 137 | "Feynman Physics 95\n", 138 | "Hamilton Mythology 144\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "for author, topic, pages in library: # tuple unpacking\n", 144 | " print(f\"{author} {topic} {pages}\")" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 16, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "Author Topic Pages \n", 157 | "Twain Rafting 601\n", 158 | "Feynman Physics 95\n", 159 | "Hamilton Mythology 144\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "# padding using the minimum width \n", 165 | "for author, topic, pages in library: # tuple unpacking\n", 166 | " print(f\"{author:{10}} {topic:{25}} {pages:{10}}\")\n", 167 | "\n", 168 | "# Here, the 10 represents 10 spaces as the minimum width for the author" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 17, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "Author Topic Pages\n", 181 | "Twain Rafting 601\n", 182 | "Feynman Physics 95\n", 183 | "Hamilton Mythology 144\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "# Pages seems to be a bit off, hence we add > to fix that automatically\n", 189 | "for author, topic, pages in library: # tuple unpacking\n", 190 | " print(f\"{author:{10}} {topic:{25}} {pages:>{10}}\")" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 20, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "Author Topic .....Pages\n", 203 | "Twain Rafting .......601\n", 204 | "Feynman Physics ........95\n", 205 | "Hamilton Mythology .......144\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "for author, topic, pages in library: # tuple unpacking\n", 211 | " print(f\"{author:{10}} {topic:{25}} {pages:.>{10}}\")" 212 | ] 213 | }, 214 | { 215 | "attachments": {}, 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "Date formatting" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 21, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "2019-02-28 00:00:00\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "from datetime import datetime\n", 237 | "today = datetime(year=2019, month=2, day=28)\n", 238 | "print(f\"{today}\")" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 26, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "February 28 2019\n" 251 | ] 252 | } 253 | ], 254 | "source": [ 255 | "print(f\"{today:%B %d %Y}\")\n", 256 | "\n", 257 | "# %B - strf month format\n", 258 | "# %d - strf date format\n", 259 | "# %Y - strf year format" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [] 268 | } 269 | ], 270 | "metadata": { 271 | "kernelspec": { 272 | "display_name": "Python 3", 273 | "language": "python", 274 | "name": "python3" 275 | }, 276 | "language_info": { 277 | "codemirror_mode": { 278 | "name": "ipython", 279 | "version": 3 280 | }, 281 | "file_extension": ".py", 282 | "mimetype": "text/x-python", 283 | "name": "python", 284 | "nbconvert_exporter": "python", 285 | "pygments_lexer": "ipython3", 286 | "version": "3.10.9" 287 | }, 288 | "orig_nbformat": 4 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 2 292 | } 293 | -------------------------------------------------------------------------------- /Natural Language Processing/Notebooks/2 - Working with text files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Writing test.txt\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "%%writefile test.txt\n", 18 | "Here, immediately below the magic command we can \n", 19 | "write the contents of the text file. Here %% is the magic command\n", 20 | "avaiable specific to jupyer notebook, writefile is the command used \n", 21 | "to create a new text file followed by the text file name represented as test" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# to open the file\n", 31 | "myfile = open('test.txt')\n", 32 | "\n", 33 | "# By default, it opens in read mode" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": [ 44 | "'Here, immediately below the magic command we can \\nwrite the contents of the text file. Here %% is the magic command\\navaiable specific to jupyer notebook, writefile is the command used \\nto create a new text file followed by the text file name represented as test\\n'" 45 | ] 46 | }, 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "# to read the file\n", 54 | "myfile.read()\n", 55 | "\n", 56 | "# Here, when the read method is being called, the cursor reads from the beginning of the text to last line of the text" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "''" 68 | ] 69 | }, 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "# Since, the cursor has reached the end of the line, there is nothing left in the text to be read\n", 77 | "myfile.read()\n", 78 | "\n", 79 | "# From this, it is clear that we just can't read the file name again and again" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 6, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "'Here, immediately below the magic command we can \\nwrite the contents of the text file. Here %% is the magic command\\navaiable specific to jupyer notebook, writefile is the command used \\nto create a new text file followed by the text file name represented as test\\n'" 91 | ] 92 | }, 93 | "execution_count": 6, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "# To fix this issue, we can seek back the cursor to the index position that is the beginning and read the file from the beginning again\n", 100 | "myfile.seek(0) # sets the cursor back to the index position\n", 101 | "myfile.read() # since curosr was set to the beginning, we can read the contents of the file again" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "Here, immediately below the magic command we can \n", 114 | "write the contents of the text file. Here %% is the magic command\n", 115 | "avaiable specific to jupyer notebook, writefile is the command used \n", 116 | "to create a new text file followed by the text file name represented as test\n", 117 | "\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "myfile.seek(0)\n", 123 | "content = myfile.read()\n", 124 | "print(content)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 8, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "# alwys close a file when done\n", 134 | "myfile.close()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 9, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "# readline can print each of the line separetely in a python list\n", 144 | "\n", 145 | "myfile = open('test.txt')" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 10, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "['Here, immediately below the magic command we can \\n',\n", 157 | " 'write the contents of the text file. Here %% is the magic command\\n',\n", 158 | " 'avaiable specific to jupyer notebook, writefile is the command used \\n',\n", 159 | " 'to create a new text file followed by the text file name represented as test\\n']" 160 | ] 161 | }, 162 | "execution_count": 10, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "myfile.readlines() # we can say that \\n is used as a delimiter to save each lines separately" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 11, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "myfile.seek(0)\n", 178 | "mylines = myfile.readlines()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 13, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "['Here, immediately below the magic command we can \\n', 'write the contents of the text file. Here %% is the magic command\\n', 'avaiable specific to jupyer notebook, writefile is the command used \\n', 'to create a new text file followed by the text file name represented as test\\n']\n" 191 | ] 192 | } 193 | ], 194 | "source": [ 195 | "print(mylines) # we can see that it has list of items whenever a \\n is encountered" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 16, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "# Writing to a file, by default the open() opens only in read mode\n", 205 | "\n", 206 | "myfile = open('test.txt', mode='w+') # w+ - reads and writes" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 17, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "''" 218 | ] 219 | }, 220 | "execution_count": 17, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "myfile.read() # when w+ is used we are overriding the original text hence it is blank" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 18, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "18" 238 | ] 239 | }, 240 | "execution_count": 18, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "myfile.write(\"This is a new text\")" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 19, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": [ 257 | "'This is a new text'" 258 | ] 259 | }, 260 | "execution_count": 19, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "myfile.seek(0)\n", 267 | "myfile.read() # It has overwritten the previous text which was written" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 20, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "myfile.close()" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 21, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "# append to a file - keeps the old content and new contents after that\n", 286 | "myfile = open('test.txt', mode='a+')" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 22, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "25" 298 | ] 299 | }, 300 | "execution_count": 22, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "myfile.write(\"This is the appended line\")" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 23, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "myfile.close()" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 26, 321 | "metadata": {}, 322 | "outputs": [ 323 | { 324 | "data": { 325 | "text/plain": [ 326 | "'This is a new textThis is the appended line'" 327 | ] 328 | }, 329 | "execution_count": 26, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "newfile = open('test.txt')\n", 336 | "newfile.read()" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 27, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "# anotehr method to close a file, called as context managers\n", 346 | "with open('test.txt','r') as newfile:\n", 347 | " myvar = newfile.readlines()\n", 348 | "\n", 349 | "# this automatically closes the file when the above block of code is ran" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [] 358 | } 359 | ], 360 | "metadata": { 361 | "kernelspec": { 362 | "display_name": "Python 3", 363 | "language": "python", 364 | "name": "python3" 365 | }, 366 | "language_info": { 367 | "codemirror_mode": { 368 | "name": "ipython", 369 | "version": 3 370 | }, 371 | "file_extension": ".py", 372 | "mimetype": "text/x-python", 373 | "name": "python", 374 | "nbconvert_exporter": "python", 375 | "pygments_lexer": "ipython3", 376 | "version": "3.10.9" 377 | }, 378 | "orig_nbformat": 4 379 | }, 380 | "nbformat": 4, 381 | "nbformat_minor": 2 382 | } 383 | -------------------------------------------------------------------------------- /Natural Language Processing/Notebooks/3 - Working with PDF Files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "PyPDF2 library can be used to read in the text data from a PDF file\n", 9 | "\n", 10 | "PyPDF2 librry is made to extract text from PDF files directly created from a word processor" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import PyPDF2" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 7, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# reading a PDF\n", 29 | "myfile = open('C:\\\\Users\\\\gmi\\\\Documents\\\\NLP using Python Course\\\\Notebook Files\\\\00-Python-Text-Basics\\\\US_Declaration.pdf', mode='rb')\n", 30 | "\n", 31 | "# rb- reading in binary methdod, this is needed because this is no longer a text file rather a PDF file" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 9, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# converting it into a PDF file reader object\n", 41 | "pdf_reader = PyPDF2.PdfReader(myfile)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 12, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "5" 53 | ] 54 | }, 55 | "execution_count": 12, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "len(pdf_reader.pages) # to display the number of pages in the PDF document" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 14, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# To extract the texts from the first page of the PDF\n", 71 | "page_one = pdf_reader.pages[0] # 0 --> first page" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 18, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "Declaration of Independence\n", 84 | "IN CONGRESS, July 4, 1776. \n", 85 | "The unanimous Declaration of the thirteen united States of America, \n", 86 | "When in the Course of human events, it becomes necessary for one people to dissolve thepolitical bands which have connected them with another, and to assume among the powers of theearth, the separate and equal station to which the Laws of Nature and of Nature's God entitlethem, a decent respect to the opinions of mankind requires that they should declare the causeswhich impel them to the separation. We hold these truths to be self-evident, that all men are created equal, that they are endowed bytheir Creator with certain unalienable Rights, that among these are Life, Liberty and the pursuit\n", 87 | "of Happiness.— \u0014That to secure these rights, Governments are instituted among Men, derivingtheir just powers from the consent of the governed,— \u0014That whenever any Form of Government\n", 88 | "becomes destructive of these ends, it is the Right of the People to alter or to abolish it, and to\n", 89 | "institute new Government, laying its foundation on such principles and organizing its powers in\n", 90 | "such form, as to them shall seem most likely to effect their Safety and Happiness. Prudence,indeed, will dictate that Governments long established should not be changed for light andtransient causes; and accordingly all experience hath shewn, that mankind are more disposed to\n", 91 | "suffer, while evils are sufferable, than to right themselves by abolishing the forms to which theyare accustomed. But when a long train of abuses and usurpations, pursuing invariably the same\n", 92 | "Object evinces a design to reduce them under absolute Despotism, it is their right, it is their duty,\n", 93 | "to throw off such Government, and to provide new Guards for their future securit y.— \u0014Such has\n", 94 | "been the patient sufferance of these Colonies; and such is now the necessity which constrainsthem to alter their former Systems of Government. The history of the present King of GreatBritain is a history of repeated injuries and usurpations, all having in direct object the\n", 95 | "establishment of an absolute Tyranny over these States. To prove this, let Facts be submitted to a\n", 96 | "candid world. \n", 97 | "He has refused his Assent to Laws, the most wholesome and necessary for the\n", 98 | "public good.He has forbidden his Governors to pass Laws of immediate and pressingimportance, unless suspended in their operation till his Assent should be obtained;and when so suspended, he has utterly neglected to attend to them.He has refused to pass other Laws for the accommodation of large districts of\n", 99 | "people, unless those people would relinquish the right of Representation in theLegislature, a right inestimable to them and formidable to tyrants only. He has called together legislative bodies at places unusual, uncomfortable, and distantfrom the depository of their public Records, for the sole purpose of fatiguing them into\n", 100 | "compliance with his measures.\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "print(page_one.extract_text())" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.10.9" 133 | }, 134 | "orig_nbformat": 4 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 2 138 | } 139 | -------------------------------------------------------------------------------- /Natural Language Processing/Notebooks/4 - regex.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "text = \"The phone numberof the agent is 408-555-1234. Call Soon\"" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import re # regular expression library that is built into python\n", 19 | "pattern = \"phone\"" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "" 31 | ] 32 | }, 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "re.search(pattern,text)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 5, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "(4, 9)" 51 | ] 52 | }, 53 | "execution_count": 5, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "my_match = re.search(pattern,text)\n", 60 | "my_match.span() # return the index where the pattern is present in the text" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 6, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "4" 72 | ] 73 | }, 74 | "execution_count": 6, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "my_match.start() # returns the starting index of the pattern in the text" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 7, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "text = \"my phone is a new phone\"\n", 90 | "match = re.search(pattern, text)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 8, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "(3, 8)" 102 | ] 103 | }, 104 | "execution_count": 8, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "match.span() \n", 111 | "# Even though the phone is repeated twice in the text, it returns only the first instance span" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 9, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "['phone', 'phone']" 123 | ] 124 | }, 125 | "execution_count": 9, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "# to find all the instnces of the pattern in a text\n", 132 | "all_matches = re.findall(pattern, text)\n", 133 | "all_matches" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 10, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "2" 145 | ] 146 | }, 147 | "execution_count": 10, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "len(all_matches) # returns the number of instances the pattern is present in the text" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 12, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "(3, 8)\n", 166 | "(18, 23)\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "for match in re.finditer(pattern, text):\n", 172 | " print(match.span())\n", 173 | "\n", 174 | "# using the finditer() method we can find the span of all of the instances" 175 | ] 176 | }, 177 | { 178 | "attachments": {}, 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "re.search() ---> returns only first instance of the matching pattern in a text\n", 183 | "\n", 184 | "re.findall() ---> returns all of the instances of the matching patterns in a text" 185 | ] 186 | }, 187 | { 188 | "attachments": {}, 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "* \\d ---> A digit\n", 193 | "* \\w ---> Alphanumeric\n", 194 | "* \\s ---> White space\n", 195 | "* \\D ---> A non digit\n", 196 | "* \\W ---> Non-alphanumeric\n", 197 | "* \\S ---> Non-whitespace" 198 | ] 199 | }, 200 | { 201 | "attachments": {}, 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "

Quantifiers

\n", 206 | "\n", 207 | "* \\+ ---> Occurs one or more times\n", 208 | "* {3} --> Occurs exactly three times\n", 209 | "* {2,4} ---> Occurs 2 to 4 times\n", 210 | "* \\* ---> Occurs zero or more times\n", 211 | "* ? ---> Once or none" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 15, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "'My telephone number is 777-555-1234'" 223 | ] 224 | }, 225 | "execution_count": 15, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "text = \"My telephone number is 777-555-1234\"\n", 232 | "text" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 16, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "# To extract the phone number from the text\n", 242 | "pattern = r'\\d{3}-\\d{3}-\\d{4}'" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 17, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "phone_no = re.search(pattern, text)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 19, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "name": "stdout", 261 | "output_type": "stream", 262 | "text": [ 263 | "The matched phone number is 777-555-1234\n" 264 | ] 265 | } 266 | ], 267 | "source": [ 268 | "print(f\"The matched phone number is {phone_no.group()}\")" 269 | ] 270 | }, 271 | { 272 | "attachments": {}, 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "Paranthesis can be used to form groups" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 20, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "The matched phone number is 777-555-1234\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "# To extract the phone number from the text\n", 294 | "pattern = r'(\\d{3})-(\\d{3})-(\\d{4})'\n", 295 | "phone_no = re.search(pattern, text)\n", 296 | "print(f\"The matched phone number is {phone_no.group()}\")" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 21, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "777\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "print(phone_no.group(1)) # returns only the contents inside the 1st ()" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 24, 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "name": "stdout", 323 | "output_type": "stream", 324 | "text": [ 325 | "\n" 326 | ] 327 | }, 328 | { 329 | "data": { 330 | "text/plain": [ 331 | "" 332 | ] 333 | }, 334 | "execution_count": 24, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "print(re.search(r\"man|woman\", \"This man was here\"))\n", 341 | "re.search(r\"man|woman\", \"This woman was here\")\n" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 25, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/plain": [ 352 | "['cat', 'hat', 'sat']" 353 | ] 354 | }, 355 | "execution_count": 25, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "re.findall(r\".at\", \"The cat in the hat sat\") # wildcard operator" 362 | ] 363 | }, 364 | { 365 | "attachments": {}, 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "* ^ ---> Starts with\n", 370 | "* $ ---> ends with" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 27, 376 | "metadata": {}, 377 | "outputs": [ 378 | { 379 | "name": "stdout", 380 | "output_type": "stream", 381 | "text": [ 382 | "['1']\n" 383 | ] 384 | }, 385 | { 386 | "data": { 387 | "text/plain": [ 388 | "['2']" 389 | ] 390 | }, 391 | "execution_count": 27, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "print(re.findall(r\"^\\d\", \"1 is the first number\"))\n", 398 | "re.findall(r\"\\d$\", \"This ends with a number 2\")" 399 | ] 400 | } 401 | ], 402 | "metadata": { 403 | "kernelspec": { 404 | "display_name": "Python 3", 405 | "language": "python", 406 | "name": "python3" 407 | }, 408 | "language_info": { 409 | "codemirror_mode": { 410 | "name": "ipython", 411 | "version": 3 412 | }, 413 | "file_extension": ".py", 414 | "mimetype": "text/x-python", 415 | "name": "python", 416 | "nbconvert_exporter": "python", 417 | "pygments_lexer": "ipython3", 418 | "version": "3.10.9" 419 | }, 420 | "orig_nbformat": 4 421 | }, 422 | "nbformat": 4, 423 | "nbformat_minor": 2 424 | } 425 | -------------------------------------------------------------------------------- /Natural Language Processing/Notebooks/5 - NLP Python basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Spacy library is being used\n", 10 | "import spacy" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 11, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# Load the language core which was downloaded\n", 20 | "nlp = spacy.load('en_core_web_sm') # sm --> small version" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 12, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# creating a document object\n", 30 | "doc = nlp(u'Tesla is looking at buying U.S. startup for $6 million')" 31 | ] 32 | }, 33 | { 34 | "attachments": {}, 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "* So, the above code parses the entire string into separate components for us.\n", 39 | "* After parsing, each component is called as tokens" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 13, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "Tesla PROPN\n", 52 | "is AUX\n", 53 | "looking VERB\n", 54 | "at ADP\n", 55 | "buying VERB\n", 56 | "U.S. PROPN\n", 57 | "startup NOUN\n", 58 | "for ADP\n", 59 | "$ SYM\n", 60 | "6 NUM\n", 61 | "million NUM\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "for token in doc:\n", 67 | " print(token.text, token.pos_) # grabs the raw text from the string\n", 68 | " # returns the part of speech" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 14, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "[('tok2vec', ),\n", 80 | " ('tagger', ),\n", 81 | " ('parser', ),\n", 82 | " ('attribute_ruler',\n", 83 | " ),\n", 84 | " ('lemmatizer', ),\n", 85 | " ('ner', )]" 86 | ] 87 | }, 88 | "execution_count": 14, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "nlp.pipeline" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 9, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']" 106 | ] 107 | }, 108 | "execution_count": 9, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "nlp.pipe_names" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 15, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "# TOKENIZATION - Splitting up into all the components called tokens\n", 124 | "\n", 125 | "doc2 = nlp(u\"Tesla isn't looking into startups anymore\")" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 17, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "spacy.tokens.doc.Doc" 137 | ] 138 | }, 139 | "execution_count": 17, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "type(doc2)\n", 146 | "\n", 147 | "# This is a document object type" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 20, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "Tesla PROPN nsubj\n", 160 | "is AUX aux\n", 161 | "n't PART neg\n", 162 | "looking VERB ROOT\n", 163 | "into ADP prep\n", 164 | "startups NOUN pobj\n", 165 | "anymore ADV advmod\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "for token in doc2:\n", 171 | " print(token.text, token.pos_, token.dep_)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 29, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "Tesla\n", 184 | "PROPN\n", 185 | "nsubj\n", 186 | "NNP\n", 187 | "look\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "print(doc2[0]) # indexing can be used to grab the tokens\n", 193 | "print(doc2[0].pos_) # attributers can be used with the indexes as well\n", 194 | "print(doc2[0].dep_) # syntactic dependency attribute\n", 195 | "print(doc2[0].tag_) # gives detailed POS \n", 196 | "print(doc2[3].lemma_) # gives the root word form" 197 | ] 198 | }, 199 | { 200 | "attachments": {}, 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "SPANS - a slice of the document object in the form of doc[start:stop]" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 30, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "new_var = doc2[0:4]" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 31, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "Tesla isn't looking" 225 | ] 226 | }, 227 | "execution_count": 31, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "new_var" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 32, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | "spacy.tokens.span.Span" 245 | ] 246 | }, 247 | "execution_count": 32, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "type(new_var)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 33, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "spacy.tokens.doc.Doc" 265 | ] 266 | }, 267 | "execution_count": 33, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "type(doc2)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 34, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "# to grab the individual sentences from the string\n", 283 | "\n", 284 | "doc3 = nlp(u\"This is the first sentence. This is the second sentence. This is the third sentence.\")" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 35, 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "This is the first sentence.\n", 297 | "This is the second sentence.\n", 298 | "This is the third sentence.\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "for sentences in doc3.sents:\n", 304 | " print(sentences)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "language": "python", 319 | "name": "python3" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.10.9" 332 | }, 333 | "orig_nbformat": 4 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 2 337 | } 338 | -------------------------------------------------------------------------------- /Natural Language Processing/Notebooks/readme.md: -------------------------------------------------------------------------------- 1 | This folder contains the notebook files realted to Natural Language Processing 2 | -------------------------------------------------------------------------------- /Natural Language Processing/README.md: -------------------------------------------------------------------------------- 1 | Using Natural Language toolkit library for Natural language processing 2 | -------------------------------------------------------------------------------- /Numpy/README.md: -------------------------------------------------------------------------------- 1 | This repo contains the basic operations and data types which can be performed on numpy ndarray 2 | -------------------------------------------------------------------------------- /Numpy/intro to numpy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Numpy/intro to numpy.pdf -------------------------------------------------------------------------------- /Numpy/numpy operations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "42536ed7", 6 | "metadata": {}, 7 | "source": [ 8 | "

Operations performed in numpy" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "9f121b62", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "2e88b648", 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "a = np.arange(12)\n", 41 | "a" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "id": "aba327f4", 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "1\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "print(np.ndim(a))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "id": "fc8a4426", 65 | "metadata": {}, 66 | "source": [ 67 | "

reshape():- it changes the arrangement of items by changing the order" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "id": "24b36310", 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "array([[ 0, 1, 2, 3, 4, 5],\n", 80 | " [ 6, 7, 8, 9, 10, 11]])" 81 | ] 82 | }, 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "b = a.reshape(2,6)\n", 90 | "b" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "id": "618ca18c", 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "2\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "print(b.ndim)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "d98b1986", 114 | "metadata": {}, 115 | "source": [ 116 | "

flatten()/ ravel():- it used for flattening, for converting n-dim to 1-d" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 6, 122 | "id": "be5cc863", 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": [ 128 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])" 129 | ] 130 | }, 131 | "execution_count": 6, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "x1 = b.flatten()\n", 138 | "x1" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "id": "01bfd553", 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])" 151 | ] 152 | }, 153 | "execution_count": 7, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | } 157 | ], 158 | "source": [ 159 | "x2 = b.ravel()\n", 160 | "x2" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 8, 166 | "id": "e8f35dd9", 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "array([[100, 1, 2, 3, 4, 5],\n", 173 | " [ 6, 7, 8, 9, 10, 11]])" 174 | ] 175 | }, 176 | "execution_count": 8, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "# now let's replace 1st element 0 with 100 in b\n", 183 | "b[0][0] = 100\n", 184 | "b" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 9, 190 | "id": "d4ce5217", 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "[ 0 1 2 3 4 5 6 7 8 9 10 11]\n", 198 | "[100 1 2 3 4 5 6 7 8 9 10 11]\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "print(x1) # flatten() is creating a copy\n", 204 | "print(x2) # ravel() is creating a view" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 10, 210 | "id": "e3b12e07", 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "False\n", 218 | "True\n" 219 | ] 220 | } 221 | ], 222 | "source": [ 223 | "print(np.may_share_memory(x1,b)) # flatten() is creating a copy\n", 224 | "print(np.may_share_memory(x2,b)) # ravel() is creating a view" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 11, 230 | "id": "11204368", 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "name": "stdout", 235 | "output_type": "stream", 236 | "text": [ 237 | "[[100 1 2 3 4 5]\n", 238 | " [ 6 7 8 9 10 11]] \n", 239 | "\n", 240 | "[100 6 1 7 2 8 3 9 4 10 5 11]\n", 241 | "[100 1 2 3 4 5 6 7 8 9 10 11]\n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "# flatten method takes a parameter called order, by default it's row major\n", 247 | "print(b, \"\\n\")\n", 248 | "print(b.flatten(order=\"f\"))\n", 249 | "print(b.flatten(order=\"c\"))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "id": "7d91b7b7", 255 | "metadata": {}, 256 | "source": [ 257 | "

Stacking:- several arrays can be stacked together along different axes" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "id": "b14abaa2", 263 | "metadata": {}, 264 | "source": [ 265 | "np.vstack():- to stack arrays along vertical axis\n", 266 | "\n", 267 | "np.hstack():- to stack arrays along horinzontal axis" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 12, 273 | "id": "0022e423", 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "array([[1, 2],\n", 280 | " [3, 4],\n", 281 | " [5, 6],\n", 282 | " [7, 8]])" 283 | ] 284 | }, 285 | "execution_count": 12, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "x1 = np.array([[1,2],[3,4]])\n", 292 | "x2 = np.array([[5,6],[7,8]])\n", 293 | "np.vstack((x1,x2))" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 13, 299 | "id": "41b660e7", 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "array([[1, 2, 5, 6],\n", 306 | " [3, 4, 7, 8]])" 307 | ] 308 | }, 309 | "execution_count": 13, 310 | "metadata": {}, 311 | "output_type": "execute_result" 312 | } 313 | ], 314 | "source": [ 315 | "np.hstack((x1,x2))" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "id": "f49b4970", 321 | "metadata": {}, 322 | "source": [ 323 | "

Matrix operations" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 27, 329 | "id": "2d25c02f", 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "name": "stdout", 334 | "output_type": "stream", 335 | "text": [ 336 | "[[1 2]\n", 337 | " [3 4]\n", 338 | " [5 6]] \n", 339 | "\n", 340 | "[5 6]\n" 341 | ] 342 | } 343 | ], 344 | "source": [ 345 | "x1 = np.array([[1,2],[3,4],[5,6]])\n", 346 | "x2 = np.array([5,6])\n", 347 | "print(x1,\"\\n\")\n", 348 | "print(x2)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 28, 354 | "id": "be1157cd", 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "name": "stdout", 359 | "output_type": "stream", 360 | "text": [ 361 | "[[ 6 8]\n", 362 | " [ 8 10]\n", 363 | " [10 12]] \n", 364 | "\n", 365 | "[[ 6 8]\n", 366 | " [ 8 10]\n", 367 | " [10 12]]\n" 368 | ] 369 | } 370 | ], 371 | "source": [ 372 | "# addition of matrices\n", 373 | "print(np.add(x1,x2), \"\\n\")\n", 374 | "print(x1+x2)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 29, 380 | "id": "27b2774e", 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "name": "stdout", 385 | "output_type": "stream", 386 | "text": [ 387 | "[[-4 -4]\n", 388 | " [-2 -2]\n", 389 | " [ 0 0]] \n", 390 | "\n", 391 | "[[-4 -4]\n", 392 | " [-2 -2]\n", 393 | " [ 0 0]]\n" 394 | ] 395 | } 396 | ], 397 | "source": [ 398 | "# subtraction of matrices\n", 399 | "print(np.subtract(x1,x2), \"\\n\")\n", 400 | "print(x1-x2)" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 30, 406 | "id": "62120f82", 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "name": "stdout", 411 | "output_type": "stream", 412 | "text": [ 413 | "[[ 5 12]\n", 414 | " [15 24]\n", 415 | " [25 36]]\n" 416 | ] 417 | } 418 | ], 419 | "source": [ 420 | "# element wise multiplication\n", 421 | "print(np.multiply(x1,x2))" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 31, 427 | "id": "c29f004e", 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "[17 39 61]\n" 435 | ] 436 | } 437 | ], 438 | "source": [ 439 | "# matrix multiplication\n", 440 | "print(np.dot(x1,x2))" 441 | ] 442 | } 443 | ], 444 | "metadata": { 445 | "kernelspec": { 446 | "display_name": "Python 3", 447 | "language": "python", 448 | "name": "python3" 449 | }, 450 | "language_info": { 451 | "codemirror_mode": { 452 | "name": "ipython", 453 | "version": 3 454 | }, 455 | "file_extension": ".py", 456 | "mimetype": "text/x-python", 457 | "name": "python", 458 | "nbconvert_exporter": "python", 459 | "pygments_lexer": "ipython3", 460 | "version": "3.8.12" 461 | } 462 | }, 463 | "nbformat": 4, 464 | "nbformat_minor": 5 465 | } 466 | -------------------------------------------------------------------------------- /Numpy/numpy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

numpy library" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# importing the numpy module\r\n", 17 | "import numpy as np" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "Creating arrays" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 17, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "The array: [1 2 3 4 5]\n", 37 | "Dimension of the array: 1\n", 38 | "Shape of the array: (5,)\n", 39 | "Legth of the array: 5\n", 40 | "\n", 41 | "\n", 42 | "The array: \n", 43 | " [[1 2 3]\n", 44 | " [4 5 6]]\n", 45 | "Dimension of the array: 2\n", 46 | "Shape of the array: (2, 3)\n", 47 | "Legth of the array: 2\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "# 1 dimensional\r\n", 53 | "a = np.array([1,2,3,4,5])\r\n", 54 | "print(\"The array: \", a)\r\n", 55 | "print(\"Dimension of the array: \", a.ndim) \r\n", 56 | "print(\"Shape of the array: \", a.shape)\r\n", 57 | "print(\"Legth of the array: \", len(a))\r\n", 58 | "print(\"\\n\")\r\n", 59 | "\r\n", 60 | "# 2 dimensional\r\n", 61 | "b = np.array(([1,2,3],[4,5,6]))\r\n", 62 | "print(\"The array: \\n\", b)\r\n", 63 | "print(\"Dimension of the array: \", b.ndim) \r\n", 64 | "print(\"Shape of the array: \", b.shape)\r\n", 65 | "print(\"Legth of the array: \", len(b))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Generating Sequence of array " 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 21, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "[0 1 2 3 4 5 6 7 8 9]\n", 85 | "[10 20 30 40]\n", 86 | "\n", 87 | "\n", 88 | "[0. 0.2 0.4 0.6 0.8 1. ]\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "# Evenly spaced\r\n", 94 | "a = np.arange(10)\r\n", 95 | "print(a)\r\n", 96 | "print(np.arange(10,50,10))\r\n", 97 | "print(\"\\n\")\r\n", 98 | "\r\n", 99 | "# By number of points\r\n", 100 | "print(np.linspace(0,1,6)) # dividing 6 partitions from 0 to 1" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Common arrays " 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 30, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "[[1. 1. 1.]\n", 120 | " [1. 1. 1.]\n", 121 | " [1. 1. 1.]]\n", 122 | "\n", 123 | "\n", 124 | "[[0. 0. 0.]\n", 125 | " [0. 0. 0.]\n", 126 | " [0. 0. 0.]]\n", 127 | "\n", 128 | "\n", 129 | "[[1. 0. 0.]\n", 130 | " [0. 1. 0.]\n", 131 | " [0. 0. 1.]]\n", 132 | "\n", 133 | "\n", 134 | "[[1 0 0]\n", 135 | " [0 2 0]\n", 136 | " [0 0 3]]\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "print(np.ones((3,3)))\r\n", 142 | "print(\"\\n\")\r\n", 143 | "\r\n", 144 | "print(np.zeros((3,3)))\r\n", 145 | "print(\"\\n\")\r\n", 146 | "\r\n", 147 | "print(np.eye(3))\r\n", 148 | "print(\"\\n\")\r\n", 149 | "\r\n", 150 | "print(np.diag(np.array([1,2,3])))" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "Generating random numbers in numpy " 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 37, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "Random numbers generated are: [0.32782768 0.70724533 0.53552084 0.1775902 ]\n" 170 | ] 171 | }, 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "dtype('float64')" 176 | ] 177 | }, 178 | "execution_count": 37, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "r = np.random.rand(4) # generates random floating point numbers between 0 to 1\r\n", 185 | "print(\"Random numbers generated are: \", r)\r\n", 186 | "r.dtype" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "Datatypes in numpy " 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 50, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "name": "stdout", 203 | "output_type": "stream", 204 | "text": [ 205 | "[1. 2. 3. 4. 5.]\n", 206 | "float64\n", 207 | "[1 2 3 4 5]\n", 208 | "int64\n", 209 | "[1.+1.j 2.+5.j]\n", 210 | "complex128\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "a = np.array([1,2,3,4,5], dtype='float64')\r\n", 216 | "print(a)\r\n", 217 | "print(a.dtype)\r\n", 218 | "\r\n", 219 | "b = np.array([1,2,3,4,5], dtype='int64')\r\n", 220 | "print(b)\r\n", 221 | "print(b.dtype)\r\n", 222 | "\r\n", 223 | "com = np.array([1+ 1j, 2+5j])\r\n", 224 | "print(com)\r\n", 225 | "print(com.dtype)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "Indexing and Slicing" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 66, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "[0 1 2 3 4 5 6 7 8 9]\n", 245 | "5\n", 246 | "[0 1 2 3 4]\n", 247 | "9\n", 248 | "[9 8 7 6 5 4 3 2 1 0]\n", 249 | "[2 5 8]\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "num = np.arange(10)\r\n", 255 | "print(num)\r\n", 256 | "print(num[5])\r\n", 257 | "print(num[:5])\r\n", 258 | "print(num[-1])\r\n", 259 | "print(num[: : -1]) # for reversing the sequence\r\n", 260 | "print(num[2:9:3])" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 64, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "[[1 0 0 0]\n", 273 | " [0 2 0 0]\n", 274 | " [0 0 3 0]\n", 275 | " [0 0 0 4]]\n", 276 | "3\n" 277 | ] 278 | }, 279 | { 280 | "data": { 281 | "text/plain": [ 282 | "array([[ 1, 0, 0, 0],\n", 283 | " [ 0, 2, 0, 0],\n", 284 | " [ 0, 99, 3, 0],\n", 285 | " [ 0, 0, 0, 4]])" 286 | ] 287 | }, 288 | "execution_count": 64, 289 | "metadata": {}, 290 | "output_type": "execute_result" 291 | } 292 | ], 293 | "source": [ 294 | "# for mutli dimensional ndarrays\r\n", 295 | "mat = np.diag(np.array([1,2,3,4]))\r\n", 296 | "print(mat)\r\n", 297 | "\r\n", 298 | "print(mat[2,2])\r\n", 299 | "\r\n", 300 | "mat[2,1] = 99 # updating the matrices\r\n", 301 | "mat" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "* A slicing operation craetes a view on the original array, which is just a way of accessing array data \r\n", 309 | "\r\n", 310 | "* Thus the original array is not copied into the memory it's just a view\r\n", 311 | "\r\n", 312 | "* we can use np.share_memory() to check whether if two arrays share the same memory or not" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 8, 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "text": [ 324 | "a: [0 1 2 3 4 5 6 7 8 9]\n", 325 | "b: [0 2 4 6 8]\n", 326 | "a: [12 1 2 3 4 5 6 7 8 9]\n", 327 | "b: [12 2 4 6 8]\n" 328 | ] 329 | } 330 | ], 331 | "source": [ 332 | "a = np.arange(10)\r\n", 333 | "print(\"a: \", a)\r\n", 334 | "\r\n", 335 | "b = a[::2] # view\r\n", 336 | "print(\"b: \", b)\r\n", 337 | "\r\n", 338 | "np.may_share_memory(a,b)\r\n", 339 | "b[0] = 12\r\n", 340 | "\r\n", 341 | "print(\"a: \", a)\r\n", 342 | "print(\"b: \", b) # when modifying the view(b) even the original array(a) also has been modified automatically" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 12, 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "name": "stdout", 352 | "output_type": "stream", 353 | "text": [ 354 | "a: [0 1 2 3 4 5 6 7 8 9]\n", 355 | "b: [0 2 4 6 8]\n", 356 | "a: [0 1 2 3 4 5 6 7 8 9]\n", 357 | "b: [12 2 4 6 8]\n" 358 | ] 359 | } 360 | ], 361 | "source": [ 362 | "a = np.arange(10)\r\n", 363 | "print(\"a: \", a)\r\n", 364 | "\r\n", 365 | "b = a[::2].copy() # forcing a copy to avoid data redundancy while doing array manipulations\r\n", 366 | "print(\"b: \", b)\r\n", 367 | "\r\n", 368 | "b[0] = 12\r\n", 369 | "print(\"a: \", a)\r\n", 370 | "print(\"b: \", b) " 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "

Numerical operations on ndarray" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 22, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "a: [0 1 2 3 4 5]\n", 390 | "a+1: [1 2 3 4 5 6]\n", 391 | "2**a: [ 0 1 4 9 16 25]\n", 392 | "Array multiplicaiton: \n", 393 | " [[1. 1. 1.]\n", 394 | " [1. 1. 1.]\n", 395 | " [1. 1. 1.]]\n", 396 | "Matrix multiplication: \n", 397 | " [[3. 3. 3.]\n", 398 | " [3. 3. 3.]\n", 399 | " [3. 3. 3.]]\n" 400 | ] 401 | } 402 | ], 403 | "source": [ 404 | "# Element-wise operations\r\n", 405 | "a = np.array([0,1,2,3,4,5])\r\n", 406 | "print(\"a: \", a)\r\n", 407 | "print(\"a+1: \", a+1)\r\n", 408 | "print(\"2**a:\", a**2)\r\n", 409 | "\r\n", 410 | "# array multiplication\r\n", 411 | "c = np.ones((3,3))\r\n", 412 | "print(\"Array multiplicaiton: \\n\", c*c)\r\n", 413 | "\r\n", 414 | "# matrix muliplication\r\n", 415 | "print(\"Matrix multiplication: \\n\", c.dot(c))" 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "metadata": {}, 421 | "source": [ 422 | "

Array shape manipulations" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": 42, 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "a:\n", 435 | " [[1 2 3]\n", 436 | " [4 5 6]]\n", 437 | "Flattened a: [1 2 3 4 5 6]\n" 438 | ] 439 | } 440 | ], 441 | "source": [ 442 | "# Flattening\r\n", 443 | "a = np.array([[1,2,3],[4,5,6]])\r\n", 444 | "print(\"a:\\n\", a)\r\n", 445 | "\r\n", 446 | "print(\"Flattened a: \", a.ravel()) # flattening 2d to 1d" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 43, 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "name": "stdout", 456 | "output_type": "stream", 457 | "text": [ 458 | "Reshaped a:\n", 459 | " [[1 2]\n", 460 | " [3 4]\n", 461 | " [5 6]]\n" 462 | ] 463 | } 464 | ], 465 | "source": [ 466 | "# Reshaping\r\n", 467 | "print(\"Reshaped a:\\n\", a.reshape(3,2)) # reverse of flattening" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 47, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "name": "stdout", 477 | "output_type": "stream", 478 | "text": [ 479 | "a: [0 1 2 3]\n", 480 | "resized a: \n", 481 | " [0 1 2 3 0 0 0 0]\n" 482 | ] 483 | } 484 | ], 485 | "source": [ 486 | "# Resizing\r\n", 487 | "a = np.arange(4)\r\n", 488 | "print(\"a: \", a)\r\n", 489 | "a.resize((8,))\r\n", 490 | "print(\"resized a: \\n\", a)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 53, 496 | "metadata": {}, 497 | "outputs": [ 498 | { 499 | "name": "stdout", 500 | "output_type": "stream", 501 | "text": [ 502 | "a:\n", 503 | " [[ 3 2 1]\n", 504 | " [10 5 6]]\n", 505 | "Sorted a: \n", 506 | " [[ 1 2 3]\n", 507 | " [ 5 6 10]]\n" 508 | ] 509 | } 510 | ], 511 | "source": [ 512 | "# Sorting data\r\n", 513 | "a = np.array([[3,2,1],[10,5,6]])\r\n", 514 | "print(\"a:\\n\", a)\r\n", 515 | "\r\n", 516 | "a.sort()\r\n", 517 | "print(\"Sorted a: \\n\",a)" 518 | ] 519 | } 520 | ], 521 | "metadata": { 522 | "interpreter": { 523 | "hash": "0989d4cb382ec003e6ad9ee0079fe5a34620af18f47069c43c62ee5030c1ec77" 524 | }, 525 | "kernelspec": { 526 | "display_name": "Python 3.7.9 64-bit ('myenv': conda)", 527 | "name": "python3" 528 | }, 529 | "language_info": { 530 | "codemirror_mode": { 531 | "name": "ipython", 532 | "version": 3 533 | }, 534 | "file_extension": ".py", 535 | "mimetype": "text/x-python", 536 | "name": "python", 537 | "nbconvert_exporter": "python", 538 | "pygments_lexer": "ipython3", 539 | "version": "3.7.9" 540 | }, 541 | "orig_nbformat": 4 542 | }, 543 | "nbformat": 4, 544 | "nbformat_minor": 2 545 | } -------------------------------------------------------------------------------- /Plotly & Cufflinks/sample.txt: -------------------------------------------------------------------------------- 1 | THis is a sample file 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python-Notebooks 2 | 3 | 4 |

5 | 6 | All Jupyter and Colab Notebooks are present here 7 |

8 | 9 | 10 | 11 | 12 | 13 | LINK:- https://mithun162001.github.io/Python-Notebooks/ 14 | 15 | 16 | Completed:- Numpy, Pandas, Matplotlib, Seaborn, Plotly & Cufflinks, Machine Learning 17 | 18 | To be starting:- Deep Learning and Neural Networks, Computer Vision, Natural Language Processing 19 | 20 | TO REFER:-
21 | 1. Basic Python 22 | 2. numpy Operations
23 | 3. Basic Operations in Pandas
24 | 4. Basic Descriptive Statistics Using Pandas
25 | 5. Data Manipulation in Pandas
26 | 6. Merging data using Pandas
27 | 7. Basic matplotlib
28 | 8. Seaborn Library
29 | 9. Streamlit Library
30 | 10. Plotly and Cufflinks
31 | 11. Geographical Plotting
32 | 12. Natural Language Processing
33 | 13. Machine Learning *
34 | 14. Deep Learning and Neural Networks *
35 | -------------------------------------------------------------------------------- /Seaborn/README.md: -------------------------------------------------------------------------------- 1 | # SEABORN 2 | 3 | In this seaborn library I have covered: 4 | 5 | 1. Distribution Plots 6 | 7 | 2. Categorical PLots 8 | 9 | 3. Matrix Plots 10 | 11 | 4. Grid Plots 12 | 13 | 5. Regression Plots 14 | 15 | 6. Styles and Colors 16 | -------------------------------------------------------------------------------- /Seaborn/heatmap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mithun162001/Python-Notebooks/792c8c2a5606c0002ff13ced296d005e1034254b/Seaborn/heatmap.pdf -------------------------------------------------------------------------------- /Seaborn/sample.txt: -------------------------------------------------------------------------------- 1 | This is just a sample file created 2 | -------------------------------------------------------------------------------- /Streamlit/Certificate app/Procfile: -------------------------------------------------------------------------------- 1 | web: sh setup.sh && streamlit run certificates.py -------------------------------------------------------------------------------- /Streamlit/Certificate app/README.md: -------------------------------------------------------------------------------- 1 | certificate app using streamlit 2 | -------------------------------------------------------------------------------- /Streamlit/Certificate app/certificates.py: -------------------------------------------------------------------------------- 1 | # importing libraries 2 | import numpy as np 3 | import pandas as pd 4 | import streamlit as st 5 | import streamlit.components.v1 as components 6 | from PIL import Image 7 | 8 | st.title("My Certificates") 9 | st.subheader("By Mithun G") 10 | st.subheader("All certificates and badges received are displayed here") 11 | st.markdown('

Have completed the courses and received certificates from various platforms

',unsafe_allow_html=True) 12 | text_to_marked = """

Certifications and badges received are:

13 |
    14 |
  • Coursera - Google Crash Course on Python 15 |
  • Coursera - Google Data Analytics 16 |
  • IBM - Machine Learning with Python 17 |
  • LinkedIn Learning - Data Science Essentials Part - 1 18 |
  • LinkedIn Learning - Statistics Foundations(4 parts) 19 |
  • Kaggle - Micro Courses 20 |
""" 21 | st.markdown(text_to_marked,unsafe_allow_html=True) 22 | st.text("---------------------------------------------------------------------------------------") 23 | st.text("Certificate Image:") 24 | st.sidebar.header("Select the Certificate platform to view") 25 | selected = st.sidebar.selectbox('Platforms:', ['None','Coursera','IBM','LinkedIn Learning','Kaggle']) 26 | 27 | if 'None' in selected: 28 | st.markdown("

To view the certificates, select one from the slider

",unsafe_allow_html=True) 29 | 30 | if 'Coursera' in selected: 31 | new_select = st.sidebar.selectbox('Course:',['Google Crash Course on Python','Google Data Analytics']) 32 | if 'Google Crash Course on Python' in new_select: 33 | st.markdown("GitHub Link",unsafe_allow_html=True) 34 | st.markdown("Credentials",unsafe_allow_html=True) 35 | google = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Coursera\\google-image.png") 36 | st.image(google,caption="Crash Course on Python") 37 | if 'Google Data Analytics' in new_select: 38 | st.markdown("GitHub Link",unsafe_allow_html=True) 39 | st.markdown("Credentials",unsafe_allow_html=True) 40 | google_1 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Google Data Analytics\\data analytics image\\0001.jpg") 41 | google_2 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Google Data Analytics\\data analytics image\\0002.jpg") 42 | st.image(google_1,caption="Foundations: Data, Data, Everywhere") 43 | st.image(google_2,caption="Ask Questions to make Data Driven decisions") 44 | 45 | if 'IBM' in selected: 46 | st.markdown("GitHub Link",unsafe_allow_html=True) 47 | st.markdown("Credentials",unsafe_allow_html=True) 48 | ibm = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\IBM ML\\Machine Learning with Python\\ML-image.png") 49 | st.image(ibm,caption="Machine Learning with Python - IBM") 50 | 51 | if 'LinkedIn Learning' in selected: 52 | new_select_2 = st.sidebar.selectbox('Course:',['Data Science Essentials - Part 1','Statistics Foundations']) 53 | if 'Data Science Essentials - Part 1' in new_select_2: 54 | st.markdown("GitHub Link",unsafe_allow_html=True) 55 | st.markdown("Credentials",unsafe_allow_html=True) 56 | ll1 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Linkedin Learning\\Data Science Essential Training Part 1\\essential-image.png") 57 | st.image(ll1,caption="Data Science Essentials - Part 1") 58 | if 'Statistics Foundations' in new_select_2: 59 | st.markdown("GitHub Link",unsafe_allow_html=True) 60 | st.markdown("Credentials",unsafe_allow_html=True) 61 | ll2 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Linkedin Learning\\Statistics Foundations\\1.jpg") 62 | ll3 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Linkedin Learning\\Statistics Foundations\\2.jpg") 63 | ll4 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Linkedin Learning\\Statistics Foundations\\3.jpg") 64 | st.image(ll2,caption="Statistics Foundations:1") 65 | st.image(ll3,caption="Statistics Foundations:2") 66 | st.image(ll4,caption="Statistics Foundations:3") 67 | 68 | if 'Kaggle' in selected: 69 | st.markdown("GitHub Link",unsafe_allow_html=True) 70 | st.markdown("Credentials",unsafe_allow_html=True) 71 | kaggle_1 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Kaggle\\Kaggle Python Certficate.png") 72 | kaggle_2 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Kaggle\\Kaggle Pandas Certficate.png") 73 | kaggle_3 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Kaggle\\Mithun G - Data Visualization.png") 74 | kaggle_4 = Image.open("C:\\Users\\mithun\\OneDrive\\Desktop\\My-Certificates\\Kaggle\\Kaggle - Intro to Machine Learning.png") 75 | 76 | st.image(kaggle_1, caption="Python") 77 | st.image(kaggle_2, caption='Pandas') 78 | st.image(kaggle_3, caption='Data visualization') 79 | st.image(kaggle_4, caption='Intro to Machine Learning') 80 | 81 | -------------------------------------------------------------------------------- /Streamlit/Certificate app/setup.sh: -------------------------------------------------------------------------------- 1 | mkdir -p ~/.streamlit/ 2 | echo "\ 3 | [server]\n\ 4 | headless = true\n\ 5 | port = $PORT\n\ 6 | enableCORS = false\n\ 7 | \n\ 8 | " > ~/.streamlit/config.toml -------------------------------------------------------------------------------- /Streamlit/EDAapp.py: -------------------------------------------------------------------------------- 1 | # importing the libraries 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import plotly.express as px 6 | import streamlit as st 7 | 8 | # Title and Markdown 9 | st.title("AN EXAMPLE EDA APP") 10 | st.markdown('''

This is an example of how to do EDA in streamlit app

''',unsafe_allow_html=True) 11 | 12 | # File upload 13 | file_up = st.file_uploader("Upload a file", type='csv') 14 | 15 | # Check if the file uploaded is successfull or not, if successfull then read the file 16 | if file_up is not None: 17 | st.success("File uploaded successfully") 18 | df = pd.read_csv(file_up) 19 | obj = [] 20 | int_float = [] 21 | for i in df.columns: 22 | clas = df[i].dtypes 23 | if clas == 'object': 24 | obj.append(i) 25 | else: 26 | int_float.append(i) 27 | 28 | # Remove null values and replace them with mean and median value 29 | with st.form(key='my_form'): 30 | with st.sidebar: 31 | st.sidebar.header("To remove NULL values press below button") 32 | submit_button = st.form_submit_button(label="Remove NULL") 33 | 34 | if submit_button: 35 | for i in df.columns: 36 | clas = df[i].dtypes 37 | if clas == 'object': 38 | df[i].fillna(df[i].mode()[0], inplace = True) 39 | else: 40 | df[i].fillna(df[i].mean(), inplace = True) 41 | 42 | # finding the number of null values in each column 43 | ls = [] 44 | for i in df.columns: 45 | dd = sum(pd.isnull(df[i])) 46 | ls.append(dd) 47 | 48 | # if number of null values are zero it will display some text else it will plot bar plot by each column 49 | if max(ls) == 0: 50 | st.write("Total no. of NULL values: ", str(max(ls))) 51 | else: 52 | st.write("Bar plot to know the number of NULL values in each column") 53 | st.write("Total number of null values: ", str(max(ls))) 54 | fig = px.bar(x=df.columns, y=ls,labels={'x':"Column Names",'y':"No. of Null values"}) 55 | st.plotly_chart(fig) 56 | 57 | # Frequency Plot 58 | st.sidebar.header("Select variable") 59 | selected = st.sidebar.selectbox('Object variables',obj) 60 | st.write("Bar Plot to know the frequency of each category") 61 | frequency = df[selected].value_counts() 62 | 63 | fig2 = px.bar(frequency, x=frequency.index,y=selected,labels={'x':selected, 'y':'count'}) 64 | st.plotly_chart(fig2) 65 | 66 | # Correlation chart 67 | st.sidebar.header("Select variable") 68 | selected2 = st.sidebar.multiselect("Variables",int_float) 69 | st.write("Scatter plot for correlation") 70 | if len(selected2) == 2: 71 | fig3 = px.scatter(df,x=selected2[0], y=selected2[1]) 72 | st.plotly_chart(fig3) 73 | else: 74 | st.write("Select any 2 variables only") -------------------------------------------------------------------------------- /Streamlit/README.md: -------------------------------------------------------------------------------- 1 | This repo contains the python files of streamlit 2 | -------------------------------------------------------------------------------- /Streamlit/app1.py: -------------------------------------------------------------------------------- 1 | # importing the streamlit library 2 | import streamlit as st 3 | 4 | # to print hello world in streamlit 5 | st.text("Hello World") 6 | 7 | # trying to change the font size and font style using markdown 8 | original_title = '

Hello World

' 9 | st.markdown(original_title, unsafe_allow_html=True) 10 | 11 | original_title = '

Hello World

' 12 | st.markdown(original_title, unsafe_allow_html=True) 13 | 14 | # trying other things in markdown 15 | my_markdown = '''

Hello World

16 |
    17 |
  • Python 18 |
  • java 19 |
''' 20 | st.markdown(my_markdown, unsafe_allow_html=True) -------------------------------------------------------------------------------- /Streamlit/app2.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | # Creating title 4 | st.title("This is a title") # displays the title 5 | 6 | # Creating headers 7 | st.header("This is a header") # displays text in header formatting 8 | 9 | # Creating markdown 10 | st.markdown("This is a markdown") 11 | 12 | # Creating a text 13 | st.text("This is a text") 14 | 15 | # Creating a success message 16 | st.success("Success") 17 | 18 | # Creating a warning message 19 | st.warning("Warning") 20 | 21 | # Creating a error message 22 | st.error("This is a error") -------------------------------------------------------------------------------- /Streamlit/app3.py: -------------------------------------------------------------------------------- 1 | # READING AND DISPLAYING CONTENTS OF A FILE 2 | import streamlit as st 3 | import pandas as pd 4 | 5 | st.title("Reading and displaying the contents of the file") 6 | st.text('''pd.read_csv("C:\\Users\\mithun\\Downloads\\death .csv") ''') 7 | df = pd.read_csv("C:\\Users\\mithun\\Downloads\\death .csv") 8 | st.write(df) # this writes arguments into the app 9 | 10 | st.text ('''df.describe()''') 11 | st.write(df.describe()) -------------------------------------------------------------------------------- /Streamlit/app4.py: -------------------------------------------------------------------------------- 1 | # CREATING AND DISPLAYING GRAPHS 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | import plotly.express as px 7 | import streamlit as st 8 | 9 | df = pd.read_csv("C:\\Users\\mithun\\Downloads\\breast-cancer_csv.csv") 10 | 11 | plt.figure(figsize=(1,1)) 12 | graph1 = sns.displot(df['Class']) 13 | st.pyplot(graph1) 14 | 15 | graph2 = px.line(df,x='tumor-size',y='age') 16 | st.plotly_chart(graph2) -------------------------------------------------------------------------------- /Streamlit/app5.py: -------------------------------------------------------------------------------- 1 | # ADDING A LOGO OR IMAGE TO YOUR APP 2 | import streamlit as st 3 | import pandas as pd 4 | from PIL import Image 5 | 6 | st.header("ADDING A LOGO OR IMAGE TO YOUR APP") 7 | 8 | image1 = Image.open("C:\\Users\\mithun\\OneDrive\\Pictures\\1_kgmImYxdhX01Yfk5qzkX-A.jpeg") 9 | image2 = Image.open("C:\\Users\\mithun\\OneDrive\\Pictures\\Saved Pictures\\Logo.png") 10 | st.image(image1, width=500) 11 | st.image(image2, width=500) 12 | 13 | # creating a button for uploading the files 14 | file_up = st.file_uploader("Upload a file", type='csv') # Display a file uploader widget. By default, uploaded files are limited to 200MB 15 | st.write(pd.DataFrame(file_up).head()) -------------------------------------------------------------------------------- /Streamlit/app6.py: -------------------------------------------------------------------------------- 1 | # SUBMIT BUTTON, SELECTION BUTTON AND SLIDERS 2 | from cProfile import label 3 | import pandas as pd 4 | import streamlit as st 5 | 6 | # select box/ drop down menu 7 | specalization = st.selectbox("Select the Specalization", ["Data Science","Artifical Intelligence","Software Engineering"]) 8 | st.write("You have choosen the specalization: ", specalization) 9 | 10 | with st.form(key='my_form'): 11 | text_input = st.text_input(label="Enter your name") 12 | submit_button = st.form_submit_button(label='Submit') 13 | 14 | with st.form(key='new_form'): 15 | st.selectbox("Select the algorithm",['Logistic','Linear','SVM'], key=1) 16 | st.multiselect("Select the algorithm",['Logistic','Linear','SVM'], key=1) 17 | st.slider(label="Select the knowledge level", min_value=0,max_value=10, key=2) 18 | submit_button = st.form_submit_button(label='Submit') 19 | st.success("Submitted succesfully") 20 | -------------------------------------------------------------------------------- /Streamlit/simple.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

A simple streamlit program

" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# importing the streamlit library\n", 17 | "\n", 18 | "import streamlit as st " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stderr", 28 | "output_type": "stream", 29 | "text": [ 30 | "2022-01-14 11:47:16.988 \n", 31 | " \u001b[33m\u001b[1mWarning:\u001b[0m to view this Streamlit app on a browser, run it with the following\n", 32 | " command:\n", 33 | "\n", 34 | " streamlit run C:\\Users\\mithun\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python39\\site-packages\\ipykernel_launcher.py [ARGUMENTS]\n" 35 | ] 36 | }, 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)" 41 | ] 42 | }, 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "# to print hello world in streamlit\n", 50 | "\n", 51 | "st.text(\"Hello World\")\n", 52 | "\n", 53 | "# trying to change the font size and font style using markdown\n", 54 | "original_title = '

Hello World

'\n", 55 | "st.markdown(original_title, unsafe_allow_html=True)\n", 56 | "\n", 57 | "original_title = '

Hello World

'\n", 58 | "st.markdown(original_title, unsafe_allow_html=True)\n" 59 | ] 60 | } 61 | ], 62 | "metadata": { 63 | "interpreter": { 64 | "hash": "13eb0af71620a846e484681143862ad0a4deab6cadfc353264e5c14897c11035" 65 | }, 66 | "kernelspec": { 67 | "display_name": "Python 3.9.9 64-bit", 68 | "language": "python", 69 | "name": "python3" 70 | }, 71 | "language_info": { 72 | "codemirror_mode": { 73 | "name": "ipython", 74 | "version": 3 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python", 80 | "pygments_lexer": "ipython3", 81 | "version": "3.9.9" 82 | }, 83 | "orig_nbformat": 4 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } 88 | -------------------------------------------------------------------------------- /Web Scraping/README.md: -------------------------------------------------------------------------------- 1 | # Web scraping done using BeautifulSoup 2 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-architect --------------------------------------------------------------------------------