├── sklearn ├── texts │ ├── 10000.txt │ ├── the-office-us-01.txt │ └── the-office-uk-01.txt ├── Vectorizers.ipynb ├── Polynomial features.ipynb └── Scalers.ipynb ├── README.md ├── problems ├── .gitignore ├── split_point.ipynb ├── Find The Duplicates.ipynb ├── Python Question for Magazino Engineers.ipynb ├── Island Count.ipynb ├── 5. Two Sum.ipynb ├── lcs.ipynb ├── Smallest Substring of All Characters.ipynb ├── single_number.ipynb ├── Bracket Match.ipynb ├── Busiest Time in The Mall.ipynb ├── Root of Number.ipynb ├── Sales Path.ipynb ├── Array of Array Products.ipynb ├── Delta encoding.ipynb ├── tree.ipynb └── Untitled.ipynb ├── image └── similarity │ ├── tom1.jpg │ ├── tom2.jpg │ ├── cc_fake.jpg │ └── cc_real.jpg ├── Pipfile ├── Python ├── find the duplicates.ipynb ├── asterisks.ipynb ├── context_managers.ipynb └── Python.ipynb ├── structures ├── same-letters.ipynb ├── substrings-matches.ipynb ├── langford-sequence.ipynb ├── largest-sum-contiguous-subarray.ipynb ├── string-permutation.ipynb ├── palyndromes.ipynb ├── binary-indexed-tree.ipynb └── binary-search.ipynb ├── NumPy └── argpartition.ipynb ├── Stats ├── PCA.ipynb ├── whiskies.txt └── Variance-covariance.ipynb ├── .gitignore ├── so └── Untitled.ipynb └── keras └── using-sequences.ipynb /sklearn/texts/10000.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # notebooks 2 | I like Jupyter Notebooks 3 | -------------------------------------------------------------------------------- /problems/.gitignore: -------------------------------------------------------------------------------- 1 | # jupygit file "extension" 2 | *-jupygit___.ipynb -------------------------------------------------------------------------------- /image/similarity/tom1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thatcsharpguy/notebooks/master/image/similarity/tom1.jpg -------------------------------------------------------------------------------- /image/similarity/tom2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thatcsharpguy/notebooks/master/image/similarity/tom2.jpg -------------------------------------------------------------------------------- /image/similarity/cc_fake.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thatcsharpguy/notebooks/master/image/similarity/cc_fake.jpg -------------------------------------------------------------------------------- /image/similarity/cc_real.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thatcsharpguy/notebooks/master/image/similarity/cc_real.jpg -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | numpy = "*" 8 | scipy = "*" 9 | scikit-learn = "*" 10 | scikit-image = "*" 11 | tensorflow = "*" 12 | "h5py" = "*" 13 | keras = "*" 14 | matplotlib = "*" 15 | jupyter = "*" 16 | pandas = "*" 17 | opencv-contrib-python = "*" 18 | imutils = "*" 19 | 20 | [dev-packages] 21 | jupygit = "*" 22 | 23 | [requires] 24 | python_version = "3.6" 25 | -------------------------------------------------------------------------------- /Python/find the duplicates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.6.5" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 2 32 | } 33 | -------------------------------------------------------------------------------- /structures/same-letters.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "{'act': ['cat', 'tac', 'act'], 'opt': ['pot', 'top'], 'emow': ['meow']}\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "arr = [ \"cat\", \"tac\" , \"pot\", \"top\", \"meow\", \"act\" ]\n", 18 | "\n", 19 | "\n", 20 | "table = {}\n", 21 | "for a in arr:\n", 22 | " r = ''.join(sorted(a))\n", 23 | " l = table.get(r, list())\n", 24 | " l.append(a)\n", 25 | " table[r] = l\n", 26 | "print(table)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "Python 3", 40 | "language": "python", 41 | "name": "python3" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.6.4" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 2 58 | } 59 | -------------------------------------------------------------------------------- /structures/substrings-matches.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 36, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Received catodocat\n", 13 | "cat\n", 14 | "Received odocat\n", 15 | "odo\n", 16 | "Received cat\n", 17 | "cat\n", 18 | "Received \n" 19 | ] 20 | }, 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "True" 25 | ] 26 | }, 27 | "execution_count": 36, 28 | "metadata": {}, 29 | "output_type": "execute_result" 30 | } 31 | ], 32 | "source": [ 33 | "valid = set(['cat', 'dog', 'bag','odo'])\n", 34 | "\n", 35 | "s = 'catodocat'\n", 36 | "\n", 37 | "def match(string):\n", 38 | " print(\"Received\", string)\n", 39 | " if not string:\n", 40 | " return True\n", 41 | " \n", 42 | " pointer = 1\n", 43 | " valid_ = False\n", 44 | " while pointer < len(string)+1:\n", 45 | " if string[:pointer] in valid:\n", 46 | " print(string[:pointer])\n", 47 | " valid_ = match(string[pointer:])\n", 48 | " pointer += 1\n", 49 | " if string in valid:\n", 50 | " return True\n", 51 | " if valid_:\n", 52 | " return True\n", 53 | " return False\n", 54 | "\n", 55 | "match(s)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [] 64 | } 65 | ], 66 | "metadata": { 67 | "kernelspec": { 68 | "display_name": "Python 3", 69 | "language": "python", 70 | "name": "python3" 71 | }, 72 | "language_info": { 73 | "codemirror_mode": { 74 | "name": "ipython", 75 | "version": 3 76 | }, 77 | "file_extension": ".py", 78 | "mimetype": "text/x-python", 79 | "name": "python", 80 | "nbconvert_exporter": "python", 81 | "pygments_lexer": "ipython3", 82 | "version": "3.6.4" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } 88 | -------------------------------------------------------------------------------- /Python/asterisks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 44, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "def positional(*args,**kwargs):\n", 10 | " print(args)\n", 11 | " print(kwargs)\n", 12 | " print()" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 45, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "('arg1', 'arg2')\n", 25 | "{'d1': 'b', 'd2': 'c'}\n", 26 | "\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "positional('arg1', 'arg2', d1='b', d2='c')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 46, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "d = {'c':'b'}" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 47, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "({'c': 'b'},)\n", 53 | "{}\n", 54 | "\n", 55 | "('c',)\n", 56 | "{}\n", 57 | "\n", 58 | "()\n", 59 | "{'c': 'b'}\n", 60 | "\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "positional(d)\n", 66 | "positional(*d)\n", 67 | "positional(**d)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.6.5" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 2 99 | } 100 | -------------------------------------------------------------------------------- /problems/split_point.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 8, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[1, 3, 3]\n", 13 | "[3, 3, -3]\n", 14 | "[2, 6]\n", 15 | "6\n", 16 | "[4, 4, 4, 5, 5, 5]\n", 17 | "[5, 5, 5, 5, 1, 1]\n", 18 | "[1, 1, 1, 4, 4]\n", 19 | "4\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "min_value = -1_000_000_001\n", 25 | "\n", 26 | "def find_split(A):\n", 27 | " ltr = []\n", 28 | " rtl = []\n", 29 | " \n", 30 | " max_value = min_value\n", 31 | " for i in A:\n", 32 | " max_value = max(max_value, i)\n", 33 | " ltr.append(max_value)\n", 34 | " \n", 35 | " max_value = min_value\n", 36 | " for i in A[::-1]:\n", 37 | " max_value = max(max_value, i)\n", 38 | " rtl.append(max_value)\n", 39 | " rtl = rtl[::-1]\n", 40 | " \n", 41 | " print(ltr); print(rtl)\n", 42 | " \n", 43 | " values = []\n", 44 | " max_diff = min_value\n", 45 | " for s in range(0, len(A)-1):\n", 46 | " max_diff = max(abs(ltr[s] - rtl[s+1]), max_diff)\n", 47 | " values.append(abs(ltr[s] - rtl[s+1]))\n", 48 | " print(values)\n", 49 | " \n", 50 | " return max_diff\n", 51 | "\n", 52 | "print(find_split([1, 3, -3]))\n", 53 | "print(find_split([4, 3, 2, 5, 1, 1]))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 3", 67 | "language": "python", 68 | "name": "python3" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.6.4" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 2 85 | } 86 | -------------------------------------------------------------------------------- /NumPy/argpartition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 67, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[ 1 14 7 12 9 3 13 8 6 2 11 5 4 10]\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import numpy as np\n", 18 | "array = np.array([1,14,7,12,9,3,13,8,6,2,11,5,4,10])\n", 19 | "print(array)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 69, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "Values: 1 2 3 4 5 6 7 8 9 10 11 12 13 14\n", 32 | "Index: 0 9 5 12 11 8 2 7 4 13 10 3 6 1\n", 33 | "k: 0 1 2 3 4 5 6 7 8 9 10 11 12 13\n", 34 | "\n", 35 | "Values: 1 2 7 12 9 3 13 8 6 14 11 5 4 10\n", 36 | "Index: 0 9 2 3 4 5 6 7 8 1 10 11 12 13\n", 37 | "k: 0 1 2 3 4 5 6 7 8 9 10 11 12 13\n", 38 | "\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "def display_array(indexes, array):\n", 44 | " print(\"Values:\", ''.join([\"%4d\" % d for d in array[indexes]]))\n", 45 | " print(\"Index: \", ''.join([\"%4d\" % d for d in indexes]))\n", 46 | " print(\"k: \", ''.join([\"%4d\" % d for d in range(len(indexes))]))\n", 47 | " print()\n", 48 | " \n", 49 | "display_array(np.argsort(array), array)\n", 50 | "display_array(np.argpartition(array, 1), array)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [] 59 | } 60 | ], 61 | "metadata": { 62 | "kernelspec": { 63 | "display_name": "Python 3", 64 | "language": "python", 65 | "name": "python3" 66 | }, 67 | "language_info": { 68 | "codemirror_mode": { 69 | "name": "ipython", 70 | "version": 3 71 | }, 72 | "file_extension": ".py", 73 | "mimetype": "text/x-python", 74 | "name": "python", 75 | "nbconvert_exporter": "python", 76 | "pygments_lexer": "ipython3", 77 | "version": "3.6.5" 78 | } 79 | }, 80 | "nbformat": 4, 81 | "nbformat_minor": 2 82 | } 83 | -------------------------------------------------------------------------------- /problems/Find The Duplicates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Given two sorted arrays arr1 and arr2 of passport numbers, implement a function findDuplicates that returns an array of all passport numbers that are both in arr1 and arr2. Note that the output array should be sorted in an ascending order.\n", 8 | "\n", 9 | "Let N and M be the lengths of arr1 and arr2, respectively. Solve for two cases and analyze the time & space complexities of your solutions: M ≈ N - the array lengths are approximately the same M ≫ N - arr2 is much bigger than arr1." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "arr1 = [1, 2, 3, 5, 6, 7]\n", 19 | "arr2 = [3, 6, 7, 8, 20]" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 8, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "26\n" 32 | ] 33 | }, 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "[3, 6, 7]" 38 | ] 39 | }, 40 | "execution_count": 8, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "\n", 47 | "def find_duplicates(arr1, arr2):\n", 48 | " iteration = 0\n", 49 | " ix = 0\n", 50 | " dup = []\n", 51 | " for num1 in arr1: # A\n", 52 | " for x in range(ix, len(arr2)):\n", 53 | " num2 = arr2[x]\n", 54 | " if num1 == num2:\n", 55 | " ix = x+1\n", 56 | " dup.append(num1)\n", 57 | " iteration += 1\n", 58 | " print(iteration)\n", 59 | " return dup\n", 60 | "\n", 61 | "find_duplicates(arr1, arr2)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.6.5" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 2 93 | } 94 | -------------------------------------------------------------------------------- /Python/context_managers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 33, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import contextlib\n", 10 | "\n", 11 | "class Mgr(contextlib.ContextDecorator):\n", 12 | " \n", 13 | " def __enter__(self):\n", 14 | " print(\"A\")\n", 15 | " \n", 16 | " def __exit__(*args):\n", 17 | " print(\"Bye\", args)\n", 18 | " \n", 19 | " def __str__(self):\n", 20 | " return \"Manager\"" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 34, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "A\n", 33 | "None\n", 34 | "Bye (<__main__.Mgr object at 0x103eac898>, None, None, None)\n", 35 | "A\n", 36 | "Manager\n", 37 | "Bye (<__main__.Mgr object at 0x103eac5f8>, None, None, None)\n", 38 | "A\n", 39 | "Manager\n", 40 | "Bye (<__main__.Mgr object at 0x103eac5f8>, None, None, None)\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "man = Mgr()\n", 46 | "\n", 47 | "with Mgr() as a:\n", 48 | " print(a)\n", 49 | " \n", 50 | "with man:\n", 51 | " print(man)\n", 52 | " \n", 53 | "with man:\n", 54 | " print(man)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 25, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "A\n", 67 | "X\n", 68 | "Bye None None None\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "@Mgr()\n", 74 | "def a():\n", 75 | " print(\"X\")\n", 76 | " \n", 77 | "a()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | } 87 | ], 88 | "metadata": { 89 | "kernelspec": { 90 | "display_name": "Python 3", 91 | "language": "python", 92 | "name": "python3" 93 | }, 94 | "language_info": { 95 | "codemirror_mode": { 96 | "name": "ipython", 97 | "version": 3 98 | }, 99 | "file_extension": ".py", 100 | "mimetype": "text/x-python", 101 | "name": "python", 102 | "nbconvert_exporter": "python", 103 | "pygments_lexer": "ipython3", 104 | "version": "3.6.5" 105 | } 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 2 109 | } 110 | -------------------------------------------------------------------------------- /problems/Python Question for Magazino Engineers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[]\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "class Node(object):\n", 18 | " \n", 19 | " def __init__(self, name, children=None):\n", 20 | " self.name = name\n", 21 | " self.children = children or []\n", 22 | "\n", 23 | " def __repr__(self):\n", 24 | " return ''.format(self.name)\n", 25 | " \n", 26 | " def extract_nodes_containing_string(self, needle):\n", 27 | " result = []\n", 28 | " nodes = [self]\n", 29 | " while nodes:\n", 30 | " current = nodes.pop()\n", 31 | " if self.__in_name(current.name, needle):\n", 32 | " result.append(current)\n", 33 | " nodes.extend(current.children)\n", 34 | " return result\n", 35 | " \n", 36 | " def __in_name(self, name, needle):\n", 37 | " aa = name.lower()\n", 38 | " bb = needle.lower()\n", 39 | " return bb in aa\n", 40 | " \n", 41 | "# Example:\n", 42 | " \n", 43 | "def create_tree():\n", 44 | " return Node('root', [\n", 45 | " Node(\"MaGaZiNo\", [\n", 46 | " Node(\"I\"),\n", 47 | " Node(\"Love\"),\n", 48 | " Node(\"magazino\")\n", 49 | " ]),\n", 50 | " Node(\"Hello\", [\n", 51 | " Node(\"Hello\", [\n", 52 | " Node(\"Hello\", [\n", 53 | " Node(\"World\")\n", 54 | " ])\n", 55 | " ])\n", 56 | " ])\n", 57 | " ])\n", 58 | "\n", 59 | "\n", 60 | "root = create_tree()\n", 61 | "print(root.extract_nodes_containing_string('root'))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.6.4" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 2 93 | } 94 | -------------------------------------------------------------------------------- /structures/langford-sequence.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 78, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[1, 3, 1, 6, 7, 3, 8, 5, 2, 4, 6, 2, 7, 5, 4, 8]\n", 13 | "(1, 3, 6, 7, 8, 5, 2, 4)\n" 14 | ] 15 | }, 16 | { 17 | "data": { 18 | "text/plain": [ 19 | "True" 20 | ] 21 | }, 22 | "execution_count": 78, 23 | "metadata": {}, 24 | "output_type": "execute_result" 25 | } 26 | ], 27 | "source": [ 28 | "from itertools import permutations as permutations\n", 29 | "\n", 30 | "n = 8\n", 31 | "\n", 32 | "def cleanArray(n):\n", 33 | " return [False for i in range(n * 2)], [0 for i in range(n*2)]\n", 34 | "\n", 35 | "def langford(n): \n", 36 | " count = 0\n", 37 | " used, arr = cleanArray(n)\n", 38 | " for p in permutations([i+1 for i in range(n)]):\n", 39 | " used,arr = cleanArray(n)\n", 40 | " count = 0\n", 41 | " for i in range(n):\n", 42 | " n_ = p[i]\n", 43 | " pos=0\n", 44 | " while used[pos] and pos <= n*2:\n", 45 | " pos+=1\n", 46 | " if pos + n_ + 1 >= n * 2:\n", 47 | " used,arr = cleanArray(n)\n", 48 | " count = 0\n", 49 | " break\n", 50 | " if used[pos] or used[pos + n_ + 1]:\n", 51 | " used,arr = cleanArray(n)\n", 52 | " count = 0\n", 53 | " break\n", 54 | "\n", 55 | " count += 1\n", 56 | " used[pos] = True\n", 57 | " used[pos + n_ + 1] = True\n", 58 | " arr[pos] = n_\n", 59 | " arr[pos + n_ + 1] = n_\n", 60 | " if count == n:\n", 61 | " print(arr)\n", 62 | " print(p)\n", 63 | " return True\n", 64 | " return False\n", 65 | "langford(n)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "Python 3", 79 | "language": "python", 80 | "name": "python3" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.6.4" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } 98 | -------------------------------------------------------------------------------- /problems/Island Count.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Given a 2D array binaryMatrix of 0s and 1s, implement a function getNumberOfIslands that returns the number of islands of 1s in binaryMatrix.\n", 8 | "\n", 9 | "An island is defined as a group of adjacent values that are all 1s. A cell in binaryMatrix is considered adjacent to another cell if they are next to each either on the same row or column. Note that two values of 1 are not part of the same island if they’re sharing only a mutual “corner” (i.e. they are diagonally neighbors).\n", 10 | "\n", 11 | "Explain and code the most efficient solution possible and analyze its time and space complexities." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "6\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "def get_number_of_islands(binaryMatrix):\n", 29 | " islands = 0\n", 30 | " rows = len(binaryMatrix)\n", 31 | " cols = len(binaryMatrix[0])\n", 32 | " for i in range(rows):\n", 33 | " for j in range(cols):\n", 34 | " if binaryMatrix[i][j] == 1:\n", 35 | " islands += 1\n", 36 | " queue = list()\n", 37 | " queue.append((i, j))\n", 38 | " while queue:\n", 39 | " visit = queue.pop()\n", 40 | " x = visit[0]\n", 41 | " y = visit[1]\n", 42 | " if 0 <= x < rows and 0 <= y < cols and binaryMatrix[x][y] == 1: \n", 43 | " binaryMatrix[x][y] = 0\n", 44 | " queue.append((x-1, y))\n", 45 | " queue.append((x, y-1))\n", 46 | " queue.append((x+1, y))\n", 47 | " queue.append((x, y+1))\n", 48 | " \n", 49 | " return islands\n", 50 | " \n", 51 | " \n", 52 | "binaryMatrix = [ [0, 1, 0, 1, 0],\n", 53 | " [0, 0, 1, 1, 1],\n", 54 | " [1, 0, 0, 1, 0],\n", 55 | " [0, 1, 1, 0, 0],\n", 56 | " [1, 0, 1, 0, 1] ]\n", 57 | "\n", 58 | "print(get_number_of_islands(binaryMatrix))" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [] 67 | } 68 | ], 69 | "metadata": { 70 | "kernelspec": { 71 | "display_name": "Python 3", 72 | "language": "python", 73 | "name": "python3" 74 | }, 75 | "language_info": { 76 | "codemirror_mode": { 77 | "name": "ipython", 78 | "version": 3 79 | }, 80 | "file_extension": ".py", 81 | "mimetype": "text/x-python", 82 | "name": "python", 83 | "nbconvert_exporter": "python", 84 | "pygments_lexer": "ipython3", 85 | "version": "3.6.4" 86 | } 87 | }, 88 | "nbformat": 4, 89 | "nbformat_minor": 2 90 | } 91 | -------------------------------------------------------------------------------- /problems/5. Two Sum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Write a function that, when passed a list and a target sum, returns two distinct zero-based indices of any two of the numbers, whose sum is equal to the target sum. If there are no two numbers, the function should return None.\n", 8 | "\n", 9 | "For example, find_two_sum([3, 1, 5, 7, 5, 9], 10) should return a single tuple containing any of the following pairs of indices:\n", 10 | "\n", 11 | " - 0 and 3 (or 3 and 0) as 3 + 7 = 10\n", 12 | " - 1 and 5 (or 5 and 1) as 1 + 9 = 10\n", 13 | " - 2 and 4 (or 4 and 2) as 5 + 5 = 10" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "$O(N)$: Traverse the whole array keeping track of the needed numbers to get to our target value\n", 21 | "\n", 22 | "\n", 23 | "| Seen | We need |\n", 24 | "|----------|--------------------|\n", 25 | "| 3 | 10 - 3 = ***7*:0** |\n", 26 | "| 1 | 10 - 1 = ***9*:1** |\n", 27 | "| 5 | 10 - 5 = ***5*:2** |\n", 28 | "| 7 | 10 - 7 = ***3*:3** |\n", 29 | "| 5 | 10 - 5 = ***5*:4** |\n", 30 | "| 9 | 10 - 9 = ***1*:5** |\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "(0, 3)\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "class TwoSum:\n", 48 | "\n", 49 | " @staticmethod\n", 50 | " def find_two_sum(numbers, target_sum):\n", 51 | " \"\"\"\n", 52 | " :param numbers: (list of ints) The list of numbers.\n", 53 | " :param target_sum: (int) The required target sum.\n", 54 | " :returns: (a tuple of 2 ints) The indices of the two elements whose sum is equal to target_sum\n", 55 | " \"\"\"\n", 56 | " nv = {}\n", 57 | " for i in range(len(numbers)):\n", 58 | " number = numbers[i]\n", 59 | " to_target = target_sum - number\n", 60 | " if number in nv:\n", 61 | " return(nv[number], i)\n", 62 | " nv[to_target] = i\n", 63 | " return None \n", 64 | "\n", 65 | "print(TwoSum.find_two_sum([3, 1, 5, 7, 5, 9], 10))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "Python 3", 79 | "language": "python", 80 | "name": "python3" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.6.4" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } 98 | -------------------------------------------------------------------------------- /structures/largest-sum-contiguous-subarray.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 28, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "7 [4, -1, -2, 1, 5] [2, 6]\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "arr = [-2, -3, 4, -1, -2, 1, 5, -3]\n", 18 | "\n", 19 | "curr = 0\n", 20 | "global_max = 0\n", 21 | "indexes = [0,0]\n", 22 | "\n", 23 | "\n", 24 | "\n", 25 | "for i in range(0,len(arr)):\n", 26 | " m = 0\n", 27 | " for j in range(i, len(arr)):\n", 28 | " m += arr[j]\n", 29 | " if global_max < m:\n", 30 | " indexes = [i, j]\n", 31 | " global_max = m\n", 32 | "print(global_max, arr[indexes[0]:indexes[1]+1], indexes)\n", 33 | " " 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 64, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Position 1 current max -3 global max 0 current -3\n", 46 | "Position 2 current max 4 global max 0 current 4\n", 47 | "Position 3 current max 3 global max 4 current -1\n", 48 | "Position 4 current max 1 global max 4 current -2\n", 49 | "Position 5 current max 2 global max 4 current 1\n", 50 | "Position 6 current max 7 global max 4 current 5\n", 51 | "Position 7 current max 4 global max 7 current -3\n", 52 | "7 [4, -1, -2, 1, 5] [2, 6]\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "arr = [-2, -3, 4, -1, -2, 1, 5, -3]\n", 58 | "\n", 59 | "current_max = 0\n", 60 | "global_max = 0\n", 61 | "indexes = [0,0]\n", 62 | "\n", 63 | "current_max = arr[0]\n", 64 | "for i in range(1,len(arr)):\n", 65 | " current = arr[i]\n", 66 | " current_max = max(current, current_max + current)\n", 67 | " print(\"Position\", i, \"current max\", current_max, \"global max\", global_max, \"current\", current)\n", 68 | " if current_max == current:\n", 69 | " indexes[0] = i\n", 70 | " if global_max < current_max:\n", 71 | " global_max = current_max\n", 72 | " indexes[1] = i\n", 73 | "print(global_max, arr[indexes[0]:indexes[1]+1], indexes)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [] 82 | } 83 | ], 84 | "metadata": { 85 | "kernelspec": { 86 | "display_name": "Python 3", 87 | "language": "python", 88 | "name": "python3" 89 | }, 90 | "language_info": { 91 | "codemirror_mode": { 92 | "name": "ipython", 93 | "version": 3 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python", 99 | "pygments_lexer": "ipython3", 100 | "version": "3.6.4" 101 | } 102 | }, 103 | "nbformat": 4, 104 | "nbformat_minor": 2 105 | } 106 | -------------------------------------------------------------------------------- /sklearn/Vectorizers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 10 | "from nltk.tokenize import RegexpTokenizer" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 15, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "tokenizer = RegexpTokenizer(r'\\w+')\n", 20 | "\n", 21 | "def transform(string):\n", 22 | " tokens_n = []\n", 23 | " tokens = tokenizer.tokenize(string)\n", 24 | " for t in tokens:\n", 25 | " tokens_n.append(t.lower())\n", 26 | " return tokens_n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 30, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "the_office = []\n", 36 | "for file in [\"texts/the-office-us-01.txt\", \"texts/the-office-uk-01.txt\"]:\n", 37 | " with open(file, \"r\") as r:\n", 38 | " the_office.append(r.read())" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 36, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "tfidf_vectorizer = TfidfVectorizer(tokenizer=transform)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 37, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "the_office_matrix = tfidf_vectorizer.fit_transform(the_office)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 38, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "(2, 1197)\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "print(the_office_matrix.shape)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 39, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "from sklearn.metrics.pairwise import cosine_similarity" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 40, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "array([[1. , 0.94438663],\n", 94 | " [0.94438663, 1. ]])" 95 | ] 96 | }, 97 | "execution_count": 40, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "cosine_similarity(the_office_matrix)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.6.5" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 2 135 | } 136 | -------------------------------------------------------------------------------- /problems/lcs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 36, 13 | "metadata": { 14 | "scrolled": false 15 | }, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | " A b c d e f g\n", 22 | " A 0 0 0 0 0 0 0 0\n", 23 | " b 0 1 0 0 0 0 0 0\n", 24 | " c 0 0 2 0 0 0 0 0\n", 25 | " d 0 0 0 3 0 0 0 0\n", 26 | " S 0 0 0 0 4 0 0 0\n", 27 | " e 0 0 0 0 0 0 0 0\n", 28 | " f 0 0 0 0 0 1 0 0\n", 29 | " g 0 0 0 0 0 0 2 0\n", 30 | " 0 0 0 0 0 0 0 3\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "import numpy as np\n", 36 | "\n", 37 | "x, y = \"AbcdSefg\", \"Abcdefg\"\n", 38 | "\n", 39 | "max_lcs = 0\n", 40 | "m = 0\n", 41 | "lcs_table = np.zeros((len(x)+1, len(y)+1))\n", 42 | "for i in range(len(x)+1):\n", 43 | " for j in range(len(y)+1):\n", 44 | " if i == 0 or j == 0:\n", 45 | " lcs_table[i,j] = 0\n", 46 | " elif x[i-1] == y[j-1]:\n", 47 | " lcs_table[i,j] = lcs_table[i-1,j-1]+1\n", 48 | " else:\n", 49 | " lcs_table[i,j] = 0\n", 50 | " \n", 51 | "def p(x, y, t):\n", 52 | " print(\" \"* 3, end='')\n", 53 | " for c in y:\n", 54 | " print(\"%3c\" % c, end='')\n", 55 | " print()\n", 56 | " for i in range(len(x)+1):\n", 57 | " print(\"%3c\" %( x[i] if i < len(x) else ' '), end='')\n", 58 | " for j in range(len(y)+1):\n", 59 | " print(\"%3d\" % t[i,j], end='')\n", 60 | " print()\n", 61 | " \n", 62 | "p(x, y, lcs_table)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 1, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "ename": "SyntaxError", 72 | "evalue": "invalid character in identifier (, line 1)", 73 | "output_type": "error", 74 | "traceback": [ 75 | "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m wo´fninowfoiwoife\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid character in identifier\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "wo´fninowfoiwoife" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [] 89 | } 90 | ], 91 | "metadata": { 92 | "kernelspec": { 93 | "display_name": "Python 3", 94 | "language": "python", 95 | "name": "python3" 96 | }, 97 | "language_info": { 98 | "codemirror_mode": { 99 | "name": "ipython", 100 | "version": 3 101 | }, 102 | "file_extension": ".py", 103 | "mimetype": "text/x-python", 104 | "name": "python", 105 | "nbconvert_exporter": "python", 106 | "pygments_lexer": "ipython3", 107 | "version": "3.6.5" 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 2 112 | } 113 | -------------------------------------------------------------------------------- /structures/string-permutation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 44, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "0 0 4\n", 13 | "1 1 4\n", 14 | "2 2 4\n", 15 | "3 3 4\n", 16 | "ABCD\n", 17 | "3 2 4\n", 18 | "3 3 4\n", 19 | "ABDC\n", 20 | "2 1 4\n", 21 | "2 2 4\n", 22 | "3 3 4\n", 23 | "ACBD\n", 24 | "3 2 4\n", 25 | "3 3 4\n", 26 | "ACDB\n", 27 | "3 1 4\n", 28 | "2 2 4\n", 29 | "3 3 4\n", 30 | "ADCB\n", 31 | "3 2 4\n", 32 | "3 3 4\n", 33 | "ADBC\n", 34 | "1 0 4\n", 35 | "1 1 4\n", 36 | "2 2 4\n", 37 | "3 3 4\n", 38 | "BACD\n", 39 | "3 2 4\n", 40 | "3 3 4\n", 41 | "BADC\n", 42 | "2 1 4\n", 43 | "2 2 4\n", 44 | "3 3 4\n", 45 | "BCAD\n", 46 | "3 2 4\n", 47 | "3 3 4\n", 48 | "BCDA\n", 49 | "3 1 4\n", 50 | "2 2 4\n", 51 | "3 3 4\n", 52 | "BDCA\n", 53 | "3 2 4\n", 54 | "3 3 4\n", 55 | "BDAC\n", 56 | "2 0 4\n", 57 | "1 1 4\n", 58 | "2 2 4\n", 59 | "3 3 4\n", 60 | "CBAD\n", 61 | "3 2 4\n", 62 | "3 3 4\n", 63 | "CBDA\n", 64 | "2 1 4\n", 65 | "2 2 4\n", 66 | "3 3 4\n", 67 | "CABD\n", 68 | "3 2 4\n", 69 | "3 3 4\n", 70 | "CADB\n", 71 | "3 1 4\n", 72 | "2 2 4\n", 73 | "3 3 4\n", 74 | "CDAB\n", 75 | "3 2 4\n", 76 | "3 3 4\n", 77 | "CDBA\n", 78 | "3 0 4\n", 79 | "1 1 4\n", 80 | "2 2 4\n", 81 | "3 3 4\n", 82 | "DBCA\n", 83 | "3 2 4\n", 84 | "3 3 4\n", 85 | "DBAC\n", 86 | "2 1 4\n", 87 | "2 2 4\n", 88 | "3 3 4\n", 89 | "DCBA\n", 90 | "3 2 4\n", 91 | "3 3 4\n", 92 | "DCAB\n", 93 | "3 1 4\n", 94 | "2 2 4\n", 95 | "3 3 4\n", 96 | "DACB\n", 97 | "3 2 4\n", 98 | "3 3 4\n", 99 | "DABC\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "def sp(a, l, r):\n", 105 | " if l==r:\n", 106 | " print(''.join(a))\n", 107 | " else:\n", 108 | " for i in range(l,r):\n", 109 | " print(i, l, r)\n", 110 | " a[l], a[i] = a[i], a[l]\n", 111 | " sp(a, l+1, r)\n", 112 | " a[l], a[i] = a[i], a[l] # backtrack\n", 113 | "sp(list(\"ABCD\"), 0, len(\"ABCD\"))" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.6.4" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 2 152 | } 153 | -------------------------------------------------------------------------------- /structures/palyndromes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 42, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "9 geekskeeg\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "string = \"forgeeksskeegfor\"\n", 18 | "\n", 19 | "def isP(string):\n", 20 | " i = 0\n", 21 | " j = len(string)-1\n", 22 | " while(i<=j):\n", 23 | " if string[i] != string[j]:\n", 24 | " return False\n", 25 | " i+=1\n", 26 | " j-=1\n", 27 | " return True\n", 28 | "\n", 29 | "\n", 30 | "longest = 0\n", 31 | "indexes = (0,0)\n", 32 | "\n", 33 | "for i in range(len(string)+1):\n", 34 | " for j in range(2, len(string)+1):\n", 35 | " if i + j > len(string): continue\n", 36 | " if isP(string[i:j+i]):\n", 37 | " if longest < j:\n", 38 | " longest = j\n", 39 | " indexes = (i, j + i)\n", 40 | "print(longest, string[indexes[0]: indexes[1]])" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 46, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "9 geekskeeg\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "string = \"forgeekskeegfor\"\n", 58 | "\n", 59 | "longest = 0\n", 60 | "indexes = (0,0)\n", 61 | "\n", 62 | "# even:\n", 63 | "for i in range(len(string)):\n", 64 | " j = i+1\n", 65 | " \n", 66 | " counter = 0\n", 67 | " while (i-counter) >= 0 and (j + counter) < len(string):\n", 68 | " if string[i-counter] == string[j+counter]:\n", 69 | " if longest < counter * 2 + 2:\n", 70 | " longest = counter * 2 + 2\n", 71 | " indexes = (i-counter, j+counter+1)\n", 72 | " counter += 1\n", 73 | " else:\n", 74 | " break\n", 75 | "\n", 76 | "# odd:\n", 77 | "for i in range(len(string)):\n", 78 | " j = i\n", 79 | " \n", 80 | " counter = 0\n", 81 | " while (i-counter) >= 0 and (j + counter) < len(string):\n", 82 | " if string[i-counter] == string[j+counter]:\n", 83 | " if longest < counter * 2 + 1:\n", 84 | " longest = counter * 2 + 1\n", 85 | " indexes = (i-counter, j+counter+1)\n", 86 | " counter += 1\n", 87 | " else:\n", 88 | " break\n", 89 | "\n", 90 | "\n", 91 | "print(longest, string[indexes[0]: indexes[1]])\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Python 3", 105 | "language": "python", 106 | "name": "python3" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.6.4" 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 2 123 | } 124 | -------------------------------------------------------------------------------- /problems/Smallest Substring of All Characters.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Smallest Substring of All Characters\n", 8 | "Given an array of unique characters arr and a string str, Implement a function getShortestUniqueSubstring that finds the smallest substring of str containing all the characters in arr. Return \"\" (empty string) if such a substring doesn’t exist.\n", 9 | "\n", 10 | "Come up with an asymptotically optimal solution and analyze the time and space complexities.\n", 11 | "\n", 12 | "Example:\n", 13 | "\n", 14 | "input: arr = ['x','y','z'], str = \"xyyzyzyx\"\n", 15 | "\n", 16 | "output: \"zyx\"\n", 17 | "Constraints:\n", 18 | "\n", 19 | "[time limit] 5000ms\n", 20 | "\n", 21 | "[input] array.character arr\n", 22 | "\n", 23 | "1 ≤ arr.length ≤ 30\n", 24 | "[input] string str\n", 25 | "\n", 26 | "1 ≤ str.length ≤ 500\n", 27 | "[output] string" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 29, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "'AXCSB'" 39 | ] 40 | }, 41 | "execution_count": 29, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "def get_shortest_unique_substring(arr, string):\n", 48 | " i, j = 0, 0\n", 49 | " min_i, min_j = 0, len(string) + 1\n", 50 | " counter = 0\n", 51 | " characters = { k:0 for k in arr }\n", 52 | " while i < len(string) and j < len(string):\n", 53 | " curr = string[j]\n", 54 | " if curr in characters:\n", 55 | " characters[curr] += 1\n", 56 | " if characters[curr] == 1:\n", 57 | " counter += 1\n", 58 | " if counter == len(arr):\n", 59 | " while counter == len(arr):\n", 60 | " old = min_j - min_i\n", 61 | " new = j - i\n", 62 | " if new < old:\n", 63 | " min_i, min_j = i, j\n", 64 | " curr = string[i]\n", 65 | " if curr in characters:\n", 66 | " characters[curr] -= 1\n", 67 | " if characters[curr] == 0:\n", 68 | " counter -= 1\n", 69 | " i+=1\n", 70 | " j += 1\n", 71 | " if min_j == len(string) + 1:\n", 72 | " return \"\"\n", 73 | " return string[min_i:min_j+1]\n", 74 | "\n", 75 | "\n", 76 | "arr = [\"A\", \"B\"]\n", 77 | "\n", 78 | "string = \"AXCSB\"\n", 79 | "\n", 80 | "get_shortest_unique_substring(arr, string)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [] 89 | } 90 | ], 91 | "metadata": { 92 | "kernelspec": { 93 | "display_name": "Python 3", 94 | "language": "python", 95 | "name": "python3" 96 | }, 97 | "language_info": { 98 | "codemirror_mode": { 99 | "name": "ipython", 100 | "version": 3 101 | }, 102 | "file_extension": ".py", 103 | "mimetype": "text/x-python", 104 | "name": "python", 105 | "nbconvert_exporter": "python", 106 | "pygments_lexer": "ipython3", 107 | "version": "3.6.4" 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 2 112 | } 113 | -------------------------------------------------------------------------------- /problems/single_number.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "In a party everyone is in couple except one. People who are in couple have same numbers. Find out the person who is not in couple.\n", 8 | "\n", 9 | "Input:\n", 10 | "The first line contains an integer 'T' denoting the total number of test cases. In each test cases, the first line contains an integer 'N' denoting the size of array. The second line contains N space-separated integers A1, A2, ..., AN denoting the elements of the array. (N is always odd)\n", 11 | "\n", 12 | "\n", 13 | "Output:\n", 14 | "In each seperate line print number of the person not in couple.\n", 15 | "\n", 16 | "\n", 17 | "Constraints:\n", 18 | "1<=T<=30\n", 19 | "1<=N<=500\n", 20 | "1<=A[i]<=500\n", 21 | "N%2==1" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 12, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "001 0001 001\n", 34 | "003 0011 002\n", 35 | "000 0000 003\n", 36 | "002 0010 002\n", 37 | "006 0110 004\n", 38 | "007 0111 001\n", 39 | "003 0011 004\n", 40 | "000 0000 003\n", 41 | "006 0110 006\n", 42 | "Res 6\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "arr = [1, 2, 3, 2,4, 1,4,3,6]\n", 48 | "\n", 49 | "p = 0\n", 50 | "\n", 51 | "\n", 52 | "\n", 53 | "for i in arr:\n", 54 | " p = p ^ i\n", 55 | " print(\"{0:03d} {1:04b} {2:03d}\".format(p, p, i))\n", 56 | "print(\"Res\", p)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 50, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "3 0\n", 69 | "0 3\n", 70 | "0 1\n", 71 | "2 0\n", 72 | "Result 2\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "arr = [3,3,2,3]\n", 78 | "\n", 79 | "ones = 0\n", 80 | "twos = 0\n", 81 | "\n", 82 | "def b(n, message = None):\n", 83 | " if message:\n", 84 | " print(message, end=' ')\n", 85 | " print(\"{0:04b}\".format(n))\n", 86 | "\n", 87 | "for ix, i in enumerate(arr):\n", 88 | " twos = twos | (ones & i)\n", 89 | " ones = ones ^ i\n", 90 | " common_bit_mask = ~(ones & twos);\n", 91 | " ones = ones & common_bit_mask\n", 92 | " twos = twos & common_bit_mask\n", 93 | " print(ones, twos)\n", 94 | "print(\"Result\", ones)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.6.4" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 2 133 | } 134 | -------------------------------------------------------------------------------- /problems/Bracket Match.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "A string of brackets is considered correctly matched if every opening bracket in the string can be paired up with a later closing bracket, and vice versa. For instance, “(())()” is correctly matched, whereas “)(“ and “((” aren’t. For instance, “((” could become correctly matched by adding two closing brackets at the end, so you’d return 2.\n", 8 | "\n", 9 | "Given a string that consists of brackets, write a function bracketMatch that takes a bracket string as an input and returns the minimum number of brackets you’d need to add to the input in order to make it correctly matched.\n", 10 | "\n", 11 | "Explain the correctness of your code, and analyze its time and space complexities." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 7, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "test = \"(()\"" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 8, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "1" 32 | ] 33 | }, 34 | "execution_count": 8, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "def bracket_match_stack(text):\n", 41 | " if not text: return 0\n", 42 | " brackets = [text[0]]\n", 43 | " for c in text[1:]:\n", 44 | " if brackets:\n", 45 | " top = brackets[-1]\n", 46 | " if top == '(' and c == ')':\n", 47 | " brackets.pop()\n", 48 | " else:\n", 49 | " brackets.append(c)\n", 50 | " else:\n", 51 | " brackets.append(c)\n", 52 | " \n", 53 | " return len(brackets)\n", 54 | "\n", 55 | "bracket_match_stack(test)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "Time: $O(N)$ \n", 63 | "Space: $O(N)$" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 9, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "1" 75 | ] 76 | }, 77 | "execution_count": 9, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "def bracket_match(text):\n", 84 | " openings = 0\n", 85 | " closings = 0\n", 86 | " for c in text:\n", 87 | " if c == '(':\n", 88 | " openings += 1\n", 89 | " elif c == ')':\n", 90 | " if openings > 0:\n", 91 | " openings -= 1\n", 92 | " else:\n", 93 | " closings+=1\n", 94 | " return openings + closings\n", 95 | "\n", 96 | "bracket_match(test)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Time: $O(N)$ \n", 104 | "Space: $O(1)$" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "Python 3", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.6.5" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | -------------------------------------------------------------------------------- /sklearn/Polynomial features.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Size of train (2, 2)\n", 13 | "[[2 3]\n", 14 | " [2 4]]\n", 15 | "Size of train (2, 6)\n", 16 | "[[ 1. 2. 3. 4. 6. 9.]\n", 17 | " [ 1. 2. 4. 4. 8. 16.]]\n" 18 | ] 19 | }, 20 | { 21 | "data": { 22 | "text/html": [ 23 | "
\n", 24 | "\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | "
1x0x1x0^2x0 x1x1^2
01.02.03.04.06.09.0
11.02.04.04.08.016.0
\n", 70 | "
" 71 | ], 72 | "text/plain": [ 73 | " 1 x0 x1 x0^2 x0 x1 x1^2\n", 74 | "0 1.0 2.0 3.0 4.0 6.0 9.0\n", 75 | "1 1.0 2.0 4.0 4.0 8.0 16.0" 76 | ] 77 | }, 78 | "execution_count": 4, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "import pandas as pd\n", 85 | "import numpy as np\n", 86 | "from sklearn.preprocessing import PolynomialFeatures\n", 87 | "pf = PolynomialFeatures(degree=2, include_bias=True)\n", 88 | "\n", 89 | "test = np.array([\n", 90 | " [2, 3],\n", 91 | " [2, 4]\n", 92 | "])\n", 93 | "\n", 94 | "pf.fit(test)\n", 95 | "print(\"Size of train\", test.shape)\n", 96 | "print(test)\n", 97 | "x = pf.transform(test)\n", 98 | "print(\"Size of train\", x.shape)\n", 99 | "print(x)\n", 100 | "\n", 101 | "df = pd.DataFrame(x)\n", 102 | "df.columns = pf.get_feature_names()\n", 103 | "df.head()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.6.1" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 2 137 | } 138 | -------------------------------------------------------------------------------- /problems/Busiest Time in The Mall.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The Westfield Mall management is trying to figure out what the busiest moment at the mall was last year. You’re given data extracted from the mall’s door detectors. Each data point is represented as an integer array whose size is 3. The values at indices 0, 1 and 2 are the timestamp, the count of visitors, and whether the visitors entered or exited the mall (0 for exit and 1 for entrance), respectively. Here’s an example of a data point: [ 1440084737, 4, 0 ].\n", 8 | "\n", 9 | "Note that time is given in a Unix format called Epoch, which is a nonnegative integer holding the number of seconds that have elapsed since 00:00:00 UTC, Thursday, 1 January 1970.\n", 10 | "\n", 11 | "Given an array, data, of data points, write a function findBusiestPeriod that returns the time at which the mall reached its busiest moment last year. The return value is the timestamp, e.g. 1480640292. Note that if there is more than one period with the same visitor peak, return the earliest one.\n", 12 | "\n", 13 | "Assume that the array data is sorted in an ascending order by the timestamp. Explain your solution and analyze its time and space complexities." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 3, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "def find_busiest_period(data):\n", 23 | " people = 0\n", 24 | " max_point = -1\n", 25 | " max_people = 0\n", 26 | " for i, datapoint in enumerate(data): \n", 27 | " people_at = datapoint[1] \n", 28 | " in_out = 1 if datapoint[2] == 1 else -1 \n", 29 | " people += in_out * people_at \n", 30 | " \n", 31 | " if i < len(data)-1 and data[i][0] == data[i+1][0]:\n", 32 | " continue\n", 33 | " \n", 34 | " if people > max_people:\n", 35 | " max_point = i\n", 36 | " max_people = people\n", 37 | " \n", 38 | " return data[max_point][0]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 7, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "1487799426\n", 51 | "1487799427\n", 52 | "1487799425\n", 53 | "1487800378\n", 54 | "1487901211\n", 55 | "1487801478\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "print(find_busiest_period([[1487799426,21,1]]))\n", 61 | "print(find_busiest_period([[1487799425,21,0],[1487799427,22,1],[1487901318,7,0]]))\n", 62 | "print(find_busiest_period([[1487799425,21,1],[1487799425,4,0],[1487901318,7,0]]))\n", 63 | "print(find_busiest_period([[1487799425,14,1],[1487799425,4,0],[1487799425,2,0],[1487800378,10,1],[1487801478,18,0],[1487801478,18,1],[1487901013,1,0],[1487901211,7,1],[1487901211,7,0]]))\n", 64 | "print(find_busiest_period([[1487799425,14,1],[1487799425,4,1],[1487799425,2,1],[1487800378,10,1],[1487801478,18,1],[1487901013,1,1],[1487901211,7,1],[1487901211,7,1]]))\n", 65 | "print(find_busiest_period([[1487799425,14,1],[1487799425,4,0],[1487799425,2,0],[1487800378,10,1],[1487801478,18,0],[1487801478,19,1],[1487801478,1,0],[1487801478,1,1],[1487901013,1,0],[1487901211,7,1],[1487901211,8,0]]))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "Python 3", 79 | "language": "python", 80 | "name": "python3" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.6.4" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } 98 | -------------------------------------------------------------------------------- /problems/Root of Number.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Many times, we need to re-implement basic functions without using any standard library functions already implemented. For example, when designing a chip that requires very little memory space.\n", 8 | "\n", 9 | "In this question we’ll implement a function root that calculates the n’th root of a number. The function takes a nonnegative number x and a positive integer n, and returns the positive n’th root of x within an error of 0.001 (i.e. suppose the real root is y, then the error is: |y-root(x,n)| and must satisfy |y-root(x,n)| < 0.001).\n", 10 | "\n", 11 | "Don’t be intimidated by the question. While there are many algorithms to calculate roots that require prior knowledge in numerical analysis (some of them are mentioned here), there is also an elementary method which doesn’t require more than guessing-and-checking. Try to think more in terms of the latter.\n", 12 | "\n", 13 | "Make sure your algorithm is efficient, and analyze its time and space complexities." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 37, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "3.000091552734375" 25 | ] 26 | }, 27 | "execution_count": 37, 28 | "metadata": {}, 29 | "output_type": "execute_result" 30 | } 31 | ], 32 | "source": [ 33 | "import math \n", 34 | "\n", 35 | "def fast_power(base, power):\n", 36 | " \"\"\"\n", 37 | " Returns the result of a^b i.e. a**b\n", 38 | " We assume that a >= 1 and b >= 0\n", 39 | "\n", 40 | " Remember two things!\n", 41 | " - Divide power by 2 and multiply base to itself (if the power is even)\n", 42 | " - Decrement power by 1 to make it even and then follow the first step\n", 43 | " \"\"\"\n", 44 | "\n", 45 | " result = 1\n", 46 | " while power > 0:\n", 47 | " # If power is even\n", 48 | " if power % 2 == 0:\n", 49 | " # Divide the power by 2\n", 50 | " power = power / 2\n", 51 | " # Multiply base to itself\n", 52 | " base = base * base\n", 53 | " else:\n", 54 | " # Decrement the power by 1 and make it even\n", 55 | " power = power - 1\n", 56 | " # Take care of the extra value that we took out\n", 57 | " # We will store it directly in result\n", 58 | " result = result * base\n", 59 | "\n", 60 | " # Now power is even, so we can follow our previous procedure\n", 61 | " power = power / 2\n", 62 | " base = base * base\n", 63 | "\n", 64 | " return result\n", 65 | "\n", 66 | "def root(x, n):\n", 67 | " if x == 0:\n", 68 | " return 0\n", 69 | " lower = 0\n", 70 | " upper = max(1,x)\n", 71 | " middle = (upper + lower) / 2\n", 72 | " res = fast_power(middle, n)\n", 73 | " while abs(res - x) >= 0.001:\n", 74 | " \n", 75 | " if res > x:\n", 76 | " upper = middle\n", 77 | " else:\n", 78 | " lower = middle\n", 79 | " \n", 80 | " middle = (upper + lower) / 2\n", 81 | " res = fast_power(middle, n)\n", 82 | " return middle\n", 83 | "\n", 84 | "root(9,2)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [] 93 | } 94 | ], 95 | "metadata": { 96 | "kernelspec": { 97 | "display_name": "Python 3", 98 | "language": "python", 99 | "name": "python3" 100 | }, 101 | "language_info": { 102 | "codemirror_mode": { 103 | "name": "ipython", 104 | "version": 3 105 | }, 106 | "file_extension": ".py", 107 | "mimetype": "text/x-python", 108 | "name": "python", 109 | "nbconvert_exporter": "python", 110 | "pygments_lexer": "ipython3", 111 | "version": "3.6.5" 112 | } 113 | }, 114 | "nbformat": 4, 115 | "nbformat_minor": 2 116 | } 117 | -------------------------------------------------------------------------------- /structures/binary-indexed-tree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 71, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "16\n", 13 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 14 | "[0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]\n", 15 | "[0, 1, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2]\n", 16 | "[0, 1, 2, 1, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3]\n", 17 | "[0, 1, 2, 1, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4]\n", 18 | "[0, 1, 2, 1, 4, 1, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 5]\n", 19 | "[0, 1, 2, 1, 4, 1, 2, 0, 6, 0, 0, 0, 0, 0, 0, 0, 6]\n", 20 | "[0, 1, 2, 1, 4, 1, 2, 1, 7, 0, 0, 0, 0, 0, 0, 0, 7]\n", 21 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 0, 0, 0, 0, 0, 0, 0, 8]\n", 22 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 1, 0, 1, 0, 0, 0, 9]\n", 23 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 0, 2, 0, 0, 0, 10]\n", 24 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 3, 0, 0, 0, 11]\n", 25 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 4, 0, 0, 0, 12]\n", 26 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 4, 1, 1, 0, 13]\n", 27 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 4, 1, 2, 0, 14]\n", 28 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 4, 1, 2, 1, 15]\n", 29 | "[0, 1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 4, 1, 2, 1, 24]\n", 30 | "Visiting 15\n", 31 | "Visiting 14\n", 32 | "Visiting 12\n", 33 | "Visiting 8\n", 34 | "15\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "import numpy as np\n", 40 | "input_array = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9]\n", 41 | "numbers = len(input_array)\n", 42 | "print(numbers)\n", 43 | "BIT = [0 for i in range(numbers +1)]\n", 44 | "\n", 45 | "def lsb(i, debug=False):\n", 46 | " result = i & -i\n", 47 | " if debug:\n", 48 | " print(np.binary_repr(i, 32))\n", 49 | " print(np.binary_repr(-i, 32))\n", 50 | " print(np.binary_repr(result, 32))\n", 51 | " return i & -i\n", 52 | "\n", 53 | "\n", 54 | "\n", 55 | "def update(ix, val, debug=False):\n", 56 | " nv = val;\n", 57 | " if debug:\n", 58 | " print(ix)\n", 59 | " while ix < len(BIT):\n", 60 | " BIT[ix] += val;\n", 61 | " if debug:\n", 62 | " print(\"Visiting %d\" % ix)\n", 63 | " ix += (ix & -ix)\n", 64 | "\n", 65 | "def query(ix, debug=False):\n", 66 | " r = 0;\n", 67 | " while(ix):\n", 68 | " r += BIT[ix];\n", 69 | " if debug:\n", 70 | " print(\"Visiting %d\" % ix)\n", 71 | " ix -= (ix & -ix);\n", 72 | " return r;\n", 73 | "\n", 74 | "print(BIT)\n", 75 | "for n in range(len(input_array)):\n", 76 | " update(n+1, input_array[n])\n", 77 | " print(BIT)\n", 78 | "print(query(15, True))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 75, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "0 0\n", 91 | "1 1\n", 92 | "2 2\n", 93 | "3 1\n", 94 | "4 4\n", 95 | "5 1\n", 96 | "6 2\n", 97 | "7 1\n", 98 | "8 8\n", 99 | "9 1\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "for i in range(10):\n", 105 | " print(i, i & -i)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.6.4" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 2 137 | } 138 | -------------------------------------------------------------------------------- /Stats/PCA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 53, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from sklearn.decomposition import PCA" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 54, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "whisky_df = pd.read_csv(\"whiskies.txt\")\n", 21 | "whisky = np.array(whisky_df.iloc[:,2:-3])" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 62, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "[[ 0.16320503 -0.02121166 0.17845477 ... -0.10475142 -0.1445124\n", 34 | " -0.4093115 ]\n", 35 | " [-1.41145428 0.67902208 1.8251002 ... 0.73061183 1.38592715\n", 36 | " -1.59799985]\n", 37 | " [ 0.28512372 -0.04715145 -0.52452208 ... 1.25305133 0.63396921\n", 38 | " 1.699413 ]\n", 39 | " ...\n", 40 | " [ 1.05053776 -0.4629256 1.80632271 ... 0.08397269 0.38390231\n", 41 | " 0.90579692]\n", 42 | " [ 1.5352916 0.47402265 -0.90367406 ... -1.90586662 -0.15338599\n", 43 | " 0.62095332]\n", 44 | " [ 0.25119059 -0.40462952 0.51675947 ... -0.14230442 1.17489829\n", 45 | " -0.17718568]]\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "def pca(data, d):\n", 51 | " # compute covariance\n", 52 | " cov = np.cov(data,rowvar=False)\n", 53 | " # compute eigenvectors and eigenvalues, and sort\n", 54 | " eigvals, eigvecs = np.linalg.eig(cov)\n", 55 | " eig_order = np.argsort(eigvals)\n", 56 | " components = []\n", 57 | " for i in range(d):\n", 58 | " # project the data onto each eigenvector in turn\n", 59 | " components.append(data @ eigvecs[eig_order[-i-1]])\n", 60 | " return np.stack(components).T\n", 61 | "\n", 62 | "centered_whisky = whisky - np.mean(whisky, axis=0)\n", 63 | "\n", 64 | "manually_analized = pca(centered_whisky, whisky.shape[1])\n", 65 | "print(manually_analized)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 65, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "[[-0.50338406 1.12202234 0.16120015 ... 0.01261079 0.65608202\n", 78 | " -0.07072755]\n", 79 | " [-1.47888827 3.00485075 -1.51709115 ... -0.78827475 -0.31893694\n", 80 | " 0.10285343]\n", 81 | " [-1.25311288 -0.65372066 0.28471962 ... -0.42175779 1.56642677\n", 82 | " 0.12767015]\n", 83 | " ...\n", 84 | " [-1.56110197 -0.73101402 -0.00333905 ... -0.52040133 0.40121693\n", 85 | " -0.08609251]\n", 86 | " [ 0.51934491 -0.2688109 2.55831635 ... 0.88801623 -0.22713249\n", 87 | " 0.04944175]\n", 88 | " [-0.60153006 -0.41868198 0.31623622 ... 0.29414666 -0.89810854\n", 89 | " 0.87448074]]\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "package_pca = PCA()\n", 95 | "package_analized = package_pca.fit_transform(whisky)\n", 96 | "print(package_analized)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "\n", 106 | "\n", 107 | "def pca(data, d):\n", 108 | " # compute covariance\n", 109 | " cov = np.cov(data,rowvar=False)\n", 110 | " # compute eigenvectors and eigenvalues, and sort\n", 111 | " eigvals, eigv = np.linalg.eig(cov)\n", 112 | " eig_order = np.argsort(np.abs(eigvals))\n", 113 | " components = []\n", 114 | " for i in range(d):\n", 115 | " # project the data onto each eigenvector in turn\n", 116 | " components.append(data @ eigvals[eig_order[-i-1]])\n", 117 | " return np.stack(components).T" 118 | ] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.6.1" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,pycharm,jupyternotebook 3 | 4 | ### JupyterNotebook ### 5 | .ipynb_checkpoints 6 | */.ipynb_checkpoints/* 7 | 8 | # Remove previous ipynb_checkpoints 9 | # git rm -r .ipynb_checkpoints/ 10 | # 11 | ### PyCharm ### 12 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 13 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 14 | 15 | # User-specific stuff: 16 | .idea/**/workspace.xml 17 | .idea/**/tasks.xml 18 | .idea/dictionaries 19 | 20 | # Sensitive or high-churn files: 21 | .idea/**/dataSources/ 22 | .idea/**/dataSources.ids 23 | .idea/**/dataSources.xml 24 | .idea/**/dataSources.local.xml 25 | .idea/**/sqlDataSources.xml 26 | .idea/**/dynamic.xml 27 | .idea/**/uiDesigner.xml 28 | 29 | # Gradle: 30 | .idea/**/gradle.xml 31 | .idea/**/libraries 32 | 33 | # CMake 34 | cmake-build-debug/ 35 | 36 | # Mongo Explorer plugin: 37 | .idea/**/mongoSettings.xml 38 | 39 | ## File-based project format: 40 | *.iws 41 | 42 | ## Plugin-specific files: 43 | 44 | # IntelliJ 45 | /out/ 46 | 47 | # mpeltonen/sbt-idea plugin 48 | .idea_modules/ 49 | 50 | # JIRA plugin 51 | atlassian-ide-plugin.xml 52 | 53 | # Cursive Clojure plugin 54 | .idea/replstate.xml 55 | 56 | # Ruby plugin and RubyMine 57 | /.rakeTasks 58 | 59 | # Crashlytics plugin (for Android Studio and IntelliJ) 60 | com_crashlytics_export_strings.xml 61 | crashlytics.properties 62 | crashlytics-build.properties 63 | fabric.properties 64 | 65 | ### PyCharm Patch ### 66 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 67 | 68 | # *.iml 69 | # modules.xml 70 | # .idea/misc.xml 71 | # *.ipr 72 | 73 | # Sonarlint plugin 74 | .idea/sonarlint 75 | 76 | ### Python ### 77 | # Byte-compiled / optimized / DLL files 78 | __pycache__/ 79 | *.py[cod] 80 | *$py.class 81 | 82 | # C extensions 83 | *.so 84 | 85 | # Distribution / packaging 86 | .Python 87 | build/ 88 | develop-eggs/ 89 | dist/ 90 | downloads/ 91 | eggs/ 92 | .eggs/ 93 | lib/ 94 | lib64/ 95 | parts/ 96 | sdist/ 97 | var/ 98 | wheels/ 99 | *.egg-info/ 100 | .installed.cfg 101 | *.egg 102 | 103 | # PyInstaller 104 | # Usually these files are written by a python script from a template 105 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 106 | *.manifest 107 | *.spec 108 | 109 | # Installer logs 110 | pip-log.txt 111 | pip-delete-this-directory.txt 112 | 113 | # Unit test / coverage reports 114 | htmlcov/ 115 | .tox/ 116 | .coverage 117 | .coverage.* 118 | .cache 119 | nosetests.xml 120 | coverage.xml 121 | *.cover 122 | .hypothesis/ 123 | 124 | # Translations 125 | *.mo 126 | *.pot 127 | 128 | # Django stuff: 129 | *.log 130 | local_settings.py 131 | 132 | # Flask stuff: 133 | instance/ 134 | .webassets-cache 135 | 136 | # Scrapy stuff: 137 | .scrapy 138 | 139 | # Sphinx documentation 140 | docs/_build/ 141 | 142 | # PyBuilder 143 | target/ 144 | 145 | # Jupyter Notebook 146 | 147 | # pyenv 148 | .python-version 149 | 150 | # celery beat schedule file 151 | celerybeat-schedule 152 | 153 | # SageMath parsed files 154 | *.sage.py 155 | 156 | # Environments 157 | .env 158 | .venv 159 | env/ 160 | venv/ 161 | ENV/ 162 | env.bak/ 163 | venv.bak/ 164 | 165 | # Spyder project settings 166 | .spyderproject 167 | .spyproject 168 | 169 | # Rope project settings 170 | .ropeproject 171 | 172 | # mkdocs documentation 173 | /site 174 | 175 | # mypy 176 | .mypy_cache/ 177 | ### macOS ### 178 | *.DS_Store 179 | .AppleDouble 180 | .LSOverride 181 | 182 | # Icon must end with two \r 183 | Icon 184 | 185 | # Thumbnails 186 | ._* 187 | 188 | # Files that might appear in the root of a volume 189 | .DocumentRevisions-V100 190 | .fseventsd 191 | .Spotlight-V100 192 | .TemporaryItems 193 | .Trashes 194 | .VolumeIcon.icns 195 | .com.apple.timemachine.donotpresent 196 | 197 | # Directories potentially created on remote AFP share 198 | .AppleDB 199 | .AppleDesktop 200 | Network Trash Folder 201 | Temporary Items 202 | .apdisk 203 | 204 | ### Windows ### 205 | # Windows thumbnail cache files 206 | Thumbs.db 207 | ehthumbs.db 208 | ehthumbs_vista.db 209 | 210 | # Folder config file 211 | Desktop.ini 212 | 213 | # Recycle Bin used on file shares 214 | $RECYCLE.BIN/ 215 | 216 | # Windows Installer files 217 | *.cab 218 | *.msi 219 | *.msm 220 | *.msp 221 | 222 | # Windows shortcuts 223 | *.lnk 224 | 225 | # End of https://www.gitignore.io/api/macos,windows 226 | 227 | # Captions files 228 | youtube-captions/* 229 | casey-neistat-analisys/* 230 | mt-scraper/* 231 | partiallyd-scrape/* 232 | tloz-scrape/* 233 | /.metadata/ 234 | 235 | # jupygit file "extension" 236 | *-jupygit___.ipynb -------------------------------------------------------------------------------- /problems/Sales Path.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The car manufacturer Honda holds their distribution system in the form of a tree (not necessarily binary). The root is the company itself, and every node in the tree represents a car distributor that receives cars from the parent node and ships them to its children nodes. The leaf nodes are car dealerships that sell cars direct to consumers. In addition, every node holds an integer that is the cost of shipping a car to it.\n", 8 | "\n", 9 | "Take for example the tree below:\n", 10 | "\n", 11 | "alt\n", 12 | "\n", 13 | "A path from Honda’s factory to a car dealership, which is a path from the root to a leaf in the tree, is called a Sales Path. The cost of a Sales Path is the sum of the costs for every node in the path. For example, in the tree above one Sales Path is 0→3→0→10, and its cost is 13 (0+3+0+10).\n", 14 | "\n", 15 | "Honda wishes to find the minimal Sales Path cost in its distribution tree. Given a node rootNode, write a function getCheapestCost that calculates the minimal Sales Path cost in the tree.\n", 16 | "\n", 17 | "Implement your function in the most efficient manner and analyze its time and space complexities.\n", 18 | "\n", 19 | "For example:\n", 20 | "\n", 21 | "Given the rootNode of the tree in diagram above\n", 22 | "\n", 23 | "Your function would return:\n", 24 | "\n", 25 | "7 since it’s the minimal Sales Path cost (there are actually two Sales Paths in the tree whose cost is 7: 0→6→1 and 0→3→2→1→1)\n", 26 | "\n", 27 | "Constraints:\n", 28 | "\n", 29 | "[time limit] 5000ms\n", 30 | "\n", 31 | "[input] Node rootNode\n", 32 | "\n", 33 | "0 ≤ rootNode.cost ≤ 100000\n", 34 | "[output] integer" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "7\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "def get_cheapest_cost(rootNode):\n", 52 | " return path(rootNode, rootNode.cost)\n", 53 | "\n", 54 | "def path(node, value):\n", 55 | " if len(node.children) == 0: return value\n", 56 | " \n", 57 | " children = []\n", 58 | " children.extend(node.children)\n", 59 | " min_cost = 100000000000\n", 60 | " for c in children:\n", 61 | " min_cost = min(min_cost, path(c, value + c.cost))\n", 62 | " return min_cost\n", 63 | "\n", 64 | "########################################## \n", 65 | "# Use the helper code below to implement #\n", 66 | "# and test your function above #\n", 67 | "##########################################\n", 68 | "\n", 69 | "# A node \n", 70 | "class Node:\n", 71 | "\n", 72 | " # Constructor to create a new node\n", 73 | " def __init__(self, cost):\n", 74 | " self.cost = cost\n", 75 | " self.children = []\n", 76 | " self.parent = None\n", 77 | " \n", 78 | "\n", 79 | "root = Node(0)\n", 80 | "five = Node(5)\n", 81 | "three = Node(3)\n", 82 | "six = Node(6)\n", 83 | "\n", 84 | "root.children.extend([five, three, six])\n", 85 | "\n", 86 | "four = Node(4)\n", 87 | "five.children.append(four)\n", 88 | "\n", 89 | "two = Node(2)\n", 90 | "zero = Node(0)\n", 91 | "three.children.extend([two, zero])\n", 92 | "\n", 93 | "one = Node(1)\n", 94 | "two.children.append(one)\n", 95 | "one1 = Node(1)\n", 96 | "one.children.append(one1)\n", 97 | "\n", 98 | "\n", 99 | "ten = Node(10)\n", 100 | "zero.children.append(ten)\n", 101 | "\n", 102 | "one2 = Node(1)\n", 103 | "five2 = Node(5)\n", 104 | "\n", 105 | "six.children.extend([one2, five2])\n", 106 | "\n", 107 | "print(get_cheapest_cost(root))" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.6.4" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 2 139 | } 140 | -------------------------------------------------------------------------------- /so/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 16, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "LINKS\n", 13 | "www.abc.com\n", 14 | "www.eee.com\n", 15 | "matching\n", 16 | "\n", 17 | "COUNTRY\n", 18 | "Brexit - UK\n", 19 | "USA UK Relations\n", 20 | "matching\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "import re\n", 26 | "\n", 27 | "keycountryfile = \"keycountry.txt\"\n", 28 | "countryfile = \"country.txt\"\n", 29 | "\n", 30 | "with open('links.txt', 'r') as links:\n", 31 | " links_data = [line.strip() for line in links.readlines()]\n", 32 | "\n", 33 | "with open('keylink.txt', 'r') as keys:\n", 34 | " keys_links = set([line.strip() for line in keys.readlines()])\n", 35 | "\n", 36 | "\n", 37 | "matching_links = []\n", 38 | "for url in links_data:\n", 39 | " if url in keys_links:\n", 40 | " matching_links.append(url)\n", 41 | " \n", 42 | "print('LINKS')\n", 43 | "if matching_links:\n", 44 | " print('\\n'.join(matching_links))\n", 45 | " print(\"matching\")\n", 46 | "else:\n", 47 | " print(\"Not matching\") \n", 48 | "\n", 49 | "print()\n", 50 | "\n", 51 | "with open(keycountryfile , \"r\") as f:\n", 52 | " country_keys = set(key.lower() for key in \n", 53 | " re.findall(r'\\w+', f.readline()))\n", 54 | "\n", 55 | "matching_lines = []\n", 56 | "with open(countryfile) as f:\n", 57 | " for line in f:\n", 58 | " words = set(word.lower() for word in re.findall(r'\\w+', line))\n", 59 | " if country_keys & words:\n", 60 | " matching_lines.append(line.strip())\n", 61 | " print(\"COUNTRY\")\n", 62 | " if matching_lines:\n", 63 | " print('\\n'.join(matching_lines))\n", 64 | " print(\"matching\")\n", 65 | " else: \n", 66 | " print(\"Not matching\")" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 17, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "LINKS\n", 79 | "www.abc.com\n", 80 | "matching\n", 81 | "www.eee.com\n", 82 | "matching\n", 83 | "Not matching\n", 84 | "COUNTRY\n", 85 | "Brexit - UK\n", 86 | "matching\n", 87 | "USA UK Relations\n", 88 | "matching\n", 89 | "Not matching\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "import re\n", 95 | "\n", 96 | "keycountryfile = \"keycountry.txt\"\n", 97 | "countryfile = \"country.txt\"\n", 98 | "\n", 99 | "links = open('links.txt', 'r')\n", 100 | "links_data = links.read()\n", 101 | "links.close()\n", 102 | "\n", 103 | "keys = open('keylink.txt', 'r')\n", 104 | "keys_data = keys.read()\n", 105 | "keys.close()\n", 106 | "\n", 107 | "keys_split = keys_data.splitlines()\n", 108 | "\n", 109 | "print('LINKS')\n", 110 | "for url in keys_split:\n", 111 | " if url in links_data:\n", 112 | " print(url)\n", 113 | " print(\"matching\")\n", 114 | "else:\n", 115 | " print(\"Not matching\") \n", 116 | "\n", 117 | "keys = set(key.lower() for key in \n", 118 | " re.findall(r'\\w+', open(keycountryfile , \"r\").readline()))\n", 119 | "\n", 120 | "print(\"COUNTRY\")\n", 121 | "with open(countryfile) as f:\n", 122 | " for line in f:\n", 123 | " words = set(word.lower() for word in re.findall(r'\\w+', line))\n", 124 | " if keys & words:\n", 125 | " print(line, end='')\n", 126 | " print(\"matching\")\n", 127 | " else:\n", 128 | " print(\"Not matching\")" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [] 137 | } 138 | ], 139 | "metadata": { 140 | "kernelspec": { 141 | "display_name": "Python 3", 142 | "language": "python", 143 | "name": "python3" 144 | }, 145 | "language_info": { 146 | "codemirror_mode": { 147 | "name": "ipython", 148 | "version": 3 149 | }, 150 | "file_extension": ".py", 151 | "mimetype": "text/x-python", 152 | "name": "python", 153 | "nbconvert_exporter": "python", 154 | "pygments_lexer": "ipython3", 155 | "version": "3.6.5" 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 2 160 | } 161 | -------------------------------------------------------------------------------- /problems/Array of Array Products.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Given an array of integers arr, you’re asked to calculate for each index i the product of all integers except the integer at that index (i.e. except arr[i]). Implement a function arrayOfArrayProducts that takes an array of integers and returns an array of the products.\n", 8 | "\n", 9 | "Solve without using division and analyze your solution’s time and space complexities." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 10, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "ename": "IndexError", 19 | "evalue": "list index out of range", 20 | "output_type": "error", 21 | "traceback": [ 22 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 23 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", 24 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_array_products\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_array_products\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m982\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray_of_array_products\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 25 | "\u001b[0;32m\u001b[0m in \u001b[0;36marray_of_array_products\u001b[0;34m(arr)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mleft\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0ma1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mright\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ma2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 26 | "\u001b[0;31mIndexError\u001b[0m: list index out of range" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "def array_of_array_products(arr):\n", 32 | " \n", 33 | " if len(arr) < 2: return []\n", 34 | " \n", 35 | " a1 = []\n", 36 | " m = 1\n", 37 | " for a_ in arr:\n", 38 | " m *= a_\n", 39 | " a1.append(m)\n", 40 | " \n", 41 | " a2 = []\n", 42 | " m = 1\n", 43 | " for a_ in arr[::-1]:\n", 44 | " m *= a_\n", 45 | " a2.append(m)\n", 46 | " a2 = a2[::-1]\n", 47 | " \n", 48 | " result = []\n", 49 | " for i in range(len(arr)):\n", 50 | " left = 1 if i == 0 else a1[i-1]\n", 51 | " right = 1 if i == len(arr)-1 else a2[i+1]\n", 52 | " result.append(left*right)\n", 53 | " \n", 54 | " return result\n", 55 | "\n", 56 | "\n", 57 | "print(array_of_array_products([2, 7, 3, 4]))\n", 58 | "print(array_of_array_products([2,3,0,982,10]))\n", 59 | "print(array_of_array_products([2]))\n", 60 | "print(array_of_array_products([2,2]))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [] 69 | } 70 | ], 71 | "metadata": { 72 | "kernelspec": { 73 | "display_name": "Python 3", 74 | "language": "python", 75 | "name": "python3" 76 | }, 77 | "language_info": { 78 | "codemirror_mode": { 79 | "name": "ipython", 80 | "version": 3 81 | }, 82 | "file_extension": ".py", 83 | "mimetype": "text/x-python", 84 | "name": "python", 85 | "nbconvert_exporter": "python", 86 | "pygments_lexer": "ipython3", 87 | "version": "3.6.5" 88 | } 89 | }, 90 | "nbformat": 4, 91 | "nbformat_minor": 2 92 | } 93 | -------------------------------------------------------------------------------- /Stats/whiskies.txt: -------------------------------------------------------------------------------- 1 | RowID,Distillery,Body,Sweetness,Smoky,Medicinal,Tobacco,Honey,Spicy,Winey,Nutty,Malty,Fruity,Floral,Postcode, Latitude, Longitude 2 | 01,Aberfeldy,2,2,2,0,0,2,1,2,2,2,2,2, PH15 2EB, 286580,749680 3 | 02,Aberlour,3,3,1,0,0,4,3,2,2,3,3,2, AB38 9PJ, 326340,842570 4 | 03,AnCnoc,1,3,2,0,0,2,0,0,2,2,3,2, AB5 5LI, 352960,839320 5 | 04,Ardbeg,4,1,4,4,0,0,2,0,1,2,1,0, PA42 7EB, 141560,646220 6 | 05,Ardmore,2,2,2,0,0,1,1,1,2,3,1,1, AB54 4NH, 355350,829140 7 | 06,ArranIsleOf,2,3,1,1,0,1,1,1,0,1,1,2, KA27 8HJ, 194050,649950 8 | 07,Auchentoshan,0,2,0,0,0,1,1,0,2,2,3,3, G81 4SJ, 247670,672610 9 | 08,Auchroisk,2,3,1,0,0,2,1,2,2,2,2,1, AB55 3XS, 340754,848623 10 | 09,Aultmore,2,2,1,0,0,1,0,0,2,2,2,2, AB55 3QY, 340754,848623 11 | 10,Balblair,2,3,2,1,0,0,2,0,2,1,2,1, IV19 1LB, 270820,885770 12 | 11,Balmenach,4,3,2,0,0,2,1,3,3,0,1,2, PH26 3PF, 307750,827170 13 | 12,Balvenie,3,2,1,0,0,3,2,1,0,2,2,2, AB55 4DH, 332680,840840 14 | 13,BenNevis,4,2,2,0,0,2,2,0,2,2,2,2, PH33 6TJ, 212600,775710 15 | 14,Benriach,2,2,1,0,0,2,2,0,0,2,3,2, IV30 8SJ, 323450,858380 16 | 15,Benrinnes,3,2,2,0,0,3,1,1,2,3,2,2, AB38 9NN, 325800,839920 17 | 16,Benromach,2,2,2,0,0,2,2,1,2,2,2,2, IV36 3EB, 303330,859350 18 | 17,Bladnoch,1,2,1,0,0,0,1,1,0,2,2,3, DG8 9AB, 242260,554260 19 | 18,BlairAthol,2,2,2,0,0,1,2,2,2,2,2,2, PH16 5LY, 294860,757580 20 | 19,Bowmore,2,2,3,1,0,2,2,1,1,1,1,2, PA43 7GS, 131330,659720 21 | 20,Bruichladdich,1,1,2,2,0,2,2,1,2,2,2,2, PA49 7UN, 126680,661400 22 | 21,Bunnahabhain,1,2,1,1,0,1,1,1,1,2,2,3, PA46 7RR, 142210,673170 23 | 22,Caol Ila,3,1,4,2,1,0,2,0,2,1,1,1, PA46 7RL, 142920,670040 24 | 23,Cardhu,1,3,1,0,0,1,1,0,2,2,2,2, AB38 7RY, 318790,843090 25 | 24,Clynelish,3,2,3,3,1,0,2,0,1,1,2,0, KW9 6LB, 290250,904230 26 | 25,Craigallechie,2,2,2,0,1,2,2,1,2,2,1,4, AB38 9ST, 328920,844920 27 | 26,Cragganmore,2,3,2,1,0,0,1,0,2,2,2,2, AB37 9AB, 316600,836370 28 | 27,Dailuaine,4,2,2,0,0,1,2,2,2,2,2,1, AB38 7RE, 323520,841010 29 | 28,Dalmore,3,2,2,1,0,1,2,2,1,2,3,1, IV17 0UT, 266610,868730 30 | 29,Dalwhinnie,2,2,2,0,0,2,1,0,1,2,2,2, PH19 1AB, 263670,785270 31 | 30,Deanston,2,2,1,0,0,2,1,1,1,3,2,1, FK16 6AG, 271570,701570 32 | 31,Dufftown,2,3,1,1,0,0,0,0,1,2,2,2, AB55 4BR, 332360,839200 33 | 32,Edradour,2,3,1,0,0,2,1,1,4,2,2,2, PH16 5JP, 295960,757940 34 | 33,GlenDeveronMacduff,2,3,1,1,1,1,1,2,0,2,0,1, AB4 3JT, 372120,860400 35 | 34,GlenElgin,2,3,1,0,0,2,1,1,1,1,2,3, IV30 3SL, 322640,861040 36 | 35,GlenGarioch,2,1,3,0,0,0,3,1,0,2,2,2, AB51 0ES, 381020,827590 37 | 36,GlenGrant,1,2,0,0,0,1,0,1,2,1,2,1, AB38 7BS, 327610,849570 38 | 37,GlenKeith,2,3,1,0,0,1,2,1,2,1,2,1, AB55 3BU, 340754,848623 39 | 38,GlenMoray,1,2,1,0,0,1,2,1,2,2,2,4, IV30 1YE, 319820,862320 40 | 39,GlenOrd,3,2,1,0,0,1,2,1,1,2,2,2, IV6 7UJ, 251810,850860 41 | 40,GlenScotia,2,2,2,2,0,1,0,1,2,2,1,1, PA28 6DS, 172090,621010 42 | 41,GlenSpey,1,3,1,0,0,0,1,1,1,2,0,2, AB38 7AU, 327760,849140 43 | 42,Glenallachie,1,3,1,0,0,1,1,0,1,2,2,2, AB38 9LR, 326490,841240 44 | 43,Glendronach,4,2,2,0,0,2,1,4,2,2,2,0, AB54 6DA, 361200,844930 45 | 44,Glendullan,3,2,1,0,0,2,1,2,1,2,3,2, AB55 4DJ, 333000,840300 46 | 45,Glenfarclas,2,4,1,0,0,1,2,3,2,3,2,2, AB37 9BD, 320950,838160 47 | 46,Glenfiddich,1,3,1,0,0,0,0,0,0,2,2,2, AB55 4DH, 332680,840840 48 | 47,Glengoyne,1,2,0,0,0,1,1,1,2,2,3,2, G63 9LB, 252810,682750 49 | 48,Glenkinchie,1,2,1,0,0,1,2,0,0,2,2,2, EH34 5ET, 344380,666690 50 | 49,Glenlivet,2,3,1,0,0,2,2,2,1,2,2,3, AB37 9DB, 319560,828780 51 | 50,Glenlossie,1,2,1,0,0,1,2,0,1,2,2,2, IV30 3SS, 322640,861040 52 | 51,Glenmorangie,2,2,1,1,0,1,2,0,2,1,2,2, IV19 1PZ, 276750,883450 53 | 52,Glenrothes,2,3,1,0,0,1,1,2,1,2,2,0, AB38 7AA, 327650,849170 54 | 53,Glenturret,2,3,1,0,0,2,2,2,2,2,1,2, PH7 4HA, 285630,723580 55 | 54,Highland Park,2,2,3,1,0,2,1,1,1,2,1,1, KW15 1SU, 345340,1009260 56 | 55,Inchgower,1,3,1,1,0,2,2,0,1,2,1,2, AB56 5AB, 342610,863970 57 | 56,Isle of Jura,2,1,2,2,0,1,1,0,2,1,1,1, PA60 7XT, 152660,667040 58 | 57,Knochando,2,3,1,0,0,2,2,1,2,1,2,2, AB38 7RT, 319470,841570 59 | 58,Lagavulin,4,1,4,4,1,0,1,2,1,1,1,0, PA42 7DZ, 140430,645730 60 | 59,Laphroig,4,2,4,4,1,0,0,1,1,1,0,0, PA42 7DU, 138680,645160 61 | 60,Linkwood,2,3,1,0,0,1,1,2,0,1,3,2, IV30 3RD, 322640,861040 62 | 61,Loch Lomond,1,1,1,1,0,1,1,0,1,2,1,2, G83 0TL, 239370,680920 63 | 62,Longmorn,3,2,1,0,0,1,1,1,3,3,2,3, IV30 3SJ, 322640,861040 64 | 63,Macallan,4,3,1,0,0,2,1,4,2,2,3,1, AB38 9RX, 327710,844480 65 | 64,Mannochmore,2,1,1,0,0,1,1,1,2,1,2,2, IV30 3SS, 322640,861040 66 | 65,Miltonduff,2,4,1,0,0,1,0,0,2,1,1,2, IV30 3TQ, 322640,861040 67 | 66,Mortlach,3,2,2,0,0,2,3,3,2,1,2,2, AB55 4AQ, 332950,839850 68 | 67,Oban,2,2,2,2,0,0,2,0,2,2,2,0, PA34 5NH, 185940,730190 69 | 68,OldFettercairn,1,2,2,0,1,2,2,1,2,3,1,1, AB30 1YE, 370860,772900 70 | 69,OldPulteney,2,1,2,2,1,0,1,1,2,2,2,2, KW1 5BA, 336730,950130 71 | 70,RoyalBrackla,2,3,2,1,1,1,2,1,0,2,3,2, IV12 5QY, 286040,851320 72 | 71,RoyalLochnagar,3,2,2,0,0,2,2,2,2,2,3,1, AB35 5TB, 326140,794370 73 | 72,Scapa,2,2,1,1,0,2,1,1,2,2,2,2, KW15 1SE, 342850,1008930 74 | 73,Speyburn,2,4,1,0,0,2,1,0,0,2,1,2, AB38 7AG, 326930,851430 75 | 74,Speyside,2,2,1,0,0,1,0,1,2,2,2,2, PH21 1NS, 278740,800600 76 | 75,Springbank,2,2,2,2,0,2,2,1,2,1,0,1, PA28 6EJ, 172280,620910 77 | 76,Strathisla,2,2,1,0,0,2,2,2,3,3,3,2, AB55 3BS, 340754,848623 78 | 77,Strathmill,2,3,1,0,0,0,2,0,2,1,3,2, AB55 5DQ,342650,850500 79 | 78,Talisker,4,2,3,3,0,1,3,0,1,2,2,0, IV47 8SR, 137950,831770 80 | 79,Tamdhu,1,2,1,0,0,2,0,1,1,2,2,2, AB38 7RP, 319210,841760 81 | 80,Tamnavulin,1,3,2,0,0,0,2,0,2,1,2,3, AB37 9JA, 321180,826110 82 | 81,Teaninich,2,2,2,1,0,0,2,0,0,0,2,2, IV17 0XB, 265360,869120 83 | 82,Tobermory,1,1,1,0,0,1,0,0,1,2,2,2, PA75 6NR, 150450,755070 84 | 83,Tomatin,2,3,2,0,0,2,2,1,1,2,0,1, IV13 7YT, 279120,829630 85 | 84,Tomintoul,0,3,1,0,0,2,2,1,1,2,1,2, AB37 9AQ, 315100,825560 86 | 85,Tormore,2,2,1,0,0,1,0,1,2,1,0,0, PH26 3LR, 315180,834960 87 | 86,Tullibardine,2,3,0,0,1,0,2,1,1,2,2,1, PH4 1QG, 289690,708850 88 | -------------------------------------------------------------------------------- /structures/binary-search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Binary search" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Simple binary search" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 74, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "Found: 70\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "def binary_search(arr, s):\n", 32 | "\n", 33 | " l = 0\n", 34 | " r = len(arr) - 1\n", 35 | "\n", 36 | " while l <= r:\n", 37 | " mid = int((l + r) / 2);\n", 38 | " current_val = arr[mid]\n", 39 | "# print(\"L: %3d R: %3d M: %3d\" %(l, r, mid))\n", 40 | " if current_val == s:\n", 41 | " return mid\n", 42 | " if current_val > s:\n", 43 | " r = mid - 1\n", 44 | " if current_val < s:\n", 45 | " l = mid + 1\n", 46 | "\n", 47 | " current_val = arr[mid]\n", 48 | " return -1\n", 49 | "\n", 50 | "arr = [2, 3, 4, 10, 40, 43, 50, 51, 52, 53, 60, 70]\n", 51 | "found = binary_search(arr, 70)\n", 52 | "if found != -1:\n", 53 | " print(\"Found:\", arr[found])\n", 54 | "else:\n", 55 | " print(\"Not found\")" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Root finding" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 119, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "0.0000 13.5000 (8128549501381546231610196145209344.0000) 27.0000\n", 75 | "0.0000 6.7500 (7570301649516025773817856.0000) 13.5000\n", 76 | "0.0000 3.3750 (7050392822843069.0000) 6.7500\n", 77 | "0.0000 1.6875 (6566189.9958) 3.3750\n", 78 | "0.0000 0.8438 (0.0061) 1.6875\n", 79 | "0.8438 1.2656 (1172.6039) 1.6875\n", 80 | "0.8438 1.0547 (4.9399) 1.2656\n", 81 | "1.0547 1.1602 (86.1975) 1.2656\n", 82 | "1.0547 1.1074 (21.3498) 1.1602\n", 83 | "1.1074 1.1338 (43.2482) 1.1602\n", 84 | "1.1074 1.1206 (30.4496) 1.1338\n", 85 | "1.1074 1.1140 (25.5103) 1.1206\n", 86 | "1.1140 1.1173 (27.8744) 1.1206\n", 87 | "1.1140 1.1157 (26.6670) 1.1173\n", 88 | "1.1157 1.1165 (27.2643) 1.1173\n", 89 | "1.1157 1.1161 (26.9641) 1.1165\n", 90 | "1.1161 1.1163 (27.1138) 1.1165\n", 91 | "1.1161 1.1162 (27.0388) 1.1163\n", 92 | "1.1161 1.1161 (27.0014) 1.1162\n" 93 | ] 94 | }, 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "1.1161251068115234" 99 | ] 100 | }, 101 | "execution_count": 119, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "def root(x, n):\n", 108 | " l = 0\n", 109 | " r = x\n", 110 | " while l <= r:\n", 111 | " mid = (l + r) / 2\n", 112 | " p = pow(mid,n)\n", 113 | " print(\"%.4f %.4f (%.4f) %.4f\" % (l, mid, p, r))\n", 114 | " if abs(p - x) < 0.01:\n", 115 | " return mid\n", 116 | " if p < x: \n", 117 | " l = mid \n", 118 | " else:\n", 119 | " r = mid \n", 120 | " return 0\n", 121 | "root(27,30)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## Array Index & Element Equality" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 151, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "[-8, -7, 0, 0, 4]\n", 141 | "[-8, -8, -2, -3, 0]\n" 142 | ] 143 | }, 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "4" 148 | ] 149 | }, 150 | "execution_count": 151, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "def index_equals_value_search(arr):\n", 157 | " start = 0\n", 158 | " end = len(arr) - 1\n", 159 | " while start <= end:\n", 160 | " i = (start + end) // 2\n", 161 | " # Check that's the first element or \n", 162 | " # the previous is not one of those\n", 163 | " # values that we are looking for\n", 164 | " if arr[i] - i == 0 and (i == 0 or (arr[i-1] - (i-1) < 0)):\n", 165 | " return i\n", 166 | " if arr[i] - i < 0:\n", 167 | " start = i + 1\n", 168 | " else:\n", 169 | " end = i - 1\n", 170 | " return -1\n", 171 | "\n", 172 | "arr = [-13,-9,-6,3,4,5,10,11,14]\n", 173 | "arr = [-8,0,2,5]\n", 174 | "arr = [-8,-7,0,0,4]\n", 175 | "dif = [arr[i] - i for i in range(len(arr))]\n", 176 | "print(arr)\n", 177 | "print(dif)\n", 178 | "index_equals_value_search(arr)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [] 187 | } 188 | ], 189 | "metadata": { 190 | "kernelspec": { 191 | "display_name": "Python 3", 192 | "language": "python", 193 | "name": "python3" 194 | }, 195 | "language_info": { 196 | "codemirror_mode": { 197 | "name": "ipython", 198 | "version": 3 199 | }, 200 | "file_extension": ".py", 201 | "mimetype": "text/x-python", 202 | "name": "python", 203 | "nbconvert_exporter": "python", 204 | "pygments_lexer": "ipython3", 205 | "version": "3.6.4" 206 | } 207 | }, 208 | "nbformat": 4, 209 | "nbformat_minor": 2 210 | } 211 | -------------------------------------------------------------------------------- /problems/Delta encoding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "arr = [25626,\n", 10 | "25757,\n", 11 | "24367,\n", 12 | "24267,\n", 13 | "16,\n", 14 | "100,\n", 15 | "2,\n", 16 | "7277]" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 9, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "[25626, -128, 131, -128, -1390, -100, -128, -24251, 84, -98, -128, 7275]\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "res = [arr[0]]\n", 34 | "for i in range(1,len(arr)):\n", 35 | " dif = arr[i] -arr[i-1]\n", 36 | " if not -127 <= dif <= 127:\n", 37 | " res.append(-128)\n", 38 | " res.append(dif)\n", 39 | "print(res)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 93, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "1481122000 1481122020\n", 52 | "1481122020 1481122040\n", 53 | "1481122020 1481122040\n", 54 | "1481122040 1481122045\n", 55 | "1481122040 \n", 56 | "1481122045 \n", 57 | "3\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "\n", 63 | "\n", 64 | "\n", 65 | "customer = 1\n", 66 | "\n", 67 | "calls = [\n", 68 | " [1481122000, 1481122020],\n", 69 | " [1481122020, 1481122040],\n", 70 | " [1481122020, 1481122040],\n", 71 | " [1481122040, 1481122045]\n", 72 | "]\n", 73 | "\n", 74 | "\n", 75 | "times = []\n", 76 | "for call in calls:\n", 77 | " start = str(call[0])+\"A\"\n", 78 | " end = str(call[1])+\"Z\"\n", 79 | " times.append(start)\n", 80 | " times.append(end)\n", 81 | "times = sorted(times)\n", 82 | "\n", 83 | "prev_time = \"\"\n", 84 | "max_overlapping = 0\n", 85 | "overlap = 0\n", 86 | "i = 0\n", 87 | "while i < len(times):\n", 88 | " current = times[i]\n", 89 | " if current[-1] == \"A\":\n", 90 | " overlap += 1\n", 91 | " else:\n", 92 | " overlap -= 1\n", 93 | " i += 1\n", 94 | "\n", 95 | " next_time = times[i+1][:-1] if i < len(times)-1 else \"\"\n", 96 | " if current[:-1] != next_time:\n", 97 | " print(current[:-1], next_time)\n", 98 | " max_overlapping = max(max_overlapping, overlap)\n", 99 | "print(max_overlapping)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 17, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "['hello', 'world']" 111 | ] 112 | }, 113 | "execution_count": 17, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "\n", 120 | "def tokenize(string):\n", 121 | " words = []\n", 122 | " partial_word = \"\"\n", 123 | " for s in string:\n", 124 | " if ord('a') <= ord(s.lower()) <= ord('z'):\n", 125 | " partial_word += s.lower()\n", 126 | " elif s == \" \" and partial_word:\n", 127 | " words.append(partial_word)\n", 128 | " partial_word = \"\"\n", 129 | " if partial_word:\n", 130 | " words.append(partial_word)\n", 131 | " return words\n", 132 | "\n", 133 | "tokenize(\"hello . world \")" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# Complete the function below.\n", 143 | "\n", 144 | "def tokenize(string):\n", 145 | " words = []\n", 146 | " partial_word = \"\"\n", 147 | " for s in string:\n", 148 | " if ord('a') <= ord(s.lower()) <= ord('z'):\n", 149 | " partial_word += s.lower()\n", 150 | " elif s == \" \" and partial_word:\n", 151 | " words.append(partial_word)\n", 152 | " partial_word = \"\"\n", 153 | " if partial_word:\n", 154 | " words.append(partial_word)\n", 155 | " return words\n", 156 | "\n", 157 | "def sort_hotels(keywords, hotel_ids, reviews):\n", 158 | " kw = set(tokenize(keywords))\n", 159 | " kw_occurrences = {}\n", 160 | " \n", 161 | " max_kw_ocurrences = 0\n", 162 | " \n", 163 | " for i in range(len(hotel_ids)):\n", 164 | " rev = reviews[i]\n", 165 | " id_ = hotel_ids[i]\n", 166 | " tokenized_review = tokenize(rev)\n", 167 | " for token in tokenized_review:\n", 168 | " if token in kw:\n", 169 | " kw_occurrences[id_] = kw_occurrences.get(id_,0) +1 \n", 170 | " max_kw_ocurrences = max(max_kw_ocurrences, kw_occurrences[id_])\n", 171 | " print(max_kw_ocurrences)\n", 172 | " # I was thinking of storing hotel_ids in an array indexed by the number of interesting\n", 173 | " # keywords\n", 174 | " pre_sorted = [(kw_occurrences[hotel_id], hotel_id) for hotel_id in kw_occurrences]\n", 175 | " sorted_ = sorted(pre_sorted, reverse=True) # Time expensive\n", 176 | " return [i[1] for i in sorted_]\n", 177 | "\n", 178 | "\n" 179 | ] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python 3", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.6.4" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 2 203 | } 204 | -------------------------------------------------------------------------------- /problems/tree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 58, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Enter your code here. Read input from STDIN. Print output to STDOUT\n", 10 | "\n", 11 | "class Node:\n", 12 | " def __init__(self, value):\n", 13 | " self.left:Node = None\n", 14 | " self.right:Node = None\n", 15 | " self.parent:Node = None\n", 16 | " self.value = value\n", 17 | " pass\n", 18 | " \n", 19 | " def set_node(self, node):\n", 20 | " if node.parent:\n", 21 | " return \"E4\"\n", 22 | " if not self.left:\n", 23 | " node.parent = self\n", 24 | " self.left = node\n", 25 | " elif not self.right:\n", 26 | " node.parent = self\n", 27 | " self.right = node\n", 28 | " else:\n", 29 | " return \"E3\"\n", 30 | " return None\n", 31 | " \n", 32 | " def has_cycle(self):\n", 33 | " seen_nodes = set()\n", 34 | " nodes = [self]\n", 35 | " while nodes:\n", 36 | " current = nodes.pop()\n", 37 | " if current:\n", 38 | " if current.value in seen_nodes:\n", 39 | " return True\n", 40 | " seen_nodes.add(current.value)\n", 41 | " nodes.append(current.left)\n", 42 | " nodes.append(current.right)\n", 43 | " return False\n", 44 | " \n", 45 | " def __str__(self):\n", 46 | " return self.value\n", 47 | " \n", 48 | " @staticmethod\n", 49 | " def s_expr(parent):\n", 50 | " if not parent: return \"\"\n", 51 | " smaller:Node = None\n", 52 | " larger: Node = None\n", 53 | " if parent.left and parent.right:\n", 54 | " smaller = parent.left if parent.left.value < parent.right.value else parent.right\n", 55 | " larger = parent.right if parent.left.value < parent.right.value else parent.left\n", 56 | " else:\n", 57 | " smaller = parent.left or parent.right\n", 58 | " return \"(\" + parent.value + Node.s_expr(smaller) + Node.s_expr(larger) + \")\"" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 66, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "E2\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "# Enter your code here. Read input from STDIN. Print output to STDOUT\n", 76 | "\n", 77 | "class Node:\n", 78 | " def __init__(self, value):\n", 79 | " self.left = None\n", 80 | " self.right = None\n", 81 | " self.parent = None\n", 82 | " self.value = value\n", 83 | " pass\n", 84 | " \n", 85 | " def set_node(self, node):\n", 86 | " if node.parent:\n", 87 | " return \"E4\"\n", 88 | " if not self.left:\n", 89 | " node.parent = self\n", 90 | " self.left = node\n", 91 | " elif not self.right:\n", 92 | " node.parent = self\n", 93 | " self.right = node\n", 94 | " else:\n", 95 | " return \"E3\"\n", 96 | " return None\n", 97 | " \n", 98 | " def has_cycle(self):\n", 99 | " seen_nodes = set()\n", 100 | " nodes = [self]\n", 101 | " while nodes:\n", 102 | " current = nodes.pop()\n", 103 | " if current:\n", 104 | " if current.value in seen_nodes:\n", 105 | " return True\n", 106 | " seen_nodes.add(current.value)\n", 107 | " nodes.append(current.left)\n", 108 | " nodes.append(current.right)\n", 109 | " return False\n", 110 | " \n", 111 | " def __str__(self):\n", 112 | " return self.value\n", 113 | " \n", 114 | " @staticmethod\n", 115 | " def s_expr(parent):\n", 116 | " if not parent: return \"\"\n", 117 | " smaller = None\n", 118 | " larger = None\n", 119 | " if parent.left and parent.right:\n", 120 | " smaller = parent.left if parent.left.value < parent.right.value else parent.right\n", 121 | " larger = parent.right if parent.left.value < parent.right.value else parent.left\n", 122 | " else:\n", 123 | " smaller = parent.left or parent.right\n", 124 | " return \"(\" + parent.value + Node.s_expr(smaller) + Node.s_expr(larger) + \")\"\n", 125 | " \n", 126 | "definition = \"(J,P) (J,O) (J,N) (J,P)\"\n", 127 | "\n", 128 | "edges = definition.split(\" \")\n", 129 | " \n", 130 | "node_dict = {}\n", 131 | "edges_seen = set()\n", 132 | "errors = []\n", 133 | "for edge in edges:\n", 134 | " if edge in edges_seen:\n", 135 | " errors.append(\"E2\")\n", 136 | " break\n", 137 | " edges_seen.add(edge)\n", 138 | " try:\n", 139 | " root = edge[1]\n", 140 | " child = edge[3]\n", 141 | " if not (\"A\" <= root <= \"Z\" and \"A\" <= child <= \"Z\"):\n", 142 | " errors.append(\"E1\")\n", 143 | " break\n", 144 | " except:\n", 145 | " errors.append(\"E1\")\n", 146 | " break\n", 147 | " \n", 148 | " root_node = node_dict.get(root, Node(root))\n", 149 | " child_node = node_dict.get(child, Node(child))\n", 150 | " result = root_node.set_node(child_node)\n", 151 | " if result:\n", 152 | " errors.append(result)\n", 153 | " \n", 154 | " node_dict[root] = root_node\n", 155 | " node_dict[child] = child_node\n", 156 | "\n", 157 | "roots = 0\n", 158 | "root = None\n", 159 | "for value in node_dict:\n", 160 | " if node_dict[value].parent == None:\n", 161 | " roots += 1\n", 162 | " root = value\n", 163 | " if roots > 1:\n", 164 | " errors.append(\"E5\")\n", 165 | " break\n", 166 | " \n", 167 | "if roots == 0:\n", 168 | " errors.append(\"E4\")\n", 169 | "elif node_dict[root].has_cycle():\n", 170 | " errors.append(\"E4\")\n", 171 | "\n", 172 | "if errors:\n", 173 | " print(sorted(errors)[0])\n", 174 | "else:\n", 175 | " A = node_dict[root]\n", 176 | " print(Node.s_expr(A))" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [] 192 | } 193 | ], 194 | "metadata": { 195 | "kernelspec": { 196 | "display_name": "Python 3", 197 | "language": "python", 198 | "name": "python3" 199 | }, 200 | "language_info": { 201 | "codemirror_mode": { 202 | "name": "ipython", 203 | "version": 3 204 | }, 205 | "file_extension": ".py", 206 | "mimetype": "text/x-python", 207 | "name": "python", 208 | "nbconvert_exporter": "python", 209 | "pygments_lexer": "ipython3", 210 | "version": "3.6.4" 211 | } 212 | }, 213 | "nbformat": 4, 214 | "nbformat_minor": 2 215 | } 216 | -------------------------------------------------------------------------------- /Stats/Variance-covariance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Variance\n", 17 | "(from [http://www.mathsisfun.com/data/standard-deviation.html](http://www.mathsisfun.com/data/standard-deviation.html))\n", 18 | "\n", 19 | " > The average of the squared differences from the Mean.\n", 20 | " \n", 21 | "To calculate the variance follow these steps:\n", 22 | "\n", 23 | " 1. Work out the Mean (the simple average of the numbers)\n", 24 | " 2. Then for each number: subtract the Mean and square the result (the squared difference).\n", 25 | " 3. Then work out the average of those squared differences. ([Why Square?](http://www.mathsisfun.com/data/standard-deviation.html#WhySquare))" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 50, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "Mean 394.0\n", 38 | "Variance 21704.000000 (21704.000000)\n", 39 | "Std 147.322775 (147.322775)\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "dogs = np.array([600, 470, 170, 430, 300])\n", 45 | "\n", 46 | "dogs_mean = np.mean(dogs)\n", 47 | "print(\"Mean\", dogs_mean)\n", 48 | "\n", 49 | "dogs_diff = dogs - dogs_mean\n", 50 | "dogs_variance = np.sum(dogs_diff ** 2) / len(dogs)\n", 51 | "print(\"Variance %f (%f)\" % (dogs_variance, np.var(dogs)))\n", 52 | "\n", 53 | "dogs_std = np.sqrt(dogs_variance)\n", 54 | "print(\"Std %f (%f)\" % (dogs_std, np.std(dogs)))" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### But ... there is a small change with Sample Data (degrees of freedom)\n", 62 | "Our example has been for a Population (the 5 dogs are the only dogs we are interested in). But if the data is a Sample (a selection taken from a bigger Population), then the calculation changes!\n", 63 | "\n", 64 | "When you have $N$ data values that are:\n", 65 | "\n", 66 | " - **The Population**: divide by $N$ when calculating *Variance* (like we did)\n", 67 | " - **A Sample**: divide by $N-1$ when calculating *Variance*" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Covariance\n", 75 | "(from [https://www.itl.nist.gov/div898/handbook/pmc/section5/pmc541.htm](https://www.itl.nist.gov/div898/handbook/pmc/section5/pmc541.htm))\n", 76 | "\n", 77 | "#### Sample data matrix\n", 78 | "\n", 79 | "Consider the following matrix:\n", 80 | "\n", 81 | "$${\\bf X} = \\left[ \\begin{array}{ccc} \n", 82 | "4.0 & 2.0 & 0.60 \\\\\n", 83 | "4.2 & 2.1 & 0.59 \\\\\n", 84 | "3.9 & 2.0 & 0.58 \\\\\n", 85 | "4.3 & 2.1 & 0.62 \\\\\n", 86 | "4.1 & 2.2 & 0.63 \n", 87 | "\\end{array} \\right]$$" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 55, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "X = np.array([ \n", 97 | "[4.0, 2.0, 0.60],\n", 98 | "[4.2, 2.1, 0.59],\n", 99 | "[3.9, 2.0, 0.58],\n", 100 | "[4.3, 2.1, 0.62],\n", 101 | "[4.1, 2.2, 0.63]])" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "The set of 5 observations, measuring 3 variables, can be described by its mean vector and variance-covariance matrix. The three variables, from left to right are length, width, and height of a certain object, for example. Each row vector $X_i$ is another observation of the three variables (or components).\n", 109 | "\n", 110 | "#### Definition of mean vector and variance-covariance matrix\n", 111 | "\n", 112 | "The mean vector consists of the means of each variable and the variance-covariance matrix consists of the *variances* of the variables along the main diagonal and the *covariances* between each pair of variables in the other matrix positions. \n", 113 | "\n", 114 | "The formula for computing the covariance of the variables $X$ and $Y$ is:\n", 115 | "\n", 116 | "$$\\mbox{COV} = \\frac{\\sum_{i=1}^n (X_i - \\bar{x})(Y_i - \\bar{y})}{n-1} \\, ,$$\n", 117 | "\n", 118 | "with $\\bar{x}$ and $\\bar{y}$ denoting the means of $X$ and $Y$, respectively." 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "#### Mean vector and variance-covariance matrix for sample data matrix" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 56, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "[4.1 2.08 0.604]\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "X_mean = np.mean(X, axis=0)\n", 143 | "print(X_mean)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "$${\\bf \\bar{x}} = \\left[ \\begin{array}{ccc} \n", 151 | "4.10 & 2.08 & 0.604\n", 152 | "\\end{array} \\right]$$" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 104, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "[[0.025 0.0075 0.00175]\n", 165 | " [0.0075 0.007 0.00135]\n", 166 | " [0.00175 0.00135 0.00043]]\n", 167 | "\n", 168 | "[[0.025 0.0075 0.00175]\n", 169 | " [0.0075 0.007 0.00135]\n", 170 | " [0.00175 0.00135 0.00043]]\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "X_0 = X[:,0]\n", 176 | "X_1 = X[:,1]\n", 177 | "X_2 = X[:,2]\n", 178 | "\n", 179 | "def calc_variance(x_1, x_2):\n", 180 | " diff_1 = x_1 - np.mean(x_1)\n", 181 | " diff_2 = x_2 - np.mean(x_2)\n", 182 | " return np.sum(diff_1 * diff_2) / (len(x_1)-1)\n", 183 | "\n", 184 | "# \"Manual\"\n", 185 | "cov = []\n", 186 | "for i in range(X.shape[1]):\n", 187 | " row = []\n", 188 | " for j in range(X.shape[1]):\n", 189 | " row.append(calc_variance(X[:,i], X[:,j]))\n", 190 | " cov.append(row)\n", 191 | " \n", 192 | "print(np.array(cov))\n", 193 | "print()\n", 194 | "print(np.cov(X, rowvar=False))" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "$${\\bf S} = \\left[ \\begin{array}{ccc} \n", 202 | "0.025 & 0.0075 & 0.00175 \\\\\n", 203 | "0.0075 & 0.0070 & 0.00135 \\\\\n", 204 | "0.00175 & 0.00135 & 0.00043 \n", 205 | "\\end{array} \\right]$$" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [] 214 | } 215 | ], 216 | "metadata": { 217 | "kernelspec": { 218 | "display_name": "Python 3", 219 | "language": "python", 220 | "name": "python3" 221 | }, 222 | "language_info": { 223 | "codemirror_mode": { 224 | "name": "ipython", 225 | "version": 3 226 | }, 227 | "file_extension": ".py", 228 | "mimetype": "text/x-python", 229 | "name": "python", 230 | "nbconvert_exporter": "python", 231 | "pygments_lexer": "ipython3", 232 | "version": "3.6.1" 233 | } 234 | }, 235 | "nbformat": 4, 236 | "nbformat_minor": 2 237 | } 238 | -------------------------------------------------------------------------------- /keras/using-sequences.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "From [https://machinelearningmastery.com/reproducible-results-neural-networks-keras/](https://machinelearningmastery.com/reproducible-results-neural-networks-keras/)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "Using TensorFlow backend.\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from pandas import DataFrame, concat\n", 25 | "from keras.models import Sequential\n", 26 | "from keras.layers import Dense, Activation\n", 27 | "\n", 28 | "import numpy as np" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/plain": [ 39 | "(array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),\n", 40 | " array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]))" 41 | ] 42 | }, 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "# create sequence\n", 50 | "length = 11\n", 51 | "sequence = [i/ 10.0 for i in range(length)]\n", 52 | "# create X/y pairs\n", 53 | "df = DataFrame(sequence)\n", 54 | "df = concat([df.shift(1), df], axis=1)\n", 55 | "df.dropna(inplace=True)\n", 56 | "# convert to MLPfriendly format\n", 57 | "values = df.values\n", 58 | "X, y = values[:,0], values[:,1]\n", 59 | "X, y" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "from sklearn.metrics import mean_squared_error\n", 69 | "\n", 70 | "# design network\n", 71 | "model = Sequential()\n", 72 | "model.add(Dense(10, input_dim=1))\n", 73 | "model.add(Dense(1))\n", 74 | "model.compile(loss='mean_squared_error', optimizer='adam')\n", 75 | "# fit network\n", 76 | "#model.fit(X, y, epochs=1000, batch_size=len(X), verbose=0)\n", 77 | "# forecast\n", 78 | "#yhat = model.predict(X, verbose=0)\n", 79 | "#print(mean_squared_error(y, yhat[:,0]))\n", 80 | "#yhat" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## Using `keras.utils.Sequence`\n", 88 | "\n", 89 | "From [https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly](https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "{'validation': ['1.0', '1.1', '1.2', '1.3', '1.4', '1.5'], 'train': ['0.0', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']}\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "partition = {}\n", 107 | "partition['validation'] = [f'{(l/10.0):0.2}' for l in range(length - 1, length + 5)]\n", 108 | "partition['train'] = [f'{l:0.2}' for l in X]\n", 109 | "print(partition)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 5, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "{'1.0': 1.1, '1.1': 1.2000000000000002, '1.2': 1.3, '1.3': 1.4000000000000001, '1.4': 1.5, '1.5': 1.6, '0.0': 0.1, '0.1': 0.2, '0.2': 0.30000000000000004, '0.3': 0.4, '0.4': 0.5, '0.5': 0.6, '0.6': 0.7, '0.7': 0.7999999999999999, '0.8': 0.9, '0.9': 1.0}\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "labels = { l:float(l)+0.1 for l in partition['validation'] + partition['train']}\n", 127 | "print(labels)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "from keras.utils import Sequence\n", 144 | "import numpy as np\n", 145 | "\n", 146 | "\n", 147 | "class DataGenerator(Sequence):\n", 148 | "\n", 149 | " def __init__(self, data_ids, labels, shuffle=True):\n", 150 | " \"\"\"Initialization\"\"\"\n", 151 | " self.labels = labels\n", 152 | " self.batch_size = 5\n", 153 | " self.data_ids = data_ids\n", 154 | " self.shuffle = shuffle\n", 155 | " self.on_epoch_end()\n", 156 | "\n", 157 | " def on_epoch_end(self):\n", 158 | " \"\"\"Updates indexes after each epoch\"\"\"\n", 159 | " self.indexes = np.arange(len(self.data_ids))\n", 160 | " if self.shuffle:\n", 161 | " np.random.shuffle(self.indexes)\n", 162 | "\n", 163 | " def __data_generation(self, temporary_list_id):\n", 164 | " \"\"\"Generates data containing batch_size sample\"\"\"\n", 165 | " # Initialization\n", 166 | " X = np.empty((self.batch_size))\n", 167 | " y = np.empty((self.batch_size))\n", 168 | "\n", 169 | " # Generate data\n", 170 | " for i, ID in enumerate(temporary_list_id):\n", 171 | " # Store sample\n", 172 | " X[i,] = self.__get_datapoint(ID)\n", 173 | "\n", 174 | " # Store class\n", 175 | " y[i] = self.labels[ID]\n", 176 | "\n", 177 | " return X, y\n", 178 | "\n", 179 | " def __get_datapoint(self, identifier):\n", 180 | " \"\"\"Perform complicated logic here\"\"\"\n", 181 | " return float(identifier)\n", 182 | "\n", 183 | " def __len__(self):\n", 184 | " 'Denotes the number of batches per epoch'\n", 185 | " number_of_batches = int(np.floor(len(self.data_ids) / self.batch_size))\n", 186 | " return number_of_batches\n", 187 | "\n", 188 | " def __getitem__(self, index):\n", 189 | " # Generate indexes of the batch\n", 190 | " indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]\n", 191 | "\n", 192 | " # Find list of IDs\n", 193 | " temp_ids = [self.data_ids[k] for k in indexes]\n", 194 | "\n", 195 | " # Generate data\n", 196 | " X, y = self.__data_generation(temp_ids)\n", 197 | " return X, y\n" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 7, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "training_generator = DataGenerator(partition['train'], labels)\n", 207 | "validation_generator = DataGenerator(partition['validation'], labels)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 9, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "name": "stdout", 217 | "output_type": "stream", 218 | "text": [ 219 | "7.960299093427971e-16\n" 220 | ] 221 | }, 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "array([[0.1 ],\n", 226 | " [0.19999999],\n", 227 | " [0.3 ],\n", 228 | " [0.40000004],\n", 229 | " [0.5 ],\n", 230 | " [0.59999996],\n", 231 | " [0.70000005],\n", 232 | " [0.79999995],\n", 233 | " [0.9 ],\n", 234 | " [1. ]], dtype=float32)" 235 | ] 236 | }, 237 | "execution_count": 9, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "model.fit_generator(generator=training_generator,\n", 244 | " validation_data=validation_generator, epochs=1000, verbose=0)\n", 245 | "\n", 246 | "yhat = model.predict(X, verbose=0)\n", 247 | "print(mean_squared_error(y, yhat[:,0]))\n", 248 | "yhat" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [] 257 | } 258 | ], 259 | "metadata": { 260 | "kernelspec": { 261 | "display_name": "Python 3", 262 | "language": "python", 263 | "name": "python3" 264 | }, 265 | "language_info": { 266 | "codemirror_mode": { 267 | "name": "ipython", 268 | "version": 3 269 | }, 270 | "file_extension": ".py", 271 | "mimetype": "text/x-python", 272 | "name": "python", 273 | "nbconvert_exporter": "python", 274 | "pygments_lexer": "ipython3", 275 | "version": "3.6.5" 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 2 280 | } 281 | -------------------------------------------------------------------------------- /Python/Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python.\n", 8 | "\n", 9 | "Como muchos de ustedes ya sabrán, Python es un lenguaje de programación, que como muy pocos seguramente saben, tomó su nombre no de una serpiente, si no de un programa de comedia británico, pero en fin. Python fue publicado en 1991 por Guido van Rossum, inicialmente fue pensado como un simple lenguaje de scripting pero en la actualidad se ha infiltrado en el desarrollo web, la ciencia de datos, machine learning y ramas afines.\n", 10 | "\n", 11 | "## Filosofía. \n", 12 | "\n", 13 | "La filosofía detrás de Python podría estar resumida en un documento que fue creado en 1999 llamado , ocho años después de su creación. Pueden consultar el documento en este enlace: pero les voy a decir algunos de estos principios que sí, suenan muy filosóficos: \n", 14 | "\n", 15 | " - Beautiful is better than ugly\n", 16 | " - Explicit is better than implicit\n", 17 | " - Simple is better than complex\n", 18 | " - Readability counts\n", 19 | " - There should be one—and preferably only one—obvious way to do it.\n", 20 | " - If the implementation is hard to explain, it's a bad idea.\n", 21 | "\n", 22 | "Lo cierto es que mientras que estos principios suenan bonitos, el escribir software todavía recae en los humanos, así que estos principios no se aplican muchas veces. Y, por ejemplo, puedes encontrar que en Python es normal que encuentres más de una manera de hacer las cosas. \n", 23 | "\n", 24 | "## Características\n", 25 | "\n", 26 | "**Es dinámicamente tipado**: Porque podemos hacer algo como esto: " 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "a = 1\n", 38 | "b = 'C'\n", 39 | "c = [0.1, 0.5]" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Es decir, no es necesario especificar el tipo de dato de una variable antes de declararla. Y no existe un compilador, ni el intérprete, que esté comprobando esto antes de que el programa se esté ejecutando. \n", 47 | "\n", 48 | "También permite algo como esto:" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "a = 1\n", 60 | "a = 'C'\n", 61 | "a = [0.1, 0.5]" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Es decir, cambiar por completo el tipo de dato de una variable sin que nadie diga nada. Y créanme, esto puede ser motivo de muchas confusiones, pero una vez que te acostumbras, puede llegar a ser una herramienta muy útil. \n", 69 | "\n", 70 | "Sin embargo, también es considerado un lenguaje **fuertemente tipado** (cabe recalcar que puede existir esta combinación: dinámico y fuertemente tipado a la vez). Es considerado fuertemente tipado porque el lenguaje define un conjunto de reglas (de comportamientos) bajo las cuales los tipos de dato se pueden mezclar entre ellos, y romper esas reglas generará una excepción. Toma por ejemplo el siguiente código:" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "ename": "TypeError", 80 | "evalue": "must be str, not int", 81 | "output_type": "error", 82 | "traceback": [ 83 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 84 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 85 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0ma3\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"a\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 86 | "\u001b[1;31mTypeError\u001b[0m: must be str, not int" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "a3 = \"a\" + 3 " 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "por increíble que parezca, esto nos generaría un error puesto que los tipos de dato int y string no definen una forma de mezclarse, si quieres concatenar las cadenas tendrías que primero convertir el entero a cadena.\n", 99 | "\n", 100 | "**No existen los corchetes (o llaves)**: sino que los bloques de código se definen usando indentaciones (tabs o espacios, lo que importa es que seas consistente en el método de indentación que usas), es decir un bloque `if` se define de la siguiente manera: " 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 4, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "b es C\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "if b == 'C':\n", 118 | " print(\"b es C\")\n", 119 | "elif b == 'A':\n", 120 | " print(\"b es A\")" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "O un código un poco más elaborado se vería así: " 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": true 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "def del_none(d):\n", 139 | " for key, value in list(d.items()):\n", 140 | " if value is None:\n", 141 | " del d[key]\n", 142 | " elif isinstance(value, str):\n", 143 | " d[key] = d[key].strip()\n", 144 | " elif isinstance(value, dict):\n", 145 | " del_none(value)\n", 146 | " return d" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "Ah, seguramente lo notaste, pero Python tampoco requiere que uses un `;` para terminar cada instrucción, la idea es que exista una instrucción por cada línea.\n", 154 | "\n", 155 | "Como tal vez pudiste ver, es también un **lenguaje de alto nivel**: La idea es abstraer (esconder) la mayor cantidad de detalles de implementación. Es un lenguaje de alto nivel y en ocasiones es muy sencillo leer programas escritos en este lenguaje, y a mi parecer, en muchos casos como si estuvieras leyendo un libro escrito en inglés. \n", 156 | "\n", 157 | "Python es también **multiparadigma**, puedes organizar tu código en clases, o utilizarlo como un lenguaje funcional, o puedes simplemente crear un programa que se ejecute proceduralmente... o una combinación de todo esto. \n", 158 | "\n", 159 | "**Altamente extensible**: tiene soporte para descargar módulos o bibliotecas de repositorios de paquetes que permiten que añadirle funcionalidad a tus programas, así que es normal que cuando descargues un proyecto tengas que descargar los paquetes asociados con instrucciones como las siguientes: \n", 160 | "\n", 161 | "```\n", 162 | "pip install package-name\n", 163 | "easy_install package-name\n", 164 | "``` \n", 165 | "\n", 166 | "Cuenta con una **consola interactiva** o REPL\n", 167 | "\n", 168 | "Es **multiplataforma** y no está fuertemente ligado a un sistema operativo ni a un entorno de desarrollo.\n", 169 | " \n", 170 | "## Desventajas \n", 171 | "\n", 172 | "- Considerado **lento**\n", 173 | "- A pesar de ser muy usado, hay áreas en las que no tiene mucho impacto, como el desarrollo para móviles \n", 174 | "- Consume mucha memoria y facilita la escritura de código que, aunque funciona, no está muy optimizado \n", 175 | "- Puede hacer que otros lenguajes sean difíciles de trabajar, uno se acostumbra muy rápido a las bondades de Python, a mi de pronto ya se me olvida poner puntos y coma en C# \n", 176 | "\n", 177 | "## Razones para aprender \n", 178 | "\n", 179 | " - Quieres desarrollar aplicaciones web\n", 180 | " - Te interesa automatizar tareas repetitivas\n", 181 | " - Quieres analizar datos\n", 182 | " - Es entretenido\n", 183 | " - Una herramienta más en tus habilidades \n", 184 | " \n", 185 | "## IDEs \n", 186 | "\n", 187 | " - [Thonny](http://thonny.org/)\n", 188 | " - [PyCharm](https://www.jetbrains.com/pycharm/)\n", 189 | " - [PyScripter](https://github.com/pyscripter/pyscripter)\n", 190 | " - [Visual Studio](https://www.visualstudio.com/es/vs/python/) y [Visual Studio Code](https://code.visualstudio.com/docs/languages/python) (con plugins)\n", 191 | " - [PyDev (Eclipse)](http://www.pydev.org/)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "source": [ 200 | "## Recursos para aprender\n", 201 | "\n", 202 | " - [LearnPython.org (en español)](https://www.learnpython.org/es/)\n", 203 | " - [\"Python instantáneo\"](http://rapto.arrakis.es/AprendaPython.html)\n", 204 | " - [Python in one pic](https://github.com/coodict/python3-in-one-pic/blob/master/notebooks/py3-in-one-pic.ipynb) and [Python in one pic (interactive)](http://coodict.github.io/python3-in-one-pic/)\n", 205 | " - [A Byte of Python](https://python.swaroopch.com/)\n", 206 | " - [Introduction to Programming with Python](https://mva.microsoft.com/en-US/training-courses/introduction-to-programming-with-python-8360)\n", 207 | " - [Automate the Boring Stuff with Python](http://automatetheboringstuff.com/)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "collapsed": true 215 | }, 216 | "outputs": [], 217 | "source": [] 218 | } 219 | ], 220 | "metadata": { 221 | "kernelspec": { 222 | "display_name": "Python 3", 223 | "language": "python", 224 | "name": "python3" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.6.2" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 2 241 | } 242 | -------------------------------------------------------------------------------- /problems/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 35, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/html": [ 11 | "
\n", 12 | "\n", 25 | "\n", 26 | " \n", 27 | " \n", 28 | " \n", 29 | " \n", 30 | " \n", 31 | " \n", 32 | " \n", 33 | " \n", 34 | " \n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | "
peaks
cromosomaorigen
chr1H3K27ac2499
H3K27ac2460
H3K27ac3568
H3K27ac1542
H3K27ac2192
H3K27ac1238
H3K27ac3207
H3K27ac2170
H3K27ac2109
H3K27ac765
H3K27ac4005
H3K27ac1103
H3K27ac2826
H3K27ac2230
H3K27ac2169
H3K27ac2346
H3K27ac5730
H3K27ac3416
H3K27ac2158
H3K27ac2474
H3K27ac3843
H3K27ac1233
H3K27ac2689
H3K27ac3184
H3K27ac4650
H3K27ac1356
H3K27ac2435
H3K27ac3478
H3K27ac3201
H3K27ac1431
.........
chrXH3K9me3183
H3K9me3151
H3K9me384
H3K9me398
H3K9me3141
H3K9me3132
H3K9me383
H3K9me3187
H3K9me3164
H3K9me3138
H3K9me3289
H3K9me359
H3K9me356
H3K9me3251
H3K9me3178
H3K9me3166
H3K9me318
H3K9me3181
H3K9me3170
H3K9me346
H3K9me3103
H3K9me3128
H3K9me3259
H3K9me3101
H3K9me3107
H3K9me3116
H3K9me335
H3K9me354
H3K9me340
H3K9me310
\n", 288 | "

32471 rows × 1 columns

\n", 289 | "
" 290 | ], 291 | "text/plain": [ 292 | " peaks\n", 293 | "cromosoma origen \n", 294 | "chr1 H3K27ac 2499\n", 295 | " H3K27ac 2460\n", 296 | " H3K27ac 3568\n", 297 | " H3K27ac 1542\n", 298 | " H3K27ac 2192\n", 299 | " H3K27ac 1238\n", 300 | " H3K27ac 3207\n", 301 | " H3K27ac 2170\n", 302 | " H3K27ac 2109\n", 303 | " H3K27ac 765\n", 304 | " H3K27ac 4005\n", 305 | " H3K27ac 1103\n", 306 | " H3K27ac 2826\n", 307 | " H3K27ac 2230\n", 308 | " H3K27ac 2169\n", 309 | " H3K27ac 2346\n", 310 | " H3K27ac 5730\n", 311 | " H3K27ac 3416\n", 312 | " H3K27ac 2158\n", 313 | " H3K27ac 2474\n", 314 | " H3K27ac 3843\n", 315 | " H3K27ac 1233\n", 316 | " H3K27ac 2689\n", 317 | " H3K27ac 3184\n", 318 | " H3K27ac 4650\n", 319 | " H3K27ac 1356\n", 320 | " H3K27ac 2435\n", 321 | " H3K27ac 3478\n", 322 | " H3K27ac 3201\n", 323 | " H3K27ac 1431\n", 324 | "... ...\n", 325 | "chrX H3K9me3 183\n", 326 | " H3K9me3 151\n", 327 | " H3K9me3 84\n", 328 | " H3K9me3 98\n", 329 | " H3K9me3 141\n", 330 | " H3K9me3 132\n", 331 | " H3K9me3 83\n", 332 | " H3K9me3 187\n", 333 | " H3K9me3 164\n", 334 | " H3K9me3 138\n", 335 | " H3K9me3 289\n", 336 | " H3K9me3 59\n", 337 | " H3K9me3 56\n", 338 | " H3K9me3 251\n", 339 | " H3K9me3 178\n", 340 | " H3K9me3 166\n", 341 | " H3K9me3 18\n", 342 | " H3K9me3 181\n", 343 | " H3K9me3 170\n", 344 | " H3K9me3 46\n", 345 | " H3K9me3 103\n", 346 | " H3K9me3 128\n", 347 | " H3K9me3 259\n", 348 | " H3K9me3 101\n", 349 | " H3K9me3 107\n", 350 | " H3K9me3 116\n", 351 | " H3K9me3 35\n", 352 | " H3K9me3 54\n", 353 | " H3K9me3 40\n", 354 | " H3K9me3 10\n", 355 | "\n", 356 | "[32471 rows x 1 columns]" 357 | ] 358 | }, 359 | "execution_count": 35, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "from glob import glob \n", 366 | "import pandas as pd\n", 367 | "\n", 368 | "len_prefix = len(\"MCF7_\")\n", 369 | "len_suffix = len(\"_narrowPeak.bed\")\n", 370 | "\n", 371 | "dataframes = []\n", 372 | "\n", 373 | "archivos = glob(\"MCF7*\")\n", 374 | "for file in archivos:\n", 375 | " f = file[len_prefix:-len_suffix]\n", 376 | " data = pd.read_csv(file, sep='\\t',header=None)\n", 377 | " cromosomas = data[0].values\n", 378 | " peaks = data[9].values\n", 379 | " origin = [f for i in range(peaks.size)]\n", 380 | " dataframes.append(pd.DataFrame({\n", 381 | " 'cromosoma' : cromosomas,\n", 382 | " 'origen': origin,\n", 383 | " 'peaks': peaks\n", 384 | " }))\n", 385 | "\n", 386 | "pd.concat(dataframes).set_index(['cromosoma','origen']).sort_index()" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [] 395 | } 396 | ], 397 | "metadata": { 398 | "kernelspec": { 399 | "display_name": "Python 3", 400 | "language": "python", 401 | "name": "python3" 402 | }, 403 | "language_info": { 404 | "codemirror_mode": { 405 | "name": "ipython", 406 | "version": 3 407 | }, 408 | "file_extension": ".py", 409 | "mimetype": "text/x-python", 410 | "name": "python", 411 | "nbconvert_exporter": "python", 412 | "pygments_lexer": "ipython3", 413 | "version": "3.6.4" 414 | } 415 | }, 416 | "nbformat": 4, 417 | "nbformat_minor": 2 418 | } 419 | -------------------------------------------------------------------------------- /sklearn/texts/the-office-us-01.txt: -------------------------------------------------------------------------------- 1 | All right, Jim. 2 | Your quarterlies look very good. 3 | - How are things at the library? - I told you. 4 | I couldn't close it. 5 | So you've come to the master for guidance? Is this what you're saying, grasshopper? Actually, you called me in here, but, yeah. 6 | All right. 7 | Well, let me show you how it's done. 8 | I'd like to speak to your office manager, please. 9 | Yes, hello. 10 | This is Michael Scott. 11 | I am the Regional Manager of Dunder Mifflin Paper Products. 12 | Just wanted to talk to you manager-a-manager. 13 | All right. 14 | Done deal. 15 | Thank you very much, sir. 16 | You're a gentleman and a scholar. 17 | Oh, I'm sorry. 18 | OK. 19 | I'm sorry. 20 | My mistake. 21 | That was a woman I was talking to, so She had a very low voice. 22 | Probably a smoker, so So that's the way it's done. 23 | I've, er, I've been at Dunder Mifflin for 12 years, the last four as Regional Manager. 24 | If you want to come through here We have the entire floor. 25 | So this is my kingdom, as far as the eye can see. 26 | This is our receptionist, Pam. 27 | Pam! Pam-Pam! Pam Beasley. 28 | Pam has been with us for forever. 29 | Right, Pam? Well, I don't know. 30 | If you think she's cute now, you should have seen her a few years ago. 31 | - What? - Any messages? Er, yeah. 32 | Just a fax. 33 | Oh! Pam, this is from Corporate. 34 | How many times have I told you? There's a special filing cabinet. 35 | - You haven't told me. 36 | - It's called the wastepaper basket! Look at that. 37 | Look at that face. 38 | People say I am the best boss. 39 | They go, "We've never worked in a place like this. 40 | You're hilarious. 41 | "And you get the best out of us. 42 | " I think that pretty much sums it up. 43 | I found it at Spencer Gifts. 44 | Shall I play for you? Pa rum pum pum pum I have no gifts for you Pa rum pum pum pum My job is to speak to clients on the phone about er, quantities and type of copier paper. 45 | You know, whether we can supply it to them. 46 | Whether they can pay for it. 47 | And I'm boring myself just talking about this. 48 | - Whassup! - Whassup! I still love that after seven years. 49 | - Whassup! - Whassup! Whass up! Whassup. 50 | What? - Nothing. 51 | - OK. 52 | All right. 53 | - See you later. 54 | - All right. 55 | - Take care. 56 | - Back to work. 57 | Just before lunch. 58 | That would be great. 59 | Corporate doesn't interfere with me. 60 | Jan Levitson Gould. 61 | Jan, hello. 62 | I call her Hillary Rodham Clinton. 63 | Right? Not to her face, because Well, not because I'm scared of her. 64 | Cos I'm not. 65 | But, um Yeah. 66 | Was there anything you wanted to add to the agenda? Umm Me no get an agenda. 67 | - What? I'm sorry? - I didn't get any agenda. 68 | I faxed one to you this morning. 69 | Really? I didn't Did we get a fax this morning? Uh, yeah, the one Why isn't it in my hand? A company runs on efficiency of communication, right? So what's the problem, Pam? Why didn't I get it? You put it in the garbage can that was the special filing cabinet. 70 | Yeah, that was a joke. 71 | That was a joke that was actually my brother's, and It was for bills. 72 | It doesn't work with faxes. 73 | - Do you want to look at mine? - Lovely. 74 | Thank you. 75 | OK. 76 | Since the last meeting, Ellen and the board decided we can't justify a Scranton branch and a Stamford branch. 77 | - OK. 78 | - Don't panic. 79 | - No, this is good. 80 | This is fine. 81 | - Listen. 82 | Don't panic. 83 | - Alarm bells are ring-ding-ding! - We haven't decided. 84 | I've spoken to Josh in Stamford and told him the same. 85 | It's up to you to convince me your branch can incorporate the other. 86 | But it does mean there'll be downsizing. 87 | Me no wanna hear that, Jan. 88 | Because downsizing is a bitch. 89 | It is a real bitch. 90 | And I wouldn't wish that on Josh's men. 91 | I certainly wouldn't wish it on my men. 92 | Or women, present company excluded. 93 | Sorry. 94 | Er, is Josh concerned about downsizing himself? Not downsizing himself but downsizing? Question. 95 | How long do we have to Todd Packer, terrific rep. 96 | Do you mind if I take it? Go ahead. 97 | - Packman. 98 | - Hey, you big queen. 99 | - That's not appropriate. 100 | - Is old Godzillary coming in today? - I don't know what you mean. 101 | - I've been meaning to ask her. 102 | - Does the carpet match the drapes? - Oh, my God! Oh! That's horrifying. 103 | Horrible. 104 | Horrible person. 105 | So do you think we could keep a lid on this for now? - I don't want to worry people. 106 | - Absolutely. 107 | Under this regime, it will not leave this office. 108 | Like that. 109 | - So what does downsizing actually mean? - Well you guys better update your résumés like I'm doing. 110 | I bet it's gonna be me. 111 | Probably gonna be me. 112 | Yeah, it'll be you. 113 | I have an important question for you. 114 | Are you going to Angela's cat party on Sunday? Stop. 115 | That is ridiculous. 116 | Am I going to tell them? No. 117 | I don't see the point. 118 | As a doctor, you would not tell a patient if they had cancer. 119 | - Hey. 120 | - Hey. 121 | - This is Mr. 122 | Scott. 123 | - Guilty as charged! Ryan Howard from the temp agency. 124 | Daniqua sent me to start today. 125 | Howard, like Moe Howard. 126 | Three Stooges. 127 | Watch this. 128 | This is Moe. 129 | Nyuck-nyuck-nyuck-nyuck-nyuck. 130 | Mee! Right here. 131 | Three Stooges. 132 | High five. 133 | Oh, Pam. 134 | It's a guy thing, Pam. 135 | I'm sort of a student of comedy. 136 | Watch this. 137 | Here we go. 138 | I'm Hitler. 139 | Adolf Hitler. 140 | I don't think it would be the worst thing if they let me go because then I might I don't think it's many little girls' dream to be a receptionist. 141 | I like to do illustrations. 142 | Erm Mostly watercolor. 143 | A few oil pencil. 144 | Erm, Jim thinks they're good. 145 | Dunder Mifflin. 146 | This is Pam. 147 | Sure. 148 | Mr. 149 | Davis, let me call you right back. 150 | Something just came up. 151 | Two minutes. 152 | Thank you. 153 | - Dwight, what are you doing? - What? - Clearing my desk. 154 | - It's not on your desk. 155 | It's overlapping. 156 | It's spilling over the edge. 157 | One word, two syllables. 158 | Demarcation. 159 | - You can't do that. 160 | - Why not? Safety violation. 161 | I could fall and pierce an organ. 162 | We'll see. 163 | This is why the whole downsizing thing doesn't bother me. 164 | Downsizing? I have no problem with that. 165 | I have been recommending downsizing since I first got here. 166 | I even brought it up in my interview. 167 | I say, bring it on. 168 | You have messages from yesterday. 169 | Relax. 170 | Everything's under control. 171 | Er, Yeah. 172 | Yeah. 173 | That's important. 174 | Right. 175 | This is so important, I should run to answer it. 176 | - What? - Come on. 177 | Six Million Dollar Man. 178 | Steve Austin. 179 | Actually, that would be a good salary for me, don't you think? Six million dollars? Memo to Jan. 180 | I deserve a raise. 181 | - Don't we all? - I'm sorry? Nothing. 182 | If you're unhappy with your compensation, take it up with HR. 183 | Not today, OK? Pam, just be professional. 184 | I think I'm a role model. 185 | I think I garner people's respect. 186 | Attention all employees. 187 | We have a meeting in the conference room, ASAP. 188 | The people I respect, heroes of mine, would be Bob Hope Abraham Lincoln, definitely. 189 | Bono. 190 | And probably God would be the fourth one. 191 | And I just think all those people really helped the world in so many ways that it's really beyond words. 192 | It's really incalculable. 193 | I want to set the record straight. 194 | I'm Assistant Regional Manager. 195 | I should know first. 196 | - Assistant to the Regional Manager. 197 | - Can you tell me? Tell me quietly. 198 | Whisper in my ear. 199 | - I'm about to. 200 | - Can you tell us? - Please, OK? Do you want me to tell 'em? - You don't know what it is. 201 | You tell 'em. 202 | With my permission. 203 | Go ahead. 204 | Corporate has deemed it appropriate to enforce an ultimatum upon me. 205 | And Jan is thinking about downsizing either the Stamford branch or this branch. 206 | I heard they might be closing this branch down. 207 | That's just the rumor going around. 208 | This is my first day. 209 | I don't really know. 210 | - What if they downsize here? - Not gonna happen. 211 | - It'll be out of your hands. 212 | - It won't be, Stanley. 213 | I promise you that. 214 | - Can you promise that? - On his mother's grave. 215 | - What? - Well, yes, it is a promise. 216 | Frankly, I'm a little insulted that you have to keep asking about it. 217 | We need to know. 218 | I know. 219 | Hold on. 220 | I think Pam wanted to say something. 221 | Pam, you had a look that you wanted to ask a question just then. 222 | I was in the meeting with Jan. 223 | She did say this branch could get the axe. 224 | Are you sure about that? Well, maybe you should stick to the ongoing confidentiality agreement of meetings. 225 | Pam, information is power. 226 | You can't say for sure whether it'll be us or them, can you? No, Stanley. 227 | You did not see me in there with her. 228 | I said if Corporate wants to come in here and interfere, they're gonna have to go through me. 229 | You can mess with Josh's people, but I'm the head of this family and you ain't gonna be messing with my chillun. 230 | If I left, what would I do with all this useless information in my head? You know? Tonnage price of manila folders? Er, Pam's favorite flavor of yogurt, which is mixed berry. 231 | Jim said mixed berries? Well, yeah, he's on to me. 232 | Watch out for this guy. 233 | Dwight Shrute in the building. 234 | - This is Ryan, the new temp. 235 | - Introduce yourself. 236 | Be polite. 237 | Dwight Shrute, Assistant Regional Manager. 238 | Assistant to the Regional Manager. 239 | So, tell him about the kung fu and the car and everything. 240 | Er I've got a '78280Z. 241 | Bought it for - His profit. 242 | - New engine, suspension, respray. 243 | I've got some photos. 244 | Damn it! Jim! OK. 245 | Hold on. 246 | The judge is in session. 247 | What is the problem? He put my stuff in Jell-O again. 248 | Real professional. 249 | The third time. 250 | It wasn't funny the first two. 251 | It's OK here, but people sometimes take advantage because it's so relaxed. 252 | I'm a volunteer Sheriff's Deputy on the weekends. 253 | And you cannot screw around there. 254 | That's sort of one of the rules. 255 | - What is that? - That is my stapler. 256 | No. 257 | Do not take it out. 258 | Eat it out, because there are starving people in the world which I hate, and it is a waste of that kind of food. 259 | You can be a witness. 260 | Can you reprimand him? - How do you know it was me? - It's always you. 261 | - Are you going to discipline him? - Discipline. 262 | Kinky! Here's the deal, you guys. 263 | The thing about a practical joke is you have to know when to start as well as when to stop. 264 | And, yeah, Jim. 265 | Now is the time to stop putting Dwight's personal effects into Jell-O. 266 | OK. 267 | Dwight, I'm sorry, because I have always been your biggest flan. 268 | Nice. 269 | That's the way it is around here. 270 | - It goes round and round. 271 | - You should put him in custardy. 272 | Hey! Yes! New guy! He scores. 273 | That's great. 274 | I guess what I'm most concerned with is damage to company property. 275 | That's all. 276 | Pudding. 277 | Pudding I'm trying to think of another dessert to do. 278 | Do you like going out at the end of the week for a drink? - Yeah. 279 | - That's why we're all going out. 280 | For an end-of-the-week drink. 281 | - When are we going out? - Tonight, hopefully. 282 | - Hey, man. 283 | - What's going on? Hi, baby. 284 | Roy's my fiancé. 285 | We've been engaged about three years. 286 | We were supposed to get married in September but I think we're gonna get married in the spring. 287 | Do you mind if I go out for a drink with these guys? No. 288 | Come on. 289 | Let's get out of here and go home. 290 | OK. 291 | I'm gonna be a few minutes. 292 | It's only twenty past five. 293 | I still have to do my faxes. 294 | You know what? You should come with us. 295 | It could be a good chance to see what people are like outside the office. 296 | - I think it could be fun. 297 | - It sounds good. 298 | Seriously, we gotta get going. 299 | Yeah, yeah. 300 | Um What's in the bag? - Tell her I'll talk to her later. 301 | - Definitely. 302 | All right, dude. 303 | Will do. 304 | Do I think I'll be invited to the wedding? So have you felt the vibe yet? We work hard, we play hard. 305 | Sometimes we play hard when we should be working hard. 306 | I guess the atmosphere that I've created here is that I'm a friend first, a boss second and probably an entertainer third. 307 | Just a second. 308 | Right? Oh, do you like The Jamie Kennedy Experiment? Punk'd and all that kind of stuff? You are gonna be my accomplice. 309 | Just go along with it, OK? Just follow my lead. 310 | Don't pip me, all right? Come in. 311 | - Corporate just said - You got a fax. 312 | Thank you. 313 | Can you come in here for a sec? I was gonna call you in anyway. 314 | You know Ryan. 315 | As you know, there is going to be downsizing. 316 | And you have made my life so much easier in that I am going to have to let you go first. 317 | - What? Why? - Why? Well, theft and stealing. 318 | Stealing? What am I supposed to have stolen? Post-it Notes. 319 | Post-it Notes? What are those worth, 50 cents? If you steal a thousand Post-it Notes at 50 cents a piece, you know, you've made a profit margin. 320 | You're gonna run us out of business, Pam. 321 | Are you serious? - Yeah. 322 | I am. 323 | - I can't believe this. 324 | I have never even stolen as much as a paperclip and you're firing me. 325 | The best thing about it is we're not going to have to give you any severance pay. 326 | Because that is gross misconduct and Just clean out your desk. 327 | I'm sorry. 328 | You been X'd, punk! Surprise! It's a joke. 329 | We were joking around. 330 | See? OK. 331 | He was in on it. 332 | He was my accomplice. 333 | It was kind of a morale booster thing. 334 | We were showing the new guy around, giving him the feel of the place. 335 | So you God, we totally got you. 336 | You're a jerk. 337 | I don't know about that. 338 | What is the most important thing for a company? Is it the cash flow? Is it the inventory? Nuh-uh. 339 | It's the people. 340 | The people. 341 | My proudest moment here was not when I increased profits by 17% or when I cut expenses without losing a single employee. 342 | No, no, no, no, no. 343 | It was a young Guatemalan guy. 344 | First job in the country, barely spoke English. 345 | He came to me, and said, "Mr. 346 | Scott, would you be the godfather of my child?" Wow. 347 | Wow. 348 | Didn't work out. 349 | We had to let him go. 350 | He sucked. 351 | - Hey. 352 | - Hey. 353 | How are you? I thought you were going for a drink with No. 354 | I just decided not to. 355 | - How's your headache? - It's better, thanks. 356 | - Good. 357 | That's great. 358 | - Is? - Yeah? - Um Are you - Am I walking out? - Yes. 359 | - Yes, I Do you want to - Yeah. 360 | Great. 361 | Let me just Oh, boy. 362 | - Listen, have a nice weekend. 363 | - Definitely. 364 | You too. 365 | Enjoy it. 366 | You know what, just come here. -------------------------------------------------------------------------------- /sklearn/texts/the-office-uk-01.txt: -------------------------------------------------------------------------------- 1 | I don't give shitty jobs. 2 | If a good man comes to me, says, 'Thank you, David, for the opportunity and continued support 'in the work-related arena, 'but I want to better myself, I want to move on. 3 | ' Then I can make that dream come true to AKA, for you. 4 | The point is you talk the talk, but do not walk the walk, vis-?vis you've not yet passed your forklift driver's test. 5 | The man who gives the jobs in the warehouse is a personal friend of mine. 6 | All right? I know you're the man for the job. 7 | Sammy, you old slag. 8 | It's the Brentmeister General. 9 | Have you advertised the forklift driver's job? Good, don't bother. 10 | I've got the man here. 11 | He's perfick. 12 | Has he passed his forklift driver's test? He gives the tests. 13 | Yeah, yeah. 14 | He's first-aid trained, yeah. 15 | Yeah. 16 | We'll get a C. 17 | V. 18 | Over to you this afternoon. 19 | i'm seeing you Sunday, aren't I? For my sins. 20 | How is Elaine? She left you yet? All right. 21 | See you then. 22 | She has left him. 23 | I forgot about that. 24 | David Brent. 25 | I've been in the business for twelve years, Been at Wernham Hogg as General Manager for eight of those, so putting together my team. 26 | Lovely Dawn. 27 | Dawn Tinsley. 28 | Receptionist. 29 | Been with us for ages, haven't you? - Yeah. 30 | id say, at one time or another, every bloke here has woken up at the crack of Dawn! What?! Can I have the mail, please? Yeah. 31 | Just a fax. 32 | Dawn, this is from Head Office. 33 | - I know. 34 | There's a special filing cabinet for things from Head Office. 35 | You haven't - The wastepaper basket! Better get that back. 36 | People say i'm the best boss. 37 | C'est la vie. 38 | If that's true, excellent. 39 | Be gentle with me today, Dawn. 40 | - Why is that? Oh, God, I had a skinful last night. 41 | I was out with Finchy - Chris Finch. 42 | Had us on a pub crawl. 43 | El vino did flow. 44 | I was blar tered. 45 | Bladdered. 46 | Blottoed. 47 | Don't ever come out with me and Finchy. 48 | No, I won't. 49 | You got to go for it. 50 | There's guys my age, and they look 50. 51 | How old do you think I look? Thirty si - Thirty, yeah. 52 | About that. 53 | But i'm going to have to slow down. 54 | Drinking a bit too much. 55 | If every single night of the week's too much! And every lunchtime! How many have I had this week? - What? How many pints have I had if you're counting? i'm not counting. 56 | - Aren't you? You seem to know a lot about my drinking. 57 | Does it offend you? You know, getting a little bit personal. 58 | Imagine if I started doing that with you. 59 | I could come up with something witty and biting, like, 'You're a bit' But I don't because i'm a professional. 60 | And professionalism is And that is what I want, OK? That's all. 61 | That's a shame. 62 | Wassup?! - Don't do that! All right. 63 | What is it, time of the month? Just the eight pints for me last night. 64 | That's all. 65 | Uh Oh, no. 66 | Oh, God. 67 | 'Boss and team leader in drunken night out shock horror,' it says here. 68 | It's not like I'm out again tonight with Oggy. 69 | That'd be a quiet night in at the library - not! I don't think. 70 | I'm a sales rep, which means that my job is to speak to clients on the phone about quantity and type of paper, and whether we can supply it with them, and whether they can pay for it. 71 | And I'm boring myself talking about it. 72 | Wassup?! Hey, wassup?! Wassup. 73 | You're fired, Keenan. 74 | Drunkard. 75 | Hypocrite warning. 76 | Oh, God. 77 | What's he been saying? It's all true. 78 | Guilty as charged. 79 | Went out with a few of his mates, didn't we? And he goes, 'Tag along if you want, 'but I must warn you, David, they get a bit rowdy. 80 | ' I went, 'ill see if I can take it. 81 | ' I was worse than them by the end! They're going, 'Who's that nutter?' 'That's my boss. 82 | ' 'We can't stand it, we're going. 83 | ' They just left! Oh, God. 84 | Absolutely mental. 85 | Resolve! What? Nothing. 86 | See you later. 87 | - See you later. 88 | Take care. 89 | Would you mind giving Maintenance a call? There's a nasty smell in the lift. 90 | Head Office don't really interfere with me, Jennifer might come down once a week, Jennifer Taylor-Clark. 91 | We call her Camilla Parker-Bowles! Not to her face. 92 | Not 'cause I'm scared of her. 93 | I got them off Nobby Burton who comes round with a suitcase. 94 | Two for a tenner. 95 | Yes, please, four. 96 | So OK. 97 | Meeting with Jennifer Taylor-Clarke present. 98 | Right - Shoot. 99 | Anything you wanted to add to the agenda? Did no get an agenda. 100 | Sorry? - Did not get an agenda, no. 101 | I did fax you one this morning. 102 | - Did we get a fax, Dawn? Yeah, we may have. 103 | Then why isn't it in my hand? Because a company runs on efficiency of communication. 104 | You put it in the bin that was a special filing cabinet. 105 | As a joke, yeah. 106 | It's meant to be bills. 107 | Doesn't really work with faxes. 108 | Do you want to look at mine? - Yeah. 109 | Yeah, sure, she'd say she's the boss, but There should be no ego when you're pulling together to do something good. 110 | Yeah? It's like Comic Relief. 111 | Yeah? I'm out here in Africa - I'm seeing the flies and the starvation - and she - if she is the boss - she's in the studio with Jonathan Ross and Lenny Henry. 112 | They're doing their bit counting the money, but their hands are clean, while I'm down here in the office with little starving kids. 113 | Right. 114 | Since the last meeting Alan and the board have decided that we can't justify a Swindon branch and a Slough branch. 115 | Oh - No, David. 116 | Don't panic. 117 | Should be good. 118 | Go on. 119 | There are alarm bells. 120 | - No, no. 121 | Don't panic. 122 | We haven't made any deci - Don't panic? We haven't made any decisions. 123 | ive spoken to Neil in Swindon. 124 | ive told him the same as you. 125 | It's up to either you or him to convince me that your branch could incorporate the other. 126 | OK. 127 | No problem. 128 | - This does, however, mean that there will be redundancies. 129 | - You see? Did I no want to hear that, Jenny. 130 | Redundancies are a tragedy always. 131 | I wouldn't wish that on Neiis men or on my men or women. 132 | Present company excepted. 133 | Is Neil concerned about redundancies? Of course, yes. 134 | - Good. 135 | Because I'm very concerned. 136 | Although I understand if they're necessary, as a businessman. 137 | Does he understand? - David. 138 | Let's not talk about redundancies. 139 | - We have to sooner or later. 140 | Yes, but now we have to decide if you take on Swindon's people at this branch or the other way round. 141 | Yeah, we'll take on theirs. 142 | - No No. 143 | You and I don't decide. 144 | I decide. 145 | - You do decide but You make your case. 146 | - Based on fact. 147 | OK. 148 | Is there a time II'mit on? Let it go onto the machine. 149 | Hl; mate, Not around at the moment, so please leave a mess-ARGE, It's Finch, - Chris Finch. 150 | Bloody good rep. 151 | Got a hangover, you big poof? - Ah, that's derogatory. 152 | You're in with Jennifer today, aren't you? Give her one for me, son, - Ah. 153 | Cheers, big ears, - Awful. 154 | Awful man. 155 | Stop looking up her skirt, David? - I wasn't. 156 | Can we keep a lid on this? I really don't want to worry people unduly. 157 | No. 158 | Under this regime, Jenny, this will not leave the office. 159 | So what does redundancy actually mean? So you'd just go, would you? Would you? - Well, I don't know. 160 | Keith and Jamie are having these little I know. 161 | They've gone mad. 162 | clandestine little chats about being out of work. 163 | I couldn't give a shit. 164 | This is Mr Brent. 165 | Guilty! - All right? Hi. 166 | Ricky Howard from the temping agency. 167 | Verna sent me to start today. 168 | Temporary. 169 | Staff. 170 | Only. 171 | Ricky? - Yeah. 172 | 'Ricky!' 'Ricky. 173 | No, Ricky!' What was his girlfriend's name on EastEnders? Bianca. 174 | - 'Ricky! Leave it!' Did she tell you I was mad? Yeah. 175 | She said you had a nervous breakdown. 176 | I haven't had a nervous breakdown. 177 | - That was a joke. 178 | She said you were a good laugh and, you know We all are, aren't we? Part of my job description now. 179 | Unofficially. 180 | OK. 181 | Let's get you started. 182 | Into the fray! What upsets me about the job? Um Wasted talent, yeah? People could come to me and they could go, 'David, you've been in the business twelve years. 183 | 'Can you spare us a moment to tell us how to run a team? 'How to keep them task-orientated as well as happy. 184 | ' But they don't. 185 | That's the tragedy. 186 | This is the er Accounts Department. 187 | All right? The number bods. 188 | Do not be fooled by their job descriptions. 189 | They are absolutely mad, all of them. 190 | Especially that one. 191 | He's mental. 192 | Not literally, obviously, that wouldn't work. 193 | Last place you'd want someone like that is in Accounts. 194 | This is the recycling bin. 195 | - Right. 196 | Obviously we get through a lot of paper. 197 | We make a lot! Actually, we sell it. 198 | - Yeah. 199 | It doesn't grow on trees! You know it is pulp, yeah? Yeah. 200 | Mr Davis, can I call you back? Something's come up. 201 | Two minutes. 202 | Thanks. 203 | Bye. 204 | What are you doing? What? - What are you doing? I'm just pushing this stuff off my desk. 205 | It wasn't on your desk. 206 | - It was overlapping. 207 | It's all coming over the edge. 208 | All right? One word, two syllables - demarcation. 209 | All right? David Brent, I presume! That's just to cheer these lot up. 210 | Send the girls out to get the plants to make them a bit happier. 211 | They can sometimes get a bit It's run out of batteries. 212 | Can we get some batteries for Billy Bigmouth? Take it out of petty cash. 213 | Can't put a price on comedy. 214 | Er You're a twat, Gareth. 215 | A twat and a knob end. 216 | I'm not listening, so it's not offending me. 217 | So you won't hear this. 218 | You're a cock, you're a cock, you're a cock. 219 | You're a cock. 220 | Here y'are, look. 221 | This is the sort of work we're doing. 222 | Cartoons. 223 | Does my bum look big in these? It's not sexist. 224 | It's the bloke saying it. 225 | At last. 226 | So All for that. 227 | All for that in the workplace. 228 | Managed to scrape a first in the end. 229 | You've met Tim, haven't you? Hello. 230 | - All right? Oh, careful. 231 | Watch this one. 232 | Gareth Keenan in the area! Ricky, the new temp. 233 | Introduce yourself. 234 | Gareth Keenan, Assistant Regional Manager. 235 | Assistant to the Regional Manager. 236 | My right-hand man. 237 | I need him beneath me - as an actress said to a bishop! No, he's not. 238 | Tell him about the car and the kung fu and everything. 239 | Yeah. 240 | ive got a TR3. 241 | I bought it for 1,200, done it up, now it's worth three grand. 242 | The profit on that's just under - New engine. 243 | A wreck - ive got some photos. 244 | Oh, what is that?! Whoa, whoa. 245 | - That is it. 246 | Slow down, you're moving too fast. 247 | Solomon's here. 248 | What's going on? He put my stapler inside a jelly again. 249 | That's the third time. 250 | It wasn't funny the first time. 251 | Why has he done that? - I told him I don't like jelly. 252 | I don't trust the way it moves. 253 | You show a weakness, he'll pounce. 254 | You should know that. 255 | What is in here? My stapler. 256 | Well, don't do that! Eat it out. 257 | There's people starving in the world, which I hate. 258 | It's a waste. 259 | How do you know it's yours? It's got my name on it in Tippex. 260 | Yeah. 261 | Don't eat it now. 262 | Chemicals. 263 | Right. 264 | You can be my witness. 265 | Give him an official warning. 266 | How do you know it was me? - It's always you. 267 | Mad here. 268 | - Can't you discipline him? Ooh, kinky! The thing about practical jokes, you've got to know when to stop, and now's the time to stop putting Gareth's possessions in jelly. 269 | Gareth, it's a trifling matter. 270 | Here we go. 271 | Always like this. 272 | You should put him in custard-y! He's going to fit in. 273 | We're like Vic and Bob and an extra one. 274 | Oh, God. 275 | Yeah. 276 | I'm more worried really about damage to company property. 277 | Trifling. 278 | I'm just trying to think of other desserts to do. 279 | Yeah, it's all right here, but people do sometimes take advantage because it's so relaxed. 280 | I like to have a laugh just as much as the next man, but this is a place of work. 281 | I was in the Territorial Army for three years, and you can't muck about there - it's one of the rules. 282 | OK, dude. 283 | Give it back. 284 | - I'm just using it for a second. 285 | It's got my name on it - Gareth. 286 | It says 'Garet' actually. 287 | Ask if you want to borrow it. 288 | - You always say no. 289 | That's why you should ask. 290 | Gareth, it was just there, OK? That's its home. 291 | Leave it there. 292 | OK. 293 | OK. 294 | Philip, get that off him! I'm going to let this go unless you stop acting like a fool. 295 | Well, you won't so - Well, I have, so What if that killed someone? They'll think you're the murderer. 296 | It's got your name on it. 297 | Why would a murderer put his name on a murder weapon? To stop people borrowing it. 298 | David. 299 | I hate the fact you bring me down to this. 300 | I really resent it. 301 | I don't know why you're laughing. 302 | - Leave her out of it. 303 | You carry on. 304 | Listen, you bring me down to this. 305 | What's that? Popcorn. 306 | Ben Elton. 307 | Funny? - It's all right, yeah. 308 | I had a bit of a scare earlier. 309 | Did you? I thought I found a lump. 310 | I examine myself regularly but It's fine. 311 | Terrifying. 312 | Testicular cancer. 313 | Cancer of them old testicles. 314 | What's that? - It's a bit of brie. 315 | From down there? - Mmm. 316 | See you later. 317 | ive just got a complaint from a very important client, saying that the figures I gave him are wrong, and Yeah. 318 | Well, basically, ive checked all other possibilities and it's come down to the calculator. 319 | Well, I don't know. 320 | Circuitry? Who is this I'm talking to? Redundancies. 321 | Now, it is true or not? OK. 322 | - Last in, first out. 323 | I don't know what source your little bird is from All right. 324 | Give me a sum. 325 | No. 326 | You give me a sum, ill try it out. 327 | All right. 328 | Yeah. 329 | Plus 52 equals All right. 330 | That time it was correct. 331 | There's nothing - Be straight with us. 332 | I am. 333 | I'm going to be straight with you now. 334 | I can't tell you at the moment. 335 | I don't like acting like a kid, do you know what I mean? But it's the effect he has. 336 | What are you doing? I don't want to have to look at you, Gareth. 337 | You can't do that. 338 | - Why not? Health and Safety. 339 | Why? Crushed by cardboard or what? No. 340 | Number one, blocking out light. 341 | Number two, misuse of company files. 342 | Misuse of files? Right. 343 | This is why the whole redundancy thing doesn't bother me, because if I have to work with him for another day, I will slit my throat. 344 | You won't do it like that though. 345 | You get the knife in behind the windpipe, pull it down like that. 346 | Or I could just apply for another job. 347 | Careful. 348 | OK. 349 | Um Thanks for coming in. 350 | This'll take a minute. 351 | Er Right. 352 | I am aware of the rumours that have been circulating, and I want to put the record straight. 353 | I'm team leader! I should know first. 354 | Yeah. 355 | I'm telling everyone now. 356 | Just tell me quickly. 357 | Just whisper it to me. 358 | Can you just tell us? - Yeah. 359 | All right. 360 | Shall I tell them? - You don't know what it is. 361 | All right. 362 | You tell them with my permission. 363 | I don't need your permission. 364 | Permission granted. 365 | You do as you wish. 366 | Head Office have deemed it appropriate to enforce an ultimatum upon me, and Jennifer is talking of either downsizing the Swindon branch or this branch. 367 | Are you going to let her? - No, Malcolm. 368 | You didn't see me in there. 369 | - For his eyes only! I said, 'Head Office have got me to contend with. 370 | 'You can fiddle with Neiis people, but I'm the head of this family. 371 | 'You're not going to fiddle with my children. 372 | ' But, David, if they do downsize here You think id let that happen? - It'd be out of your hands. 373 | It won't be out of my hands, and that's a promise. 374 | Can you promise that? On his mother's grave. 375 | Well I have promised it. 376 | OK? And it insults me that you have to ask. 377 | It's just that - Sorry. 378 | Dawn wants to speak. 379 | It was just that I was in the meeting with Jennifer, and she said it could be this branch that gets the chop. 380 | Well, if you were in the meeting with Jennifer, then maybe you should adhere to the ongoing confidentiality agreement of meeting. 381 | Yeah. 382 | Information is power. 383 | So you can't say whether it's going to be us or them. 384 | This is my ship and ive asked you to trust me, and you can't go wrong. 385 | It's not a question of trust. 386 | - It is, Malcolm. 387 | It is a question of trust. 388 | Do you trust me? Do you trust me? Yes or no? - Yes. 389 | I trust you. 390 | He does. 391 | So Meeting a-journed. 392 | - Good. 393 | Excellent. 394 | id have said much the same. 395 | In fact Can I just ask? Do you trust me? Hands up if you trust me. 396 | - You don't You asked them. 397 | - Put your hand down. 398 | I'm Assistant Regional Manager. 399 | Assistant to the Regional Manager. 400 | They're gone. 401 | I'm not worried for me. 402 | ill be all right. 403 | But if there does have to be a cull, then so be it. 404 | I mean, that's just natural selection. 405 | In the wild, some people wouldn't survive. 406 | I mean, Imagine a warehouse where a little midget fella is driving a forklift. 407 | He can't see over the top. 408 | He's got great big platform shoes on so he can reach the pedals because of his little legs. 409 | Anton's a lovely bloke - don't get me wrong - but should he be working here? Have you got a price for matt-coated SRA1? I can't see you, I can't hear you, Gareth. 410 | Just tell me, will you? - No. 411 | I can't hear you. 412 | Just tell me. 413 | - Give me a ring, OK? It's on voicemail. 414 | - Leave a message. 415 | Hi. 416 | It's me, Gareth. 417 | I need a price on matt-coated This is stupid! Yes. 418 | This is stupid. 419 | It's so Sorry, mate, what do you want? Er I need a tonnage price on matt-coated SRA1. 420 | ive got 160 here, but I'm sure that isn't right because when I spoke to Glynn earlier on, he Right. 421 | I know you're not there. 422 | Obviously you can't hear that, but I'm not talking to myself because they're filming. 423 | That feels nice, actually. 424 | Do that bit with your nails. 425 | - Sshh! That's no different. 426 | You can't do anything with your hair at all! Oh, no, trouble. 427 | Sanj. 428 | This is Sanj. 429 | This is Ricky. 430 | Hello, mate. 431 | - Nice to see you. 432 | This guy does the best Ali G Impersonation. 433 | I can't. 434 | You do it. 435 | Go on. 436 | - I don't. 437 | You're thinking of someone else. 438 | - Oh! Sorry. 439 | It's not you, it's the other one. 440 | - The other what? Um - Paki? Ah. 441 | That's racist. 442 | No, I don't have a great many ethnic employees, that's true, but it's not company policy I haven't got a sign on the door that says, 'White people only2'. 443 | I don't care if you're black, brown, yellow - Orientals make very good workers, for example. 444 | Do you like a drink at the end of a week? Yup. 445 | Well, this is why we're going out When are you going out? - To have an end of the week drink. 446 | When? - Tonight, hopefully. 447 | Er, yeah Hi, mate. 448 | - Hi, sweetheart. 449 | Are you ready, yeah? - Yeah. 450 | Do you mind if I go out for a drink with this lot? No. 451 | Let's go home, yeah? OK. 452 | ill be a couple of minutes because it's twenty past five. 453 | You should come. 454 | It'll be a laugh. 455 | - No. 456 | You're all right. 457 | We got to get off. 458 | Um What's in the bag? - Tell her ill see her later. 459 | Certainly will, mate. 460 | Take care. 461 | Dreaded first day. 462 | - Yeah. 463 | All right? - Yeah. 464 | All right. 465 | You've seen the vibe. 466 | Chilled out. 467 | Oh, dear. 468 | We work hard. 469 | I mean, we play hard. 470 | Play hard when we should be working hard sometimes, partly down to me. 471 | Um I let them get away with murder and they let me. 472 | The girls love me. 473 | Not in that way. 474 | But, er I suppose ive created an atmosphere where I'm a friend first and a boss second. 475 | Probably an entertainer third. 476 | Hold on! Practical jokes, yeah? Right. 477 | OK. 478 | Practical joke. 479 | Don't give me away. 480 | Come in. 481 | And then Head Office said Yeah. 482 | Fax for you. 483 | - Thanks. 484 | Don't go, Dawn. 485 | Pull up a chair. 486 | I was going to call you in. 487 | I need a quick word. 488 | Um As you are aware, there are going to be redundancies. 489 | You've made my life easier inasmuch as I'm going to let you go first. 490 | What? Why? Why? Stealing. 491 | Thieving. 492 | Thieving? - Yeah. 493 | Er What am I meant to have stolen? Post-It notes. 494 | Post-It notes? What are they worth, about 12p? Got your Bible on you, Ricky? - No. 495 | Thou shalt not steal unless it's only 12p. 496 | You steal a thousand Post-It notes at 12p, you've made a profit. 497 | Why would I steal Post-It notes? To make the little things in the end of joints Roaches. 498 | - Caught you, drug addict. 499 | Are you serious? - Yeah. 500 | I can't God ive never stolen as much as a paper clip and you're firing me. 501 | And I don't need to give you severance pay because it's gross misconduct. 502 | So you can go straight away. 503 | Oh, now That was a joke there. 504 | Good girl. 505 | It was a joke we were doing. 506 | Well done. 507 | Settling in. 508 | Practical jokes for the good. 509 | 'Thanks for these. 510 | Check them out. 511 | ' Better do these now actually. 512 | You wanker. 513 | Come on. 514 | You're such a sad little man. 515 | Am I? Didn't know that. 516 | What is the single most important thing for a company? Is it the building? Is it the stock? Is it the turnover? It's the people. 517 | Investment in people. 518 | Yeah? My proudest moment here wasn't when I increased profit by 17%, or cut expenditure without losing a single member of staff. 519 | No. 520 | A young Greek guy, first job in the country, hardly spoke English, he came to me and went, 'Mr Brent, will you be godfather to my child?' So Didn't happen. 521 | We had to let him go. 522 | He was rubbish. 523 | He was rubbish. -------------------------------------------------------------------------------- /sklearn/Scalers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 31, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler, RobustScaler\n", 12 | "import numpy as np\n", 13 | "import pandas as pd" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 32, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "
\n", 25 | "\n", 38 | "\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | "
hundredsneg_thousandsoutlierstensthousands
01.0-1000.000000-3.212321e+061.01.0
112.0-777.777778-7.777778e+022.0112.0
223.0-555.555556-5.555556e+023.0223.0
334.0-333.333333-3.333333e+024.0334.0
445.0-111.111111-1.111111e+025.0445.0
556.0111.1111111.111111e+026.0556.0
667.0333.3333333.333333e+027.0667.0
778.0555.5555565.555556e+028.0778.0
889.0777.7777787.777778e+029.0889.0
9100.01000.0000003.212321e+0610.01000.0
\n", 132 | "
" 133 | ], 134 | "text/plain": [ 135 | " hundreds neg_thousands outliers tens thousands\n", 136 | "0 1.0 -1000.000000 -3.212321e+06 1.0 1.0\n", 137 | "1 12.0 -777.777778 -7.777778e+02 2.0 112.0\n", 138 | "2 23.0 -555.555556 -5.555556e+02 3.0 223.0\n", 139 | "3 34.0 -333.333333 -3.333333e+02 4.0 334.0\n", 140 | "4 45.0 -111.111111 -1.111111e+02 5.0 445.0\n", 141 | "5 56.0 111.111111 1.111111e+02 6.0 556.0\n", 142 | "6 67.0 333.333333 3.333333e+02 7.0 667.0\n", 143 | "7 78.0 555.555556 5.555556e+02 8.0 778.0\n", 144 | "8 89.0 777.777778 7.777778e+02 9.0 889.0\n", 145 | "9 100.0 1000.000000 3.212321e+06 10.0 1000.0" 146 | ] 147 | }, 148 | "execution_count": 32, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "tens = np.linspace(1, 10, 10)\n", 155 | "hundreds = np.linspace(1, 100, 10)\n", 156 | "thousands = np.linspace(1, 1000, 10)\n", 157 | "neg_thousands = np.linspace(-1000, 1000, 10)\n", 158 | "outliers = neg_thousands.copy()\n", 159 | "outliers[0] = -3212321\n", 160 | "outliers[9] = 3212321\n", 161 | "\n", 162 | "data = pd.DataFrame({\n", 163 | " 'tens': tens,\n", 164 | " 'hundreds': hundreds,\n", 165 | " 'thousands': thousands,\n", 166 | " 'neg_thousands': neg_thousands,\n", 167 | " 'outliers': outliers\n", 168 | "})\n", 169 | "columns = data.columns\n", 170 | "data.head(10)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 33, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/html": [ 181 | "
\n", 182 | "\n", 195 | "\n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | "
hundredsneg_thousandsoutlierstensthousands
00.0000000.0000000.0000000.0000000.000000
10.1111110.1111110.4998790.1111110.111111
20.2222220.2222220.4999140.2222220.222222
30.3333330.3333330.4999480.3333330.333333
40.4444440.4444440.4999830.4444440.444444
50.5555560.5555560.5000170.5555560.555556
60.6666670.6666670.5000520.6666670.666667
70.7777780.7777780.5000860.7777780.777778
80.8888890.8888890.5001210.8888890.888889
91.0000001.0000001.0000001.0000001.000000
\n", 289 | "
" 290 | ], 291 | "text/plain": [ 292 | " hundreds neg_thousands outliers tens thousands\n", 293 | "0 0.000000 0.000000 0.000000 0.000000 0.000000\n", 294 | "1 0.111111 0.111111 0.499879 0.111111 0.111111\n", 295 | "2 0.222222 0.222222 0.499914 0.222222 0.222222\n", 296 | "3 0.333333 0.333333 0.499948 0.333333 0.333333\n", 297 | "4 0.444444 0.444444 0.499983 0.444444 0.444444\n", 298 | "5 0.555556 0.555556 0.500017 0.555556 0.555556\n", 299 | "6 0.666667 0.666667 0.500052 0.666667 0.666667\n", 300 | "7 0.777778 0.777778 0.500086 0.777778 0.777778\n", 301 | "8 0.888889 0.888889 0.500121 0.888889 0.888889\n", 302 | "9 1.000000 1.000000 1.000000 1.000000 1.000000" 303 | ] 304 | }, 305 | "execution_count": 33, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "minmax = MinMaxScaler()\n", 312 | "minmax.fit(data)\n", 313 | "data_minmax_scaled = minmax.transform(data)\n", 314 | "pd.DataFrame(data_minmax_scaled, columns=columns)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 34, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/html": [ 325 | "
\n", 326 | "\n", 339 | "\n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | "
hundredsneg_thousandsoutlierstensthousands
00.01-1.000000-1.0000000.10.001
10.12-0.777778-0.0002420.20.112
20.23-0.555556-0.0001730.30.223
30.34-0.333333-0.0001040.40.334
40.45-0.111111-0.0000350.50.445
50.560.1111110.0000350.60.556
60.670.3333330.0001040.70.667
70.780.5555560.0001730.80.778
80.890.7777780.0002420.90.889
91.001.0000001.0000001.01.000
\n", 433 | "
" 434 | ], 435 | "text/plain": [ 436 | " hundreds neg_thousands outliers tens thousands\n", 437 | "0 0.01 -1.000000 -1.000000 0.1 0.001\n", 438 | "1 0.12 -0.777778 -0.000242 0.2 0.112\n", 439 | "2 0.23 -0.555556 -0.000173 0.3 0.223\n", 440 | "3 0.34 -0.333333 -0.000104 0.4 0.334\n", 441 | "4 0.45 -0.111111 -0.000035 0.5 0.445\n", 442 | "5 0.56 0.111111 0.000035 0.6 0.556\n", 443 | "6 0.67 0.333333 0.000104 0.7 0.667\n", 444 | "7 0.78 0.555556 0.000173 0.8 0.778\n", 445 | "8 0.89 0.777778 0.000242 0.9 0.889\n", 446 | "9 1.00 1.000000 1.000000 1.0 1.000" 447 | ] 448 | }, 449 | "execution_count": 34, 450 | "metadata": {}, 451 | "output_type": "execute_result" 452 | } 453 | ], 454 | "source": [ 455 | "maxabs = MaxAbsScaler()\n", 456 | "maxabs.fit(data)\n", 457 | "data_maxabs_scaled = maxabs.transform(data)\n", 458 | "pd.DataFrame(data_maxabs_scaled, columns=columns)" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 38, 464 | "metadata": { 465 | "scrolled": true 466 | }, 467 | "outputs": [ 468 | { 469 | "data": { 470 | "text/html": [ 471 | "
\n", 472 | "\n", 485 | "\n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | "
hundredsneg_thousandsoutlierstensthousands
0-1.000000-1.000000-3212.321000-1.000000-1.000000
1-0.777778-0.777778-0.777778-0.777778-0.777778
2-0.555556-0.555556-0.555556-0.555556-0.555556
3-0.333333-0.333333-0.333333-0.333333-0.333333
4-0.111111-0.111111-0.111111-0.111111-0.111111
50.1111110.1111110.1111110.1111110.111111
60.3333330.3333330.3333330.3333330.333333
70.5555560.5555560.5555560.5555560.555556
80.7777780.7777780.7777780.7777780.777778
91.0000001.0000003212.3210001.0000001.000000
\n", 579 | "
" 580 | ], 581 | "text/plain": [ 582 | " hundreds neg_thousands outliers tens thousands\n", 583 | "0 -1.000000 -1.000000 -3212.321000 -1.000000 -1.000000\n", 584 | "1 -0.777778 -0.777778 -0.777778 -0.777778 -0.777778\n", 585 | "2 -0.555556 -0.555556 -0.555556 -0.555556 -0.555556\n", 586 | "3 -0.333333 -0.333333 -0.333333 -0.333333 -0.333333\n", 587 | "4 -0.111111 -0.111111 -0.111111 -0.111111 -0.111111\n", 588 | "5 0.111111 0.111111 0.111111 0.111111 0.111111\n", 589 | "6 0.333333 0.333333 0.333333 0.333333 0.333333\n", 590 | "7 0.555556 0.555556 0.555556 0.555556 0.555556\n", 591 | "8 0.777778 0.777778 0.777778 0.777778 0.777778\n", 592 | "9 1.000000 1.000000 3212.321000 1.000000 1.000000" 593 | ] 594 | }, 595 | "execution_count": 38, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "robust = RobustScaler()\n", 602 | "robust.fit(data)\n", 603 | "data_robust_scaled = robust.transform(data)\n", 604 | "pd.DataFrame(data_robust_scaled, columns=columns)" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 39, 610 | "metadata": {}, 611 | "outputs": [ 612 | { 613 | "data": { 614 | "text/html": [ 615 | "
\n", 616 | "\n", 629 | "\n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | "
hundredsneg_thousandsoutlierstensthousands
0-1.566699-1.566699-2.236068-1.566699-1.566699
1-1.218544-1.218544-0.000541-1.218544-1.218544
2-0.870388-0.870388-0.000387-0.870388-0.870388
3-0.522233-0.522233-0.000232-0.522233-0.522233
4-0.174078-0.174078-0.000077-0.174078-0.174078
50.1740780.1740780.0000770.1740780.174078
60.5222330.5222330.0002320.5222330.522233
70.8703880.8703880.0003870.8703880.870388
81.2185441.2185440.0005411.2185441.218544
91.5666991.5666992.2360681.5666991.566699
\n", 723 | "
" 724 | ], 725 | "text/plain": [ 726 | " hundreds neg_thousands outliers tens thousands\n", 727 | "0 -1.566699 -1.566699 -2.236068 -1.566699 -1.566699\n", 728 | "1 -1.218544 -1.218544 -0.000541 -1.218544 -1.218544\n", 729 | "2 -0.870388 -0.870388 -0.000387 -0.870388 -0.870388\n", 730 | "3 -0.522233 -0.522233 -0.000232 -0.522233 -0.522233\n", 731 | "4 -0.174078 -0.174078 -0.000077 -0.174078 -0.174078\n", 732 | "5 0.174078 0.174078 0.000077 0.174078 0.174078\n", 733 | "6 0.522233 0.522233 0.000232 0.522233 0.522233\n", 734 | "7 0.870388 0.870388 0.000387 0.870388 0.870388\n", 735 | "8 1.218544 1.218544 0.000541 1.218544 1.218544\n", 736 | "9 1.566699 1.566699 2.236068 1.566699 1.566699" 737 | ] 738 | }, 739 | "execution_count": 39, 740 | "metadata": {}, 741 | "output_type": "execute_result" 742 | } 743 | ], 744 | "source": [ 745 | "standard = StandardScaler()\n", 746 | "standard.fit(data)\n", 747 | "data_standard_scaled = standard.transform(data)\n", 748 | "pd.DataFrame(data_standard_scaled, columns=columns)" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": null, 754 | "metadata": { 755 | "collapsed": true 756 | }, 757 | "outputs": [], 758 | "source": [] 759 | } 760 | ], 761 | "metadata": { 762 | "kernelspec": { 763 | "display_name": "Python 3", 764 | "language": "python", 765 | "name": "python3" 766 | }, 767 | "language_info": { 768 | "codemirror_mode": { 769 | "name": "ipython", 770 | "version": 3 771 | }, 772 | "file_extension": ".py", 773 | "mimetype": "text/x-python", 774 | "name": "python", 775 | "nbconvert_exporter": "python", 776 | "pygments_lexer": "ipython3", 777 | "version": "3.6.1" 778 | } 779 | }, 780 | "nbformat": 4, 781 | "nbformat_minor": 2 782 | } 783 | --------------------------------------------------------------------------------