├── CategoryClassifer.ipynb ├── README.md ├── Tutorial.ipynb ├── data ├── category │ ├── Books_small.json │ ├── Clothing_small.json │ ├── Electronics_small.json │ ├── Grocery_small.json │ └── Patio_small.json └── sentiment │ ├── Books_small.json │ └── Books_small_10000.json ├── data_process.py └── models ├── category_classifier.pkl ├── category_vectorizer.pkl └── sentiment_classifier.pkl /CategoryClassifer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import random\n", 11 | "from sklearn.model_selection import train_test_split\n", 12 | "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n", 13 | "from sklearn.metrics import f1_score\n", 14 | "\n", 15 | "import json" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Load In Data" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "#### Data Class" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 7, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "class Category:\n", 39 | " ELECTRONICS = \"ELECTRONICS\"\n", 40 | " BOOKS = \"BOOKS\"\n", 41 | " CLOTHING = \"CLOTHING\"\n", 42 | " GROCERY = \"GROCERY\"\n", 43 | " PATIO = \"PATIO\"\n", 44 | " \n", 45 | "class Sentiment:\n", 46 | " POSITIVE = \"POSITIVE\"\n", 47 | " NEGATIVE = \"NEGATIVE\"\n", 48 | " NEUTRAL = \"NEUTRAL\"\n", 49 | "\n", 50 | "class Review:\n", 51 | " def __init__(self, category, text, score):\n", 52 | " self.category = category\n", 53 | " self.text = text\n", 54 | " self.score = score\n", 55 | " self.sentiment = self.get_sentiment()\n", 56 | " \n", 57 | " def get_sentiment(self):\n", 58 | " if self.score <= 2:\n", 59 | " return Sentiment.NEGATIVE\n", 60 | " elif self.score == 3:\n", 61 | " return Sentiment.NEUTRAL\n", 62 | " else: # Amazon review is a 4 or 5\n", 63 | " return Sentiment.POSITIVE\n", 64 | " \n", 65 | "class ReviewContainer:\n", 66 | " def __init__(self, reviews):\n", 67 | " self.reviews = reviews\n", 68 | " \n", 69 | " def get_text(self):\n", 70 | " return [x.text for x in self.reviews]\n", 71 | " \n", 72 | " def get_x(self, vectorizer):\n", 73 | " return vectorizer.transform(self.get_text())\n", 74 | " \n", 75 | " def get_y(self):\n", 76 | " return [x.sentiment for x in self.reviews]\n", 77 | " \n", 78 | " def get_category(self):\n", 79 | " return [x.category for x in self.reviews]\n", 80 | " \n", 81 | " def evenly_distribute(self):\n", 82 | " negative = list(filter(lambda x: x.sentiment == Sentiment.NEGATIVE, self.reviews))\n", 83 | " positive = list(filter(lambda x: x.sentiment == Sentiment.POSITIVE, self.reviews))\n", 84 | " positive_shrunk = positive[:len(negative)]\n", 85 | " print(len(positive_shrunk))\n", 86 | " self.reviews = negative + positive_shrunk\n", 87 | " random.shuffle(self.reviews)\n", 88 | " print(self.reviews[0])\n", 89 | " " 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "#### Load in Data" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 9, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "file_names = ['./data/category/Electronics_small.json', './data/category/Books_small.json', './data/category/Clothing_small.json', './data/category/Grocery_small.json', './data/category/Patio_small.json']\n", 106 | "file_categories = [Category.ELECTRONICS, Category.BOOKS, Category.CLOTHING, Category.GROCERY, Category.PATIO]\n", 107 | "\n", 108 | "reviews = []\n", 109 | "for i in range(len(file_names)):\n", 110 | " file_name = file_names[i]\n", 111 | " category = file_categories[i]\n", 112 | " with open(file_name) as f:\n", 113 | " for line in f:\n", 114 | " review_json = json.loads(line)\n", 115 | " review = Review(category, review_json['reviewText'], review_json['overall'])\n", 116 | " reviews.append(review)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "### Data Prep" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 11, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "train, test = train_test_split(reviews, test_size = 0.33, random_state=42)\n", 133 | "\n", 134 | "train_container = ReviewContainer(train)\n", 135 | "#train_container.evenly_distribute()\n", 136 | "test_container = ReviewContainer(test)\n", 137 | "#test_container.evenly_distribute()\n", 138 | "\n", 139 | "corpus = train_container.get_text()\n", 140 | "# vectorizer = CountVectorizer(binary=True)\n", 141 | "# vectorizer.fit(corpus)\n", 142 | "vectorizer = TfidfVectorizer()\n", 143 | "vectorizer.fit(corpus)\n", 144 | "\n", 145 | "train_x = train_container.get_x(vectorizer)\n", 146 | "train_y = train_container.get_category()\n", 147 | "\n", 148 | "test_x = test_container.get_x(vectorizer)\n", 149 | "test_y = test_container.get_category()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "### Classification" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 12, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "SVC(C=16, cache_size=200, class_weight=None, coef0=0.0,\n", 168 | " decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',\n", 169 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", 170 | " tol=0.001, verbose=False)" 171 | ] 172 | }, 173 | "execution_count": 12, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "from sklearn import svm\n", 180 | "\n", 181 | "clf = svm.SVC(C=16, kernel='linear', gamma='auto')\n", 182 | "clf.fit(train_x, train_y)\n", 183 | "\n" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 13, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/plain": [ 194 | "array(['CLOTHING', 'PATIO', 'ELECTRONICS'], dtype='" 449 | ] 450 | }, 451 | "execution_count": 29, 452 | "metadata": {}, 453 | "output_type": "execute_result" 454 | }, 455 | { 456 | "data": { 457 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAD4CAYAAAA6j0u4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3dd3wUdfrA8c+TAiQEEAGpKqAoSocgCKJ0QVHhDhVERU6PExA9BfSw/BTPwwqCino0AQEBESyAhSocSu9SBASREukllECyz++PneAaUzbJbiaDz5vXvNj5zndnntnJ7rPf73x3RlQVY4wxJpwi3A7AGGPMhc+SjTHGmLCzZGOMMSbsLNkYY4wJO0s2xhhjwi7K7QAuJGf3/nDBDe2Lrdja7RDComjBWLdDCLnjSafcDiEsoiIi3Q4hLM6c2SW5Xce5gz8F/ZkTXbJyrreXG9ayMcYYE3bWsjHGGK/ypbgdQdAs2RhjjFelJLsdQdAs2RhjjEep+twOIWiWbIwxxqt8lmyMMcaEm7VsjDHGhJ0NEDDGGBN21rIxxhgTbuqh0Wj2o05jjPEqny/4KRMiUkhElonIWhH5QUQGOOWVRGSpiGwVkckiUsApL+jMb3OWV8wqVEs2xhjjVeoLfspcEtBcVWsBtYE2ItIQeBV4U1WrAEeAB536DwJHVPVK4E2nXqYs2RhjjFf5UoKfMqF+ic5stDMp0ByY6pSPBdo7j+9w5nGWtxCRTK+9ZsnGGGO8KhstGxHpLiIrAqbugasSkUgRWQPsB2YD24Gjqpp6Ymg3UN55XB74BcBZfgwokVmoNkDAGGO8KhsDBFR1ODA8k+UpQG0RuQiYDlyTXjXn//RaMZlegdqSjTHGeFUYriCgqkdFZAHQELhIRKKc1ksFYK9TbTdwKbBbRKKAYsDhzNZr3WjGGONRqilBT5kRkVJOiwYRiQFaApuA+UBHp1pX4DPn8efOPM7yeapqLRtjjLkghe5HnWWBsSISib8RMkVVZ4jIRmCSiLwErAZGOfVHAR+KyDb8LZpOWW3Akk0+lHT2LA889ixnz54jJcVHq5uup1e3TkycPovxU2fwy94EFn46huLFigIw73/LeOeDj4gQITIykqce+Rt1a6TX3Zo/VahQjjGjh1K6TCl8Ph8jR07g7XdGZf3EfOjtd1+mdZtmHDxwiMYNbj1f/vd/3MdD/7iXlOQUvvl6AS8895qLUeZcwYIFWTDvEwoULEhUVCTTps1kwIuD3A4r16pUqcz48cPOz1eqdBkvvjiYd/L732GIutFUdR1QJ53yn4Dr0ik/A9yZnW1IFi0fRCQFWB9QNElVX3H69Pqq6oqAuk3xN7N2BNTvq6pzRKQMMASoj39M906gP78Nn7sM/4iGY8BB4CH8zbgtQAFgBfCgqp5ztnUDMBgo6jx/sHMCDBF5AXgSqKiq+52yRFWNS+fxVU5cVwHnnH3tDZwARgA18Z8MOwq0CRge+Aehui20qnL6zBliY2I4l5xM197P8FTvv1EgOpqiReL42z+fY9J/Xz+fbE6dPk1MoUKICFu276TvgEF8Me7tUISSJ7eFLlPmEsqWuYTVazYQF1eYZUu/4q8d/8amTVvDts1w3Rb6+sb1OZl4kveGv34+2dzQpAFP9OtBp47dOXv2LCVLXszBg5l2b+dIXt0WunDhWE6ePEVUVBQLF0zn8SeeZ+myVWHbXl7fFjoiIoKfflrGjTfewa5de8K2nVDcFvrMyk+D/swpVK+9q7eFDqZlc1pVa2djnYtUtV1ggTP+ejowVlU7OWW1gaKp6xaRMcAMVZ3qzFcEtqtqbadpNxu4C5jgJK6JQHtVXSUiJYGvRWSPqs50NnsQ6AM8lVGgIlIImAk8oapfOGXNgFLA34BfVbWGU341/mQUdiJCbEwMAMnJKSSnJCMI11SpnG791LoAp88kkflo9/wnIWE/CQn7AUhMPMnmzVspX65MWJNNuHy/eDmXXlb+d2V/e+gehg4eztmzZwHCkmjy0smT/qQWHR1FVHQ0WX1h9ZrmzRuzY8eusCaakEnJk4+kkMirAQLNgHOq+n5qgaquUdVFwTzZGZK3jN/GePcCxqjqKmf5QfwtmX8FPG00cLeIXJzJqu8Bvk9NNM665qvqBvx9mHsCyreoalIw8YZCSkoKHR96gps6dKNhvVrUvPaqTOvPXbSE2+7vTa/+/+HFJx/JoyhD7/LLK1C7VnWWLlvtdighc8WVlbi+UTyz503liy8nUKduDbdDypWIiAhWLP+GfXvWMXfuQpYtv3COFcCdd97O5MmfZV0xPwjR5WryQjDJJkZE1gRMd2dRv0ma+lcA1YGVOQ3SaYE0AL5yiqqls74VTnmqRPwJ57FMVp1ZXKOBp0TkexF5SUSqZBDb+R9KjRz/cRZ7ErzIyEimjhzMnI9HsGHzNrbu+DnT+i2aNOSLcW8z9N9P8c7oj0IWR14qXDiWKZNH8ETf5zlxIsPeSs+Jioqk2EXFaNW8I88/+yqjxw51O6Rc8fl8xNdvzeWV4qkfX4dq1a52O6SQiY6O5tZbWzFt2sysK+cHobtcTdgFk2xOq2rtgGlyFvUXpam/PRfxXeH8ovUQsMs5iQX+cyjptd3Tlr0FdBWRounUzZSqrgEqA68DFwPLReQPZ91Vdbiqxqtq/EP3Zut8WVCKxhWmfu1qLA7ym358rWrs3pvAkWPHQx5LOEVFRfHx5BF89NF0Pv30S7fDCam9exKY8fnXAKxauQ6fTylRMrMGtzccO3acbxd+x82tm7odSsjcfHNT1qzZwP79B90OJTgXWMsmFH4A6uXgedudczpXAg1F5PaA9cWnqVsP2BhYoKpH8Z/b6ZmTuFQ1UVWnqWpPYDxwS/Z3IfsOHz3G8cSTAJxJSmLJynVUuqxChvV37dl3vt9844/bOZeczEVFi+RFqCEzYvggNm3expChGf7A2bNmzpjDjTddD8AVV1akQIFoDnn0vE3JkhdTzBmYUqhQIVo0b8KWLbn5Ppm/3HXXHUyZ4pEuNPBUssmroc/zgIEi8ndVHQEgIvWBWFX9Nqsnq+o+EfkX/tFrnwPDgKUiMk1V14hICfxXHX0xnacPBpaT/r5OBPqLyK2pAwtEpA3+czVFgY2qesS5rPa1wIJs7XUOHTh0hGdfeZsUnw/1+WjdtDE3XR/PhE9mMnrSdA4dPspfH3ycJg3qMqBfL2Yv/J4vvv6WqKhIChYswOv/14csromXrzRuVJ/77u3IuvUbWbH8GwCee+4VvvxqnsuRZd+I0W/SuMl1lChRnA2bF/HKwKFM+HAqb7/7MouXzuTs2XP0/MeTboeZY2XLlmb0qCFERkYQERHB1KlfMHPWHLfDComYmEK0aNGERx7p73YoQVMPDRDIydDnr1T1X87Q52v4bYTW9/iTQNqhzy+p6lQRKYd/iHE94Az+oc//VNWtznbG8MfRaDNUtbozL8Aa4BFVXSQiNwKDgCL4u9WGqOp7Tt0XgERVfcOZHww8rqrizAcOfa7qxHWFsy/r8J/nuRno66w7Av+otacy+5VsqIY+5yd5MfTZDeEa+uymvBr6nNfyeuhzXgnF0OfT80cG/ZkT0+whV7+BZplsTPAs2XiHJRvvsGSTsdNzhwefbFp0z/e/szHGGJMf5YNRZsGyZGOMMV6VD078B8uSjTHGeJW1bIwxxoRdcvA3T3ObJRtjjPEqa9kYY4wJOztnY4wxJuysZWOMMSbsrGVjjDEm7KxlY4wxJuxsNJoxxpiw89DlxizZGGOMV9k5G2OMMWFnycYYY0zY2QABY4wxYZeS4nYEQbNkE0JFK7d1O4SQO713kdshhEVchZvcDiHkIiPy6i7veSvZ550P1Dxn3WjGGGPCzpKNMcaYsLNzNsYYY8JNffY7G2OMMeFm3WjGGGPCzkajGWOMCTtr2RhjjAk7DyWbC3NgvjHG/BmoBj9lQkQuFZH5IrJJRH4QkcfSLO8rIioiJZ15EZG3RGSbiKwTkbpZhWotG2OM8arQtWySgT6qukpEigArRWS2qm4UkUuBVsCugPptgSrO1AB4z/k/Q9ayMcYYr/Jp8FMmVHWfqq5yHp8ANgHlncVvAk8CgSu5AxinfkuAi0SkbGbbsJaNMcZ4VRhGo4lIRaAOsFREbgf2qOpaEQmsVh74JWB+t1O2L6P1WrIxxhiP0mx0o4lId6B7QNFwVR2epk4c8AnwT/xda88ArdNbXXrhZLZ9SzbGGONV2biCgJNYhme0XESi8SeaCao6TURqAJWA1FZNBWCViFyHvyVzacDTKwB7M9u+nbMxxhivUl/wUybEn01GAZtUdTCAqq5X1UtUtaKqVsSfYOqqagLwOXC/MyqtIXBMVTPsQgNr2RhjjHeF7tpojYH7gPUissYpe1pVZ2VQfxZwC7ANOAV0y2oDlmyMMcarkkMzQEBV/0f652EC61QMeKxAr+xsw5KNMcZ4lYduMWDnbDymd+8HWbVqDitXzmbcuLcpWLCg2yEFLSnpLJ0eeoy/dO3JHV3+wTsjPwRg4tTPaXvX36jeuC1Hjh47X//Y8RM82v9FOtzfg04PPcbWn3a6FHnOFStWlI8mvs+6tfNZu2YeDRpk+UPrfO9C3KeCBQvy/eIZrFwxm7Vr5vH8//VxO6TghOh3NnnBM8lGRFJEZI2IrBWRVSLSKGBZNRGZJyI/ishWEXlOAgaFi0h755IKm0VkvYi0D1g2RkQ6Oo8vFpHVItJNRCKcyzFscJ6zXEQq5e1e/165cqXp1asbjRrdSr16rYiIiOSuu25zM6RsKVAgmtFvvcK0se8ydewwFi9dydoNm6hT81pGDn2ZcmUu+V39EeMmU7XKFUwf9x4Dn+vLK0PedynynBs06AW+mb2AmrWaEV//ZjZv3uZ2SLl2Ie5TUlISLVvfRb34VtSLb83NrZvS4Lr8n0TV5wt6cptnkg1wWlVrq2otoD/wMoCIxOAfGfGKql4F1AIaAT2d5bWAN4A7VLUqcDvwhojUDFy5iBQDvsY/9vwD4G6gHFBTVWsAHYCj4d/NzEVFRRETU4jIyEhiY2PYt+9Xt0MKmogQGxsDQHJyMsnJyYgI11x1JeXLlv5D/e07d9GwXi0AKl9+KXv2/crBw0fyNObcKFIkjiY3NOCDDyYBcO7cOY4dO+5yVLlzIe5TqpMnTwEQHR1FVHQ0msX1xPIFa9mEXVEg9VPnHmCxqn4DoKqngEeAfznL+wIDVXWHs3wH/kTVL2B9ccCXwERVfc8pKwvsU/V3iqrqblV19ZNu795fefPN4WzduoSdO1dw/Phx5sxZ5GZI2ZaSksJfu/bixnadub5+HWpWq5ph3auvrMycb78DYP3GLez7dT+/7j+YV6HmWqVKl3HgwGFGjBjM0iVf8t57r51Ptl51Ie5TqoiICFYs/4Z9e9Yxd+5Cli1f7XZIWbNkExYxTjfaZmAk8G+nvBqwMrCiqm4H4kSkaHrLgRVOearBwP9U9c2AsinAbc42B4lInRDuS45cdFExbrutFVWrNqZSpfrExsbSuXMHt8PKlsjISD4ZO4y50z9k/cYfMz0P89B9d3L8RCJ/7dqLCVM/p2qVK4iMjMy7YHMpKiqKOnWqM3z4OBo0bMupk6fo1y9bA3jynQtxn1L5fD7i67fm8krx1I+vQ7VqV7sdUtZSUoKfXOalZJPajVYVaAOMc87LCBlfJkEzWJ62bB5wh4icP2mgqruBq/F32fmAuSLSIu0GRKS7iKwQkRUpKYk53LXgNG9+Azt3/sLBg4dJTk7ms8++omHDemHdZrgULRJH/bo1+d+SFRnWiStcmJeeeYJPxg7j5ef6cuToMSqU+2N3W361Z88+du/Zx/Ll/p8tTJs+izq1q7scVe5ciPuU1rFjx/l24Xfc3Lqp26FkSX0a9OQ2LyWb81T1e6AkUAr4AYgPXC4ilYFE5+qlf1gO1AU2BsxPwn+J7FnO5bVTt5Okql+qaj9gINCeNFR1uKrGq2p8ZGRc7ncuE7/8sofrrqtLTEwhAJo1a+ypk7OHjxzl+Al/Qj6TlMSS5aupdPmlGdY/fiKRc+fOAfDJF19Rr3YN4goXzpNYQ+HXXw+we/c+rqpSGfAfr02btrocVe5ciPsEULLkxRQrVhSAQoUK0aJ5E7Zs2e5yVEHwUDeaJ39nIyJVgUjgEDABeFpEWqrqHGfAwFvAa071N4CPRWSequ50rmj6NNAxcJ2qOsS5RPZ0EbkFqA4kqOpeEYkAagLr8mD3MrR8+RqmT5/FkiWzSE5OYe3aHxg1aqKbIWXLgUNHeOalN0jx+VCfcnPzJjRt3IDxH3/GBxM+5uDhI/zl/p40ub4+L/b/Jz/9/AtP//sNIiMiqFzxMl7s/0+3dyHbHn/8OcaMeZsCBaLZsWMXf+/ukSG1mbgQ96ls2dKMHjWEyMgIIiIimDr1C2bOmuN2WFnLB6PMgiWeGHGBf+gzsD51Fv+lFGY6y2oAb+M/qR8JfAi86PzKFRH5CzAAiAbOAc+r6jRn2RhghqpOdeY/AGKBsfjPC6X+kGUZ0FNVz2QUY6FCl3njxcyGE7sXuB1CWMRVuMntEEyQUjz0gZodyWf3ZPqL/WCc6Nk26M+cIu9+mevt5YZnWjaqmuGZYVVdDzTNZPk0YFoGyx5IMx94jZ+MrgtkjDHuywfdY8HyTLIxxhjze5rinVafJRtjjPEqa9kYY4wJt/wwpDlYlmyMMcarLNkYY4wJO++csrFkY4wxXqXJ3sk2lmyMMcarvJNrLNkYY4xX2QABY4wx4WctG2OMMeFmLRtjjDHhZy0bY4wx4abJbkcQPEs2xhjjUWotG2OMMWFnycYYY0y4WcvGGGNM2Fmy+ZOKK1DI7RBCLqZcE7dDCIvE5SPcDiHkLmrwsNshhIWrt5fM5zTFO6+OJRtjjPEoa9kYY4wJO/VZy8YYY0yYWcvGGGNM2Klay8YYY0yYeallE+F2AMYYY3LGlyJBT1kRkdEisl9ENgSU1RaRJSKyRkRWiMh1TrmIyFsisk1E1olI3azWb8nGGGM8Sn0S9BSEMUCbNGWvAQNUtTbwf848QFugijN1B97LauWWbIwxxqNCmWxUdSFwOG0xUNR5XAzY6zy+AxinfkuAi0SkbGbrt3M2xhjjUZqN29mISHf8rZBUw1V1eBZP+yfwtYi8gb9x0sgpLw/8ElBvt1O2L6MVWbIxxhiPys7vbJzEklVySasH8LiqfiIidwGjgJakf2GHTFOfdaMZY4xHqUrQUw51BaY5jz8GrnMe7wYuDahXgd+62NJlycYYYzwqJUWCnnJoL3CT87g5sNV5/DlwvzMqrSFwTFUz7EID60YzxhjPCuWPOkXkI6ApUFJEdgPPA38HhopIFHCG3875zAJuAbYBp4BuWa3fko0xxnhUKK+NpqqdM1hUL526CvTKzvot2RhjjEdlZzSa2yzZGGOMR9lVn40xxoRdis87Y7ws2XjA0GEDad2mGQcPHKJJw3YAVK9xDW8MGUDBggVJSU6mX58BrF65zuVIc6ZChXKMGT2U0mVK4fP5GDlyAm+/M8rtsIKScPAIzwybyKGjxxEROra8ni633MSWnXt4acTHnDpzlnKlivPyo/cRF+u/k+uPP+/l38OnkHj6DBESwcSXH6dggWiX9yQ4VapUZvz4YefnK1W6jBdfHMw7HjleWYmIiGDpki/ZsyeB9h26uh1OlqwbLR0iUgYYAtQHkoCd+H+dOk1Vq6epK8Az+Md4K7AHeERVfxCRpUBB4GIgxlkG0B5YAMSr6kFnPU2BvqraTkQecJY9IiIvAE8CFVV1v1M3UVXjnMelgTeBhsAR4CzwmqpOD+2rEpxJE6Yxavh4hv33tfNlz/+7H6+/8g5zZy+kZeubeOHFftxx631uhJdrycnJ9HtyAKvXbCAurjDLln7FnLkL2bRpa9ZPdllkZAR977udaypfysnTZ+j0r8E0rHk1A/47mSfuu534a69k+ryljPl8Ho90uoXklBSefns8/3mkC1dXLM/REyeJiop0ezeCtnXrTzRo0BbwfzD/9NMyPv/8K5ejCp1Hez/Eps1bKVqkiNuhBMXnoVsM5EkbzEke04EFqnqFql4LPA2UzuApvfBfFqGWql4FvAx8LiKFVLVBwEXhJqtqbWfamc2wDgJ9Moj1U2ChqlZW1XpAJ/w/WnLF99+t4MiRY78rU1WKFIkDoGjROBIS9rsRWkgkJOxn9Rr/hWYTE0+yefNWypcr43JUwSlVvBjXVPb/tq1wTCEqly/N/sPH2Ll3P/WuuQKA62texdyl/lbn92u3UOWyclxdsTwAFxUpTGSEd7pCAjVv3pgdO3axa9eerCt7QPnyZWnbtgWjR3/kdihBy4MfdYZMXrVsmgHnVPX91AJVXSMiFTOo/xTQVFVPOXW/EZHvgC74L5cQCqOBB0TkVVUNvPhcc+Bsmlh/Bt4O0XZD4pmnBvLx9FEMeOkpIiIiaNvqbrdDConLL69A7VrVWbpstduhZNue/YfZvGM3Na68nCsvLcuCFRtoVr8G3yxZS8KhowD8vO8AIvDwf97nyPFE2jSqQ7c7Wrgcec7ceeftTJ78mdthhMygQQPo3/8l4pwvcV7gpW60vPpKVR1YGUxFESkKFFbV7WkWrQCqBbGK+c69F9YAIzOpl4g/4TyWprwasCqYWJ14uzv3eVhx5uyxrJ8QIt0e6syz/QdS69qbeLb/QIa+MzDPth0uhQvHMmXyCJ7o+zwnTiS6HU62nDqTRJ9BH9DvgQ7ExRZiQI9OTPr6f3R6ahCnTp8h2ukqS0nxsXrzDl7ufS9jXnyUecvWs3T9jy5Hn33R0dHcemsrpk2b6XYoIXHLLS05sP8gq1avdzuUbPGpBD25zUvtdyGLC705mqV2rQEPZVH3LaCrk+DS36jIMBFZKyLL01uuqsNVNV5V4wsVKBZEeKHRqXMHZnz+DQCfTf+SuvVq5tm2wyEqKoqPJ4/go4+m8+mnX7odTracS07hiUEfcEuTerRs4D8OlcqX5r/P9mDSq31o07guFUqXBOCSEsWIv/YKiheNI6ZgAW6ocy2bdux2M/wcufnmpqxZs4H9+w+6HUpINGoUT7t2rdn64xImjH+XZs0aM3bMW26HlaUUX0TQk9vyKoIfSOdXqOlR1ePASRGpnGZRXWBjKINS1aPARKBnQPEPzrZS6/QCWgClQrnt3EpI2E/jG/zXxGty0/X8tH2nuwHl0ojhg9i0eRtDhmb3orTuUlVeeH8SlcuX5v52Tc+XHzp2AgCfz8eIabO5s5X/yuyNa1Xlx117OZ10luSUFFZu2kblChmdusy/7rrrDqZMuXC60J599hUqVY6nylUN6XJvT+bPX0zXBx51O6wsaTYmt+XVOZt5wEAR+buqjgAQkfpAbAb1XwfeEpE7VfW0iLQEbgD+EYbYBgPL+e21SI21h6qm3n0uozjzxPDRg2l8w3VcXKI46zYt5NWBb/F472cZ+OozREZFkZSUxBOPPedmiLnSuFF97ru3I+vWb2TFcn9r7bnnXuHLr+a5HFnWVm/ZwYyFK6hyWVnu6vc6AL0738quhANM+noxAC2uq0H7Zv4vBkXjYrnv1qbc038wIkKTOtdwY91geofzj5iYQrRo0YRHHunvdih/evmheyxYonl0hklEyuEf+lwP/wXdduIf+rwR+DWg6uPAVPyjze4DUoAE/EOf1wes7wGcocwBZTsJfuhzoqq+4dQbjP+eDeLMl8U/9LkBcAA4CbyvqpMz28eSRa/KD18gQuromZNuhxAWictHuB1CyF3U4GG3QwiLFF+K2yGExbmze3KdKRaX6Rj0Z07jhKmuZqY8SzZ/BpZsvMOSjXdYssnYomwkmyYuJxu7goAxxniUpnvDzPzJko0xxnhUsofO2ViyMcYYj7KWjTHGmLDzuR1ANliyMcYYj7KWjTHGmLCzlo0xxpiwS7GWjTHGmHDz0F2hLdkYY4xX+axlY4wxJty8dMkSSzbGGONRNkDAGGNM2PnEutGMMcaEmZcuUWrJxhhjPMpGoxljjAk7G432J3XsAr33y4WoxPU9s67kMcdWfuB2CGFRrF43t0PIt2w0mjHGmLCzbjRjjDFhZ0OfjTHGhF2Kh1o2EW4HYIwxJmd82ZiyIiKjRWS/iGwIKHtdRDaLyDoRmS4iFwUs6y8i20Rki4jcnNX6LdkYY4xHhTLZAGOANmnKZgPVVbUm8CPQH0BErgU6AdWc57wrIpGZrdySjTHGeJRK8FOW61JdCBxOU/aNqiY7s0uACs7jO4BJqpqkqjuAbcB1ma3fko0xxnhUdlo2ItJdRFYETN2zubm/AV86j8sDvwQs2+2UZcgGCBhjjEdl53I1qjocGJ6T7YjIM0AyMCG1KL1NZLYOSzbGGONRefE7GxHpCrQDWqhqakLZDVwaUK0CsDez9Vg3mjHGeFSIBwj8gYi0AZ4CblfVUwGLPgc6iUhBEakEVAGWZbYua9kYY4xHhfJHnSLyEdAUKCkiu4Hn8Y8+KwjMFv/tDJao6sOq+oOITAE24u9e66WqmfbqWbIxxhiPCuW10VS1czrFozKp/x/gP8Gu35KNMcZ4lF0bzRhjTNjZzdOMMcaEnc9DNxmwZGOMMR5lV302xhgTdt5p11iy8aSIiAiWLvmSPXsSaN+hq9vh5FrBggVZMO8TChQsSFRUJNOmzWTAi4PcDitH3nv/Ndq2ac6BA4eoX99/IdzixYsxbtw7XHZ5BXb9vJv77uvF0aPHXY40c0lnz9Ht2cGcPZdMis9Hy+vr0KtTO3b/epAnB4/meOJJrql0KQMfe4Do6CjOnjvHM0PHsvGnXyhWpDCv93mQ8peUcHs3sqV37wfp1q0zqsoPP2zm73/vS1JSktthZcpLLRtXftQpIqVFZKKI/CQiK0XkexHpICJNReSYiKx2Lmv9RprntXcudb1ZRNaLSPs0y/s6yzaIyFoRud8pX+BcBnuNM011yl8QkT1O2UYR6eyUdxeRyQHrLSoi250fL7nu0d4PsWnzVrfDCJmkpCRatr6LevGtqBffmptbN6XBdXXdDitHxn84lfbtf/8FoE+fHixY8B21ajZjwYLv6NMn/9+SukB0FCMHPMbUN31PRtIAABcsSURBVJ9hyqCnWbx6I2u37GDIh59y323NmTFsAEXjYpk29zsAps35jqJxscx8dwD33dacIeOmu7wH2VOuXGl69epGo0a3Uq9eKyIiIrnrrtvcDitLyaJBT27L82Qj/l8GfQosVNXKqloP/6WqU68mukhV6wB1gHYi0th5Xi3gDeAOVa0K3A68ISI1neUPA62A61S1OnAjv79+TxdVre1MHQPK31TV2vivYvpfEYkGRgAVRKSlU+dFYLRzdVNXlS9flrZtWzB69EduhxJSJ0/6f5wcHR1FVHQ0v10Vw1sWL17G4cPHfld2a7tWTJgwFYAJE6bS7rZWboSWLSJCbEwhAJJTUkhOTkEElq3fQqvr6wBwe7OGzF+2FoAFy9dxe7OGALS6vg5L12/x3DGMiooiJqYQkZGRxMbGsG/fr26HlCXNxuQ2N1o2zYGzqvp+aoGq/qyqbwdWUtXTwBp+u5JoX2Bg6ge+8//LQD9n+dNAT1U97iw/pqpjgw1KVbcCp4DizvV/egBDRCQeaAG8nu09DYNBgwbQv/9L+HxeakBnLSIighXLv2HfnnXMnbuQZctXux1SyFxySSkSEg4AkJBwgFKlSrocUXBSUnzc+cRAmnZ7iutrVeXSMqUoUjiWqEj/bUtKl7iIXw8dBeDXQ0cpXaI4AFGRkcTFxnD0xEnXYs+uvXt/5c03h7N16xJ27lzB8ePHmTNnkdthZSncl6sJJTeSTTVgVVaVRKQ4/uvtLAx43so01VYA1USkCFBEVbdnssoJAd1of0gcIlIX2Kqq+wFUdR3wNTAXeFRVz2YVc7jdcktLDuw/yKrV690OJeR8Ph/x9VtzeaV46sfXoVq1q90O6U8vMjKCjwc/zewR/2HDtp38tDvhD3WcS5ikK5NF+c5FFxXjtttaUbVqYypVqk9sbCydO3dwO6ws+dCgJ7e5fiFOERnmnF9Z7hQ1EZF1QAIwQ1VT/8KFP7YGU8vSW5ZWYDdav4Dyx0VkC7AUeCHNc4YBe1R1fibxn79HhM8X3m9yjRrF065da7b+uIQJ49+lWbPGjB3zVli3mdeOHTvOtwu/4+bWTd0OJWT27z9AmTKlAChTphQHDhx0OaLsKVo4lvhqV7Huxx2cOHmK5BT/Twl/PXSUSy4uBqS2co4A/m63xFOnKRZX2LWYs6t58xvYufMXDh48THJyMp999hUNG9ZzO6wsWTda5n4Azp/9VdVe+LupSjlFi5xbkNYAeohI7YDnxadZV11go9N1dlJEKucgnjdV9WrgbmCciBQKWJZlC1RVh6tqvKrGR0SE98317LOvUKlyPFWuakiXe3syf/5iuj7waFi3mRdKlryYYsWKAlCoUCFaNG/Cli2ZNVK9ZdbMOXTp4j9N2KVLR2bOmO1yRFk7fOwEx53zaGeSzrJk3WYqVyhD/epXMft7fxfn5/OX0LR+TQCa1q/J5/OXADD7+9VcV+PqTFs9+c0vv+zhuuvqEuOcp2rWrDGbN29zOaqseakbzY2hz/OAgSLSQ1Xfc8pi01ZS1R9F5GX8l7fujH9wwMciMk9Vd4pIRfznaVJP9r8MDBORu1X1uIgUBTo5NwzKkqpOc+7b0BX4by72z2RT2bKlGT1qCJGREURERDB16hfMnDXH7bByZMyYt2hyY0NKlCjOj1u/56WX3mTQoPf48MNh3N/1Lnb/spd7783/o9EOHjnGs2+PI8Xnw+dTbm5cj5via3BFhbI8OXgU70z8gqqVKvCXlo0A6NCiEU8PHcOtPZ+nWFwsrz3xoMt7kD3Ll69h+vRZLFkyi+TkFNau/YFRoya6HVaWUvJFmyU44saIEREpC7wJNAAOACeB94Ffgb6q2s6pF4P/3tY3qOoOEfkLMACIBs4Bz6vqNKeu4B8s8KCz7BwwSFXHi8gCoCxw2gnhoKq2FJEXgERVfcNZRz1gInCNqvqchDbDGd2WpegC5b1z5IN0we2Qo2BUtNshhNzh5RleoNfTitXr5nYIYXHmzK5cN/0eq9gp6Lfo0J2TXG1qupJsLlSWbLzDko13WLLJ2KMV7w76LfrWzsmuJhu7goAxxnhUfjgXEyxLNsYY41H5YUhzsCzZGGOMR3kn1ViyMcYYz0r2ULqxZGOMMR6llmyMMcaEmw0QMMYYE3bWsjHGGBN21rIxxhgTdike+lG+JRtjjPEo+52NMcaYsLNzNsYYY8LOztkYY4wJO+tGM8YYE3bWjWaMMSbsbDSaMcaYsLNutD+pAhfgDbmSks+5HUJYnL0A96tE/YfcDiEsjs5/1e0Q8i0vDRCIcDsAY4wxOaPZ+JcVEblIRKaKyGYR2SQi14vIxSIyW0S2Ov8Xz2mslmyMMcajfGjQUxCGAl+palWgFrAJ+BcwV1WrAHOd+RyxZGOMMR6lqkFPmRGRosCNwChnvWdV9ShwBzDWqTYWaJ/TWC3ZGGOMR6WgQU8i0l1EVgRM3QNWVRk4AHwgIqtFZKSIFAZKq+o+AOf/S3Iaqw0QMMYYj8rOaDRVHQ4Mz2BxFFAX6K2qS0VkKLnoMkuPtWyMMcajQtWNBuwGdqvqUmd+Kv7k86uIlAVw/t+f01gt2RhjjEeFaoCAqiYAv4jI1U5RC2Aj8DnQ1SnrCnyW01itG80YYzwqxJer6Q1MEJECwE9AN/wNkiki8iCwC7gzpyu3ZGOMMR4VysvVqOoaID6dRS1CsX5LNsYY41F2uRpjjDFhZ8nGGGNM2AUxyizfsGRjjDEeZS0bY4wxYWc3TzPGGBN2KeqdmwxYsjHGGI+yczbGGGPCzs7ZmJB67/3XaNumOQcOHKJ+/ZsBKF68GOPGvcNll1dg18+7ue++Xhw9etzlSHOmQoVyjBk9lNJlSuHz+Rg5cgJvvzPK7bBCIiIigqVLvmTPngTad+ia9RPyoXfff/X839919dsA0KHDLTz9zGNcXfVKbrqxPatXrXc5yuAkHDrGMyOmc+hYIiJCx6b16NK6IZt/3sdLY2dw9lwykZERPH3/rdSoXIETp87w9H+nkXD4GMkpPrq2bUT7JnXc3o3zvHTOxvPXRhORFBFZIyIbRORjEYkNWNZBRFREqjrzNZy6a0TksIjscB7PEZGKIrIh4Lk3iMgy5651m9NcjjtPjf9wKu3b//6Dqk+fHixY8B21ajZjwYLv6NOnp0vR5V5ycjL9nhxAjZpNaXzDbfTo8QDXXFPF7bBC4tHeD7Fp81a3w8iVCR9+Qvv2D/yubOPGLdzTuQeL/7fMnaByKDIygr6dWvPpy48w/rmHmDR3Gdv37OfNKbN5uH1Tpvy7Bz07NGPI5NkATJ67jMrlS/Hxv3sw6l8PMGjS15xLTnZ5L37jUw16cpvnkw1wWlVrq2p14CzwcMCyzsD/gE4AqrreqVsb/wXm+jnzLQNXKCJlgInAw85d624A/iEit+bB/vzB4sXLOHz42O/Kbm3XigkTpgIwYcJU2t3Wyo3QQiIhYT+r1/jzfGLiSTZv3kr5cmVcjir3ypcvS9u2LRg9+iO3Q8mVxYuXceTw0d+Vbdmyna1bf3IpopwrdVERrqlYDoDCMQWpXK4U+4+cQERIPJ0EQOLpJEoVLwKAiHDqTBKqyqmksxQrHENkRP752AzlbaHD7ULrRlsE1AQQkTigMdAMf2J5IRvr6QWMUdVVAKp6UESedNYxM4Tx5tgll5QiIeEAAAkJByhVqqTLEYXG5ZdXoHat6ixdttrtUHJt0KAB9O//EnFF4twOxaRjz4EjbP55HzWuKM+T97ShxxsfMnjyN/h8yrhnHwSgU4vreHToR7T85yBOnknitR53EpGPko2XRqPln1ctl0QkCmgLpHYet8d/P+0fgcMiUjcbq6sGrExTtsIpT7vd83e/S04+kYPITarChWOZMnkET/R9nhMnEt0OJ1duuaUlB/YfZNVqb5zL+LM5dSaJPu9Mod89bYiLKcSUecvp17kN3wx+gn733MwLo/1X0v9uwzaqXlaGOUP6MOXFh3l5/CwST59xOfrfWDda3ooRkTX4k8EunHto4+9Cm+Q8nuTMB0sg3XbnH8pUdbiqxqtqfFRUkWxsInf27z9AmTKlAChTphQHDhzMs22HQ1RUFB9PHsFHH03n00+/dDucXGvUKJ527Vqz9cclTBj/Ls2aNWbsmLfcDssA55JTeOKdKdxyfQ1axl8LwBeL19Ii/hoAWtevxoaf9gDw2aI1tKh3DSLCZaVLUL7URezYl3/ea17qRrsQkk3qOZvaqtpbVc+KSAmgOTBSRHYC/YC7RUSCXOcP/PFS2/Xw30woX5g1cw5dunQEoEuXjsycMdvliHJnxPBBbNq8jSFDM7prrbc8++wrVKocT5WrGtLl3p7Mn7+Yrg886nZYf3qqygujP6Ny2ZLc36bR+fJSFxVhxeadACzbtIPLSpcAoEyJYizd6D83dehYIjv3HaJCqeJ5HndGvNSyudDO2aTqCIxT1X+kFojIt/hP9C8K4vnDgKUiMk1V1zjJ61XgxbBEm4UxY96iyY0NKVGiOD9u/Z6XXnqTQYPe48MPh3F/17vY/cte7r3Xu6PRGjeqz333dmTd+o2sWP4NAM899wpffjXP5cgMwAdjhp7/+9uy9Tv+89IQjhw5yhuDXqBkyYv55JPRrFu3kfZ35P+h3au37mLGd+uoUuES7nruPQB6d2zB/3W7jdcmfEWKz0eB6Cj+r9ttAHS//UaeG/kpf332XVSVf97VkuJFCru5C7+TH1oswRIv/QI1PSKSqKpxacoWAK+o6lcBZY8C16hqD2d+DDBDVac68xWd+erO/I3AIKAI/m61Iar6XmaxFI6t6O0XMx1JyefcDiEsgm3ieknBqAJuhxAWB+cOdDuEsCh0fedc/xleXqJm0J85Px9a5+qfvedbNmkTjVPWNJ2yt9LMP5BmfidQPWB+IVA/RGEaY0zIeamx4PlkY4wxf1Z2uRpjjDFhZy0bY4wxYZcfRpkFy5KNMcZ4lJdGo1myMcYYj/LS5Wos2RhjjEfZORtjjDFhZ+dsjDHGhJ21bIwxxoSd/c7GGGNM2FnLxhhjTNjZaDRjjDFhZwMEjDHGhJ11oxljjAk7u4KAMcaYsLOWjTHGmLDz0jkbz9+p889KRLqr6nC34wi1C3G/LsR9ggtzvy7EfcovItwOwORYd7cDCJMLcb8uxH2CC3O/LsR9yhcs2RhjjAk7SzbGGGPCzpKNd12o/coX4n5diPsEF+Z+XYj7lC/YAAFjjDFhZy0bY4wxYWfJxhhjTNhZsskmEUkRkTUB07+c8gUiEp+mblMROZamfktnWRkRmSQi20Vko4jMEpFaAfUOi8gO5/EcEakoIqed+Y0iMk5EogO2dYOILBORzc7UPWDZCyJySkQuCShLzODxVU4s20Rkk4hMEZHSIhIrIhNEZL2IbBCR/4lIXAavzVoRWSUijQKWVROReSLyo4hsFZHnREQClrcXkXVO7OtFpH3AsjEi0tF5fLGIrBaRbiISISJvOfGsF5HlIlIpB8c0vWNxlYhsSKeuiMizzj78KCLzRaSas2yps/+7RORAwLGsKCI7RaRkwHqaisgM5/EDIvJOkMeqtIhMFJGfRGSliHwvIh0y2bd06wf8ba52XvM30jwvw+PhLO/rLNvgHO/7nfIFIrIlYN+nBuzXnoC/385OeXcRmRyw3qLOccj2cUxn31P/HjeIyMciEhuwrIOIqIhUdeZrSObvvQ0Bz83wvWYyoao2ZWMCEjMoXwDEpylrCsxIp64A3wMPB5TVBpoEzI8BOgbMVwQ2OI8jgXlAF2e+DLALqOvMlwRWArc68y84y19Nbz9SHwOFgK3AbQHLmgHVgf7A4IDyq4GCGb02wM3At87jGGA70NqZjwW+BHo587WAbUAlZ76SM18z8LUAigHLgR5OeWdgKhDhzFcAimfzeGZ4LFJf7zT1HwFmAbHOfGtn3woF1HkAeCfN83YCJdP72wisn9mxyiDWy4He2di3y4HeabYfA2wGGgd5PB4GvgaKOvPFgK4ZvQ8C9quv87gKcByIdmJcDLR0lg0Bngn1exWYADwRMD8FWAS8kM7zxpDxey/T95pNGU/WsnFHM+Ccqr6fWqCqa1R1UTBPVtUUYBlQ3inqBYxR1VXO8oPAk8C/Ap42GrhbRC7OZNX3AN+r6hcB25qvqhuAssCegPItqpqUybqKAkcC1rtYVb9xnnsK/4d2anx9gYGqusNZvgN4GegXsL44/Alqoqq+55SVBfap+m/qoaq7VfUI2ZPusQB+yaD+U/g/3E85db8BvgO6ZHO7mcnoWDUHzqaJ9WdVfTuD9QRVX1VPA2v47e8pq+PxNNBTVY87y4+p6thgd05VtwKn8H8xUKAHMET8PQMtgNeDXVc2LAKuBBB/i7wx8CDQKZvrCea9ZtJhySb7YuT33WJ3Z1G/SZr6V+BvKazMaQAiUghoAHzlFFVLZ30rnPJUifg/xB7LZNWZxTUaeMrphnlJRKqkUyf1tdkMjAT+nVF8qrodiBORokHGPxj4n6q+GVA2BbjN2eYgEamTyb5lJOhj4cRa2Ik9s1gzMj/17wD/65ORjI5VNWBVMLFmp76IFMff2lgY8Lx0j4eIFAGKpPMaBJoQ8Pf+h8QhInWBraq6H0BV1+FvKc0FHlXVs1nFnB0iEgW0BdY7Re2Br1T1R+CwE0+wgvlbNemwZJN9p1W1dsA0OYv6i9LUz+xNmpUrnA+qQ8Au500K/q6I9Mawpy17C+jqfGhmi/NtvzL+b50XA8tF5Jo01VJfm6pAG2CciEgm8aXGmN7ytGXzgDsCz2Wo6m783Xn9AR8wV0RaZHffQiCz/QvULPXvAHgoi7pZHisRGeacL1keVJB/rN9ERNYBCfi71BJSq5Lx8QhmX7sE/L0Htk4fF5EtwFL83WqBhgF7VHV+MPsSpBjn/bICf9fXKKe8MzDJeTzJmQ9WsO81k4YlG3f8ANTLwfO2Ox9UVwINReT2gPXFp6lbD9gYWKCqR4GJQM+cxKWqiao6TVV7AuOBWzKp+z3+/uxS6cUnIpXx96mfyCD+umninwS8B8xyvl2nbidJVb90PtQG4v/Wmh1BHwun2+ikE3tmseZaBsfqB2dbqXV64e92KpXBarKqv0hVawI1gB4iUjvgeekej0xeg2C8qapXA3fj/yJSKGCZz5lCKfCLYW9VPSsiJfB3L44UkZ34uwbvdr4UBSOo95r5I0s27pgHFBSRv6cWiEh9EbkpmCer6j78fcT9naJhwAOpHxbOG+pV4LV0nj4Y+Afp315iItBIRG4NiKuNM1KnsdPdgogUAK4Ffs4oRmeUTyT+VtgE4Ab5bSReDP5v7qnxvQH0F5GKzvKK+M8LDEqz30Pwd7VMF5ECIlJXRMo5z4kAamYWUwbSPRb4T6Sn53XgLWcfcPbpBvyvXailPVbzgEIi0iOgTuwfnvWboOo73Ukv4z8fBVkfj5eBYamtLmcEWdAjslR1Gv7WRtdgnxNCHYFxqnq5qlZU1UuBHfiPYTCy814zASzZZF/aczavBCybKSK7neljpyztOZuOzknRDkAr8Q/z/AF/t8LebMTxKRArIk2c5HMvMMI5X/IdMDrwRH8q54TmdKBgOstOA+2A3uIf2rsR/0ip/cAVwLcish5Yjf/D4pOMXhtgMv4RSinOeu8AnnW6UdbjH1X2jrPdNfg/6L5w4v8CeNIpTxvjU/hP3n+If2TQF86w1HVAcuo6g5XFsbg64HjuFpE7gbed2Nc7+/IccIezjyGV9lg5sbYHbhL/0NxlwFh+SxLp7Vuw9d8HbhSRSkEcj/eA+fi7UjcA3+I/4Z8q8JzNnAx270XgCedLQl7qjP81DfQJ/kEsWcrOe838nl2uxhhjTNhZy8YYY0zYWbIxxhgTdpZsjDHGhJ0lG2OMMWFnycYYY0zYWbIxxhgTdpZsjDHGhN3/A+iqdZYy9hT3AAAAAElFTkSuQmCC\n", 458 | "text/plain": [ 459 | "
" 460 | ] 461 | }, 462 | "metadata": { 463 | "needs_background": "light" 464 | }, 465 | "output_type": "display_data" 466 | } 467 | ], 468 | "source": [ 469 | "from sklearn.metrics import confusion_matrix\n", 470 | "import seaborn as sn\n", 471 | "import pandas as pd\n", 472 | "import matplotlib as plt\n", 473 | "\n", 474 | "y_pred = clf.predict(test_x)\n", 475 | "\n", 476 | "labels = [Category.ELECTRONICS, Category.BOOKS, Category.CLOTHING, Category.GROCERY, Category.PATIO]\n", 477 | "\n", 478 | "cm = confusion_matrix(test_y, y_pred, labels=labels)\n", 479 | "df_cm = pd.DataFrame(cm, index=labels, columns=labels)\n", 480 | "\n", 481 | "sn.heatmap(df_cm, annot=True, fmt='d')\n" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [] 490 | } 491 | ], 492 | "metadata": { 493 | "kernelspec": { 494 | "display_name": "Python 3", 495 | "language": "python", 496 | "name": "python3" 497 | }, 498 | "language_info": { 499 | "codemirror_mode": { 500 | "name": "ipython", 501 | "version": 3 502 | }, 503 | "file_extension": ".py", 504 | "mimetype": "text/x-python", 505 | "name": "python", 506 | "nbconvert_exporter": "python", 507 | "pygments_lexer": "ipython3", 508 | "version": "3.7.3" 509 | } 510 | }, 511 | "nbformat": 4, 512 | "nbformat_minor": 2 513 | } 514 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sklearn 2 | Data & Code associated with my tutorial on the sci-kit learn machine learning library in python 3 | 4 | Video link: https://youtu.be/M9Itm95JzL0 5 | 6 | Tutorial.ipynb is the file that I worked on during the video. 7 | 8 | Data directory contains several files of 1000+ amazon reviews across different departments. If you want the raw data that I created these files from, check out here: http://jmcauley.ucsd.edu/data/amazon/ 9 | -------------------------------------------------------------------------------- /Tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Data Class" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import random\n", 17 | "\n", 18 | "class Sentiment:\n", 19 | " NEGATIVE = \"NEGATIVE\"\n", 20 | " NEUTRAL = \"NEUTRAL\"\n", 21 | " POSITIVE = \"POSITIVE\"\n", 22 | "\n", 23 | "class Review:\n", 24 | " def __init__(self, text, score):\n", 25 | " self.text = text\n", 26 | " self.score = score\n", 27 | " self.sentiment = self.get_sentiment()\n", 28 | " \n", 29 | " def get_sentiment(self):\n", 30 | " if self.score <= 2:\n", 31 | " return Sentiment.NEGATIVE\n", 32 | " elif self.score == 3:\n", 33 | " return Sentiment.NEUTRAL\n", 34 | " else: #Score of 4 or 5\n", 35 | " return Sentiment.POSITIVE\n", 36 | "\n", 37 | "class ReviewContainer:\n", 38 | " def __init__(self, reviews):\n", 39 | " self.reviews = reviews\n", 40 | " \n", 41 | " def get_text(self):\n", 42 | " return [x.text for x in self.reviews]\n", 43 | " \n", 44 | " def get_sentiment(self):\n", 45 | " return [x.sentiment for x in self.reviews]\n", 46 | " \n", 47 | " def evenly_distribute(self):\n", 48 | " negative = list(filter(lambda x: x.sentiment == Sentiment.NEGATIVE, self.reviews))\n", 49 | " positive = list(filter(lambda x: x.sentiment == Sentiment.POSITIVE, self.reviews))\n", 50 | " positive_shrunk = positive[:len(negative)]\n", 51 | " self.reviews = negative + positive_shrunk\n", 52 | " random.shuffle(self.reviews)\n", 53 | " \n", 54 | " " 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### Load Data" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 2, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "'I hoped for Mia to have some peace in this book, but her story is so real and raw. Broken World was so touching and emotional because you go from Mia\\'s trauma to her trying to cope. I love the way the story displays how there is no \"just bouncing back\" from being sexually assaulted. Mia showed us how those demons come for you every day and how sometimes they best you. I was so in the moment with Broken World and hurt with Mia because she was surrounded by people but so alone and I understood her feelings. I found myself wishing I could give her some of my courage and strength or even just to be there for her. Thank you Lizzy for putting a great character\\'s voice on a strong subject and making it so that other peoples story may be heard through Mia\\'s.'" 73 | ] 74 | }, 75 | "execution_count": 2, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "import json\n", 82 | "\n", 83 | "file_name = './data/sentiment/books_small_10000.json'\n", 84 | "\n", 85 | "reviews = []\n", 86 | "with open(file_name) as f:\n", 87 | " for line in f:\n", 88 | " review = json.loads(line)\n", 89 | " reviews.append(Review(review['reviewText'], review['overall']))\n", 90 | " \n", 91 | "reviews[5].text\n", 92 | " " 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Prep Data" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 39, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "from sklearn.model_selection import train_test_split\n", 109 | "\n", 110 | "training, test = train_test_split(reviews, test_size=0.33, random_state=42)\n", 111 | "\n", 112 | "train_container = ReviewContainer(training)\n", 113 | "\n", 114 | "test_container = ReviewContainer(test)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 40, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "436\n", 127 | "436\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "train_container.evenly_distribute()\n", 133 | "train_x = train_container.get_text()\n", 134 | "train_y = train_container.get_sentiment()\n", 135 | "\n", 136 | "test_container.evenly_distribute()\n", 137 | "test_x = test_container.get_text()\n", 138 | "test_y = test_container.get_sentiment()\n", 139 | "\n", 140 | "print(train_y.count(Sentiment.POSITIVE))\n", 141 | "print(train_y.count(Sentiment.NEGATIVE))" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "#### Bag of words vectorization" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 49, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "name": "stdout", 158 | "output_type": "stream", 159 | "text": [ 160 | "I read this book over a year ago & enjoyed the various stories, the author takes you on a journey of life as it pretty much is in today's world & society, as you end one story you look forward to starting the next, relaxed reading I highly recommend it for peps who enjoy stories from back in their grand-ma & grand-dad days in the South. I will peruse more books by this author for future purchase.\n", 161 | "[[0. 0. 0. ... 0. 0. 0.]]\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n", 167 | "\n", 168 | "# This book is great !\n", 169 | "# This book was so bad\n", 170 | "\n", 171 | "vectorizer = TfidfVectorizer()\n", 172 | "train_x_vectors = vectorizer.fit_transform(train_x)\n", 173 | "\n", 174 | "test_x_vectors = vectorizer.transform(test_x)\n", 175 | "\n", 176 | "print(train_x[0])\n", 177 | "print(train_x_vectors[0].toarray())\n", 178 | "\n", 179 | "\n", 180 | "\n" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "## Classification" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "#### Linear SVM\n", 195 | "\n", 196 | "\n", 197 | "\n", 198 | "\n", 199 | "\n", 200 | "\n", 201 | "\n", 202 | "\n", 203 | "\n", 204 | "\n", 205 | "\n", 206 | "\n", 207 | "\n", 208 | "\n", 209 | "\n", 210 | "\n", 211 | "\n" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 50, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "array(['POSITIVE'], dtype='