├── .gitignore ├── 01 - Why scikit-learn.ipynb ├── 02 - First Steps.ipynb ├── 03 - Unsupervised Transformers.ipynb ├── 04 - API Summary.ipynb ├── 05 - Cross-validation.ipynb ├── 06 - Grid Searches for Hyper Parameters.ipynb ├── 07 - Preprocessing and Pipelines.ipynb ├── 08 - Working With Text Data.ipynb ├── 08.5 Feature Union.ipynb ├── 09 - Out Of Core Learning.ipynb ├── 10 - Other tools.ipynb ├── 11 - Pystruct.ipynb ├── README.md ├── data ├── test_with_solutions.csv ├── train.csv ├── train_0.csv ├── train_1.csv ├── train_2.csv ├── train_3.csv ├── train_4.csv ├── train_5.csv ├── train_6.csv ├── train_7.csv ├── train_8.csv └── train_9.csv ├── figures ├── bag_of_words.svg ├── grid_search_cross_validation.svg ├── hashing_vectorizer.svg ├── pipeline_cross_validation.svg └── randomized_search.png ├── intro_to_structured_prediction.pdf └── solutions ├── cross_validation_iris.py ├── digits_tsne.py ├── grid_search_forest.py ├── letters_graph_crf.py ├── pipeline_iris.py ├── text_pipeline.py └── train_iris.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | -------------------------------------------------------------------------------- /01 - Why scikit-learn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# What is Scikit-learn?\n", 8 | "* A machine learning library in python\n", 9 | "\n", 10 | "## What does it do?\n", 11 | "* Easy to use, efficient implementations of standard algorithms:\n", 12 | " * SVMs, linear models, tree models, K-Means, T-SNE, NMF, ICA, ...\n", 13 | "* tools for preprocessing, model selection, model evaluation, building workflows\n", 14 | " * cross validation, grid search, randomized search, pipelines, scoring functions, roc curve, ...\n", 15 | "* not necessary easy to adapt the algorithms\n", 16 | "\n", 17 | "## What does it not do?\n", 18 | "* No reinforcement learning.\n", 19 | "* limited deep learning (no GPU).\n", 20 | "* No structured prediction / sequence algorithms.\n", 21 | "* limited support for missing values.\n", 22 | "\n", 23 | "## Why should you care?\n", 24 | "* Makes it trivial to apply baselines to your problem.\n", 25 | "* Provides \"standard\" bits of your ML worflow.\n", 26 | "* Tooling in which you can plug in your custom model with ease." 27 | ] 28 | } 29 | ], 30 | "metadata": { 31 | "kernelspec": { 32 | "display_name": "Python 2", 33 | "language": "python", 34 | "name": "python2" 35 | }, 36 | "language_info": { 37 | "codemirror_mode": { 38 | "name": "ipython", 39 | "version": 2 40 | }, 41 | "file_extension": ".py", 42 | "mimetype": "text/x-python", 43 | "name": "python", 44 | "nbconvert_exporter": "python", 45 | "pygments_lexer": "ipython2", 46 | "version": "2.7.6" 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 0 51 | } 52 | -------------------------------------------------------------------------------- /02 - First Steps.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "Get some data to play with" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": false 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "from sklearn.datasets import load_digits\n", 32 | "digits = load_digits()\n", 33 | "digits.keys()" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "digits.images.shape" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "print(digits.images[0])" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "\n", 67 | "\n", 68 | "plt.matshow(digits.images[0], cmap=plt.cm.Greys)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "digits.data.shape" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "digits.target.shape" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "digits.target" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "**Data is always a numpy array (or sparse matrix) of shape (n_samples, n_features)**" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Split the data to get going" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "from sklearn.cross_validation import train_test_split\n", 127 | "X_train, X_test, y_train, y_test = train_test_split(digits.data,\n", 128 | " digits.target)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "Really Simple API\n", 136 | "-------------------\n", 137 | "0) Import your model class" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": false 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "from sklearn.svm import LinearSVC" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "1) Instantiate an object and set the parameters" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "svm = LinearSVC(C=0.1)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "2) Fit the model" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "svm.fit(X_train, y_train)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "3) Apply / evaluate" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": false 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "print(svm.predict(X_train))\n", 203 | "print(y_train)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "collapsed": false 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "svm.score(X_train, y_train)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "collapsed": false 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "svm.score(X_test, y_test)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "And again\n", 233 | "---------" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "from sklearn.ensemble import RandomForestClassifier" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "collapsed": false 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "rf = RandomForestClassifier(n_estimators=50)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "rf.fit(X_train, y_train)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "collapsed": false 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "rf.score(X_test, y_test)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": false 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "#%load from github" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": { 295 | "collapsed": false 296 | }, 297 | "outputs": [], 298 | "source": [ 299 | "import numpy as np\n", 300 | "import pylab as pl\n", 301 | "from matplotlib.colors import ListedColormap\n", 302 | "from sklearn.cross_validation import train_test_split\n", 303 | "from sklearn.preprocessing import StandardScaler\n", 304 | "from sklearn.datasets import make_moons, make_circles, make_classification\n", 305 | "from sklearn.neighbors import KNeighborsClassifier\n", 306 | "from sklearn.svm import SVC\n", 307 | "from sklearn.tree import DecisionTreeClassifier\n", 308 | "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", 309 | "from sklearn.naive_bayes import GaussianNB\n", 310 | "from sklearn.lda import LDA\n", 311 | "from sklearn.qda import QDA\n", 312 | "\n", 313 | "h = .02 # step size in the mesh\n", 314 | "\n", 315 | "names = [\"Nearest Neighbors\", \"Linear SVM\", \"RBF SVM\", \"Decision Tree\",\n", 316 | " \"Random Forest\", \"AdaBoost\", \"Naive Bayes\", \"LDA\", \"QDA\"]\n", 317 | "classifiers = [\n", 318 | " KNeighborsClassifier(3),\n", 319 | " SVC(kernel=\"linear\", C=0.025),\n", 320 | " SVC(gamma=2, C=1),\n", 321 | " DecisionTreeClassifier(max_depth=5),\n", 322 | " RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),\n", 323 | " AdaBoostClassifier(),\n", 324 | " GaussianNB(),\n", 325 | " LDA(),\n", 326 | " QDA()]\n", 327 | "\n", 328 | "X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,\n", 329 | " random_state=1, n_clusters_per_class=1)\n", 330 | "rng = np.random.RandomState(2)\n", 331 | "X += 2 * rng.uniform(size=X.shape)\n", 332 | "linearly_separable = (X, y)\n", 333 | "\n", 334 | "datasets = [make_moons(noise=0.3, random_state=0),\n", 335 | " make_circles(noise=0.2, factor=0.5, random_state=1),\n", 336 | " linearly_separable\n", 337 | " ]\n", 338 | "\n", 339 | "figure = pl.figure(figsize=(27, 9))\n", 340 | "i = 1\n", 341 | "# iterate over datasets\n", 342 | "for ds in datasets:\n", 343 | " # preprocess dataset, split into training and test part\n", 344 | " X, y = ds\n", 345 | " X = StandardScaler().fit_transform(X)\n", 346 | " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)\n", 347 | "\n", 348 | " x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n", 349 | " y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n", 350 | " xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", 351 | " np.arange(y_min, y_max, h))\n", 352 | "\n", 353 | " # just plot the dataset first\n", 354 | " cm = pl.cm.RdBu\n", 355 | " cm_bright = ListedColormap(['#FF0000', '#0000FF'])\n", 356 | " ax = pl.subplot(len(datasets), len(classifiers) + 1, i)\n", 357 | " # Plot the training points\n", 358 | " ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n", 359 | " # and testing points\n", 360 | " ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)\n", 361 | " ax.set_xlim(xx.min(), xx.max())\n", 362 | " ax.set_ylim(yy.min(), yy.max())\n", 363 | " ax.set_xticks(())\n", 364 | " ax.set_yticks(())\n", 365 | " i += 1\n", 366 | "\n", 367 | " # iterate over classifiers\n", 368 | " for name, clf in zip(names, classifiers):\n", 369 | " ax = pl.subplot(len(datasets), len(classifiers) + 1, i)\n", 370 | " clf.fit(X_train, y_train)\n", 371 | " score = clf.score(X_test, y_test)\n", 372 | "\n", 373 | " # Plot the decision boundary. For that, we will assign a color to each\n", 374 | " # point in the mesh [x_min, m_max]x[y_min, y_max].\n", 375 | " if hasattr(clf, \"decision_function\"):\n", 376 | " Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n", 377 | " else:\n", 378 | " Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n", 379 | "\n", 380 | " # Put the result into a color plot\n", 381 | " Z = Z.reshape(xx.shape)\n", 382 | " ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n", 383 | "\n", 384 | " # Plot also the training points\n", 385 | " ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n", 386 | " # and testing points\n", 387 | " ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n", 388 | " alpha=0.6)\n", 389 | "\n", 390 | " ax.set_xlim(xx.min(), xx.max())\n", 391 | " ax.set_ylim(yy.min(), yy.max())\n", 392 | " ax.set_xticks(())\n", 393 | " ax.set_yticks(())\n", 394 | " ax.set_title(name)\n", 395 | " ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),\n", 396 | " size=15, horizontalalignment='right')\n", 397 | " i += 1\n", 398 | "\n", 399 | "figure.subplots_adjust(left=.02, right=.98)\n" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "# Exercises\n", 407 | "Load the iris dataset from the ``sklearn.datasets`` module using the ``load_iris`` function.\n", 408 | "What is the number of classes, features and data points in this dataset?\n", 409 | "\n", 410 | "Split it into training and test set using ``train_test_split``.\n", 411 | "Then train an evaluate a classifier of your choice.\n" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": { 418 | "collapsed": false 419 | }, 420 | "outputs": [], 421 | "source": [ 422 | "from sklearn.datasets import load_iris\n", 423 | "iris = load_iris()\n", 424 | "print(iris.DESCR)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "collapsed": false 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "# %load solutions/train_iris.py" 436 | ] 437 | } 438 | ], 439 | "metadata": { 440 | "kernelspec": { 441 | "display_name": "Python 2", 442 | "language": "python", 443 | "name": "python2" 444 | }, 445 | "language_info": { 446 | "codemirror_mode": { 447 | "name": "ipython", 448 | "version": 2 449 | }, 450 | "file_extension": ".py", 451 | "mimetype": "text/x-python", 452 | "name": "python", 453 | "nbconvert_exporter": "python", 454 | "pygments_lexer": "ipython2", 455 | "version": "2.7.6" 456 | } 457 | }, 458 | "nbformat": 4, 459 | "nbformat_minor": 0 460 | } 461 | -------------------------------------------------------------------------------- /03 - Unsupervised Transformers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "from sklearn.datasets import load_digits\n", 25 | "from sklearn.cross_validation import train_test_split\n", 26 | "import numpy as np\n", 27 | "np.set_printoptions(suppress=True)\n", 28 | "\n", 29 | "digits = load_digits()\n", 30 | "X, y = digits.data, digits.target\n", 31 | "X_train, X_test, y_train, y_test = train_test_split(X, y)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "Removing mean and scaling variance\n", 39 | "===================================" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "from sklearn.preprocessing import StandardScaler" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "1) Instantiate the model" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "scaler = StandardScaler()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "2) Fit using only the data." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "scaler.fit(X_train)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "3) `transform` the data (not `predict`)." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "X_train_scaled = scaler.transform(X_train)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "X_train.shape" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "X_train_scaled.shape" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "The transformed version of the data has the mean removed:" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": false 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "X_train_scaled.mean(axis=0)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "X_train_scaled.std(axis=0)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "X_test_transformed = scaler.transform(X_test)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "Principal Component Analysis\n", 174 | "=============================" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "0) Import the model" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "collapsed": false 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "from sklearn.decomposition import PCA" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "1) Instantiate the model" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "pca = PCA(n_components=2)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "2) Fit to training data" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "collapsed": false 225 | }, 226 | "outputs": [], 227 | "source": [ 228 | "pca.fit(X)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "3) Transform to lower-dimensional representation" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": false 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "print(X.shape)\n", 247 | "X_pca = pca.transform(X)\n", 248 | "X_pca.shape" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Visualize\n", 256 | "----------" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": { 263 | "collapsed": false 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": { 273 | "collapsed": false 274 | }, 275 | "source": [ 276 | "Manifold Learning\n", 277 | "==================" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": true 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "from sklearn.manifold import Isomap\n", 289 | "isomap = Isomap()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": false 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "X_isomap = isomap.fit_transform(X)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "plt.scatter(X_isomap[:, 0], X_isomap[:, 1], c=y)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": { 317 | "collapsed": true 318 | }, 319 | "source": [ 320 | "# Exercises\n", 321 | "Visualize the digits dataset using the TSNE algorithm from the sklearn.manifold module (it runs for a couple of seconds).\n" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": { 328 | "collapsed": false 329 | }, 330 | "outputs": [], 331 | "source": [ 332 | "# %load solutions/digits_tsne.py" 333 | ] 334 | } 335 | ], 336 | "metadata": { 337 | "kernelspec": { 338 | "display_name": "Python 2", 339 | "language": "python", 340 | "name": "python2" 341 | }, 342 | "language_info": { 343 | "codemirror_mode": { 344 | "name": "ipython", 345 | "version": 2 346 | }, 347 | "file_extension": ".py", 348 | "mimetype": "text/x-python", 349 | "name": "python", 350 | "nbconvert_exporter": "python", 351 | "pygments_lexer": "ipython2", 352 | "version": "2.7.6" 353 | } 354 | }, 355 | "nbformat": 4, 356 | "nbformat_minor": 0 357 | } 358 | -------------------------------------------------------------------------------- /04 - API Summary.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A recap on Scikit-learn's estimator interface\n" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "``X`` : data, 2d numpy array or scipy sparse matrix of shape (n_samples, n_features)\n", 15 | "\n", 16 | "``y`` : targets, 1d numpy array of shape (n_samples,)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Methods" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "\n", 31 | "\n", 32 | "\n", 33 | "\n", 34 | "\n", 35 | "\n", 36 | "\n", 37 | "
``model.fit(X_train, [y_train])``
``model.predict(X_test)````model.transform(X_test)``
ClassificationPreprocessing
RegressionDimensionality Reduction
ClusteringFeature Extraction
 Feature selection
" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Efficient alternatives, methods for models that don't generalize\n", 45 | "``model.fit_predict(X)`` (clustering)\n", 46 | "\n", 47 | "``model.fit_transform(X)`` (manifold learning)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "###Additional methods\n", 55 | "__Model evaluation__ : ``score(X, [y])``\n", 56 | "\n", 57 | "__Uncertainties from Classifiers__: ``decision_function(X)`` and ``predict_proba(X)``." 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "##Attributes\n", 65 | "__Classifiers__: ``classes_``\n", 66 | "\n", 67 | "__Clustering__: ``labels_``\n", 68 | "\n", 69 | "__Manifold Learning__: ``embedding_``\n", 70 | "\n", 71 | "__Linear models__: ``coef_``\n", 72 | "\n", 73 | "__Linear Decompositions__: ``components_``" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 2", 80 | "language": "python", 81 | "name": "python2" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 2 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython2", 93 | "version": "2.7.9" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 0 98 | } 99 | -------------------------------------------------------------------------------- /05 - Cross-validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "Cross-Validation\n", 21 | "----------------------------------------" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "from sklearn.datasets import load_iris" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "collapsed": false 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "iris = load_iris()\n", 44 | "X = iris.data\n", 45 | "y = iris.target" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "from sklearn.cross_validation import cross_val_score\n", 57 | "from sklearn.svm import LinearSVC" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "cross_val_score(LinearSVC(), X, y, cv=5)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "cross_val_score(LinearSVC(), X, y, cv=5, scoring=\"f1_macro\")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Let's go to a binary task for a moment" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "y % 2" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "cross_val_score(LinearSVC(), X, y % 2)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "cross_val_score(LinearSVC(), X, y % 2, scoring=\"average_precision\")" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "cross_val_score(LinearSVC(), X, y % 2, scoring=\"roc_auc\")" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "from sklearn.metrics.scorer import SCORERS\n", 142 | "print(SCORERS.keys())" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "Implementing your own scoring metric:" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "def my_accuracy_scoring(est, X, y):\n", 161 | " return np.mean(est.predict(X) == y)\n", 162 | "\n", 163 | "cross_val_score(LinearSVC(), X, y, scoring=my_accuracy_scoring)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "def my_super_scoring(est, X, y):\n", 175 | " return np.mean(est.predict(X) == y) - np.mean(est.coef_ != 0)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "from sklearn.grid_search import GridSearchCV\n", 187 | "\n", 188 | "y = iris.target\n", 189 | "grid = GridSearchCV(LinearSVC(C=.01, dual=False),\n", 190 | " param_grid={'penalty' : ['l1', 'l2']},\n", 191 | " scoring=my_super_scoring)\n", 192 | "grid.fit(X, y)\n", 193 | "print(grid.best_params_)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "There are other ways to do cross-valiation" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "from sklearn.cross_validation import ShuffleSplit\n", 212 | "\n", 213 | "shuffle_split = ShuffleSplit(len(X), 10, test_size=.4)\n", 214 | "cross_val_score(LinearSVC(), X, y, cv=shuffle_split)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "collapsed": true 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "from sklearn.cross_validation import StratifiedKFold, KFold, ShuffleSplit\n", 226 | "\n", 227 | "def plot_cv(cv, n_samples):\n", 228 | " masks = []\n", 229 | " for train, test in cv:\n", 230 | " mask = np.zeros(n_samples, dtype=bool)\n", 231 | " mask[test] = 1\n", 232 | " masks.append(mask)\n", 233 | " plt.matshow(masks)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "plot_cv(StratifiedKFold(y, n_folds=5), len(y))" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "collapsed": false 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "plot_cv(KFold(len(iris.target), n_folds=5), len(iris.target))" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "plot_cv(ShuffleSplit(len(iris.target), n_iter=20, test_size=.2), \n", 267 | " len(iris.target))" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": { 273 | "collapsed": false 274 | }, 275 | "source": [ 276 | "# Exercises\n", 277 | "Use KFold cross validation and StratifiedKFold cross validation (3 or 5 folds) for LinearSVC on the iris dataset.\n", 278 | "Why are the results so different? How could you get more similar results?" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "collapsed": false 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "# %load solutions/cross_validation_iris.py" 290 | ] 291 | } 292 | ], 293 | "metadata": { 294 | "kernelspec": { 295 | "display_name": "Python 2", 296 | "language": "python", 297 | "name": "python2" 298 | }, 299 | "language_info": { 300 | "codemirror_mode": { 301 | "name": "ipython", 302 | "version": 2 303 | }, 304 | "file_extension": ".py", 305 | "mimetype": "text/x-python", 306 | "name": "python", 307 | "nbconvert_exporter": "python", 308 | "pygments_lexer": "ipython2", 309 | "version": "2.7.6" 310 | } 311 | }, 312 | "nbformat": 4, 313 | "nbformat_minor": 0 314 | } 315 | -------------------------------------------------------------------------------- /06 - Grid Searches for Hyper Parameters.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "Grid Searches\n", 21 | "=================" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Grid-Search with build-in cross validation" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "from sklearn.grid_search import GridSearchCV\n", 47 | "from sklearn.svm import SVC" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "from sklearn.datasets import load_digits\n", 59 | "from sklearn.cross_validation import train_test_split\n", 60 | "digits = load_digits()\n", 61 | "X_train, X_test, y_train, y_test = train_test_split(digits.data,\n", 62 | " digits.target)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Define parameter grid:" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "import numpy as np\n", 81 | "\n", 82 | "param_grid = {'C': 10. ** np.arange(-3, 3),\n", 83 | " 'gamma' : 10. ** np.arange(-5, 0)}\n", 84 | "\n", 85 | "np.set_printoptions(suppress=True)\n", 86 | "print(param_grid)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "grid_search = GridSearchCV(SVC(), param_grid, verbose=3)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "A GridSearchCV object behaves just like a normal classifier." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "collapsed": false, 112 | "scrolled": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "grid_search.fit(X_train, y_train)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "grid_search.predict(X_test)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "grid_search.score(X_test, y_test)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "grid_search.best_params_" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "# We extract just the scores\n", 161 | "\n", 162 | "scores = [x.mean_validation_score for x in grid_search.grid_scores_]\n", 163 | "scores = np.array(scores).reshape(6, 5)\n", 164 | "\n", 165 | "plt.matshow(scores)\n", 166 | "plt.xlabel('gamma')\n", 167 | "plt.ylabel('C')\n", 168 | "plt.colorbar()\n", 169 | "plt.xticks(np.arange(5), param_grid['gamma'])\n", 170 | "plt.yticks(np.arange(6), param_grid['C']);" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "Nested Cross-validation in scikit-learn:" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": false 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "from sklearn.cross_validation import cross_val_score\n", 189 | "cross_val_score(GridSearchCV(SVC(), param_grid),\n", 190 | " digits.data, digits.target)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "source": [ 199 | "# Exercises\n", 200 | "Use GridSearchCV to adjust max_depth and max_features of a RandomForestClassifier (from ``sklearn.ensemble``) on the digits dataset.\n", 201 | "\n", 202 | "Visualize the results as a heat map.\n", 203 | "\n", 204 | "Should you also adjust ``n_estimators``?" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "collapsed": false 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "# %load solutions/grid_search_forest.py" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "# Randomized Search" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": { 236 | "collapsed": true 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "from sklearn.datasets import load_iris\n", 241 | "iris = load_iris()\n", 242 | "X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": false 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "from scipy.stats import expon\n", 254 | "plt.hist([expon.rvs(scale=0.001) for x in range(10000)], bins=100, normed=True);" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "collapsed": false 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "from sklearn.grid_search import RandomizedSearchCV\n", 266 | "\n", 267 | "param_distributions = {'C': expon(), 'gamma': expon()}\n", 268 | "rs = RandomizedSearchCV(SVC(), param_distributions=param_distributions,\n", 269 | " n_iter=50)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "collapsed": false 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "rs.fit(X_train, y_train)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "rs.best_params_" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "collapsed": false 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "rs.best_score_" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "scores, Cs, gammas = zip(*[(score.mean_validation_score, score.parameters['C'], score.parameters['gamma'])\n", 314 | " for score in rs.grid_scores_])" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": { 321 | "collapsed": false 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "plt.scatter(Cs, gammas, s=50, c=scores, linewidths=0)\n", 326 | "plt.xlabel(\"C\")\n", 327 | "plt.ylabel(\"gamma\")\n", 328 | "plt.xscale(\"log\")\n", 329 | "plt.yscale(\"log\")\n", 330 | "plt.colorbar()" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "# Exercise\n", 338 | "Add parameters that are not relevant (like ``coef0``) to param_distribution. Observe that it doesn't change the runtime and be happy." 339 | ] 340 | } 341 | ], 342 | "metadata": { 343 | "kernelspec": { 344 | "display_name": "Python 2", 345 | "language": "python", 346 | "name": "python2" 347 | }, 348 | "language_info": { 349 | "codemirror_mode": { 350 | "name": "ipython", 351 | "version": 2 352 | }, 353 | "file_extension": ".py", 354 | "mimetype": "text/x-python", 355 | "name": "python", 356 | "nbconvert_exporter": "python", 357 | "pygments_lexer": "ipython2", 358 | "version": "2.7.6" 359 | } 360 | }, 361 | "nbformat": 4, 362 | "nbformat_minor": 0 363 | } 364 | -------------------------------------------------------------------------------- /07 - Preprocessing and Pipelines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "Preprocessing and Pipelines\n", 21 | "=============================" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "from sklearn.datasets import load_digits\n", 33 | "from sklearn.cross_validation import train_test_split\n", 34 | "digits = load_digits()\n", 35 | "X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Cross-validated pipelines including scaling, we need to estimate mean and standard deviation separately for each fold.\n", 43 | "To do that, we build a pipeline." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "from sklearn.pipeline import Pipeline, make_pipeline\n", 55 | "from sklearn.svm import SVC\n", 56 | "from sklearn.preprocessing import StandardScaler" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "pipeline = Pipeline([(\"scaler\", StandardScaler()), (\"svm\", SVC())])\n", 68 | "# in new versions:\n", 69 | "# make_pipeline(StandardScaler(), SVC())" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "pipeline.fit(X_train, y_train)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "pipeline.predict(X_test)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Cross-validation with a pipeline\n", 106 | "---------------------------------" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "from sklearn.cross_validation import cross_val_score\n", 118 | "cross_val_score(pipeline, X_train, y_train)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Grid Search with a pipeline\n", 126 | "===========================" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "from sklearn.grid_search import GridSearchCV\n", 138 | "\n", 139 | "param_grid = {'svm__C': 10. ** np.arange(-3, 3),\n", 140 | " 'svm__gamma' : 10. ** np.arange(-3, 3)}\n", 141 | "\n", 142 | "grid_pipeline = GridSearchCV(pipeline, param_grid=param_grid, n_jobs=-1)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": false 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "grid_pipeline.fit(X_train, y_train)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "grid_pipeline.score(X_test, y_test)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "source": [ 173 | "# Exercises\n", 174 | "Add random features to the iris dataset using ``np.random.uniform`` and ``np.hstack``.\n", 175 | "\n", 176 | "Build a pipeline using the SelectKBest univariate feature selection from the sklearn.feature_selection module and the LinearSVC on the iris dataset.\n", 177 | "\n", 178 | "Use GridSearchCV to adjust C and the number of features selected in SelectKBest." 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "collapsed": false, 186 | "scrolled": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "# %load solutions/pipeline_iris.py" 191 | ] 192 | } 193 | ], 194 | "metadata": { 195 | "kernelspec": { 196 | "display_name": "Python 2", 197 | "language": "python", 198 | "name": "python2" 199 | }, 200 | "language_info": { 201 | "codemirror_mode": { 202 | "name": "ipython", 203 | "version": 2 204 | }, 205 | "file_extension": ".py", 206 | "mimetype": "text/x-python", 207 | "name": "python", 208 | "nbconvert_exporter": "python", 209 | "pygments_lexer": "ipython2", 210 | "version": "2.7.6" 211 | } 212 | }, 213 | "nbformat": 4, 214 | "nbformat_minor": 0 215 | } 216 | -------------------------------------------------------------------------------- /08 - Working With Text Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# Working with Text Data" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import pandas as pd\n", 39 | "import os\n", 40 | "\n", 41 | "data = pd.read_csv(os.path.join(\"data\", \"train.csv\"))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "len(data)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "data" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "y_train = np.array(data.Insult)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "y_train" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "text_train = data.Comment.tolist()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "text_train[6]" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "data_test = pd.read_csv(os.path.join(\"data\", \"test_with_solutions.csv\"))" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "text_test, y_test = data_test.Comment.tolist(), np.array(data_test.Insult)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "from sklearn.feature_extraction.text import CountVectorizer" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "cv = CountVectorizer()\n", 152 | "cv.fit(text_train)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "len(cv.vocabulary_)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": false, 171 | "scrolled": true 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "print(cv.get_feature_names()[:50])\n", 176 | "print(cv.get_feature_names()[-50:])" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": false 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "X_train = cv.transform(text_train)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": false 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "X_train" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": false 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "text_train[6]" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "collapsed": false 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "X_train[6, :].nonzero()[1]" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": false 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "X_test = cv.transform(text_test)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "from sklearn.svm import LinearSVC\n", 243 | "svm = LinearSVC()" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "collapsed": false 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "svm.fit(X_train, y_train)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "collapsed": false 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "svm.score(X_train, y_train)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": { 272 | "collapsed": false 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "svm.score(X_test, y_test)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": { 283 | "collapsed": false 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "def visualize_coefficients(classifier, feature_names, n_top_features=25):\n", 288 | " # get coefficients with large absolute values \n", 289 | " coef = classifier.coef_.ravel()\n", 290 | " positive_coefficients = np.argsort(coef)[-n_top_features:]\n", 291 | " negative_coefficients = np.argsort(coef)[:n_top_features]\n", 292 | " interesting_coefficients = np.hstack([negative_coefficients, positive_coefficients])\n", 293 | " # plot them\n", 294 | " plt.figure(figsize=(15, 5))\n", 295 | " colors = [\"red\" if c < 0 else \"blue\" for c in coef[interesting_coefficients]]\n", 296 | " plt.bar(np.arange(50), coef[interesting_coefficients], color=colors)\n", 297 | " feature_names = np.array(feature_names)\n", 298 | " plt.xticks(np.arange(1, 51), feature_names[interesting_coefficients], rotation=60, ha=\"right\");\n" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "visualize_coefficients(svm, cv.get_feature_names())" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "# Exercises\n", 317 | "* Create a pipeine using the count vectorizer and SVM (see 07). Train and score using the pipeline.\n", 318 | "* Vary the n_gram_range in the count vectorizer, visualize the changed coefficients.\n", 319 | "* Grid search the C in the LinearSVC using the pipeline.\n", 320 | "* Grid search the C in the LinearSVC together with the n_gram_range (try (1,1), (1, 2), (2, 2))" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": { 327 | "collapsed": false 328 | }, 329 | "outputs": [], 330 | "source": [ 331 | "# %load solutions/text_pipeline.py\n" 332 | ] 333 | } 334 | ], 335 | "metadata": { 336 | "kernelspec": { 337 | "display_name": "Python 2", 338 | "language": "python", 339 | "name": "python2" 340 | }, 341 | "language_info": { 342 | "codemirror_mode": { 343 | "name": "ipython", 344 | "version": 2 345 | }, 346 | "file_extension": ".py", 347 | "mimetype": "text/x-python", 348 | "name": "python", 349 | "nbconvert_exporter": "python", 350 | "pygments_lexer": "ipython2", 351 | "version": "2.7.6" 352 | } 353 | }, 354 | "nbformat": 4, 355 | "nbformat_minor": 0 356 | } 357 | -------------------------------------------------------------------------------- /08.5 Feature Union.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "Aside: Feature Union\n", 21 | "======================" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import pandas as pd\n", 33 | "import os\n", 34 | "\n", 35 | "data = pd.read_csv(os.path.join(\"data\", \"train.csv\"))\n", 36 | "y_train = np.array(data.Insult)\n", 37 | "text_train = data.Comment.tolist()" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "from sklearn.pipeline import make_union, make_pipeline\n", 49 | "from sklearn.feature_extraction.text import CountVectorizer\n", 50 | "from sklearn.svm import LinearSVC\n", 51 | "from sklearn.grid_search import GridSearchCV\n", 52 | "\n", 53 | "char_and_word = make_union(CountVectorizer(analyzer=\"char\"),\n", 54 | " CountVectorizer(analyzer=\"word\"))\n", 55 | "\n", 56 | "text_pipe = make_pipeline(char_and_word, LinearSVC(dual=False))\n", 57 | "param_grid = {'linearsvc__C': 10. ** np.arange(-3, 3)}\n", 58 | "\n", 59 | "grid = GridSearchCV(text_pipe, param_grid=param_grid, cv=3, verbose=10)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "grid.fit(text_train, y_train)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "param_grid = {'featureunion__countvectorizer-1__ngram_range':\n", 82 | " [(1, 3), (1, 5), (2, 5)],\n", 83 | " 'featureunion__countvectorizer-2__ngram_range':\n", 84 | " [(1, 1), (1, 2), (2, 2)],\n", 85 | " 'linearsvc__C': 10. ** np.arange(-3, 3)}" 86 | ] 87 | } 88 | ], 89 | "metadata": { 90 | "kernelspec": { 91 | "display_name": "Python 2", 92 | "language": "python", 93 | "name": "python2" 94 | }, 95 | "language_info": { 96 | "codemirror_mode": { 97 | "name": "ipython", 98 | "version": 2 99 | }, 100 | "file_extension": ".py", 101 | "mimetype": "text/x-python", 102 | "name": "python", 103 | "nbconvert_exporter": "python", 104 | "pygments_lexer": "ipython2", 105 | "version": "2.7.6" 106 | } 107 | }, 108 | "nbformat": 4, 109 | "nbformat_minor": 0 110 | } 111 | -------------------------------------------------------------------------------- /09 - Out Of Core Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# write out some toy data\n", 12 | "from sklearn.datasets import load_digits\n", 13 | "import cPickle\n", 14 | "\n", 15 | "digits = load_digits()\n", 16 | "\n", 17 | "X, y = digits.data, digits.target\n", 18 | "\n", 19 | "for i in range(10):\n", 20 | " cPickle.dump((X[i::10], y[i::10]), open(\"data/batch_%02d.pickle\" % i, \"w\"), -1)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": false 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "from sklearn.linear_model import SGDClassifier\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "sgd = SGDClassifier()\n", 43 | "\n", 44 | "for i in range(9):\n", 45 | " X_batch, y_batch = cPickle.load(open(\"data/batch_%02d.pickle\" % i))\n", 46 | " sgd.partial_fit(X_batch, y_batch, classes=range(10))" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "X_test, y_test = cPickle.load(open(\"data/batch_09.pickle\"))\n", 58 | "\n", 59 | "sgd.score(X_test, y_test)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Text\n", 67 | "=====" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "import pandas as pd\n", 79 | "from sklearn.feature_extraction.text import HashingVectorizer\n", 80 | "\n", 81 | "sgd = SGDClassifier()\n", 82 | "hashing_vectorizer = HashingVectorizer()\n", 83 | "\n", 84 | "for i in range(10):\n", 85 | " data_batch = pd.read_csv(\"data/train_%d.csv\" % i)\n", 86 | " text_batch = data_batch.Comment.tolist()\n", 87 | " y_batch = data_batch.Insult.values\n", 88 | " X_batch = hashing_vectorizer.transform(text_batch)\n", 89 | " sgd.partial_fit(X_batch, y_batch, classes=range(10))" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "data_test = pd.read_csv(\"data/test_with_solutions.csv\")\n", 101 | "X_test = hashing_vectorizer.transform(data_test.Comment.tolist())\n", 102 | "y_test = data_test.Insult.values\n", 103 | "sgd.score(X_test, y_test)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Kernel Approximations\n", 111 | "=======================" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "from sklearn.kernel_approximation import RBFSampler\n", 123 | "\n", 124 | "sgd = SGDClassifier()\n", 125 | "kernel_approximation = RBFSampler(gamma=.001, n_components=400)\n", 126 | "\n", 127 | "for i in range(9):\n", 128 | " X_batch, y_batch = cPickle.load(open(\"data/batch_%02d.pickle\" % i))\n", 129 | " if i == 0:\n", 130 | " kernel_approximation.fit(X_batch)\n", 131 | " X_transformed = kernel_approximation.transform(X_batch)\n", 132 | " sgd.partial_fit(X_transformed, y_batch, classes=range(10))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "X_test, y_test = cPickle.load(open(\"data/batch_09.pickle\"))\n", 144 | "\n", 145 | "sgd.score(kernel_approximation.transform(X_test), y_test)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [] 156 | } 157 | ], 158 | "metadata": { 159 | "kernelspec": { 160 | "display_name": "Python 2", 161 | "language": "python", 162 | "name": "python2" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 2 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython2", 174 | "version": "2.7.6" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 0 179 | } 180 | -------------------------------------------------------------------------------- /10 - Other tools.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Use the tools that work for you\n", 8 | "\n", 9 | "## Python libraries:\n", 10 | "* pystruct (structured prediction)\n", 11 | "* lightning (great linear models)\n", 12 | "* pybrain (reinforcement learning)\n", 13 | "* theano / lasagna / keras (deep learning)\n", 14 | "* pymc (MCMC)\n", 15 | "\n", 16 | "## Lua:\n", 17 | "* Torch / nn (Deep learning)\n", 18 | "\n", 19 | "## C++ libraries:\n", 20 | "* MLpack (fast, tree-based models)\n", 21 | "* dlib (structured)\n", 22 | "* bidmach\n", 23 | "\n", 24 | "## Many-language interfaces:\n", 25 | "* Shogun (C++ implementations, many algorithms, focused on kernels)\n", 26 | "* H2O (java implementation, many algorithms)\n", 27 | "* dato / graphlab (C++ implementations, many algorithms)\n", 28 | "\n", 29 | "## Scala\n", 30 | "* Factorie (PGMs)\n", 31 | "\n", 32 | "## Julia\n", 33 | "* [many that I don't know very well]" 34 | ] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "Python 2", 40 | "language": "python", 41 | "name": "python2" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 2 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython2", 53 | "version": "2.7.9" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 0 58 | } 59 | -------------------------------------------------------------------------------- /11 - Pystruct.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# Pystruct" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Structured Prediction! Chains first\n", 28 | "from ``pystruct/examples/plot_letters.py``\n", 29 | "\n", 30 | "http://pystruct.github.io/auto_examples/plot_letters.html" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "from pystruct.datasets import load_letters\n", 42 | "\n", 43 | "letters = load_letters()\n", 44 | "X, y, folds = letters['data'], letters['labels'], letters['folds']\n", 45 | "\n", 46 | "print(\"length of X: %d\" % len(X))\n", 47 | "print(\"shape of first data point: %s\" % (X[0].shape,))\n", 48 | "print(\"shape of 200th data point: %s\" % (X[200].shape,))\n", 49 | "\n", 50 | "print(\"length of y: %d\" % len(y))\n", 51 | "print(\"shape of first label: %s\" % (y[0].shape,))\n", 52 | "print(\"y[0]: %s\" % y[0])\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "_, axes = plt.subplots(1, len(X[0]))\n", 64 | "for ax, x in zip(axes, X[0]):\n", 65 | " ax.matshow(x.reshape(16, 8), cmap=\"gray\")\n", 66 | " # (c)ommanding" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "# we convert the lists to object arrays, as that makes slicing much more\n", 78 | "# convenient\n", 79 | "X, y = np.array(X), np.array(y)\n", 80 | "X_train, X_test = X[folds == 1], X[folds != 1]\n", 81 | "y_train, y_test = y[folds == 1], y[folds != 1]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "from sklearn.svm import LinearSVC\n", 93 | "# Train linear SVM\n", 94 | "svm = LinearSVC(dual=False, C=.1)\n", 95 | "# flatten input\n", 96 | "svm.fit(np.vstack(X_train), np.hstack(y_train))" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "print(\"Test score with linear SVM: %f\" %\n", 108 | " svm.score(np.vstack(X_test), np.hstack(y_test)))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "# Model + Solver + Inverence" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "from pystruct.models import ChainCRF\n", 131 | "from pystruct.learners import FrankWolfeSSVM\n", 132 | "\n", 133 | "# Train linear chain CRF\n", 134 | "model = ChainCRF()\n", 135 | "ssvm = FrankWolfeSSVM(model=model, C=.1, max_iter=11)\n", 136 | "ssvm.fit(X_train, y_train)\n", 137 | "\n", 138 | "print(\"Test score with chain CRF: %f\" % ssvm.score(X_test, y_test))" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "# plot some word sequenced\n", 150 | "abc = \"abcdefghijklmnopqrstuvwxyz\"\n", 151 | "\n", 152 | "n_words = 4\n", 153 | "rnd = np.random.RandomState(1)\n", 154 | "selected = rnd.randint(len(y_test), size=n_words)\n", 155 | "max_word_len = max([len(y_) for y_ in y_test[selected]])\n", 156 | "fig, axes = plt.subplots(n_words, max_word_len, figsize=(10, 10))\n", 157 | "fig.subplots_adjust(wspace=0)\n", 158 | "for ind, axes_row in zip(selected, axes):\n", 159 | " y_pred_svm = svm.predict(X_test[ind])\n", 160 | " y_pred_chain = ssvm.predict([X_test[ind]])[0]\n", 161 | " for i, (a, image, y_true, y_svm, y_chain) in enumerate(\n", 162 | " zip(axes_row, X_test[ind], y_test[ind], y_pred_svm, y_pred_chain)):\n", 163 | " a.matshow(image.reshape(16, 8), cmap=plt.cm.Greys)\n", 164 | " a.text(0, 3, abc[y_true], color=\"#00AA00\", size=25)\n", 165 | " a.text(0, 14, abc[y_svm], color=\"#5555FF\", size=25)\n", 166 | " a.text(5, 14, abc[y_chain], color=\"#FF5555\", size=25)\n", 167 | " a.set_xticks(())\n", 168 | " a.set_yticks(())\n", 169 | " for ii in range(i + 1, max_word_len):\n", 170 | " axes_row[ii].set_visible(False)\n", 171 | "\n", 172 | "plt.matshow(ssvm.w[26 * 8 * 16:].reshape(26, 26))\n", 173 | "plt.title(\"Transition parameters of the chain CRF.\")\n", 174 | "plt.xticks(np.arange(25), abc)\n", 175 | "plt.yticks(np.arange(25), abc);\n" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": { 181 | "collapsed": true 182 | }, 183 | "source": [ 184 | "# Different models get different X!" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "# Exercise\n", 192 | "* Use the GraphCRF model, in which each x is a tuple of features and edges so solve the sequence prediction task, see http://pystruct.github.io/user_guide.html#graphcrf set the inference_method, the default may be slow! What does \"directed\" do?\n", 193 | "* Use the EdgeFeatureGraphCRF model, in which each x is a tuploe of features, edges and edge features, to solve the sequence prediction task, see http://pystruct.github.io/user_guide.html#edgefeaturegraphcrf set the inference_method, the default may be slow!" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": { 200 | "collapsed": false 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "# %load solutions/letters_graph_crf.py\n", 205 | "from pystruct.models import GraphCRF, EdgeFeatureGraphCRF\n", 206 | "\n", 207 | "def make_edges(n_nodes):\n", 208 | " return np.c_[np.arange(n_nodes - 1), np.arange(1, n_nodes)]\n", 209 | "\n", 210 | "X_graph = np.array([(x, make_edges(len(x))) for x in X])\n", 211 | "X_graph_train, X_graph_test = X_graph[folds == 1], X_graph[folds != 1]\n", 212 | "\n", 213 | "\n", 214 | "graph_model = GraphCRF(inference_method=\"max-product\")\n", 215 | "ssvm = FrankWolfeSSVM(model=graph_model, C=.1, max_iter=11)\n", 216 | "ssvm.fit(X_graph_train, y_train)\n", 217 | "print(\"score with GraphCRF %f\" % ssvm.score(X_graph_test, y_test))\n" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "collapsed": false 225 | }, 226 | "outputs": [], 227 | "source": [ 228 | "# %load solutions/letters_graph_crf.py" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "# Awesome example" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": true 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "# %load http://pystruct.github.io/_downloads/plot_snakes.py\n", 247 | "\"\"\"\n", 248 | "==============================================\n", 249 | "Conditional Interactions on the Snakes Dataset\n", 250 | "==============================================\n", 251 | "This example uses the snake dataset introduced in\n", 252 | "Nowozin, Rother, Bagon, Sharp, Yao, Kohli: Decision Tree Fields ICCV 2011\n", 253 | "\n", 254 | "This dataset is specifically designed to require the pairwise interaction terms\n", 255 | "to be conditioned on the input, in other words to use non-trival edge-features.\n", 256 | "\n", 257 | "The task is as following: a \"snake\" of length ten wandered over a grid. For\n", 258 | "each cell, it had the option to go up, down, left or right (unless it came from\n", 259 | "there). The input consists of these decisions, while the desired output is an\n", 260 | "annotation of the snake from 0 (head) to 9 (tail). See the plots for an\n", 261 | "example.\n", 262 | "\n", 263 | "As input features we use a 3x3 window around each pixel (and pad with background\n", 264 | "where necessary). We code the five different input colors (for up, down, left, right,\n", 265 | "background) using a one-hot encoding. This is a rather naive approach, not using any\n", 266 | "information about the dataset (other than that it is a 2d grid).\n", 267 | "\n", 268 | "The task can not be solved using the simple DirectionalGridCRF - which can only\n", 269 | "infer head and tail (which are also possible to infer just from the unary\n", 270 | "features). If we add edge-features that contain the features of the nodes that are\n", 271 | "connected by the edge, the CRF can solve the task.\n", 272 | "\n", 273 | "From an inference point of view, this task is very hard. QPBO move-making is\n", 274 | "not able to solve it alone, so we use the relaxed AD3 inference for learning.\n", 275 | "\n", 276 | "PS: This example runs a bit (5 minutes on 12 cores, 20 minutes on one core for me).\n", 277 | "But it does work as well as Decision Tree Fields ;)\n", 278 | "\"\"\"\n", 279 | "import numpy as np\n", 280 | "import matplotlib.pyplot as plt\n", 281 | "\n", 282 | "from sklearn.preprocessing import label_binarize\n", 283 | "from sklearn.metrics import confusion_matrix, accuracy_score\n", 284 | "\n", 285 | "from pystruct.learners import OneSlackSSVM\n", 286 | "from pystruct.datasets import load_snakes\n", 287 | "from pystruct.utils import make_grid_edges, edge_list_to_features\n", 288 | "from pystruct.models import EdgeFeatureGraphCRF\n", 289 | "\n", 290 | "\n", 291 | "def one_hot_colors(x):\n", 292 | " x = x / 255\n", 293 | " flat = np.dot(x.reshape(-1, 3), 2 ** np.arange(3))\n", 294 | " one_hot = label_binarize(flat, classes=[1, 2, 3, 4, 6])\n", 295 | " return one_hot.reshape(x.shape[0], x.shape[1], 5)\n", 296 | "\n", 297 | "\n", 298 | "def neighborhood_feature(x):\n", 299 | " \"\"\"Add a 3x3 neighborhood around each pixel as a feature.\"\"\"\n", 300 | " # we could also use a four neighborhood, that would work even better\n", 301 | " # but one might argue then we are using domain knowledge ;)\n", 302 | " features = np.zeros((x.shape[0], x.shape[1], 5, 9))\n", 303 | " # position 3 is background.\n", 304 | " features[:, :, 3, :] = 1\n", 305 | " features[1:, 1:, :, 0] = x[:-1, :-1, :]\n", 306 | " features[:, 1:, :, 1] = x[:, :-1, :]\n", 307 | " features[:-1, 1:, :, 2] = x[1:, :-1, :]\n", 308 | " features[1:, :, :, 3] = x[:-1, :, :]\n", 309 | " features[:-1, :-1, :, 4] = x[1:, 1:, :]\n", 310 | " features[:-1, :, :, 5] = x[1:, :, :]\n", 311 | " features[1:, :-1, :, 6] = x[:-1, 1:, :]\n", 312 | " features[:, :-1, :, 7] = x[:, 1:, :]\n", 313 | " features[:, :, :, 8] = x[:, :, :]\n", 314 | " return features.reshape(x.shape[0] * x.shape[1], -1)\n", 315 | "\n", 316 | "\n", 317 | "def prepare_data(X):\n", 318 | " X_directions = []\n", 319 | " X_edge_features = []\n", 320 | " for x in X:\n", 321 | " # get edges in grid\n", 322 | " right, down = make_grid_edges(x, return_lists=True)\n", 323 | " edges = np.vstack([right, down])\n", 324 | " # use 3x3 patch around each point\n", 325 | " features = neighborhood_feature(x)\n", 326 | " # simple edge feature that encodes just if an edge is horizontal or\n", 327 | " # vertical\n", 328 | " edge_features_directions = edge_list_to_features([right, down])\n", 329 | " # edge feature that contains features from the nodes that the edge connects\n", 330 | " edge_features = np.zeros((edges.shape[0], features.shape[1], 4))\n", 331 | " edge_features[:len(right), :, 0] = features[right[:, 0]]\n", 332 | " edge_features[:len(right), :, 1] = features[right[:, 1]]\n", 333 | " edge_features[len(right):, :, 0] = features[down[:, 0]]\n", 334 | " edge_features[len(right):, :, 1] = features[down[:, 1]]\n", 335 | " edge_features = edge_features.reshape(edges.shape[0], -1)\n", 336 | " X_directions.append((features, edges, edge_features_directions))\n", 337 | " X_edge_features.append((features, edges, edge_features))\n", 338 | " return X_directions, X_edge_features\n", 339 | "\n", 340 | "\n", 341 | "print(\"Please be patient. Learning will take 5-20 minutes.\")\n", 342 | "snakes = load_snakes()\n", 343 | "X_train, Y_train = snakes['X_train'], snakes['Y_train']\n", 344 | "\n", 345 | "X_train = [one_hot_colors(x) for x in X_train]\n", 346 | "Y_train_flat = [y_.ravel() for y_ in Y_train]\n", 347 | "\n", 348 | "X_train_directions, X_train_edge_features = prepare_data(X_train)\n", 349 | "\n", 350 | "inference = 'qpbo'\n", 351 | "# first, train on X with directions only:\n", 352 | "crf = EdgeFeatureGraphCRF(inference_method=inference)\n", 353 | "ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, max_iter=100,\n", 354 | " n_jobs=1)\n", 355 | "ssvm.fit(X_train_directions, Y_train_flat)\n", 356 | "\n", 357 | "# Evaluate using confusion matrix.\n", 358 | "# Clearly the middel of the snake is the hardest part.\n", 359 | "X_test, Y_test = snakes['X_test'], snakes['Y_test']\n", 360 | "X_test = [one_hot_colors(x) for x in X_test]\n", 361 | "Y_test_flat = [y_.ravel() for y_ in Y_test]\n", 362 | "X_test_directions, X_test_edge_features = prepare_data(X_test)\n", 363 | "Y_pred = ssvm.predict(X_test_directions)\n", 364 | "print(\"Results using only directional features for edges\")\n", 365 | "print(\"Test accuracy: %.3f\"\n", 366 | " % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred)))\n", 367 | "print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred)))\n", 368 | "\n", 369 | "# now, use more informative edge features:\n", 370 | "crf = EdgeFeatureGraphCRF(inference_method=inference)\n", 371 | "ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, switch_to='ad3',\n", 372 | " n_jobs=-1)\n", 373 | "ssvm.fit(X_train_edge_features, Y_train_flat)\n", 374 | "Y_pred2 = ssvm.predict(X_test_edge_features)\n", 375 | "print(\"Results using also input features for edges\")\n", 376 | "print(\"Test accuracy: %.3f\"\n", 377 | " % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred2)))\n", 378 | "print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred2)))\n", 379 | "\n", 380 | "# plot stuff\n", 381 | "fig, axes = plt.subplots(2, 2)\n", 382 | "axes[0, 0].imshow(snakes['X_test'][0], interpolation='nearest')\n", 383 | "axes[0, 0].set_title('Input')\n", 384 | "y = Y_test[0].astype(np.int)\n", 385 | "bg = 2 * (y != 0) # enhance contrast\n", 386 | "axes[0, 1].matshow(y + bg, cmap=plt.cm.Greys)\n", 387 | "axes[0, 1].set_title(\"Ground Truth\")\n", 388 | "axes[1, 0].matshow(Y_pred[0].reshape(y.shape) + bg, cmap=plt.cm.Greys)\n", 389 | "axes[1, 0].set_title(\"Prediction w/o edge features\")\n", 390 | "axes[1, 1].matshow(Y_pred2[0].reshape(y.shape) + bg, cmap=plt.cm.Greys)\n", 391 | "axes[1, 1].set_title(\"Prediction with edge features\")\n", 392 | "for a in axes.ravel():\n", 393 | " a.set_xticks(())\n", 394 | " a.set_yticks(())\n", 395 | "plt.show()\n" 396 | ] 397 | } 398 | ], 399 | "metadata": { 400 | "kernelspec": { 401 | "display_name": "Python 2", 402 | "language": "python", 403 | "name": "python2" 404 | }, 405 | "language_info": { 406 | "codemirror_mode": { 407 | "name": "ipython", 408 | "version": 2 409 | }, 410 | "file_extension": ".py", 411 | "mimetype": "text/x-python", 412 | "name": "python", 413 | "nbconvert_exporter": "python", 414 | "pygments_lexer": "ipython2", 415 | "version": "2.7.6" 416 | } 417 | }, 418 | "nbformat": 4, 419 | "nbformat_minor": 0 420 | } 421 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Machine Learning Open Source Software 2 | ===================================== 3 | Notebooks / material for the machine learning software practical. 4 | 5 | This tutorial covers some of the tools provided by scikit-learn, as well as an 6 | introduction to pystruct. 7 | 8 | You need at least scikit-learn v0.15 and ipython v3.0 for this practical. 9 | If you don't already have an installation, I recommend using the anaconda distribution: 10 | http://continuum.io/downloads 11 | 12 | For the last part, you will also need pystruct, which you can install using 13 | 14 | pip install pystruct 15 | 16 | If it errors on installing PyQPBO or AD3, don't worry too much. 17 | 18 | Check out the documentation of scikit-learn at http://scikit-learn.org/dev/documentation.html 19 | and pystruct at http://pystruct.github.io 20 | -------------------------------------------------------------------------------- /data/train_1.csv: -------------------------------------------------------------------------------- 1 | ,Insult,Date,Comment 2 | 1,0,20120528192215Z,"""i really don't understand your point.\xa0 It seems that you are mixing apples and oranges.""" 3 | 11,0,20120530044519Z,"""Be careful,Jimbo.OG has a fork with your name on it.""" 4 | 21,0,20120611234235Z,"""http://www.youtube.com/watch?v=tLYLLPHKRU4""" 5 | 31,0,20120529115214Z,"""A good article, which, for all the good it will do, could have been delivered to a brick wall.""" 6 | 41,1,20120620075045Z,"""Is that it?? your sum total to any conversation is first abuse and then calling them a troll.. You are a typical socialist idiot. Keep up your non existent class warfare, it is as pointless as you.""" 7 | 51,1,20120320131007Z,"""Even as a troll you are a pathetic failure.""" 8 | 61,1,20120619033424Z,"""god, you're tiresome. get a life, you loser.""" 9 | 71,0,20120620154731Z,"""you mean like your off topic Obama rant bellow?""" 10 | 81,0,20120619201802Z,"""M\xe1tenlos!!\nhttp://1.bp.blogspot.com/-YVSZmVv2iRw/Tmo8KFaUGMI/AAAAAAAAA0k/NlLAahKNnis/s1600/kill_it_with_fire_KILL_IT_KILL_IT_WITH_FIRE-s670x394-132457.jpg""" 11 | 91,0,,"""They should put all the gay and lesbians on an island\\xe2\\x80\\xa6\\xc2\\xa0 Oh wait there is Manhattan. """ 12 | 101,0,20120619182936Z,"""Powers,\n\nMSNBC Is far far more biased and hostile in it's comments than FOX. At least at FOX there are usually opposing views. MSNBC is just a shill for the president. And they keep having to fire their hosts for all the outrageous remarks they make.\n\nNow, CNN is getting almost as bad as MSNBC.""" 13 | 111,0,20120529153517Z,"""Exactly. Like, I don't really give a shit how fat a person may or may not feel, but it really is shitty when your clothes don't fit. And not in a ""I'm so gross"" way but in a ""I can't very well leave the house naked!!"" way.""" 14 | 121,0,20120620181659Z,"""Oops! \xa0How embarrassing (for me)... you are right People1st... I bow to the superior movie intellect of your son :)""" 15 | 131,0,20120529130655Z,"""@Axlrodes\xa0Were you here for the Wide Receiver drama?""" 16 | 141,1,20120612083701Z,"""You are one sick puppy.""" 17 | 151,0,20120527181201Z,"""We afford what we HAVE to afford, Marco.""" 18 | 161,0,20120529224055Z,"""Anything better than the Grover Norquist, Tea party, Wall Street, corporatist GOP.""" 19 | 171,0,20120529093527Z,"""Yes but remember - the government is listening to the views of the owner of Wonga.\n\nWonga will loan your business\xa0money (at astonomical rates)""" 20 | 181,0,20120610074630Z,"""With inbreeds being the longest running malcontents.""" 21 | 191,0,20120611051049Z,"""dude, it looks NOTHING like the ""fag-box."" It looks more towards PlayStation-meets-Alienware console. It has already been proven that modern consoles continually progress towards a PC. So please take your\xa0idiotically painful comments\xa0elsewhere.""" 22 | 201,0,20120619063951Z,"""This man's drunk as fuck, he's on drugs, he's a male prostitute.""" 23 | 211,0,20120610200600Z,"""The bad part of the story is the dumb redneck Republicans, like yourself.\n\nNot sure if these folks are sociopaths like you, though, so you're probably a downgrade from them...""" 24 | 221,0,20120609174056Z,"""Holder and Obama will be hit back from all directions and 1000 times as hard. Dozens inside the DOJ and the WH who have dignity and patriotism will turn on them both like rabid dogs. This administration is goind down and right into federal prison. Nothing can stop it now. Nothing at all. The people are pissed off and out for heads.\n\nHolder = Peice of Shit\n\nObama = Fucked Up Sociopath""" 25 | 231,0,20120529135802Z,"""oh hey, I have yousaid.it if you like.""" 26 | 241,1,20120609203104Z,"""Why did you do that?"" ""Because you smell like dog poo. You're so stupid, just like dog poo. You're made outta poo!""" 27 | 251,1,,"""ugly bitch go prostitute yourself so dat you can buy a big enough pant bottom to cover your gigantic ass """ 28 | 261,0,20120515172041Z,"""""""You all, including your son, deserve a slow, painful, and miserable death.""""\n\nThat's how Zimmerman stands his ground.""" 29 | 271,0,20120528200032Z,"""come on Sweeney show us again man""" 30 | 281,0,20120619134407Z,"""The guy is an idiot if he sticks his dick around without protection.""" 31 | 291,0,20120610015101Z,"""Your little girl will probably look just like her :) lol""" 32 | 301,1,20120515152922Z,"""More people on here than you, you cunt.""" 33 | 311,0,20120619145414Z,"""I guess their coverage was IDIOTIC to YOU, especially when they were all but anointing your boy as the champion after day one of the Open. \xa0 Tht is what I dearly love about the SYCOPHANTS, they have to have it both ways...ALL THE TIME.\xa0 Crawl back under your rock CRAPI.""" 34 | 321,0,20120611190542Z,"""Libero ha per vicedirettore un signore che ha pubblicato l'audio di una telefonata PRIVATA con un parlamentare, Crosetto. Libero fa campagne contro le intercettazioni e poi pubblica telefonate PRIVATE (""quel testa di C \xe8 andato a Milano"" diceva Crosetto del suo boss). Il DIRETTORE di Libero si \xe8 fatto un attentato da solo, o forse \xe8 stata la guardia del corpo recidiva. Pansa, Il megagiornalista, su Libero ha scritto (e mai smentito) che ENRICO BERLINGUER ha telefonato a Curzi per raccomandare la figlia, nel 1991. Da MORTO! Questo \xe8 libero. Che si prendano Freda.""" 35 | 331,0,20120618192916Z,"""Mine too. So I just assume all the time over what it was. \n\nMost of the time, they are correct assumptions.""" 36 | 341,0,,"""Cant wait, this guy is amazing.""" 37 | 351,0,,"""@Doremus Jessup 2.0 ""Little bastard shot me in the ass!"" --The Waco Kid""" 38 | 361,0,20120612035858Z,"""I have one too! And I snagged a black and white maxi skirt from target last week! It's so comfy!! :) Where did you get your dress?""" 39 | 371,0,20120610080953Z,"""Great Pin Bea. This photo just tells a story.""" 40 | 381,1,20120619090606Z,"""why dony you fuck off""" 41 | 391,0,,"""Democrats believe the government should take care of them... republicans feel you should get off you lazy ass and make something of yourself.....""" 42 | 401,0,20120610141834Z,"""The comments about this sad story only proves how ""smart"" some people are in this once great nation. Kenya is an US ally and have been for years, including during W. Bush's administration. If this ""crash"" was an act of terror, this would not be the first time Kenyans faced acts of terror due to their alliance with the US. Remember the US embassy terror bombings in the late 1990's? By the way Kenya is 80% Christian and the people are not starving to death, for those who somehow believe Kenyans are all terrorist trying to destroy America because Obama's father was Kenyan. Get an education before posting ""smart"" comments about a nation you ""smart"" posters obviously know nothing about. My heart goes out to the good people of Kenya during this tragic event.""" 43 | 411,0,20120502232044Z,"""Sling the term liberal around a little more because people might actually begin to believe you aren't a pro-life democrat.""" 44 | 421,0,,"""Joe Paterno let an accused Pedophile roam the campuses free....when Sandusky was charged in 1998....Paterno simply let him ""retire""----and why did no other college in the NATION try to recruit Sandusky? Because they KNEW he was dirty filthy pedophile...yet Paterno continue to befriend him...even letting him use the locker rooms...so that...another boy could be ANAL RAPED in the locker rooms of Penn State....Ask yourself...why would Paterno even CONSIDER letting a Man (friend or not)----back on campus after accused of being a Pedophile?!?! Unless Paterno didn\\'t care? Paterno is just as much to blame..was his stupid decision to let the man back on campus after a D.A. was investigating and continuing to investigate Sandusky up until 2005 when the D.A. went missing and is presumed DEAD (oh and his laptop and hard drive were destroyed too...can you say cover up?!!""" 45 | 431,0,20120515201255Z,"""Are you stebbers?""" 46 | 441,0,,"""Last I checked the constitution still said ""All men are created equal"", even the gay ones. \\xc2\\xa0How would straight folks feel if their right to marry was taken away? \\xc2\\xa0Come on. \\xc2\\xa0Fair is fair and equality is equally. \\xc2\\xa0\\xc2\\xa0Everyone should have the same opportunities as the rest of us. Just because you don\\'t agree with the lifestyle doesn\\'t give anyone in a fee country the right to force that opinion on others. \\xc2\\xa0We spout freedom as our most treasured of ideas yet we want to limit the freedom of some individuals. \\xc2\\xa0That is what doesn\\'t make sense. \\xc2\\xa0If you don\\'t agree with gay marriage, don\\'t be in one. \\xc2\\xa0But don\\'t restrict someone else from conducting their lives as they wish. \\xc2\\xa0That kind of control is communism.""" 47 | 451,1,,"""What is the point of switching between accounts?\\nJust so you know, you are being software tracked.\\nIts useless you stupid imbecile. """ 48 | 461,0,,"""two words....WHO CARES.....Needs to sort his prepubescent face out bless him..has she been made to sign the sex offenders register? Silly girl""" 49 | 471,0,20120514050954Z,"""you are amazzzzing! Give up your full time job and START PAINTING AGAIN1 These are remarkable!""" 50 | 481,0,20120515143841Z,"""No, but you can't just treat someone like a piece-of-shit just because you think your shit don't stink. No one is above the law, so by law you have the right to ask a person to leave your property if they are disturbing the peace or damaging property, or stealling.""" 51 | 491,0,20120530132325Z,"""I would like to see EVERYTHING in japan. yes everything because I am dying to go japan soon. but im afraid that I dont know japanese. Can you tell us your itinerary?? any hotel reccomend ? shop that couldnt miss... and japanese culture =) hehee\n\ntwitter is @rumtumcat""" 52 | 501,0,20120619105734Z,"""Here he is out of character where he mentions why he won't let his kids watch Colbert Report. \nhttp://www.youtube.com/watch?v=DNvJZCFpdp8""" 53 | 511,0,20120620110804Z,"""Il ponte sullo stretto, la maddalena, la new town di L'Aquila, l'expo 2012, le olimpiadi, gli europei e via dicendo...""" 54 | 521,0,,"""@Coach P @pastert33 I do agree on Herrera. I love the way he plays ball. You realize next year we may have Jenkins, JHC, Foster and Herrera as our starting 4.""" 55 | 531,0,20120620141559Z,"""not to mention all the LAME BROTHER IN-LAWS she'll be getting""" 56 | 541,0,20120612020801Z,"""Life, motherfucker. Lmfao gets me every time.""" 57 | 551,0,20120619002607Z,"""Kennedys where sure man whores.""" 58 | 561,0,20120530010807Z,"""Best MMO that will be out in 2012. \xa0It will be niche however, because 90% of MMO gamers are clickers and lazy gamers and this game does not support clickers and the lazy crowd.""" 59 | 571,1,20120610142641Z,"""Well you just lost all credibility when you said you were a student of Limbaugh University! Now you are just a TROLL! Nobody cares what you think or say, you are being flagged every time you post one of your degrading comments! Enjoy while it lasts!""" 60 | 581,1,20120609180947Z,"""\nExactly how often did your daddy r\xe4pe you? Weekly Hourly?\n\nYou're easily the most depressingly buttf++ked piece of shite ever to vomit their halfassed moronic excuses for the English language on this site. \xa0You think you're being clever, but your grasp of colloquialisms and sarcasm are hilariously pathetic. You're a fool. Your posts are puerile and you personally have no value. \xa0Nobody here likes you. H\xe9ll, nobody here even tolerates you. If these people could recognize you on the street they would spit on you. My best guess is you're the result of unprotected buttsex between your mother and the family dog. \xa0An abomination with no justifiable reason to exist.\n\nNow do us all a huge favor and go fuckyourself with a chainsaw, ""Sparky"". \xa0Slowly and painfully.\n\nEveryone! Blow this pathetic Obamatroll a big sticky sphincter kiss!!!\n\nMMMMMMMWWWWWWAAAAAAAAAAAAAA!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n\n\u2026\u2026\u2026\u2026\u2026\u2026\u2026./\xb4\xaf/)\n\u2026\u2026\u2026\u2026\u2026\u2026..,/\xaf../\n\u2026\u2026\u2026\u2026\u2026\u2026./\u2026./\n\u2026\u2026\u2026\u2026./\xb4\xaf/'\u2026'/\xb4\xaf\xaf`\xb7\xb8\n\u2026\u2026\u2026./'/\u2026/\u2026./\u2026\u2026./\xa8\xaf\\\n\u2026\u2026..('(\u2026\xb4\u2026\xb4\u2026. \xaf~/'\u2026')\n\u2026\u2026\u2026\\\u2026\u2026\u2026\u2026\u2026..'\u2026../\n\u2026\u2026..\u2026\\\u2026.....\u2026\u2026. _.\xb7\xb4\n\u2026\u2026\u2026\u2026\\\u2026\u2026\u2026\u2026..(\n\u2026\u2026\u2026\u2026..\\\u2026\u2026\u2026\u2026.\\\n\n""" 61 | 591,0,20120530101807Z,"""bosh is ready they just dont need him to play""" 62 | 601,0,20120611210213Z,"""@BabaPuppe: thats right negro, run off the site because you are no match for me anyway. only ""PU"" to the ""C"" like you is no match for me.""" 63 | 611,0,20120527181535Z,"""God is not a Republican, nor an American, nor a Conservative""" 64 | 621,0,20120619183949Z,"""FUCK YOU NEWSDAY.""" 65 | 631,1,20120619222716Z,"""FUCK ALLA YALL YMCB TAKEOVER WEEZY JUST TIRED OF THE BEEF DAS WHY HE SAID THAT\xa0http://www.youtube.com/watch?v=5n7oA_Kk1IQ""" 66 | 641,0,,"""@jakesnake99 @skywaker9 (David) There's enough Rhino for the both of you.""" 67 | 651,1,20120320211949Z,"""Laffin?\xa0 You don't laugh at anything you jihadist puke.""" 68 | 661,1,20120619002826Z,"""AWE.......\xa0 Poor wittle troll on Patrol is TOO $TUPID to speak onlyEnglish, SO I'M THE BAD GUY.\xa0 SERIOUSLY?\xa0 LMFAO!""" 69 | 671,0,20120611204033Z,"""Did you really use the word ""evoking""?""" 70 | 681,0,20120610042525Z,"""You are a theif are you????""" 71 | 691,0,20120619173940Z,"""A pic from Queen NIcki concert last night.\xa0\nhttp://twitpic.com/9xz941\xa0Now you tell me..who the fuck is winning!""" 72 | 701,0,,"""Jay-- My condolences to you and your family. It's a terrible way to get a wake-up call, but I'm glad you've found a way to channel this terrible loss into something positive for you and yours. Enjoy life in the (slightly) slower lane. """ 73 | 711,0,20120619191032Z,"""Like our first ever credit rating downgrade.""" 74 | 721,0,20120529200551Z,"""She was kind of a main character.""" 75 | 731,0,,"""Look at the bright side, the libs/dems have this David Crock/Brock character supporting their side, a strong wind will blow him from LA to Boston.""" 76 | 741,0,20120528114402Z,"""The U.S. govermet spends billions of dollars every year guarding against al qaeda.""" 77 | 751,1,20120515185538Z,"""lol, your full of chit , just another stupid liberal""" 78 | 761,1,20120610031032Z,"""@peter8888,\n\nYou showed your true colors. In addition to comment misdirection, you are now using name calling and innuendo.\n\nName calling, innuendo, comment misdirection, the last refuge of someone who has no viable, logical, or well reasoned comment.""" 79 | 771,0,20120619202639Z,"""And who protects us from you when you decide you don't like our political views or our religion or the color of our skin? And how is asking for some reasonable safety precautions taking away your rights?""" 80 | 781,1,20120619170743Z,"""Doubt it.. again showing your awesome reading comprehension skills.. i said you're acting like a b!tch not you are a b!tch big difference there genius""" 81 | 791,0,20120527185632Z,"""who gives a fyck this is his second year""" 82 | 801,0,20120530053012Z,"""as a browns fan, i softly weep.""" 83 | 811,1,20120320180328Z,"""You need some serious psycho;logical help.""" 84 | 821,0,20120527215030Z,"""Obama does what he thinks is best for the country, GOP does what they think is best for the Republican party......it should be an easy choice come November.""" 85 | 831,0,20120612071856Z,"""Wake up and smell the coffee, who is the current premier of the WC?""" 86 | 841,0,,"""what a great way to run a business.""" 87 | 851,0,,"""First of all its the SCOTUS moron, and secondly the ruling by the 9th is \\nwritten for Kennedy bro, have you read it? It mirrors Romer v. Evans, I \\nwouldn't be so sure about your slap down, we only need one vote from the\\n Supremes and judging by their rulings on gay rights and equal \\nprotection in the past, Perry v. Brown is a slam dunk\\xc2\\xa0 for our side.""" 88 | 861,0,20120619181811Z,"""SO everyone is on board for that mosque to be built near the Twin Towers?""" 89 | 871,1,20120609235122Z,"""lol ur dumb/retarded""" 90 | 881,1,20120620163353Z,"""says the child who does not understand what relevancy means. It's ok Schimidt, we get the fact that you are a closed book. Thankfully, your impact on society is meaningless. I accomplish more in the first hour of my day then you have your entire life""" 91 | 891,1,20120620135033Z,"""I wish the same to you uglyandpoor. You have nothing to say of your own, so you post things I have said - out of context. You are a horrible little person. flag yet again.""" 92 | 901,1,20120320142343Z,"""Coolest president ever? Your a complete waste of oxygen and resources. Phucking idiot""" 93 | 911,0,20120514051030Z,"""You just went full retard. Never go full retard.""" 94 | 921,0,20120612063755Z,"""im sorry but dont nobody just pick somebody out randomly and start attacking...like she went ham for a reason. kimora seem like one of those low-key stank bitches. it might not be all true but kimora played a role. aint no black chick just just gon go off like that.""" 95 | 931,1,20120612034208Z,"""huck huck huck, no... but giant pictures of them beating the_crap out of idiots like you are ok!!""" 96 | 941,0,20120609192115Z,"""I notice you haven't responded. You only care when you are telling other people how dumb they are. Again who is your team UJ???""" 97 | 951,0,20120514050850Z,"""@cheatingrangers\xa0\n\xa0\nWe shall see only a fool predicts the future, you qualify.""" 98 | 961,0,20120502132655Z,"""You are such a talented photographer!""" 99 | 971,0,20120619200631Z,"""That is kind of a sexy photo though. Who took it?""" 100 | 981,0,20120612034735Z,"""I imagine he lives in Virginia, he's wouldn't live with the riffraff in DC.""" 101 | 991,1,20120619222807Z,"""Klein, you are a coward. Did you really interview these guys, just to mock them. \n\nBest part is that you stop short of belittling Scheartz. Even you know that would probably be the last straw in your downward career.""" 102 | 1001,0,20120528045218Z,"""why the hate man? Done nothing to you....ever....""" 103 | 1011,0,20120609221240Z,"""Warren Buffet owes corporate taxes back to 2003..\xa0 He doesn't want to pay his fair share...""" 104 | 1021,0,20120611235414Z,"""In Chicago, we say gas station.""" 105 | 1031,1,20120620014312Z,"""No actually, you've shown me no source, all recent data has been shipments, idiot. Your other comments would hurt if you were actually relevant, moron of the year. Explain how that's still not dominant, kid.""" 106 | 1041,0,20120619143245Z,"""Citizen9,\xa0\n\n\xa0you can't keep liberal style lying your way thru life.\xa0\n\nThe truth will prevail. I know thats anathema to you.\xa0\n\nYou need help.""" 107 | 1051,0,20120612145338Z,"""Votto was in our farm before Krusty ever came here. So was Bruce and Cueto. Phillips was here too but not from our farm you idiot.""" 108 | 1061,0,20120612060219Z,"""\xc1no, v\u0161etci \u010do ho pozn\xe1me vieme, \u017ee hr\xe1 len do vlastn\xe9ho vrecka !!!!!""" 109 | 1071,0,,"""\\xc2\\xa0Supposedly, a very rare pattern in the ""C"" position, or sometimes referred to as the ""bullish doggy-style manuever"", take your pick!\\xc2\\xa0 LOL!http://www.screencast.com/users/hoof_hearted/folders/Default/media/63e5082f-2692-4214-865c-4701ba356a88Very suspect, though!(*if someone has posted this, my apologies, I\\'ve not seen it)""" 110 | 1081,0,20120620022954Z,"""Faux is a better source of animus.""" 111 | 1091,0,20120612031140Z,"""Based on the architecture, stucco building and foliage....oh, and the dancing gay black kid, I'm betting this is from Florida.""" 112 | 1101,1,20120612024132Z,"""Your Mother is the clubhouse whore...""" 113 | 1111,0,20120612143504Z,"""There's far worse things happening in Africa,and where's your humanity towards africa.More than half of Mali has been taken over by FSA best mates Al Quaida,Mauritania still has a slavery system going on,Chad,Niger,Darfur,South Sudan are on the brink of famine,Somalia once again FSA best mates Al Quaida ruining the country,north eastern congo there's deaths and mass rapes,don't you reckon some of the issues in Africa are far worse than syrian regime battling terrorists.""" 114 | 1121,0,20120530145624Z,"""Laird ans Avila (and Santos) may be among the Tigers' most marketable players- at least as far as trading for a starting pitcher or a starting 2B is concerned.""" 115 | 1131,0,20120528144444Z,"""Is this a defensive stand for Theo or an offensive jab at paolo? They really aren't the same thing. If you've been reading him THAT long and dislike him THAT much then ignore his comments. The irony is , many of paolo's views and opinions bring the GOOD Cub Fan out of many of us .( rahrah) The guy was a Military Drill Instructor devoted to creating fearless fighters. He just does it on a different scale now creating fearless fans out of fair weather fans. Just Sayin man.""" 116 | 1141,0,20120514051016Z,"""Ha ha wait until you have your own kids:)""" 117 | 1151,0,20120611202808Z,"""I think I'm in love with you...""" 118 | 1161,0,,"""and rape and incest? \\xc2\\xa0That's self control too?""" 119 | 1171,1,20120620162014Z,"""But, you couldn't show ANY proof of that, even if it was true.\nGo away. You are an embarassment to your Klan brothers, your family, your neighbors, your teachers and America as a whole.\nFind something to say that is not directly quoting Nazi or Stalinist propaganda. \nThe most pathetic poster on the web. You are below Oberbite, a Soros trool or the ignorant little heap that wwrote ""Waiting to Exhale"".""" 120 | 1181,1,20120612115443Z,"""Are you fucking dumb? ""making it clear that the group was delaying their comeback due to album production and not to\xa0avoid competition with other groups."" \xa0You should be thankful that 2ne1 didn't cameback this june, if 2ne1 did, those awards will be owned by 2ne1. \xa0Fuckin insicured fx fan.""" 121 | 1191,0,20120515185619Z,"""Mom looks like an ol' ho from way back herself. Screw her and her pervert a$$ son.""" 122 | 1201,0,20120515232334Z,"""You would know a thing or two about having trains ran on yourself sending messages from theo lacy...""" 123 | 1211,0,,"""He's not an Isolationist. He just does not want a foreign policy that ends in America stomping all over the world like Babylonians. \\n\\nOnly RON PAUL will defend Israel when they blow up Iran's nuclear sites\\njust like only RON PAUL defended Israel when they blew up Iraq's nuclear sites.\\n\\nRon Paul is hardly an isolationist, he just recognizes a lot of tough truths about America's foreign policy that the self-righteous warmongers just can't seem to swallow. """ 124 | 1221,0,20120619201706Z,"""Allan Evans......some who actually pay to see you......\n\nMore fool them if they disagree with his tax status.""" 125 | 1231,0,20120610151609Z,"""Kelly made Idol. They owe her as much as she owes them. She is an international superstar because of her voice and hard work not because of Idol. Kelly would have been famous with or without Idol because she lives for music and has a gift from God.""" 126 | 1241,0,20120611054037Z,"""Why do you white people keep coming here trying to act like they are one of us. Go burn in the sun. I hope they kick all of you pale devils out of Africa.You fools are going to bring pain on to yourselves.You watch we watch. In a world of the lost the battle of open minds is becoming a reality.""" 127 | 1251,0,20120529130419Z,"""@Hoss \xa0Yeah, they came out with an album last week.\n\xa0\nFirst one in 5 years.\n\xa0\nIt's pretty fucking awesome. Solid rock album.""" 128 | 1261,0,20120527211008Z,"""Hey there Less, good to see you!""" 129 | 1271,0,,"""All that hatred inside of you must have eaten away what little capacity for clear thought you may have had. You lack the basic intelligence and knowledge to have real discussions or debates, so you come here insulting people into having arguments. Your insults are childish. You're proving all the negatives about your party every time you type. How about posting your picture... what are you ashamed of? You wouldn't have the balls to push all 'teh stupid' under your real name.""" 130 | 1281,1,20120620095810Z,"""Oh I think I do. Let's put it this way, it's not like watching United.\n\nOnly muppets like yourself can see that's good football. Listen good on them for qualifying, but the football is dross and a 100 odd idiots posting here isn't going to change that.\n\nSo think again about who the thickos are and add your name to the list.""" 131 | 1291,0,20120619001224Z,"""I shot the computer with a pistol, for being a fucking cocksucker. Last time it fucks with me in the morning.""" 132 | 1301,0,20120610042312Z,"""Hot_damn.... it's_obozo_bitch_, watch_them_teeth_fold_back_and_them_legs_spread_when_they_start_playing_hell_to_the_chief....""" 133 | 1311,1,,"""how do you know mother fucker stop assuming shit you don t know you don t know that man like you hypocrite son of a bitch""" 134 | 1321,1,20120610151941Z,"""Go back to MediaMatters, troll.\n\nYou and your pond scum buddies are toast.""" 135 | 1331,0,20120514050748Z,"""okay now I know your trolling, good laugh, you had me for a while""" 136 | 1341,0,20120530023506Z,"""Trump refused to give any ""evidence"", because he has none.""" 137 | 1351,1,20120612131944Z,"""It's been years and you birther idiots have yet to find ONE piece if evidence to support your claims, so who is the stupid one?\n\nYou are gullible and ignorant if you believe our president was born in Kenya""" 138 | 1361,0,,"""Conservatives are to blame for some of this for wimping out on it. Why are we not working on laws to ban gay marriage on a federal level and let\\xe2\\x80\\x99s not stop there? No gay adoptions we need a man and a woman in these kids lives. Regardless of what liberal propaganda tells us, conservatives out number liberals 3 to 1 why do you think fox is so big and liberal radio shows are basically non existent. Why is it 85% of Americans were against Obama care and yet because of the majority in congress it was passed anyway? Conservatives need to get off their keesters. In Arizona these things are not radical, there mellow. Start working on making homosexuality illegal start far to the right just as the liberals start far to the left and they get it (Obama care etc.) no this is the conservative\\xe2\\x80\\x99s fault. Gay people need to feel shame in what their doing, not heroic as the liberal media makes them sound. Make it illegal and that will help in giving them the shame they should already feel. It\\xe2\\x80\\x99s just not right it\\xe2\\x80\\x99s not a matter of hate I guess I hate rapists to because I don\\xe2\\x80\\x99t feel that\\xe2\\x80\\x99s right either.\\nIf we glorified rapists and made them sound all heroic would it become a RIGHT to be who they are? That's how they were born. They didn't choose to be that way they just are so they should have the RIGHT to rape any man woman child or animal. This is the liberal logic. Next their going to want to pass laws that being a rapist is just sexual orientation. You would even see ignorant people trying to say the bible says its ok. If it were ok then 2 men could have a baby without help from any women and 2 women could have a child without using any sperm from a man. Nope sorry just not possible. """ 139 | 1371,0,20120528091752Z,"""You will never get a job in Government, too much common sense \xa0and stating the bleeding obvious. Bettie Windsor and Diana Ross seem to prefer Northolt ( for different reasons ) so why not look again at the utilisation.""" 140 | 1381,1,20120610010421Z,"""fuck this gane this fucking fake""" 141 | 1391,0,20120611194257Z,"""not funny dude....but I get it.""" 142 | 1401,1,20120610202120Z,"""_FUCK_OFF_NIGGER_YOU'RE_NOT-AMERICAN_YOU_POS-FAGGOT!""" 143 | 1411,1,20120620074946Z,"""idiot nayar you are mother fucker. you are saying attack on Shri harmandir sahib was a right action, this shows you negative thinking about sikh community. All siks are agree with building memorial thats why any sikh organisation does not oppose. You have no concern with issues of punjab. The wording you used in this article is very rude and this shows your hate about sikh community,,,,,,you are barking dog,,,we don't care you""" 144 | 1421,0,20120610011820Z,"""Looking up a definition doesn't make me an authority on labeling people.\xa0 Therefore, your diagnosis is invalid.""" 145 | 1431,0,,"""aaahhh, how sweet........read your post ..........it is the sign of an enabler.....oh yeah, but aaaah, how sweet.........puke""" 146 | 1441,1,20120610081951Z,"""Steve,go brush your teeth with a revolver you dumb bastard""" 147 | 1451,0,20120527183423Z,"""""...erections.""\r\nThen why are they targeting women?""" 148 | 1461,0,,"""all these police and security yet people get in with flairs meanwhile in england a 100k+ a week player gets called a name and there is uproar go figure""" 149 | 1471,0,20120619051756Z,"""@Rourke needs to give me a fucking like\xa0I am from a family of nurses. \xa0They are much more liberal than I am (my sister in law supports a single payer system, I support getting rid of the hypocratic oath and the AMA). \xa0I have gotten their diagnosis, even had them get me scripts from their diagnosis.""" 150 | 1481,0,20120529220046Z,"""Your name is so ironic.""" 151 | 1491,0,20120502195543Z,"""Stop glorifying these idiots and you'll be better off. Go Browns !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!""" 152 | 1501,0,20120619183203Z,"""Too thin? Yes that is what she said, while laughing and pointing, and then serving me divorce papers. :(\n\n8mm is about as thin as I think it should be. Just my preference.""" 153 | 1511,1,20120620053436Z,"""Fuck you, you fucking dork.""" 154 | 1521,0,20120609220419Z,"""All the more reason to slap the dog shit out of them RT""TenkYouPlease: Alcohols got you feeling like you cant get slapped""""" 155 | 1531,0,20120620172839Z,"""Were hundreds of people killed by gun runners in 2007??""" 156 | 1541,0,20120619195729Z,"""Holy shit. \xa0Son is droppin science.""" 157 | 1551,1,20120502230100Z,"""thankyou EPS\nYOUR AN IDIOT RICDOSE""" 158 | 1561,0,20120610015244Z,"""You are soo right.Muslims fight Muslims everywhere.Muslim men degrade Muslim women,everywhere. Until all this can be repaired and Muslims become \xf6ne people"", you will not go forth.\nYou have about as much chance of this,as I have to see Rocking Horse Dung.""" 159 | 1571,0,20120611232806Z,"""How about bring the troops home to their families and stop this farce.""" 160 | 1581,1,20120612012805Z,"""That may be but you are asad, little pathetic WHITE \xa0man man ewww...100 times worse you child molesting mass murdering welfare bum! you must be deported!""" 161 | 1591,0,20120528002453Z,"""All pitchers have bad days at the office, and if the possible signing of Oswalt to replace Lilly is an indication of things, I personally feel it is that Lilly may be headed to the DL rather than traded, unless Billingsley is headed to the DL.""" 162 | 1601,0,20120503040043Z,"""F you, DEA. You and your useless war on drugs. Stick to prescription med fraud. At least lay off the weed, that shit is safer than alcohol.""" 163 | 1611,0,20120530132112Z,"""You're not the boss of CNN.""" 164 | 1621,0,20120527190448Z,"""fuck i keep talking to myself! i guess i need to get smart to before i can suck some DELICIOUS WHITE COCK""" 165 | 1631,0,20120612130416Z,"""that sucks...plug in F550.....wake the fuck up\xa0\xa0 douch bags...i can see my new ramp truck having batterys in it....why dont you just put in a real large spring in it and wind it up just before you to on a tow...do you think we will run out of crude oil...i dont think so...the oil companys will just be making more money and the goverment is being controled by them...fuck heads!""" 166 | 1641,0,20120619002737Z,"""..........RON PAUL....is / was.....ALWAYS...for YEARS....BATSHIT CRAZY!!""" 167 | 1651,0,20120528031934Z,"""I didn't realize Pop has a weak eye...""" 168 | 1661,0,,"""lol they did forget to put the feet on in the scene in the kitchen""" 169 | 1671,0,,"""Tell you what though,the lasses in Newcastle will still be out on Saturday night, in their mini skirts and skimpy tops!""" 170 | 1681,1,20120320204322Z,"""narrow minded little fool you are""" 171 | 1691,0,20120619213252Z,"""@PPW I would add Marvin Bullett to that list... He's a better choice IMO""" 172 | 1701,0,20120528142053Z,"""Every team has the ability to beat anybody. That's why they are professionals. Come playoff time that changes.""" 173 | 1711,0,20120612142600Z,"""umm theres somthing called the internet go to Google and type in Porn there ya go and guess what the best part is its free....""" 174 | 1721,0,,"""\\xc2\\xa0Oh no it's much cooler to be able to blow up the planet 1000 times over then simply 10 times over.""" 175 | 1731,0,20120529144509Z,"""So the pools aren't open Sundays? The hell are my taxes paying for?""" 176 | 1741,0,20120610091522Z,"""morcar ...you are hilarious\n\xa0\nThe whole world was at war with the nazis and it took 6 years to defeat them\xa0but u expect a small village full of farmers and the likes to have stopped them???\n\xa0\nOn what planet were you born???""" 177 | 1751,0,20120502230035Z,"""F*&K YOU MTV.""" 178 | 1761,0,20120530065950Z,"""Whoever had the aldasity to compare that quitter to jordan has to be born from 2003 on up never seen a champion at work""" 179 | 1771,0,20120502174256Z,"""Actually the US needs to walk its own talk! We preach to others about human rights but it's the US killing people (and its own citizens) with drones on presidential say-so! Have you been living under a rock for thepast decade? Our Consitution (Bill of Rights) is being shredded! The US is trying to suck Uzbekistani balls for the energy game and to establish US forces in the region to ""contain""Chinese influence in their OWN BACKYARD! (The Usbek gov't makes China look like a bunch of liberals!)""" 180 | 1781,0,20120529210140Z,"""Yeah its difficult if your chick thought you didn't give a fuck, and then all of a sudden you act like you can't live without her.\n\nOn the bright side though, now she knows you do give a fuck. Which is a good thing. Now though, the only play is to back off, and let her miss you.""" 181 | 1791,0,20120610151606Z,"""But, Dante, with all due respect, you ARE a hard core partisan.\n\nYou DO NOT accept facts which counter your stated positions.\n\nYou falsely call government websites ""right wing blogs""\n\nYou NEVER furnish a documented fact in support of your own positions while demanding them of others.\n\nYou REFUSE to even DISCUSS any opposing positions in a civil manner, choosing instead to call your conversants, liars, stupid, etc.\n\nIn short, your posts DEMONSTRATE you to be a hard core ideologue, devoid of ANY ability to think critically.""" 182 | 1801,0,20120620050018Z,"""Exactly!\nSee, you don't learn quality facts like that in school. They tell you you have to spend weeks in the hospital, well, fuck them.""" 183 | 1811,0,20120609202644Z,"""Wait, let me get my boots on. It's getting deep here. You do know you are just repeating right wing pablum that has been proven false, don't you?""" 184 | 1821,0,20120528194147Z,"""Are you fucking kidding me ?!""" 185 | 1831,0,20120530003957Z,"""Claudell Washington was a starter too.""" 186 | 1841,0,20120612033148Z,"""Yeah, I got the same thing about 3-4 weeks ago. \xa0Just couldn't post with it anymore. \xa0No e-mail message or anything. \xa0Even when I tried to post. \xa0Just frozen.""" 187 | 1851,0,20120610005241Z,"""don't threaten me Steve......never ever threaten me............""" 188 | 1861,0,20120610113045Z,"""no another idiot Ezra""" 189 | 1871,1,20120529024955Z,"""them some ugly motherfuckers. Proof you don't need to be beautiful to make it in Hollywood, you just have to know who to blow.""" 190 | 1881,0,20120530054130Z,"""Hey, Rob-Me likes dogs, he might like that thing on Trumps head.""" 191 | 1891,1,20120609211003Z,"""this nigga gay, i would have been fucked that bitch.""" 192 | 1901,1,20120620080626Z,"""Commenter, why are you still trolling here? You are the loser!""" 193 | 1911,1,20120502142507Z,"""Stupid fucking idiots,,,what more can you say. These freaks should put signs on their foreheads saying ""This Space for Rent.""""" 194 | 1921,0,20120528190028Z,"""lol-they will find a way to lose....""" 195 | 1931,0,,"""maybe the MPs should take a pay cut. to show the way...........""" 196 | 1941,0,20120502224253Z,"""Just to educate you my brother, you and Kim may be the bigots. If he's liking your post because he's ""Asian too"".""" 197 | 1951,1,20120612013439Z,"""Dude, if you change you username to\xa0specify\xa0against someone then you're a butthurt fag""" 198 | 1961,0,,"""Here is a video of why republicans suck.\\xc2\\xa0 http://otpoliticalposts.forumotion.com/t1495-why-we-suck""" 199 | 1971,1,20120610131611Z,"""Leftist White race traitor\xa0or shit skinned\xa0degenerate; you are a hard one to call.\n\nMongoloid.""" 200 | 1981,0,20120619210436Z,"""I am almost more excited than you!""" 201 | 1991,0,20120527160654Z,"""not only the #3 team in their own division, but also the #3 team in the nation before the LSU game""" 202 | 2001,0,20120528163856Z,"""So when is Crawford coming back? I drafted yhat bum.""" 203 | 2011,1,20120320122805Z,"""Get thatdick outta your mouth so you can talk.""" 204 | 2021,1,20120612021335Z,"""Stfu your existence offends me""" 205 | 2031,0,20120619183457Z,"""Waaaaaaaaaa!!!!! Waaaaaaaaaaaa!!!!!!\n\n\nListen to those libturds whine!\xa0\xa0 They know they are toast so they\xa0yap and snip like\xa0little Chihauhaus!\n\n(No offense to Chihauhaus intended).""" 206 | 2041,1,20120612003605Z,"""You are simply comically idiotic.\xa0 Or you're just a depressing fool.\xa0 Either way, you're still just a liar and an Internet coward.\xa0 Cheers!""" 207 | 2051,0,20120611190127Z,"""A Frank yesterday, a Frank today, a Frank tomorrow. I have been and always will be one big Frank.""" 208 | 2061,0,,"""SHOLA'WHAT PLANET DO YOU LIVE ON YOU NEED TO GET DOWN TO SPECSAVERS BLIND OR STUPID?""" 209 | 2071,0,20120529022715Z,"""LOTS OF FOULS . . . LOTS OF FREE THROWS MADE. . . . COMIN' UP!""" 210 | 2081,0,20120620112903Z,"""I admire him, he is a true warrior king of old.\n\nThe difference between a general and a warlord is that a general only follows orders, a warlord is only constrained by the rules he agrees to.\n\nIt was a pathetic joke that Obama was given an honorary black belt in Taekwondo without earning it.\n\n Putin is a better man than that... your standards of measuring a man (even an adversary) is as pathetic.\n\nNo, I think Putin is more honest than you will admit.""" 211 | 2091,1,20120320113848Z,"""Wally the bigot, and\xa0 cmorris the racist. The new faces of ignorance for the new south. No matter how often they wash their black sheet, it will still carry the smell. Good luck guys, and all those like you. God has a special section for you to sit in when you arrive and your resting spot.""" 212 | 2101,1,20120619024417Z,"""I would slap you silly but clearly you are already there.""" 213 | 2111,1,20120502101915Z,"""Get I life u bitch!!!\navande ammede oru lekhanam...""" 214 | 2121,1,20120619192141Z,"""The difference between your wife and a rooster HoldYourOwn? The rooster says cock-a-doodle doo, your mama says any-cock'll do.""" 215 | 2131,1,20120530024344Z,"""you're right; there weren't enough toothpick hangovers and tailwhips... Go fuck yourself""" 216 | 2141,1,,"""\\xc2\\xa0Stop suxing dik for cigarette money hole""" 217 | 2151,0,20120619041609Z,"""If he comes back, we continue.\n:)""" 218 | 2161,0,20120529163552Z,"""you are on to something there""" 219 | 2171,0,20120619152712Z,"""True...but if you are around someone who abuses drugs contstantly and the drugs are always around and you feel hopeless because you married a loser like Bobby Brown and he treats you like crap you sure would be tempted....""" 220 | 2181,0,20120619090109Z,"""Is that the war we decided to lose, because the Kings Freemasons wrote the constitution, Act of Independence that never was and set the US up as a crown (not royal) corporation and is that why a relatively new CIA has its officers sware an oath to the Crown??lol\n\nThe poor Amerikan sheeple are really living and working in a Crown work camp anf now you might understand why Amerikans are probably the most isolated and narrow minded extreme thinkers on the planet""" 221 | 2191,0,20120515165233Z,"""Lots of women use it to burn body fat and improve skin quality. but you don't get one injection, you get a daily injection over a course of 6 months""" 222 | 2201,0,,"""my screen is stuck in the black screen help somebody""" 223 | 2211,0,20120609173356Z,"""Type your comment here.sfsdfasdfsad""" 224 | 2221,0,20120530023817Z,"""Are you watching a Quantum Leap marathon?""" 225 | 2231,0,20120529171258Z,"""even if the bucks won the nba, they wouldnt catapult past the bulls in popularity""" 226 | 2241,1,20120620082015Z,"""Go away, Flora. \xa0Take your spam with you. \xa0And if you MUST comment, then\n\ndon't steal words from other posts.\n\nSPAM ALERT \xa0 Flag her post.""" 227 | 2251,0,20120528114520Z,"""it won't be as dramatic as that... I mean we england fans who go to Poland have been given the safest guarantee that we won't stay long there anyway, after the first round we'll all be back to England with our team of heroes who battled and fought but were just unlucky. LOL""" 228 | 2261,0,20120611225727Z,"""No, it's because your liberal Messiah is SUCH a monumental LOSER!""" 229 | 2271,0,20120612121009Z,"""He also identified the unit that took out Ben Laden.\xa0 When asked, in 1942, where the aircraft that bombed Japan, in the Doolittle Raid, came from FDR answered, ""Shangri La.""\xa0 President Obama should also have side stepped the questiong.""" 230 | 2281,0,20120612141041Z,"""Once again I give a ""tip of_the_fuckyou_hat"" to_Faux_""News"". \xa0So, we can comment only on articles that you see to be appropriate?????""" 231 | 2291,0,20120529005211Z,"""Ky, come on man, the leading returning senior WR has 16 catches.........""" 232 | 2301,1,20120320180032Z,"""fuck you all to hell""" 233 | 2311,0,20120529113600Z,"""Interesting story on the Canadian prime minister's reaction to a nude painting done of him\nhttp://www.iol.co.za/the-star/art-helps-to-expose-the-naked-truth-1.1305811""" 234 | 2321,0,20120619200244Z,"""At the beginning of the season if you send him down, no one will pick him up. No one violates that rule.""" 235 | 2331,0,20120529233743Z,"""And the Heat shot 5-25 on 3's and still won by 14. So let's not act like the Heat played at their best either..""" 236 | 2341,1,20120320142848Z,"""you are sickening and pathetic !!!!!!!!!!!!!!""" 237 | 2351,0,20120528100240Z,"""Compared with the USA, Zimbabwe looks clean.""" 238 | 2361,1,20120530123011Z,"""Damn it Nicole, fuck it up for us, just once. Please?""" 239 | 2371,1,20120620164342Z,"""Not sure if you are joking or just dumb/have no knowledge in comics.""" 240 | 2381,1,,"""Captain, You need to quit standing on your head, You are have a cranial recital inversion problem, You have inserted your cranium so far you your recital cavity you are seeing everything upside down.\\n""" 241 | 2391,1,20120610152113Z,"""FUCK ALL HEAT HATERS.YALL CAN SUCK MY DICK , I GOT NOTHING AGAINST OKC BUT FUCK EM TOOO !! WE WINNING THIS YEAR WADE COUNTY KING JAMES BITCHES , 305""" 242 | 2401,1,20120618212610Z,"""You are a born m o r o n.""" 243 | 2411,1,20120610173833Z,"""grrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr i hate that motherfuck her dad need\xa0 to be burned at stake""" 244 | 2421,0,20120609190924Z,"""It is one of the few things the Federal Government is Constitutionally SUPPOSED to pay for.""" 245 | 2431,0,20120612004409Z,"""i just want to know why billboard did not post my comment. seriously? and all of these negative comments are on this blog. screw you!""" 246 | 2441,0,20120609232539Z,"""The collapse is now very near, i think!\n\nHey EU people (and US people, by the way), let's just face it once and for all:\n\nWe are bankrupt!\nThis is jut too much debt. Everybody is drowning in a sea of....debt.\nDerivatives exceed, by far, the world GDP.\n\nSorry.....but it seems to me: game (soon) over!""" 247 | 2451,0,20120612012811Z,"""A Nickelback riot is when their two fans get into an argument.""" 248 | 2461,0,20120619194129Z,"""@Rourke needs to give me a fucking like\xa0you mean Polenta?""" 249 | 2471,0,,"""@gdlow The fact that he was picked in the 7th round should have no bearing on the decision. He was stuck behind JaMarcus Russell for three years and his stock drop was strange to say the least. Irrelevant through, you have to look at the fact that he worked with one of the game's best QBs and best QB coaches for 4 years. Not to mention that Philbin knows how to make him succeed.""" 250 | 2481,1,,"""Your pussy. Stop hitting woman and fight me bitch ass nigga""" 251 | 2491,0,20120527192245Z,"""The Miami heat are 2012 NBA Champions....""" 252 | 2501,0,,"""The American commerce system runs on sex. It's used to sell everything from condoms to condominiums. You'd think, the party of Big Business, would recognize this and not try and kill the golden goose, (or choke the golden chicken, as it were.)""" 253 | 2511,1,20120609210158Z,"""and you are a MORON.""" 254 | 2521,1,20120611051450Z,"""Victor Moore you are one dumb ass writher or you are smoking some bad crack. WTF suck hawks nock off the NINERS. WTF. dude buy some good coke. Stay off that bad shit...DUDE are you hanging with Bobby Brown. Dang....""" 255 | 2531,0,20120619061117Z,"""#FuckTrolls""" 256 | 2541,0,20120619234331Z,"""Self defense? That little girl must pack a mean punch!""" 257 | 2551,0,,"""In Thailand, the Iranian blew his own legs off but\\xc2\\xa0didn't harm any Israelis.\\xc2\\xa0 Is this an example of Iran's\\xc2\\xa0military might?\\xc2\\xa0\\xc2\\xa0In the end, the Iranians will blow the legs out from under their own country.""" 258 | 2561,1,20120529051327Z,"""so its sad you idiot. Ray is one of the best pure shooters... so dumb""" 259 | 2571,0,20120609205559Z,"""Ha! I prefer something for him in the polar ice cap region.\n\nBTW, Welcome back.""" 260 | 2581,0,20120610060056Z,"""If it's not your thing, make her do it.""" 261 | 2591,0,,"""@ABXXV25 Yeah, I've been following along. Just wanted to make sure you knew.""" 262 | 2601,0,20120529142232Z,"""You guys have an awesome slate of games this year as far as the whole smack talk thing will go. Aside from the divisional games, you get rematches against the Giants, Lions, and Saints. Plus the Packer/Niner game that never happened last year.""" 263 | 2611,0,20120619041228Z,"""Zuckerberg\xa0is jewish you know...""" 264 | 2621,0,20120619073323Z,"""How could someone named Emmit Smith be racist??""" 265 | 2631,0,20120620061134Z,"""Thank you for your comment! :) Sorry to hear about the banking soda...let me know how everything is going! Keep in mind you can post a picture in your comment on here as well if you would like to do that!""" 266 | 2641,0,20120611222005Z,"""Footlongs next favorite holiday is Christmas ..he dresses up as Santa and gets children to sit in his lap alll day long""" 267 | 2651,0,,"""@shooter Subban.""" 268 | 2661,0,20120619033910Z,"""Hi Kathy.\xa0 Well at least you'd have something to show for your money.\xa0 LOL""" 269 | 2671,1,20120618200932Z,"""Keep telling yourself that, you dumb tea maggot.\xa0 The only true racists left are turds like you and your worthless tea maggot friends.""" 270 | 2681,0,20120620132143Z,"""This will hurt the Rs more than help. To be sure.""" 271 | 2691,0,20120611215102Z,"""You are talking apples and oranges.""" 272 | 2701,0,,"""@thejeanrose\\\\n\\\\n \\\\n\\\\n"" Imprisoning black men disproportionately. Obama does it""\\\\n\\\\n \\\\n\\\\nAre you seriuos?\\\\n\\\\n \\\\n\\\\nThat\\\\\\'s a terribly ignorant statement.""" 273 | 2711,0,20120528042728Z,"""They do that. Even on channels with old tv shows. S01/Ep2. (1960) for example.""" 274 | 2721,0,20120612033701Z,"""WOW my second bias dancing a cover to my ultimate bias song! IU ur too amazing. \xa0And haters it's nothing serious she was just having fun.""" 275 | 2731,1,20120609221256Z,"""fake pic u hoe ass nigga!""" 276 | 2741,0,20120528033032Z,"""Too funny seeing the cop run up all tough.. then jump back like ""WTF!!?""""" 277 | 2751,0,20120612002824Z,"""dude dont wanna hear your pillow talk about double pent.""" 278 | 2761,0,20120502200037Z,"""I wish you could smell it just by clicking on it. ;)""" 279 | 2771,0,20120527190216Z,"""Where, exactly? You are questioning logic without an argument.""" 280 | 2781,1,20120611204113Z,"""Go and tell him to fuck himself, the twat""" 281 | 2791,0,20120612145449Z,"""@NaturalBlues:disqus -- I used the cars from my train set as pretend rollerskates all the time. The worst accident though was when I tried to use them to go down the stairs in our apartment complex, not ride down a hill. I think that was an even worse idea than yours.""" 282 | 2801,0,20120527203107Z,"""they make up for 85% of the scoring""" 283 | 2811,0,,"""So unfair again a young mother. I have doubts to me is suspicious as missed a party makes me wonder why?""" 284 | 2821,0,20120528185254Z,"""donnie\xa0 should have taken him out sooner.....True...oh so true....""" 285 | 2831,1,20120611202811Z,"""If you think it's is so insignificant, why are you spending time commenting, unless you are a troll and can't help yourself. Get a life.""" 286 | 2841,1,20120610151744Z,"""Ha ha, Anthony is idiot. \xa0He thinks he knows basketball but is too dumb to know he is dumb.""" 287 | 2851,0,,"""Great review. I would love to try one of these.""" 288 | 2861,0,20120611234528Z,"""waaaah\nbut I promise you're a WINNER\nWe ARE ALL WINNERS!!!\n\nYAY!!!!!\nHIP HIP HOORAY!!!!!!!!!!!!!!!\n\nFeel better now?""" 289 | 2871,0,20120620152110Z,"""the Star box allows you to link your comment to Facebook and Twitter. You can go to your profile and change settings. If you click back on yellow star it will return to grey""" 290 | 2881,0,20120620014647Z,"""ugh they're so fucking flawless and gorgeous. Can I have them both?""" 291 | 2891,1,20120620143048Z,"""Yet here you are, day after day, supporting the paper by clicking on links and making comments, increasing site traffic.\n\nYou just cant cure stupid.""" 292 | 2901,1,20120618221909Z,"""If its so lame why did you read this or for that matter search this topic maybe you should think a little more before you make yourself look like a fucking hippocrate.""" 293 | 2911,1,20120620124806Z,"""I am not ""dude""....you moron! Go back to your mothers""" 294 | 2921,0,20120527233726Z,"""Are you in Thailand?""" 295 | 2931,1,20120612153437Z,"""What's funny as hell is Rooster and you being married, retard.""" 296 | 2941,0,20120528210022Z,"""Probably, it is Memorial Day after all, a good joke on Barajas part and a sign of a wounded vet""" 297 | 2951,1,20120620003324Z,"""Taxing people at different levels is not redistribution of wealth you fucking retard.\xa0 That's like saying a fat person is redistributing fat by eating more.""" 298 | 2961,0,,"""\\xc2\\xa0Too many. Like it or not, we're suck with him for another 4 years unless the Republicans can pull a rabbit out of a hat. The best we can hope for is a Republican controlled house and senate.""" 299 | 2971,0,20120619030929Z,"""Racism will die when white people stop being racist.""" 300 | 2981,0,20120611234239Z,"""a lot like you and your life huh?""" 301 | 2991,0,20120529000020Z,"""The first draft dodger who became president was Bill Clinton""" 302 | 3001,0,20120528222634Z,"""Must be what happened when they saw you Soledad.\xa0 Wearing your thong and\xa0 nipple ring as you went to sleep in your fox hole...secretly dreaming of the soldier next to you.""" 303 | 3011,0,20120619014445Z,"""cy said 22\xa0 june wins that is looking\xa0 pretty\xa0 close right now""" 304 | 3021,0,20120514051328Z,"""Hey Wade, why don't stop doing your hater and start argue with arguements? Actually, you are not really useful, not at all.""" 305 | 3031,0,20120612123803Z,"""Thats why you use a hollow point.""" 306 | 3041,0,,"""teach your children:TO be a parasitical glommer-onner, a worthless vile wretched puke who does not create a thing, but who loves to lie, swindle, cheat, its way into possession of the works of others and then take over their lives and tell them what is best for them, even if they must slay innocents to do it- takes a special kind of criminally insane hysterical pig freakery that can only be summed up in one word: democrats.\\n""" 307 | 3051,0,20120620043138Z,"""You liberal, socialist, communist democrats better understand what you are wishing for. \xa0If you really believe what you say you believe, why don't you just move to Russia or some other communist country and enjoy the ride there.""" 308 | 3061,1,20120610035135Z,"""i think both of you need to grow up and stop arguing over a fuckin computer all day and do something with your lives like maybe go outside for a change or get out of your moms basement and find a job. :D""" 309 | 3071,0,20120528105428Z,"""Oh dear, now you will be sorry, in fact you will probably wish you had never been born. If you had smuggled it into the UK and were unlucky enough to get caught, you would have got a slapped wrist by one of our weak judges. Asia is different, drugs are not tolerated, there are NO human rights, sentences are long, prisons are real prisons not like the UK world famous holiday camps!!""" 310 | 3081,1,20120320095046Z,"""cockneywideboy\n\nSo your the obese twat taking up all the pavement...get a diet going and come back when you're cockneyslimboy.""" 311 | 3091,0,20120528145250Z,"""Those guys are hardcore! \xa0I usually play as close air support and those dudes ride my shit about going off mission after I blow up whatever command has lased.""" 312 | 3101,0,20120619224519Z,"""Haley these girls are just ribbing you. sisterly love is what it is""" 313 | 3111,0,20120609211311Z,"""wow he's fucking sexy i love the white underwear :p""" 314 | 3121,1,20120527190600Z,"""you are an idiot pbgallag. it has been a rough 2012 for halladay. but with the career he has had to say he sucks is ignorant and reflects more on your lack of intelligence.""" 315 | 3131,0,20120610030949Z,"""@EatSleepThunder Mine is OKC in 5""" 316 | 3141,1,20120610014810Z,"""Still here? \xa0You love getting smacked around ,huh? \xa0You just keep coming back for more.""" 317 | 3151,0,20120527165316Z,"""Period at the end of\xa0your sentence.\xa0 :-)""" 318 | 3161,1,20120620022759Z,"""Let me rephrase my initial comment about you and your post;\nPathetic little boy.""" 319 | 3171,0,20120527172907Z,"""He only comes here at times to post""" 320 | 3181,0,20120529141822Z,"""You make a good point. Despite being ""challenged"", she does come here and play along with the idiot.\n\nWhen I do reply, I soften it up a bit.""" 321 | 3191,0,20120529153843Z,"""You is so right""" 322 | 3201,1,,"""S T F U and pay your dues""" 323 | 3211,0,20120528234353Z,"""Didn't take you any time at all to try to turn a solemn day into another attempt at racial conflict, did it?\n\nThe Facts:The earliest known evidence of\xa0Decoration Day observance goes back to various women's auxillary groups in the North and South, when ladies organized events to honor their war dead by decorating graves. The earliest recorded event took place on April 25, 1866 in Columbus, Mississippi when a group of women formed an association to decorate the graves of civil war soldiers, starting with those who died in the Battle of Shiloh.The towns of Macon, Georgia, Columbus, Georgia, and Richmond, Virginia all claim to be the birthplace of Decoration Day, having first celebrated it in 1866. The town of Boalsburg, Pennsylvania claims to have celebrated the first Decoration Day in 1868. Carbondale, Illinois claims to have celebrated it first on April 29, 1866. In all, some 25 cities claim to be the birthplace of Decoration Day, most of them in the South.To settle the dispute, President Lyndon Johnson issued a proclaimation in 1966 naming Waterloo, New York to be the official birthplace of Memorial Day.""" 324 | 3221,1,20120619024840Z,"""You stupid racist motherfucker! I bet you'd never say that to a black man or woman't face would you?""" 325 | 3231,1,20120612051630Z,"""You wicked shit, you have the mitigated gall to call the Lord's name in your filthy beer smelling mouth.""" 326 | 3241,0,20120620160236Z,"""det er jo d\xf8nn samme greia.\nfuck verden, jeg leser aldri mer en overskriften, det en min stil, lever litt p\xe5 kanten kan du si. eller?""" 327 | 3251,0,20120619232038Z,"""I think Bon Vivant's go outside every once in a while.""" 328 | 3261,0,20120612072139Z,"""Nanette Lepore:\xa0http://bit.ly/MykDd3\n\nIf you wait a bit, it'll definitely go on sale. It doesn't look very flattering on the hanger so a lot of people pass over it (so says the sales lady who helped me), so you'll be able to get it for less than $398.""" 329 | 3271,1,20120618200053Z,"""OVER promoted idiot \xa0- get your facts right!""" 330 | 3281,1,20120619083143Z,"""What you are, jussi, is a plain old fashioned IDIOT. I am no fan of the USA any more than I am a fan of airchair bigot warriors like yourself.\n\n\nI am talking about the politics inherent in warfare AND made a comment as to why Hamas chooses NOW to accept responsibility openly, you fool.\n\nThis is the area I am trained in, political science. Not superstition and religiously-motivated jargon, like you.""" 331 | 3291,0,20120528221638Z,"""nah I will take it over being a giants fan\n\nvery successful in the short time we have had as a franchise""" 332 | 3301,0,20120611214921Z,"""Why is she wearing a sea urchin on her head?""" 333 | 3311,1,,"""i will fuckin kill u in real life track u down than shove a katana u ur ass just bcuz we asians r smarted than u dicks doesnt mean u could beat him up i swear i will be at chicago in 2 hours""" 334 | 3321,0,20120530041600Z,"""I agree what is being said by the SEC but Muschamp and Pinkel, really???\n\nThose two are bums.\n\nAny decent coach would have won with Florida's talent, Muschamp is a terrible coach.""" 335 | 3331,0,,"""The Ronald Reagan Alumni Association? Your kidding right? I think I'm gonna puke!""" 336 | 3341,1,20120320052024Z,"""hey cock sucker i have a decent job so before you go running that big dick sucker of yours get the facts straight and if you were actually a hard ass you would come say this shit to my face you pansy piece of shit next time you want to talk shit to me post it with your mother fucking name nigga""" 337 | 3351,0,,"""I too am guilty of over moisturizing my hair. Its hard to get that balance sometimes. I started using protein based conditioners such as Aubrey Organics GPB weekly and heavy duty protein treatments every 6 weeks and that's been helping a lot. """ 338 | 3361,0,20120618234615Z,"""They prefer it when they're working for the police, or when they're not the target of the police investigation. \xa0But... \xa0:)""" 339 | 3371,0,,"""Dont mention social security or they will arrive by the coach load.""" 340 | 3381,0,20120620010941Z,"""@Nonstopdrivel\xa0\xa0@Natesweet\xa0\xa0@LKP No Limits\xa0\nAppeal to authority is a classic logical fallacy.""" 341 | 3391,1,20120618194243Z,"""do you have any proof that Romney stole anything? Or are you just a stupid libtard troll?""" 342 | 3401,0,20120529011350Z,"""@Savage_Beast\xa0\xa0@Marbbles\xa0dude's gonna be a hell of a defenseman. \xa0Him and McBain are going to be a real solid pairing for the Canes. \xa0They're gonna be pretty good in a few years, they got a lot of young talent""" 343 | 3411,0,20120502192711Z,"""He killed himself like a little sissy.\n\nGOOD RIDDANCE SCUMBAG""" 344 | 3421,1,20120619031813Z,"""make your mother stop spreading Herpes""" 345 | 3431,0,20120611180304Z,"""hows come you don't get nobody clicking your like button? \xa0That should be something you think about alot, it means something, it means you are wrong.""" 346 | 3441,0,20120611223957Z,"""360 Balls on your chin""" 347 | 3451,0,20120609210845Z,"""Why not vote Libertarian? Most of your progressive agenda will be fulfilled\n\nHilarious.""" 348 | 3461,1,,"""Yeah you probably havew a dick to you ugly fucking monkey black whore, you are not even a women to me, no black women are I think of you as monkeys that should be in the zoo. Now go get your cunt sterilized slus so you dont make any more nixxers""" 349 | 3471,0,20120619233540Z,"""Obama does not believe in our three separate but equal branches of governent.""" 350 | 3481,0,20120515164922Z,"""Zooey, you are NEVER going to be cast as Power Girl. Stop wearing her costume knock-offs. Also, the nude bra is distracting. It's like a fungus or lichen edging toward her trachea. Worrisome.""" 351 | 3491,0,20120611235036Z,"""Sorry your dad didn't make parole.""" 352 | 3501,0,20120530115615Z,"""<{:-)\xa0 Now you're just teasing me.""" 353 | 3511,0,20120619024133Z,"""Leecherius Collapse \n The higher you rise , the more isolated you become..===============One liner BS...""" 354 | 3521,0,,"""Great idea!\\xc2\\xa0 Who the hell empowered free speech or believe it serves any real purpose?\\xc2\\xa0 \\nConsider the fact our president and governors, their cabinet czars, our members of congress and state legislatures have exempted themselves from accountability for what they do AND set their own rules!\\nWE vote for such royalty so obviously we love what we have!\\nStop complaining for what you voted for!\\xc2\\xa0 \\nGet on our knees, kiss ass, and praise the lords we elect!""" 355 | 3531,1,20120610033411Z,"""Actually just got told to get to my bed. Fuck off Jackie ya old cow.""" 356 | 3541,0,20120529223527Z,"""Here's an idea.....Why don't we get the h ell out of their country.""" 357 | 3551,0,,"""You rock my world Alice.""" 358 | 3561,1,20120609191838Z,"""Maybe you are the braindead moron, religious bigot.""" 359 | 3571,1,20120529172557Z,"""your ego makes you less qualified.""" 360 | 3581,0,20120620124033Z,"""The greatest generation that won World War II and saved our nation and the world from Hitler are the old white men and women.""" 361 | 3591,0,20120611104900Z,"""sure u are the baddest nigger""" 362 | 3601,0,20120529211838Z,"""Not all of us are...only some, dead and alive.\xa0 When your leader is a loser, then everyone is lost!""" 363 | 3611,0,20120529041257Z,"""The article really doesn't say much. Israel has an increasing number of illegal aliens\xa0to deal with. Got it. \n\nAppears that author Ghitis just felt compelled by her congenital left wing bias to write something that almost sounds negative about Israel\xa0as propaganda intended to morally normalize what moslems are doing every day in places like Syria.""" 364 | 3621,0,20120610135343Z,"""They're ALL taking orders from the Globalist bankers because they are ALL bought and paid for. But you've obviously been very well programmed to be a seriously nasty opponent of ALL things American...you're an idiot plain and simple.""" 365 | 3631,0,20120611184448Z,"""More jurk off material for Apple fan boys. Those not living in mom's basement could care less""" 366 | 3641,1,20120612145516Z,"""Looks like the\xa0dipshat left the building.\xa0 What a joke.\xa0 Go back to where you came from Bob the slob.""" 367 | 3651,0,20120611215900Z,"""A Sony troll?? That's something new. How are you dude?""" 368 | 3661,0,,"""The reason most people are gay is because they are to ugly or stupid to find a mate of the opposite sex. Fundmentals of science, the preservation of the species does not lend its self well to qweers and lezzies. You are obomination before nature, To bad your parents were not gay.""" 369 | 3671,1,20120620155440Z,"""You can't see it because you're a clueless idiot. Show me how he is helping the middle class you stupid Obama semen slurper""" 370 | 3681,0,,"""The day when the Sharia or the book of Leviticus becomes law you will be right. For now we have laws made by man and these state that being gay is legal. Don't like it? Move to Iran. They say over there that there are no gays there,.""" 371 | 3691,1,20120612144047Z,"""And what are you going to do tough guy?""" 372 | 3701,0,,"""@daddymention thanks man.""" 373 | 3711,1,20120320172805Z,"""hahaha wtf? ""jesus christ"" you retard, don't embarrass yourself on the internet.""" 374 | 3721,0,,"""but remember....this is the same guy who's logic said if we\\xc2\\xa0spend\\xc2\\xa0trillions of dollars, we would subtract from the debt.....and, if we divided wealth, it would multiply......and, if we Mirandized\\xc2\\xa0terrorists\\xc2\\xa0we wouldn't have to be cruel and blow them up silently from the skies\\xc2\\xa0\\n\\nthe man is a psychopath\\xc2\\xa0""" 375 | 3731,1,20120619221350Z,"""""Conservative"" scum bag retort to any criticism of Grifter Queen Bitch:\n\n\nWAAAAA!!!!!!!! They're reporting on stuff!!!! Evil Libruls!!\xa0\n\nAre you Tina Tampa? You must be....""" 376 | 3741,1,20120619235827Z,"""it's shit, I dont think my comment will get here. But its not free and comes with lots of crap you dont need. You M F""" 377 | 3751,1,,"""BITCH ASS NIGGA, WE KNOW YOU A COWARD, YOU SCARED TO PUT UP A PROFILE PIC! GET THE FUCK OUT HERE!""" 378 | 3761,0,20120528231014Z,"""I'll take your word for it.\n\nJust out of dumbfounded curiosity one time, I took the figures for ""existing nuclear weapons"" and divided it by time.\n\nI found that, at that time, there were enough nuclear weapons on Earth to detonate one each second for thirteen hours ... and more (I no longer remember the exact amount of time.)\n\nWe had enough nuclear weapons to detonate one every second of every minute for over thirteen hours.\n\nIt was about then that I became clear what ""insanity"" was.""" 379 | 3771,0,20120515214423Z,"""You know your history...""" 380 | 3781,0,,"""1) This is a sham to begin with, because there is no law in existence which keeps homosexual people from spending their lives together in anyway they see fit -- this is a manufactured ""injustice"" in attempt to force people to call them ""normal"".\\n2) To answer your question directly, once you blur the lines to make one group of people feel better about themselves ( pretend that men ""marrying"" men or women ""marrying"" women is normal ), then who is the next group waiting for their chance to blur the lines further?\\n\\nThe question really becomes, why have any lines at all?\\xc2\\xa0 What is marriage after all?\\n""That is why a man leaves his father and mother and is united to his wife, and they become one flesh.""\\xc2\\xa0 Genesis 2:24\\nWhether you regard the Bible as a moral authority, it does show that the concept of marriage was between a man and a woman as far back as at least 6000 years -- I think that makes it fairly well established.\\n\\nThere are a lot of people in the world who desire evil -- people who want to live their lives pursuing ""feeling good"" -- these people use drugs, child molestation, rape -- anything that makes them feel good for the moment.\\xc2\\xa0 How is that different from homosexuality?\\xc2\\xa0 This is something that is clearly not by design, yet it makes you feel good -- so do you do what\\'s right or do you do what makes you feel good?\\nIn your pursuit of doing what makes you feel good rather than what is right, you do it long enough, and then you say that guilty feeling is because of other people and that you need to force them to change because after all, it\\'s totally normal that you are doing what you are doing ( rationalization ).\\n\\nYou may not believe in an ultimate authority, but what happens when you have helped convert the whole world to your way of thinking -- reduced any shred of morality to the lowest common denominator?\\xc2\\xa0 When all of us are free to pursue whatever lust happens to pop into our hearts when we wake up in the morning?\\xc2\\xa0 It\\'s already happening now really -- schools are pushing harder and harder for younger and younger kids to be ""instructed"" in every last detail of the most pornographic forms of sex -- desensitizing the nation.\\xc2\\xa0 Why?""" 381 | 3791,1,,"""haha you don't own an iphone? man your fucking poor""" 382 | 3801,1,20120619143902Z,"""I SEE YOU ARE DETALINGING WHAT HAPPENED IN YOUR FAMILY, YOU FORGET THE PART WHERE YOUR FATHER ATE THE EBST PART OF YOU OFF YOUR MOTHERS AZZ_CRACK""" 383 | 3811,0,20120610191029Z,"""it say something about them 2 coward ass cops...you cant arrest 1 man fucking pussies""" 384 | 3821,1,20120610155224Z,"""Another loser comment.\xa0 I'm am honored that everyone gets to see what an as%hole you really are.\xa0 But I'm sure your co - workers at McDonald's already knew that.""" 385 | 3831,0,,"""@eldlazar@matt48 ""An uncle raping a niece? In Texas? Couldn\\\\\\'t happen.""\\\\n\\\\nOh it happens -- but in Texas they call it a ""family reunion"".""" 386 | 3841,0,20120528051657Z,"""That is totally correct. Hitler could never have done what he did without the approvel of most of most Germans.""" 387 | 3851,1,20120618221650Z,"""YOU SHUT YOUR FUCKlNG MOUTH YOU DlRTY WHORlNG DRUNK BlTCH""" 388 | 3861,0,20120515205037Z,"""konstigt n\xe4r barca inte sl\xe4pper bollen utan st\xe5r d\xe4r och passar bollen till varandra i 89 min och sen kommer p\xe5 Oh shit vi m\xe5ste g\xf6ra m\xe5l ocks\xe5, och d\xe5 f\xe5r dem panik sen kommer messi och s\xe4tter bollen i n\xe4tet 2 min i fr\xe5n matchslut d\xe5 \xe4r det ganska sv\xe5rt att sl\xe4ppa in m\xe5l s\xe5... jag f\xf6rst\xe5r att han f\xe5r det d\xe4r j\xe4vla priset vart enda j\xe4vla \xe5r.""" 389 | 3871,0,20120528105319Z,"""Oh dear, now you will be sorry, in fact you will probably wish you had never been born. If you had smuggled it into the UK and were unlucky enough to get caught, you would have got a slapped wrist by one of our weak judges. Asia is different, drugs are not tolerated, there are NO human rights, sentences are long, prisons are real prisons not like the UK world famous holiday camps!!""" 390 | 3881,0,,"""Love another?.. But don't love another more than we love our gawd, or else he'll throw us to hell for all eternity - as jesus says in the bible, multiple times.\\n\\nBut don't love gay people. Kill them. Kill them all. Kill all nonbelievers as well. And witches. And servants of other gods. Disobeying children? Must be slaughtered. Children who call our prophets bald? Must be torn apart by bears sent by god. Adulterers? Pfft, them too. And so on, ad infinitum. Kill everyone. Except the mindless zealots.\\n\\nPlease. The intellectual inconsistency you demonstrate here makes me dizzy.\\n\\nIt was SECULARISM and HUMANIST IDEAS that pulled our little society out from the muck of religious people - like you, and worse.\\n\\nDon't you dare to tell me, or any other human being that we were born faulty, sinful, worthless.""" 391 | 3891,0,20120502183946Z,"""Sounds like you should dump him.""" 392 | 3901,0,20120610041303Z,"""A hammer. A hammer works. Break in with one of those hard rubber ones, if you're a sensitive sort.""" 393 | 3911,0,20120610155951Z,"""I can't bash you on the fact you get to spend your time around (usually) attractive, (generally) unintelligent females (at least around here they are, usually give damn good head, though). However, most of the male cheerleaders I know are skinny SOBs, same with the chicks. Throwing a hundred and fifty pound dude in the air, or a hundred and twenty pound chick, is not the same as pushing/pulling around a three or four (or more) hundred pound barbell.\n\n\nSo please, you've got what you do, we've got what we do. Don't act fucking superior and most of us wont either.""" 394 | 3921,0,20120609221726Z,"""by your logic also legalize Heroin.""" 395 | 3931,1,20120618222326Z,"""You goofy Repub thugs don't have a clue, not the tiniest shred of a fact on how health care works in most other countries around the globe. The US is the only developed country without universal, single-payer or socialized medical care for all its citizens. Some systems differ, but all achieve near 100% access to a doctor and hospital at way lower cost than we here in The United States of America. In some countries, people pay some small amount per doctor visit, like a co-pay. In other countries, you are covered, no co-pay. In most countries, the option to have private health insurance is always an option. This gives the wealthy or well-heeded access to some private doctors who want to only see rich patients. Their choice. \nBut no one, no mom, dad, child, uncle, aunt, grandma, grandpa, sister, brother, business associate, boss, garbage man, ditch digger, or nuclear physicist goes bankrupt from a disease or medical condition like here in the USA. You need not fear the collection agency coming after you because you fell and broke your arm and had no insurance. No insurance company denies your claim or drops your coverage after finding out you have cancer or heart disease or MS. Get it? \n\nWe are in the dark ages of medical care in the US and the Neanderthal republicans want to keep it that way because their wealthy benefactors in the giant Insurance-Pharma industrial complex like it that way. These craven business people who lust after profits like an alcoholic lusts for a drink, make huge boatloads of cash and mega profits off us and do not want that gravy train to stop.""" 396 | 3941,0,,"""@Sara Besleaga Griji, doruri sau dorin\\xc8\\x9be... Au \\xc8\\x99i copiii mai mici ! :)\\n\\nMul\\xc8\\x9bumesc de urare, \\xc8\\x99i \\xc8\\x9bie la fel, Sara, c\\xc4\\x83 avem nevoie !""" 397 | -------------------------------------------------------------------------------- /figures/bag_of_words.svg: -------------------------------------------------------------------------------- 1 | 2 | image/svg+xmlThis is how you get ants.” 537 | [0, …, 0, 1, 0, … , 0, 1 , 0, …, 0, 1, 0, …., 0 ] 556 | ants 573 | get 590 | you 607 | aardvak 624 | zyxst 641 | ['this','is','how','you','get', 'ants'] 686 | tokenizer 725 | Sparse matrix encoding 742 | Build a vocabulary over all documents 770 | ['aardvak','amsterdam','ants', ...'you','your', 'zyxst'] 813 | -------------------------------------------------------------------------------- /figures/randomized_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/mlss_2015/063b252c51701510c4e9f2ba4b06b5339ee3a7e9/figures/randomized_search.png -------------------------------------------------------------------------------- /intro_to_structured_prediction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amueller/mlss_2015/063b252c51701510c4e9f2ba4b06b5339ee3a7e9/intro_to_structured_prediction.pdf -------------------------------------------------------------------------------- /solutions/cross_validation_iris.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | from sklearn.cross_validation import StratifiedKFold, KFold 3 | iris = load_iris() 4 | X, y = iris.data, iris.target 5 | 6 | print(cross_val_score(LinearSVC(), X, y, cv=KFold(len(X), 3))) 7 | print(cross_val_score(LinearSVC(), X, y, cv=StratifiedKFold(y, 3))) 8 | -------------------------------------------------------------------------------- /solutions/digits_tsne.py: -------------------------------------------------------------------------------- 1 | from sklearn.manifold import TSNE 2 | tsne = TSNE() 3 | X_tsne = tsne.fit_transform(X) 4 | plt.title("All classes") 5 | plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y) 6 | -------------------------------------------------------------------------------- /solutions/grid_search_forest.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import RandomForestClassifier 2 | 3 | param_grid = {'max_depth': [1, 3, 5, 7, 10], 'max_features': [5, 8, 10, 20]} 4 | 5 | grid = GridSearchCV(RandomForestClassifier(), param_grid=param_grid) 6 | grid.fit(X_train, y_train) 7 | print("best parameters: %s" % grid.best_params_) 8 | print("Training set accuracy: %s" % grid.score(X_train, y_train)) 9 | print("Test set accuracy: %s" % grid.score(X_test, y_test)) 10 | 11 | scores = [x.mean_validation_score for x in grid.grid_scores_] 12 | scores = np.array(scores).reshape(5, 4) 13 | plt.matshow(scores) 14 | plt.xlabel("max_features") 15 | plt.ylabel("max_depth") 16 | -------------------------------------------------------------------------------- /solutions/letters_graph_crf.py: -------------------------------------------------------------------------------- 1 | from pystruct.models import GraphCRF, EdgeFeatureGraphCRF 2 | 3 | def make_edges(n_nodes): 4 | return np.c_[np.arange(n_nodes - 1), np.arange(1, n_nodes)] 5 | 6 | X_graph = np.array([(x, make_edges(len(x))) for x in X]) 7 | X_graph_train, X_graph_test = X_graph[folds == 1], X_graph[folds != 1] 8 | 9 | 10 | graph_model = GraphCRF(inference_method="max-product", directed=True) 11 | ssvm = FrankWolfeSSVM(model=graph_model, C=.1, max_iter=11) 12 | ssvm.fit(X_graph_train, y_train) 13 | print("score with GraphCRF %f" % ssvm.score(X_graph_test, y_test)) 14 | 15 | 16 | X_edge_features = np.array([(x, make_edges(len(x)), np.ones(len(x)) - 1)[:, np.newaxis] for x in X]) 17 | X_edge_features_train, X_edge_features_test = X_edge_features[folds == 1], X_edge_features[folds != 1] 18 | 19 | edge_feature_model = EdgeFeatureGraphCRF(inference_method="max-product") 20 | ssvm = FrankWolfeSSVM(model=edge_feature_model, C=.1, max_iter=11) 21 | ssvm.fit(X_edge_features_train, y_train) 22 | print("score with GraphCRF %f" % ssvm.score(X_edge_features_test, y_test)) 23 | -------------------------------------------------------------------------------- /solutions/pipeline_iris.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | from sklearn.feature_selection import SelectKBest 3 | from sklearn.pipeline import make_pipeline 4 | from sklearn.svm import LinearSVC 5 | 6 | rng = np.random.RandomState(42) 7 | iris = load_iris() 8 | X = np.hstack([iris.data, rng.uniform(size=(len(iris.data), 5))]) 9 | X_train, X_test, y_train, y_test = train_test_split(X, iris.target, random_state=2) 10 | 11 | selection_pipe = make_pipeline(SelectKBest(), LinearSVC()) 12 | param_grid = {'linearsvc__C': 10. ** np.arange(-3, 3), 13 | 'selectkbest__k': [1, 2, 3, 4, 5, 7]} 14 | grid = GridSearchCV(selection_pipe, param_grid, cv=5) 15 | grid.fit(X_train, y_train) 16 | print("Best parameters: %s" % grid.best_params_) 17 | print("Test set performance: %s" % grid.score(X_test, y_test)) 18 | -------------------------------------------------------------------------------- /solutions/text_pipeline.py: -------------------------------------------------------------------------------- 1 | from sklearn.pipeline import Pipeline 2 | from sklearn.grid_search import GridSearchCV 3 | 4 | pipeline = Pipeline([('vectorizer', cv), ('classifier', svm)]) 5 | pipeline.fit(text_train, y_train) 6 | print("Pipeline test score: %f" % pipeline.score(text_test, y_test)) 7 | 8 | param_grid = {'classifier__C': 10. ** np.arange(-3, 3)} 9 | 10 | grid_search = GridSearchCV(pipeline, param_grid=param_grid) 11 | grid_search.fit(text_train, y_train) 12 | print("best parameters : %s" % grid_search.best_params_) 13 | print("Grid-searched test score: %f" % grid_search.score(text_test, y_test)) 14 | 15 | 16 | param_grid = {'classifier__C': 10. ** np.arange(-3, 3), 17 | "vectorizer__ngram_range": [(1, 1), (1, 2), (2, 2)]} 18 | grid_search = GridSearchCV(pipeline, param_grid=param_grid, n_jobs=3) 19 | grid_search.fit(text_train, y_train) 20 | 21 | print("best parameters with n-gram search: %s" % grid_search.best_params_) 22 | print("test set score with n-gram search: %s" % grid_search.score(text_test, y_test)) 23 | -------------------------------------------------------------------------------- /solutions/train_iris.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | from sklearn.neighbors import KNeighborsClassifier 3 | 4 | iris = load_iris() 5 | X, y = iris.data, iris.target 6 | 7 | print("Dataset size: %d number of features: %d number of classes: %d" 8 | % (X.shape[0], X.shape[1], len(np.unique(y)))) 9 | 10 | X_train, X_test, y_train, y_test = train_test_split(X, y) 11 | 12 | knn = KNeighborsClassifier(n_neighbors=3) 13 | knn.fit(X_train, y_train) 14 | 15 | print("test set score of knn: %f" % knn.score(X_test, y_test)) 16 | --------------------------------------------------------------------------------