├── .DS_Store ├── README.md ├── main.py ├── output ├── code.png ├── gen.png └── pub.ipynb └── test.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utility-code/code_to_readme/13234ac1d1968bb81f0784156c7151fe7b5f39cf/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # README 2 | ## Contents 3 | 1. THE README FOR THIS PROGRAM IS ALSO GENERATED BY THE CODE ITSELF. 4 | 5 | 2. ADDS ARGUMENTS 6 | 7 | 3. FIND THE SIDE COMMENTS AND CREATE CONTENTS LIST 8 | 9 | 4. FORMAT THE FILE AND SAVE 10 | 11 | 5. MAIN FUNCTION 12 | 13 | ## Docstrings 14 | >This program converts all the comments in your code to a README.md file and saves a lot of your time provided you write comments in your code. It is obvious how easy it is to actually generate the README.md file and save your time.Using the argparse module to get the file path. Note that to run the file 15 | ```bash 16 | python main.py -p 17 | ``` 18 | >For now this program only supports python. 19 | ## Side comments 20 | - Initial file 21 | 22 | - Output file 23 | 24 | **GENERATED README** 25 | ![generatedoutput](output/gen.png) 26 | **INITIAL CODE** 27 | ![initialcode](output/code.png) 28 | 29 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This program converts all the comments in your code to a README.md file and saves a lot of your time provided you write comments in your code. It is obvious how easy it is to actually generate the README.md file and save your time.Using the argparse module to get the file path. Note that to run the file 3 | ```bash 4 | python main.py -p 5 | ``` 6 | 7 | ''' 8 | #The readme for this program is also generated by the code itself except the images added. 9 | import argparse 10 | import re 11 | 12 | #adds arguments 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("-p", type=str, help="enter the file path") 15 | args = parser.parse_args() 16 | file = open(args.p, 'r') 17 | f = file.readlines() 18 | 19 | #To find Block Comments 20 | def blockComm(): 21 | p = [(x, f[x][0]) for x in range(len(f))] 22 | fip = [] 23 | for a in range(len(p)): 24 | try: 25 | if p[a][1] == "'": 26 | fip.append(a) 27 | except IndexError: 28 | pass 29 | 30 | fip = [[fip[x], fip[x + 1]] for x in range(0, len(fip) - 1, 2)] 31 | contents = [] 32 | sidecomm = [] 33 | blockcomm = [''.join(f[x[0] + 1:x[1]]).strip() for x in fip] 34 | return blockcomm 35 | ''' 36 | For now this program only supports python. 37 | ''' 38 | 39 | # find the side comments and create contents list 40 | def content_sidecomm(): 41 | contents = [] 42 | sidecomm = [] 43 | for a in range(0, len(f)): 44 | finder = f[a].find('#') 45 | try: 46 | if finder != -1: 47 | if finder == 0: 48 | # fin_out.write('### {}\n'.format(f[a][1::]).upper()) 49 | contents.append(f[a][1::].upper()) 50 | else: 51 | # fin_out.write('> {}\n'.format(f[a][finder+1::]).capitalize()) 52 | sidecomm.append(f[a][finder + 1::].capitalize()) 53 | except IndexError: 54 | pass 55 | 56 | return [contents, sidecomm] 57 | 58 | # Format the file and save 59 | def format_and_save(): 60 | #initial file 61 | fin_out = open('README.md', 'w+') #output file 62 | fin_out.write('# README\n') 63 | fin_out.write('## Contents\n') 64 | cs = content_sidecomm() 65 | for a in range(len(cs[0])): 66 | fin_out.write('{}. {}\n'.format(a+1,cs[0][a])) 67 | fin_out.write('## Docstrings\n') 68 | bc = blockComm() 69 | for a in range(len(bc)): 70 | fin_out.write('>{}\n'.format(bc[a])) 71 | 72 | fin_out.write('## Side comments\n') 73 | for a in range(len(cs[1])): 74 | fin_out.write('- {}\n'.format(cs[1][a])) 75 | 76 | 77 | fin_out.close() 78 | 79 | # Main function 80 | format_and_save() 81 | -------------------------------------------------------------------------------- /output/code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utility-code/code_to_readme/13234ac1d1968bb81f0784156c7151fe7b5f39cf/output/code.png -------------------------------------------------------------------------------- /output/gen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utility-code/code_to_readme/13234ac1d1968bb81f0784156c7151fe7b5f39cf/output/gen.png -------------------------------------------------------------------------------- /output/pub.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "pub.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [] 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "wcSDC5SBH4n4", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "Intro, Problems, Future Scope" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "id": "h86K2uTXFty7", 31 | "colab_type": "code", 32 | "outputId": "29a4f477-0236-41e2-c013-03bb7de577ab", 33 | "colab": { 34 | "base_uri": "https://localhost:8080/", 35 | "height": 76 36 | } 37 | }, 38 | "source": [ 39 | "from google.colab import drive\n", 40 | "drive.mount('/gdrive')\n", 41 | "%cd /gdrive" 42 | ], 43 | "execution_count": 89, 44 | "outputs": [ 45 | { 46 | "output_type": "stream", 47 | "text": [ 48 | "Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount(\"/gdrive\", force_remount=True).\n", 49 | "/gdrive\n" 50 | ], 51 | "name": "stdout" 52 | } 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "metadata": { 58 | "id": "wh9yqQVBF2o9", 59 | "colab_type": "code", 60 | "colab": {} 61 | }, 62 | "source": [ 63 | "import pandas as pd\n", 64 | "import numpy as np\n", 65 | "import re\n", 66 | "import nltk\n", 67 | "from nltk.tokenize import word_tokenize \n", 68 | "from nltk.corpus import stopwords\n", 69 | "stop_words = set(stopwords.words('english'))\n", 70 | "from keras.models import Sequential\n", 71 | "from keras.layers import Dense\n", 72 | "from keras.layers import Flatten\n", 73 | "from keras.layers.embeddings import Embedding" 74 | ], 75 | "execution_count": 0, 76 | "outputs": [] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "metadata": { 81 | "id": "OacNAx1vF2mZ", 82 | "colab_type": "code", 83 | "outputId": "cb69303e-cfab-4125-9c14-3bc4a2a0baf6", 84 | "colab": { 85 | "base_uri": "https://localhost:8080/", 86 | "height": 102 87 | } 88 | }, 89 | "source": [ 90 | "nltk.download('stopwords')\n", 91 | "nltk.download('punkt')" 92 | ], 93 | "execution_count": 91, 94 | "outputs": [ 95 | { 96 | "output_type": "stream", 97 | "text": [ 98 | "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", 99 | "[nltk_data] Package stopwords is already up-to-date!\n", 100 | "[nltk_data] Downloading package punkt to /root/nltk_data...\n", 101 | "[nltk_data] Package punkt is already up-to-date!\n" 102 | ], 103 | "name": "stdout" 104 | }, 105 | { 106 | "output_type": "execute_result", 107 | "data": { 108 | "text/plain": [ 109 | "True" 110 | ] 111 | }, 112 | "metadata": { 113 | "tags": [] 114 | }, 115 | "execution_count": 91 116 | } 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "metadata": { 122 | "id": "RM6WjSnjF2rp", 123 | "colab_type": "code", 124 | "colab": {} 125 | }, 126 | "source": [ 127 | "data = pd.read_csv('/gdrive/My Drive/Colab_ML/publish/init_data.csv')\n", 128 | "data = data.head(20000)" 129 | ], 130 | "execution_count": 0, 131 | "outputs": [] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "metadata": { 136 | "id": "XjK94dtIGQcs", 137 | "colab_type": "code", 138 | "outputId": "2352d540-c6af-44e0-961a-1bcc263914cc", 139 | "colab": { 140 | "base_uri": "https://localhost:8080/", 141 | "height": 563 142 | } 143 | }, 144 | "source": [ 145 | "data.tail(10)" 146 | ], 147 | "execution_count": 93, 148 | "outputs": [ 149 | { 150 | "output_type": "execute_result", 151 | "data": { 152 | "text/html": [ 153 | "
\n", 154 | "\n", 167 | "\n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | "
Unnamed: 0urltitlescore
1999019990http://pycon.blogspot.com/2012/08/pycon-us-201...PyCon 2013 March 13-21, Santa Clara - Keynotes...69
1999119991http://forum.dlang.org/thread/ko7qgc$14i1$1@di...DMD 2.063 released with 260 bugfixes and enhan...69
1999219992http://spyrestudios.com/mega-collection-of-che...Cheatsheets for Designers & Developers69
1999319993http://www.llvm.org/releases/2.8/docs/ReleaseN...LLVM 2.8 Released69
1999419994http://www.kurzweilai.net/a-cure-for-type-1-di...A cure for type 1 diabetes in dogs69
1999519995http://www.youtube.com/watch?v=qd95SrdNkgEConstruder - A Minecraft-inspired OpenGL game ...69
1999619996http://www.theregister.co.uk/2011/10/24/email_...The Register accidentally emails 46,524 user d...69
1999719997https://thelab.o2.com/2013/05/my-experience-wi...My experience with Firefox OS69
1999819998http://www.kansas.com/news/politics-government...WSU statistician sues seeking Kansas voting ma...69
1999919999http://www.reddit.com/r/IAmA/comments/t1ygb/ia...Nobel Prize-winning economist Paul Krugman is ...69
\n", 250 | "
" 251 | ], 252 | "text/plain": [ 253 | " Unnamed: 0 ... score\n", 254 | "19990 19990 ... 69\n", 255 | "19991 19991 ... 69\n", 256 | "19992 19992 ... 69\n", 257 | "19993 19993 ... 69\n", 258 | "19994 19994 ... 69\n", 259 | "19995 19995 ... 69\n", 260 | "19996 19996 ... 69\n", 261 | "19997 19997 ... 69\n", 262 | "19998 19998 ... 69\n", 263 | "19999 19999 ... 69\n", 264 | "\n", 265 | "[10 rows x 4 columns]" 266 | ] 267 | }, 268 | "metadata": { 269 | "tags": [] 270 | }, 271 | "execution_count": 93 272 | } 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": { 278 | "id": "cJ-y7Wa8EVnT", 279 | "colab_type": "text" 280 | }, 281 | "source": [ 282 | "# Regex to extract companies from url\n" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "metadata": { 288 | "id": "FC3spUDEGORy", 289 | "colab_type": "code", 290 | "colab": {} 291 | }, 292 | "source": [ 293 | "regex = '.*://(.[^/]+)\\.?'\n", 294 | "counter = 0\n", 295 | "for a in data['url']:\n", 296 | " try:\n", 297 | " temp = re.findall(regex,a)[0]\n", 298 | " except Exception as e:\n", 299 | " print(a)\n", 300 | " print(e)\n", 301 | " try:\n", 302 | " temp = temp.replace('www.','')\n", 303 | " temp = temp.replace('-','')\n", 304 | " except Exception as e:\n", 305 | " print(e)\n", 306 | "# print(temp)\n", 307 | " temp = ''.join(temp.split('.')[:-1])\n", 308 | " data.loc[data.index[counter], 'publisher'] = temp\n", 309 | " counter+=1\n", 310 | " " 311 | ], 312 | "execution_count": 0, 313 | "outputs": [] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "metadata": { 318 | "id": "JpjLA3ONGOUn", 319 | "colab_type": "code", 320 | "outputId": "49ef45bb-724a-4b47-c543-7f3bb0a80280", 321 | "colab": { 322 | "base_uri": "https://localhost:8080/", 323 | "height": 204 324 | } 325 | }, 326 | "source": [ 327 | "data['publisher'].head(10)" 328 | ], 329 | "execution_count": 95, 330 | "outputs": [ 331 | { 332 | "output_type": "execute_result", 333 | "data": { 334 | "text/plain": [ 335 | "0 alsoplouie\n", 336 | "1 torrentfreak\n", 337 | "2 devstand\n", 338 | "3 erlangfactory\n", 339 | "4 businessinsider\n", 340 | "5 zorter\n", 341 | "6 blogtippingpointlabs\n", 342 | "7 youtube\n", 343 | "8 thebottomlinecpaaustraliacom\n", 344 | "9 codebrief\n", 345 | "Name: publisher, dtype: object" 346 | ] 347 | }, 348 | "metadata": { 349 | "tags": [] 350 | }, 351 | "execution_count": 95 352 | } 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": { 358 | "id": "Hs3CdNipEcEs", 359 | "colab_type": "text" 360 | }, 361 | "source": [ 362 | "# Drop useless colmns" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "metadata": { 368 | "id": "NKRo5Va1GOXT", 369 | "colab_type": "code", 370 | "colab": {} 371 | }, 372 | "source": [ 373 | "data = data.drop(['url','Unnamed: 0','score'],axis = 1)" 374 | ], 375 | "execution_count": 0, 376 | "outputs": [] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "metadata": { 381 | "id": "XAWAO6lkgxd9", 382 | "colab_type": "code", 383 | "colab": {} 384 | }, 385 | "source": [ 386 | "# data = data.loc[data.publisher.value_counts()[data.publisher.value_counts()>50] ]" 387 | ], 388 | "execution_count": 0, 389 | "outputs": [] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "metadata": { 394 | "id": "ZhznXecAF22L", 395 | "colab_type": "code", 396 | "colab": {} 397 | }, 398 | "source": [ 399 | "counter = 0\n", 400 | "for a in data['title']:\n", 401 | " word_tokens = word_tokenize(a) \n", 402 | " filtered_sentence = [w for w in word_tokens if not w in stop_words] \n", 403 | " \n", 404 | " filtered_sentence = [] \n", 405 | " \n", 406 | " for w in word_tokens: \n", 407 | " if w not in stop_words: \n", 408 | " filtered_sentence.append(w)\n", 409 | " \n", 410 | " filtered_sentence = ' '.join(filtered_sentence)\n", 411 | " filtered_sentence = re.sub(r'[^a-zA-Z\\d\\s]', '', filtered_sentence)\n", 412 | " filtered_sentence = filtered_sentence.lower()\n", 413 | " data.loc[data.index[counter], 'title'] = filtered_sentence\n", 414 | " counter+=1\n" 415 | ], 416 | "execution_count": 0, 417 | "outputs": [] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "metadata": { 422 | "id": "RKC3KxgNF23R", 423 | "colab_type": "code", 424 | "colab": {} 425 | }, 426 | "source": [ 427 | "data.to_csv('/gdrive/My Drive/Colab_ML/publish/cleaned.csv')" 428 | ], 429 | "execution_count": 0, 430 | "outputs": [] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "metadata": { 435 | "id": "iUoRYE8ULpRp", 436 | "colab_type": "code", 437 | "outputId": "a6fe0f77-f1df-44e8-b6cd-803a2b08c716", 438 | "colab": { 439 | "base_uri": "https://localhost:8080/", 440 | "height": 204 441 | } 442 | }, 443 | "source": [ 444 | "data.head()" 445 | ], 446 | "execution_count": 100, 447 | "outputs": [ 448 | { 449 | "output_type": "execute_result", 450 | "data": { 451 | "text/html": [ 452 | "
\n", 453 | "\n", 466 | "\n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | "
titlepublisher
0portfolio review justintvalsoplouie
1dutch isps ordered block the pirate baytorrentfreak
2stunning 3d examples html5 artworkdevstand
32010 sf bay area erlang factory programmeerlangfactory
4razors dealbusinessinsider
\n", 502 | "
" 503 | ], 504 | "text/plain": [ 505 | " title publisher\n", 506 | "0 portfolio review justintv alsoplouie\n", 507 | "1 dutch isps ordered block the pirate bay torrentfreak\n", 508 | "2 stunning 3d examples html5 artwork devstand\n", 509 | "3 2010 sf bay area erlang factory programme erlangfactory\n", 510 | "4 razors deal businessinsider" 511 | ] 512 | }, 513 | "metadata": { 514 | "tags": [] 515 | }, 516 | "execution_count": 100 517 | } 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "metadata": { 523 | "id": "pyHIldu2_WVD", 524 | "colab_type": "code", 525 | "outputId": "72c6ccfd-631b-4458-c45b-045ff10dce23", 526 | "colab": { 527 | "base_uri": "https://localhost:8080/", 528 | "height": 34 529 | } 530 | }, 531 | "source": [ 532 | "len(data['publisher'].unique())" 533 | ], 534 | "execution_count": 101, 535 | "outputs": [ 536 | { 537 | "output_type": "execute_result", 538 | "data": { 539 | "text/plain": [ 540 | "9520" 541 | ] 542 | }, 543 | "metadata": { 544 | "tags": [] 545 | }, 546 | "execution_count": 101 547 | } 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": { 553 | "id": "qIjl5EHdEgH-", 554 | "colab_type": "text" 555 | }, 556 | "source": [ 557 | "# Check for na" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "metadata": { 563 | "id": "E6KCj22FLpUd", 564 | "colab_type": "code", 565 | "outputId": "0d435a61-67ae-4be7-c062-f3261737e665", 566 | "colab": { 567 | "base_uri": "https://localhost:8080/", 568 | "height": 51 569 | } 570 | }, 571 | "source": [ 572 | "for a in [data['title'],data['publisher']]:\n", 573 | " print(a.isna().sum())" 574 | ], 575 | "execution_count": 102, 576 | "outputs": [ 577 | { 578 | "output_type": "stream", 579 | "text": [ 580 | "0\n", 581 | "0\n" 582 | ], 583 | "name": "stdout" 584 | } 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": { 590 | "id": "0BG2l0F_HqWx", 591 | "colab_type": "text" 592 | }, 593 | "source": [ 594 | "# Categorical " 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "metadata": { 600 | "id": "kiLSGMjpHJpa", 601 | "colab_type": "code", 602 | "colab": {} 603 | }, 604 | "source": [ 605 | " dictionary = dict(zip(data['publisher'], data['publisher'].astype('category').cat.codes))" 606 | ], 607 | "execution_count": 0, 608 | "outputs": [] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "metadata": { 613 | "id": "_kq5p-hDHveU", 614 | "colab_type": "code", 615 | "outputId": "30bc6e4e-63e9-42e8-9730-0765a7079823", 616 | "colab": { 617 | "base_uri": "https://localhost:8080/", 618 | "height": 1000 619 | } 620 | }, 621 | "source": [ 622 | "dictionary" 623 | ], 624 | "execution_count": 104, 625 | "outputs": [ 626 | { 627 | "output_type": "execute_result", 628 | "data": { 629 | "text/plain": [ 630 | "{'alsoplouie': 291,\n", 631 | " 'torrentfreak': 8726,\n", 632 | " 'devstand': 2818,\n", 633 | " 'erlangfactory': 3284,\n", 634 | " 'businessinsider': 1910,\n", 635 | " 'zorter': 9506,\n", 636 | " 'blogtippingpointlabs': 1647,\n", 637 | " 'youtube': 9451,\n", 638 | " 'thebottomlinecpaaustraliacom': 8412,\n", 639 | " 'codebrief': 2254,\n", 640 | " 'flatworldknowledge': 3497,\n", 641 | " 'reddit': 7110,\n", 642 | " 'rednovalabs': 7122,\n", 643 | " 'helpknow': 4137,\n", 644 | " 'lovewithweed': 5293,\n", 645 | " 'businessweek': 1918,\n", 646 | " 'moodbe': 5792,\n", 647 | " 'vpsplansdabbledb': 9096,\n", 648 | " 'briancray': 1830,\n", 649 | " 'technozooo': 8302,\n", 650 | " 'siliconisland': 7655,\n", 651 | " 'cakemail': 1942,\n", 652 | " 'dimdim': 2877,\n", 653 | " 'itunesapple': 4588,\n", 654 | " 'technotab': 8299,\n", 655 | " 'blogexpensify': 1079,\n", 656 | " 'vimeo': 9047,\n", 657 | " 'blogicyse': 1175,\n", 658 | " 'readwriteweb': 7080,\n", 659 | " 'peoplehbs': 6583,\n", 660 | " 'ngmnationalgeographic': 6123,\n", 661 | " 'zetetic': 9497,\n", 662 | " 'startupplays': 7964,\n", 663 | " 'paywallkillerblogspot': 6549,\n", 664 | " 'grumpytesting': 4005,\n", 665 | " 'refaelos': 7132,\n", 666 | " 'spreadsheetsgoogle': 7894,\n", 667 | " 'yetanotherstartupblogblogspot': 9432,\n", 668 | " 'github': 3830,\n", 669 | " 'newrepublic': 6044,\n", 670 | " 'time': 8642,\n", 671 | " 'alleyinsider': 273,\n", 672 | " 'blogjonasbandi': 1230,\n", 673 | " 'winestockwebdesign': 9308,\n", 674 | " 'chromepowered': 2133,\n", 675 | " 'gistgithub': 3827,\n", 676 | " 'airbnb': 213,\n", 677 | " 'cloudtweaks': 2216,\n", 678 | " 'thinkingserious': 8581,\n", 679 | " 'bitsblogsnytimes': 814,\n", 680 | " 'lu': 5304,\n", 681 | " 'google': 3898,\n", 682 | " 'zakgreant': 9473,\n", 683 | " 'blograndylubin': 1448,\n", 684 | " 'beautifulmind': 667,\n", 685 | " 'gymcalls': 4020,\n", 686 | " 'computerworldcom': 2381,\n", 687 | " 'textweight': 8374,\n", 688 | " 'blogmaverick': 1304,\n", 689 | " 'blogcreapptives': 1018,\n", 690 | " 'unsealedherokuapp': 8906,\n", 691 | " 'techcrunch': 8256,\n", 692 | " 'rezmason': 7196,\n", 693 | " 'jacobian': 4611,\n", 694 | " 'onlineuniversityrankings2010': 6350,\n", 695 | " 'uxmag': 8963,\n", 696 | " 'vaidikkapoor': 8971,\n", 697 | " 'dealtaker': 2707,\n", 698 | " 'flickr': 3502,\n", 699 | " 'mlanzagithub': 5735,\n", 700 | " 'kosherswitch': 4988,\n", 701 | " 'ec267202321compute1amazonaws': 3091,\n", 702 | " 'friendshuffle': 3620,\n", 703 | " 'swoopo': 8158,\n", 704 | " 'mattmaroon': 5531,\n", 705 | " 'blogjakerlevine': 1203,\n", 706 | " 'devosaurus': 2817,\n", 707 | " 'morganclaypool': 5803,\n", 708 | " 'economist': 3102,\n", 709 | " 'joyent': 4817,\n", 710 | " 'stainedglasslabs': 7934,\n", 711 | " 'storeunity3d': 8049,\n", 712 | " 'itworld': 4591,\n", 713 | " 'blogcitrusbyte': 989,\n", 714 | " 'tonywright': 8715,\n", 715 | " 'codemarks': 2283,\n", 716 | " 'blogrescuetime': 1466,\n", 717 | " 'playpi': 6740,\n", 718 | " 'slate': 7732,\n", 719 | " 'panarmenian': 6478,\n", 720 | " 'blogappboy': 891,\n", 721 | " 'codegoogle': 2271,\n", 722 | " 'tuxradar': 8803,\n", 723 | " 'boston': 1773,\n", 724 | " 'startupgeek': 7953,\n", 725 | " 'tech': 8237,\n", 726 | " 'snaptalent': 7772,\n", 727 | " 'gravity7': 3962,\n", 728 | " 'reuters': 7184,\n", 729 | " 'blogjambool': 1205,\n", 730 | " 'aidwatchers': 210,\n", 731 | " 'diycomputerscience': 2904,\n", 732 | " 'smarterware': 7749,\n", 733 | " 'moxiecode': 5824,\n", 734 | " 'effectgames': 3143,\n", 735 | " 'ideamonkblogspot': 4323,\n", 736 | " 'espn': 3299,\n", 737 | " 'getfireplug': 3779,\n", 738 | " 'thatcriticguy': 8380,\n", 739 | " 'chatterous': 2077,\n", 740 | " 'hypertextbook': 4283,\n", 741 | " 'lawyerclock': 5100,\n", 742 | " 'dropbox': 3025,\n", 743 | " 'valleywag': 8975,\n", 744 | " 'mathstackexchange': 5504,\n", 745 | " 'eff': 3142,\n", 746 | " 'theartofthepossible': 8397,\n", 747 | " 'claretegithub': 2170,\n", 748 | " 'scaleconf': 7411,\n", 749 | " 'sydneyedu': 8162,\n", 750 | " 'bmc2': 1739,\n", 751 | " 'webstartupcalculator': 9222,\n", 752 | " 'buildingof': 1891,\n", 753 | " 'flourishconf': 3509,\n", 754 | " 'startupsrule': 7972,\n", 755 | " 'changefeed': 2053,\n", 756 | " 'phrack': 6662,\n", 757 | " 'nicklothian': 6133,\n", 758 | " 'freshtechweb': 3615,\n", 759 | " 'docsgoogle': 2945,\n", 760 | " 'isthesingularityhereyet': 4568,\n", 761 | " 'imagine27': 4367,\n", 762 | " 'mathmaster': 5498,\n", 763 | " 'insidecrm': 4454,\n", 764 | " 'progscrape': 6889,\n", 765 | " 'fauzism': 3424,\n", 766 | " 'thenextweb': 8502,\n", 767 | " 'utilitymill': 8960,\n", 768 | " 'wired': 9313,\n", 769 | " 'phillyemergingtech': 6643,\n", 770 | " 'sciencedaily': 7427,\n", 771 | " 'signonsandiego': 7645,\n", 772 | " 'washingtonpost': 9141,\n", 773 | " 'robertsosinski': 7256,\n", 774 | " 'sphinxsearch': 7873,\n", 775 | " 'rippleqblogspot': 7227,\n", 776 | " 'swissmiss': 8153,\n", 777 | " 'itulip': 4587,\n", 778 | " 'scribblesheetco': 7468,\n", 779 | " 'wpengine': 9346,\n", 780 | " 'bloghackfwd': 1143,\n", 781 | " 'opencs': 6373,\n", 782 | " 'sethlevinetypepad': 7564,\n", 783 | " 'borismus': 1766,\n", 784 | " 'picasagoogle': 6678,\n", 785 | " 'gamasutra': 3686,\n", 786 | " 'xobni': 9396,\n", 787 | " 'engwealthfront': 3238,\n", 788 | " 'vccafe': 8995,\n", 789 | " 'nerdscentralblogspot': 5998,\n", 790 | " 'messagify': 5616,\n", 791 | " 'arnklint': 488,\n", 792 | " 'geniuswiki': 3755,\n", 793 | " 'listsubuntu': 5235,\n", 794 | " '16systems': 13,\n", 795 | " 'bitcointalk': 795,\n", 796 | " 'bitbet': 788,\n", 797 | " 'centernetworks': 2034,\n", 798 | " 'newsyahoo': 6098,\n", 799 | " 'nature': 5951,\n", 800 | " 'johnndege': 4768,\n", 801 | " 'facebook': 3383,\n", 802 | " 'icu64blogspot': 4321,\n", 803 | " 'themetricsystemrjmetrics': 8494,\n", 804 | " 'linkfindr': 5199,\n", 805 | " 'lonewolfgamesgithub': 5274,\n", 806 | " 'timrosenblatt': 8661,\n", 807 | " 'buxfer': 1920,\n", 808 | " 'realworldocaml': 7095,\n", 809 | " 'kiwimotion': 4963,\n", 810 | " 'bbcco': 657,\n", 811 | " 'koi': 4980,\n", 812 | " 'siavashgposterous': 7638,\n", 813 | " 'cnn': 2231,\n", 814 | " 'shoutfast': 7630,\n", 815 | " 'paprikaapp': 6493,\n", 816 | " 'keybase': 4935,\n", 817 | " 'ifdefined': 4339,\n", 818 | " 'metromodemedia': 5635,\n", 819 | " 'pubnub': 6931,\n", 820 | " 'anassina': 334,\n", 821 | " 'showhnappherokuapp': 7632,\n", 822 | " 'wiredco': 9314,\n", 823 | " 'ddl': 2700,\n", 824 | " 'newaer': 6029,\n", 825 | " 'chyrp': 2140,\n", 826 | " 'nytimes': 6276,\n", 827 | " 'ninthdivision': 6158,\n", 828 | " 'urshoutbox': 8919,\n", 829 | " 'networkworld': 6022,\n", 830 | " 'thesetimes': 8532,\n", 831 | " 'markevanstech': 5452,\n", 832 | " 'bribe': 1839,\n", 833 | " 'webinsightlab': 9187,\n", 834 | " 'bzg': 1930,\n", 835 | " 'opera': 6406,\n", 836 | " 'devhikeblogspotcom': 2808,\n", 837 | " 'faceroulette': 3386,\n", 838 | " 'adammcnamara': 156,\n", 839 | " 'hecticgeek': 4124,\n", 840 | " 'libramatic': 5178,\n", 841 | " 'upbeat': 8909,\n", 842 | " 'noteslate': 6217,\n", 843 | " 'blogheroku': 1155,\n", 844 | " 'blogethanjfast': 1072,\n", 845 | " 'newsycombinator': 6100,\n", 846 | " 'toutapp': 8731,\n", 847 | " 'developeramazonwebservices': 2784,\n", 848 | " 'sitemeter': 7697,\n", 849 | " 'reinventingemail': 7138,\n", 850 | " 'jottit': 4805,\n", 851 | " 'patentlyo': 6515,\n", 852 | " 'blogmozilla': 1339,\n", 853 | " 'tornadovideos': 8725,\n", 854 | " 'dieselweb': 2851,\n", 855 | " 'livestream': 5246,\n", 856 | " 'alistapart': 261,\n", 857 | " 'blogkeen': 1240,\n", 858 | " 'angulartranslategithub': 376,\n", 859 | " 'groupsgoogle': 3998,\n", 860 | " 'blaketechwordpress': 836,\n", 861 | " 'thedailywtf': 8438,\n", 862 | " 'bartkiosk': 636,\n", 863 | " 'fretwar': 3616,\n", 864 | " 'mixturtle': 5728,\n", 865 | " 'highscalability': 4164,\n", 866 | " 'arstechnica': 492,\n", 867 | " 'saddlegithub': 7366,\n", 868 | " 'mikepalmer': 5688,\n", 869 | " 'fseek': 3633,\n", 870 | " 'blogpagefair': 1398,\n", 871 | " 'wsgr': 9357,\n", 872 | " 'boulder': 1781,\n", 873 | " 'blogsskype': 1586,\n", 874 | " 'asymco': 547,\n", 875 | " 'bugzillamozilla': 1886,\n", 876 | " 'blogoscoped': 1394,\n", 877 | " 'singularityhub': 7687,\n", 878 | " 'recordit': 7102,\n", 879 | " 'jinaldesai': 4731,\n", 880 | " 'bloomberg': 1731,\n", 881 | " 'coffeescript': 2326,\n", 882 | " 'simvla': 7682,\n", 883 | " 'googlecodeblogspot': 3906,\n", 884 | " 'xpud': 9398,\n", 885 | " 'bloglocalmind': 1284,\n", 886 | " 'videry': 9034,\n", 887 | " 'pythonhistoryblogspot': 6962,\n", 888 | " 'internetsansfrontieres': 4493,\n", 889 | " 'codon': 2325,\n", 890 | " 'moneymorning': 5784,\n", 891 | " 'alexanderletmego': 240,\n", 892 | " 'DMCAInjury': 80,\n", 893 | " 'plusgoogle': 6755,\n", 894 | " 'drawium': 3011,\n", 895 | " 'batsov': 649,\n", 896 | " 'rogerdudlergithub': 7273,\n", 897 | " 'techzimco': 8326,\n", 898 | " 'jeffmillergithub': 4693,\n", 899 | " 'fairphone': 3399,\n", 900 | " 'venturebeat': 9004,\n", 901 | " 'dsigniocom': 3035,\n", 902 | " 'hnmood': 4187,\n", 903 | " 'cryptome': 2496,\n", 904 | " 'betamss': 733,\n", 905 | " 'scribd': 7469,\n", 906 | " 'lifebeyondfife': 5183,\n", 907 | " 'claymath': 2175,\n", 908 | " 'forumtrapexit': 3571,\n", 909 | " 'enwikipedia': 3261,\n", 910 | " 'blogmilesgrimshaw': 1319,\n", 911 | " 'alonhorev': 284,\n", 912 | " 'sloanstanford': 7743,\n", 913 | " 'bgr': 750,\n", 914 | " 'ryanbigg': 7349,\n", 915 | " 'slideshare': 7739,\n", 916 | " 'goosh': 3927,\n", 917 | " 'twtpoll': 8830,\n", 918 | " 'servus': 7561,\n", 919 | " 'onthemedia': 6355,\n", 920 | " 'tweetping': 8810,\n", 921 | " 'newteevee': 6103,\n", 922 | " 'pcsx2': 6558,\n", 923 | " 'videogoogle': 9026,\n", 924 | " 'theatlantic': 8398,\n", 925 | " 'scobleizer': 7450,\n", 926 | " 'iprlwz': 4533,\n", 927 | " 'here': 4145,\n", 928 | " 'reghardware': 7135,\n", 929 | " 'thingsgnod': 8576,\n", 930 | " 'joelonsoftware': 4753,\n", 931 | " 'yafla': 9408,\n", 932 | " 'bvb': 1926,\n", 933 | " 'buzzscale': 1925,\n", 934 | " 'thevarguy': 8563,\n", 935 | " 'supportgodaddy': 8124,\n", 936 | " 'usps': 8953,\n", 937 | " 'launch': 5082,\n", 938 | " 'markchangtumblr': 5438,\n", 939 | " 'dondodgetypepad': 2968,\n", 940 | " 'pastebin': 6511,\n", 941 | " 'examiner': 3347,\n", 942 | " 'foobub': 3542,\n", 943 | " 'wikidarkpatterns': 9276,\n", 944 | " 'wmhartnett': 9326,\n", 945 | " 'ieet': 4337,\n", 946 | " 'facultyumfmaine': 3391,\n", 947 | " 'hackernewspapergilesb': 4042,\n", 948 | " 'rachelbaker': 7008,\n", 949 | " 'billmoyers': 771,\n", 950 | " 'htmlandcsstutorial': 4248,\n", 951 | " 'tlrobinson': 8679,\n", 952 | " 'brainspl': 1797,\n", 953 | " 'jiraskype': 4732,\n", 954 | " 'buildstarted': 1894,\n", 955 | " 'labsgoogle': 5031,\n", 956 | " 'eriwen': 3281,\n", 957 | " 'arkafrica': 485,\n", 958 | " 'embedplnkr': 3182,\n", 959 | " 'medium': 5583,\n", 960 | " 'americaaljazeera': 307,\n", 961 | " 'smashingmagazine': 7756,\n", 962 | " 'commandcenterblogspot': 2355,\n", 963 | " 'linkedin': 5198,\n", 964 | " 'foldable': 3531,\n", 965 | " 'speakerdeck': 7859,\n", 966 | " 'ycombinator': 9424,\n", 967 | " 'samsoff': 7377,\n", 968 | " 'edmunds': 3114,\n", 969 | " 'w3': 9105,\n", 970 | " 'johnpavluswordpress': 4773,\n", 971 | " 'avc': 581,\n", 972 | " 'sourcefoundry': 7835,\n", 973 | " 'paulgraham': 6531,\n", 974 | " 'blogasmartbear': 906,\n", 975 | " 'pando': 6482,\n", 976 | " 'shopjolla': 7622,\n", 977 | " 'blogacolyer': 865,\n", 978 | " 'blogszdnet': 1625,\n", 979 | " 'thecodist': 8427,\n", 980 | " 'theverge': 8565,\n", 981 | " 'blogwired': 1703,\n", 982 | " 'speirs': 7866,\n", 983 | " 'stuartsierra': 8078,\n", 984 | " 'jamesiryblogspot': 4631,\n", 985 | " 'bitsbook': 815,\n", 986 | " 'oreillynet': 6422,\n", 987 | " 'codinghorror': 2314,\n", 988 | " 'omninerd': 6329,\n", 989 | " 'pkn': 6713,\n", 990 | " 'twitter': 8821,\n", 991 | " 'anandtypepad': 331,\n", 992 | " 'weblogjamisbuck': 9194,\n", 993 | " 'freelancersunion': 3607,\n", 994 | " 'financefortunecnn': 3472,\n", 995 | " 'steveblank': 8025,\n", 996 | " 'lawstanford': 5099,\n", 997 | " 'djangoproject': 2914,\n", 998 | " 'apenwarr': 410,\n", 999 | " 'monstersuniversity': 5789,\n", 1000 | " 'mathstatdal': 5505,\n", 1001 | " 'swannodettegithub': 8141,\n", 1002 | " 'theglobeandmail': 8459,\n", 1003 | " 'experimentgardenblogspot': 3358,\n", 1004 | " 'blogscoutapp': 1505,\n", 1005 | " 'paidcontent': 6469,\n", 1006 | " 'chron': 2135,\n", 1007 | " 'yadazula': 9407,\n", 1008 | " 'dotnetfoundation': 2987,\n", 1009 | " 'allthingsd': 280,\n", 1010 | " 'blogtwitter': 1667,\n", 1011 | " 'guardianco': 4010,\n", 1012 | " 'cdsmithwordpress': 2028,\n", 1013 | " 'wakeupworld': 9120,\n", 1014 | " 'forbes': 3549,\n", 1015 | " 'codeboje': 2252,\n", 1016 | " 'motherboardvice': 5812,\n", 1017 | " 'hackeducation': 4030,\n", 1018 | " 'stevesouders': 8035,\n", 1019 | " 'satellitetoday': 7395,\n", 1020 | " 'buzzfeed': 1923,\n", 1021 | " 'blogvalbonneconsulting': 1679,\n", 1022 | " 'newscientist': 6052,\n", 1023 | " 'designobserver': 2761,\n", 1024 | " 'markmanson': 5457,\n", 1025 | " 'ted': 8329,\n", 1026 | " 'newscnet': 6053,\n", 1027 | " 'googleresearchblogspot': 3920,\n", 1028 | " 'mapthefallen': 5422,\n", 1029 | " 'howgoogleworks': 4228,\n", 1030 | " 'googlemacblogspot': 3912,\n", 1031 | " 'ibm': 4307,\n", 1032 | " 'bizyahoo': 824,\n", 1033 | " 'lcamtufblogspot': 5107,\n", 1034 | " 'mitchfournier': 5720,\n", 1035 | " 'eweek': 3345,\n", 1036 | " 'ibiblio': 4306,\n", 1037 | " 'techdirt': 8258,\n", 1038 | " 'blogairbnb': 872,\n", 1039 | " 'sixkidsandafulltimejobblogspot': 7704,\n", 1040 | " 'opinionatorblogsnytimes': 6407,\n", 1041 | " 'mesosphere': 5615,\n", 1042 | " 'pennyarcade': 6576,\n", 1043 | " 'theinquirer': 8476,\n", 1044 | " 'thinqco': 8591,\n", 1045 | " 'scripting': 7470,\n", 1046 | " 'seomoz': 7555,\n", 1047 | " 'laboratoryequipment': 5026,\n", 1048 | " 'ebizmba': 3088,\n", 1049 | " 'troyangrignon': 8771,\n", 1050 | " 'inquisitr': 4448,\n", 1051 | " 'kazimirmajorincblogspot': 4902,\n", 1052 | " 'securityledger': 7519,\n", 1053 | " 'mcarthurgfx': 5555,\n", 1054 | " 'hackermonthly': 4038,\n", 1055 | " 'cartoonbank': 1993,\n", 1056 | " 'rawstory': 7058,\n", 1057 | " 'breitbart': 1818,\n", 1058 | " 'useit': 8931,\n", 1059 | " 'blogswsj': 1622,\n", 1060 | " 'andismithgithub': 337,\n", 1061 | " 'blogcloudflare': 994,\n", 1062 | " 'fallible': 3407,\n", 1063 | " 'casasciuswordpress': 1995,\n", 1064 | " 'themarshallproject': 8491,\n", 1065 | " 'docsracketlang': 2950,\n", 1066 | " 'joshuatopolsky': 4803,\n", 1067 | " 'betabeat': 728,\n", 1068 | " 'technologyreview': 8294,\n", 1069 | " 'poorbuthappy': 6780,\n", 1070 | " 'jezkempblogspot': 4722,\n", 1071 | " 'socialfixer': 7790,\n", 1072 | " 'scalenpm': 7412,\n", 1073 | " 'novainfosecportal': 6229,\n", 1074 | " 'nobugs': 6182,\n", 1075 | " 'smhcom': 7761,\n", 1076 | " 'blogintigi': 1191,\n", 1077 | " 'blogguykawasaki': 1139,\n", 1078 | " 'yahoo': 9409,\n", 1079 | " 'stackoverflow': 7932,\n", 1080 | " 'freedomtotinker': 3606,\n", 1081 | " 'dailydot': 2592,\n", 1082 | " 'mixergy': 5723,\n", 1083 | " 'garrettdimon': 3710,\n", 1084 | " 'cdnjs': 2024,\n", 1085 | " 'ureqsolusipse': 8918,\n", 1086 | " 'imdb': 4373,\n", 1087 | " 'theguardian': 8464,\n", 1088 | " 'nymag': 6270,\n", 1089 | " 'bayfronttechnologies': 655,\n", 1090 | " 'askslashdot': 526,\n", 1091 | " 'mashable': 5486,\n", 1092 | " 'b2bwikiccgatech': 608,\n", 1093 | " 'telegraphco': 8337,\n", 1094 | " 'blogycombinator': 1712,\n", 1095 | " 'inc': 4397,\n", 1096 | " 'podcastrubyonrails': 6762,\n", 1097 | " 'wepwhartonupenn': 9235,\n", 1098 | " 'tapbots': 8214,\n", 1099 | " 'newsbbcco': 6047,\n", 1100 | " 'digitalmediastrategyeye': 2866,\n", 1101 | " 'blogsmsdn': 1561,\n", 1102 | " 'rubyinside': 7319,\n", 1103 | " 'theamericanscholar': 8391,\n", 1104 | " 'googleblogblogspot': 3900,\n", 1105 | " 'bloghackensplat': 1141,\n", 1106 | " 'alexkrupptypepad': 251,\n", 1107 | " 'linuxmag': 5216,\n", 1108 | " 'davidedicillo': 2674,\n", 1109 | " '37signals': 39,\n", 1110 | " 'financialcryptography': 3474,\n", 1111 | " 'neowin': 5991,\n", 1112 | " 'timdorr': 8641,\n", 1113 | " 'zdnet': 9475,\n", 1114 | " 'daringfireball': 2639,\n", 1115 | " 'blosxom': 1733,\n", 1116 | " 'wagerlabs': 9110,\n", 1117 | " 'financialpost': 3475,\n", 1118 | " 'iphonehacks': 4529,\n", 1119 | " 'sciam': 7420,\n", 1120 | " 'igizmodo': 4342,\n", 1121 | " 'pgpf': 6631,\n", 1122 | " 'successfulsoftware': 8101,\n", 1123 | " 'sfgate': 7574,\n", 1124 | " 'pbs': 6550,\n", 1125 | " 'theregisterco': 8524,\n", 1126 | " 'marshallyang': 5471,\n", 1127 | " 'ukbusinessinsider': 8861,\n", 1128 | " 'nationalpost': 5944,\n", 1129 | " 'narrowthegapp': 5929,\n", 1130 | " 'gigaom': 3815,\n", 1131 | " 'macrumors': 5354,\n", 1132 | " 'onlinewsj': 6351,\n", 1133 | " 'etexteditor': 3307,\n", 1134 | " 'occamsoftwareblogspot': 6288,\n", 1135 | " 'mymilliondollarideasblogspot': 5890,\n", 1136 | " 'newyorker': 6105,\n", 1137 | " 'rawsyntax': 7059,\n", 1138 | " 'micromathwordpress': 5667,\n", 1139 | " 'songkick': 7826,\n", 1140 | " 'messynessychic': 5618,\n", 1141 | " 'improvetheweb': 4388,\n", 1142 | " 'pragprog': 6825,\n", 1143 | " 'mmpcbamit': 5741,\n", 1144 | " 'pixability': 6705,\n", 1145 | " 'cringely': 2474,\n", 1146 | " 'blogsuperadditive': 1614,\n", 1147 | " 'siliconvalley': 7656,\n", 1148 | " 'joachimbreitner': 4743,\n", 1149 | " 'csunm': 2540,\n", 1150 | " 'itproco': 4582,\n", 1151 | " 'bothsidesofthetable': 1780,\n", 1152 | " 'newssqueak': 6085,\n", 1153 | " 'principlesofchaos': 6851,\n", 1154 | " 'lob': 5258,\n", 1155 | " 'mailhaskell': 5372,\n", 1156 | " 'kurzweilai': 5011,\n", 1157 | " 'webdesignerwall': 9177,\n", 1158 | " 'cocoawithlove': 2241,\n", 1159 | " 'success': 8100,\n", 1160 | " 'eecsharvard': 3131,\n", 1161 | " 'wafflewootest': 9108,\n", 1162 | " 'e4awssilverdr': 3071,\n", 1163 | " 'thedailybeast': 8436,\n", 1164 | " 'hanselman': 4085,\n", 1165 | " 'eater': 3081,\n", 1166 | " 'sbeattyconsulting': 7403,\n", 1167 | " 'futureoftheinternet': 3662,\n", 1168 | " 'blogregehr': 1462,\n", 1169 | " 'thinkvitamin': 8590,\n", 1170 | " 'quora': 7001,\n", 1171 | " 'ustream': 8955,\n", 1172 | " 'msdnmicrosoft': 5840,\n", 1173 | " 'y2kemo': 9403,\n", 1174 | " 'dancohen': 2615,\n", 1175 | " 'howithappened': 4230,\n", 1176 | " 'cdixon': 2022,\n", 1177 | " 'ririanproject': 7228,\n", 1178 | " 'awsamazon': 594,\n", 1179 | " 'libgit2github': 5175,\n", 1180 | " 'rampantgames': 7029,\n", 1181 | " 'segment': 7532,\n", 1182 | " 'bright': 1843,\n", 1183 | " 'andrewchentypepad': 343,\n", 1184 | " 'blognella': 1348,\n", 1185 | " 'trueplayergear': 8775,\n", 1186 | " 'minyanville': 5710,\n", 1187 | " 'geek': 3732,\n", 1188 | " 'newsquelsolaar': 6077,\n", 1189 | " 'fora': 3548,\n", 1190 | " 'luvit': 5329,\n", 1191 | " 'longnow': 5276,\n", 1192 | " 'sethgodintypepad': 7563,\n", 1193 | " 'cimgf': 2145,\n", 1194 | " 'acidealogblogspot': 129,\n", 1195 | " 'jeremymansonblogspot': 4707,\n", 1196 | " 'hostedap': 4220,\n", 1197 | " 'sivers': 7702,\n", 1198 | " 'codedjangoproject': 2263,\n", 1199 | " 'infoq': 4431,\n", 1200 | " 'jessenoller': 4715,\n", 1201 | " 'recode': 7101,\n", 1202 | " 'labnol': 5025,\n", 1203 | " 'ejohn': 3153,\n", 1204 | " 'insidefacebook': 4458,\n", 1205 | " 'scienceray': 7435,\n", 1206 | " 'weblogsasp': 9199,\n", 1207 | " 'beyondvc': 747,\n", 1208 | " 'wikisecondlife': 9282,\n", 1209 | " 'katharsis': 4895,\n", 1210 | " 'instacached': 4467,\n", 1211 | " 'blogarduino': 902,\n", 1212 | " 'jacquesmattheij': 4614,\n", 1213 | " 'ilovetypography': 4362,\n", 1214 | " 'catonmat': 2006,\n", 1215 | " 'fsharpforfunandprofit': 3635,\n", 1216 | " 'lethain': 5157,\n", 1217 | " 'blogdavidchartier': 1032,\n", 1218 | " 'digbysblogblogspot': 2854,\n", 1219 | " 'myappleguide': 5884,\n", 1220 | " 'marsinvasiongithub': 5472,\n", 1221 | " 'engadget': 3198,\n", 1222 | " 'plentyoffishwordpress': 6746,\n", 1223 | " 'blogsharvardbusiness': 1534,\n", 1224 | " 'socialentrepreneurshipchange': 7788,\n", 1225 | " 'mattmazur': 5532,\n", 1226 | " 'beyond3d': 744,\n", 1227 | " 'polishlinux': 6773,\n", 1228 | " 'mint': 5709,\n", 1229 | " 'fndrs': 3523,\n", 1230 | " 'zwitserloot': 9515,\n", 1231 | " 'profounder': 6872,\n", 1232 | " 'azfamily': 604,\n", 1233 | " 'bokardo': 1748,\n", 1234 | " 'ft': 3637,\n", 1235 | " 'iospressmetapress': 4516,\n", 1236 | " 'allanjosephbatac': 269,\n", 1237 | " 'thesunco': 8549,\n", 1238 | " 'computerworlduk': 2382,\n", 1239 | " 'mattcutts': 5515,\n", 1240 | " 'blogsalescrunch': 1489,\n", 1241 | " 'blogsforbes': 1527,\n", 1242 | " 'longtail': 5278,\n", 1243 | " 'blogcarlmercier': 977,\n", 1244 | " 'founderinstitute': 3577,\n", 1245 | " 'newssoftpedia': 6084,\n", 1246 | " 'gametableapp': 3702,\n", 1247 | " 'macobserver': 5351,\n", 1248 | " 'blogsubtledisruption': 1610,\n", 1249 | " 'nobitypepad': 6181,\n", 1250 | " 'leftnode': 5139,\n", 1251 | " 'giantrobotlasers': 3806,\n", 1252 | " 'arabcrunch': 454,\n", 1253 | " 'siliconangle': 7653,\n", 1254 | " 'jasoncrawford': 4648,\n", 1255 | " 'bakerycakephp': 624,\n", 1256 | " 'astartupadaywordpress': 537,\n", 1257 | " 'esciencenews': 3294,\n", 1258 | " 'babblingvctypepad': 609,\n", 1259 | " 'nvie': 6262,\n", 1260 | " 'aeon': 188,\n", 1261 | " 'jvns': 4872,\n", 1262 | " 'xuanwulabgithub': 9400,\n", 1263 | " 'blogwolfram': 1705,\n", 1264 | " 'structuredprocrastination': 8075,\n", 1265 | " 'aftau': 196,\n", 1266 | " 'schneier': 7418,\n", 1267 | " 'etymonline': 3313,\n", 1268 | " 'tedunangst': 8333,\n", 1269 | " 'betterexplained': 740,\n", 1270 | " 'newsxbox': 6097,\n", 1271 | " 'blogphusion': 1414,\n", 1272 | " 'phenomenanationalgeographic': 6636,\n", 1273 | " 'blogbitbucket': 942,\n", 1274 | " 'wingolog': 9310,\n", 1275 | " 'researcherwatsonibm': 7160,\n", 1276 | " 'startupboy': 7949,\n", 1277 | " 'boingboing': 1747,\n", 1278 | " 'hicksdesignco': 4160,\n", 1279 | " 'macheist': 5344,\n", 1280 | " 'nautil': 5952,\n", 1281 | " 'loweringthebar': 5294,\n", 1282 | " 'blogsteveklabnik': 1604,\n", 1283 | " 'slashfilm': 7730,\n", 1284 | " 'sciencenews': 7433,\n", 1285 | " 'photomatt': 6653,\n", 1286 | " 'openelectronics': 6377,\n", 1287 | " 'lexifi': 5167,\n", 1288 | " 'gizmodo': 3847,\n", 1289 | " 'nilkanth': 6151,\n", 1290 | " 'hackaday': 4026,\n", 1291 | " 'unfinishedman': 8887,\n", 1292 | " 'bestofmetafilter': 724,\n", 1293 | " 'peoplecsailmit': 6580,\n", 1294 | " 'blogvideojs': 1682,\n", 1295 | " 'fastcoexist': 3419,\n", 1296 | " 'hnrdnsalias': 4190,\n", 1297 | " 'amberlang': 304,\n", 1298 | " 'politico': 6774,\n", 1299 | " 'thenational': 8498,\n", 1300 | " 'pcworld': 6559,\n", 1301 | " 'blogstackoverflow': 1592,\n", 1302 | " 'elaineou': 3156,\n", 1303 | " 'imgur': 4378,\n", 1304 | " 'reviewszdnetco': 7189,\n", 1305 | " 'mobilexweb': 5762,\n", 1306 | " 'engineeringflipboard': 3205,\n", 1307 | " 'kybelepsychcornell': 5017,\n", 1308 | " 'ostatic': 6435,\n", 1309 | " 'weblogsmozillazine': 9201,\n", 1310 | " 'weierophinney': 9230,\n", 1311 | " 'detroitnews': 2770,\n", 1312 | " 'adage': 145,\n", 1313 | " 'weblogrubyonrails': 9198,\n", 1314 | " 'afternoontrickgithub': 198,\n", 1315 | " 'gapingvoid': 3708,\n", 1316 | " 'oregonlive': 6420,\n", 1317 | " 'slashdot': 7729,\n", 1318 | " 'techvalleystartupweekend': 8322,\n", 1319 | " 'freelancingjob': 3609,\n", 1320 | " 'kevinvanzonneveld': 4931,\n", 1321 | " 'xkcd': 9394,\n", 1322 | " 'getinstinct': 3784,\n", 1323 | " 'blogunderdog': 1671,\n", 1324 | " 'prog21dadgum': 6873,\n", 1325 | " 'cloudspace': 2215,\n", 1326 | " 'blogdopplr': 1049,\n", 1327 | " 'spreedly': 7896,\n", 1328 | " 'mackross': 5347,\n", 1329 | " 'dealbooknytimes': 2704,\n", 1330 | " 'gastospublicoscom': 3714,\n", 1331 | " 'extremetech': 3375,\n", 1332 | " 'archive': 465,\n", 1333 | " 'coovtech': 2419,\n", 1334 | " 'blogbacktype': 924,\n", 1335 | " 'senzeeblogspot': 7552,\n", 1336 | " 'revkme': 7190,\n", 1337 | " 'gadling': 3677,\n", 1338 | " 'phoronix': 6652,\n", 1339 | " 'gimado': 3820,\n", 1340 | " 'radaroreilly': 7014,\n", 1341 | " 'chronicle': 2136,\n", 1342 | " 'matthewpaulmoore': 5523,\n", 1343 | " 'andrewgavinmarshall': 345,\n", 1344 | " 'freakonomicsblogsnytimes': 3599,\n", 1345 | " 'livescience': 5245,\n", 1346 | " 'mdconnects': 5559,\n", 1347 | " 'vnet': 9081,\n", 1348 | " 'pcauthoritycom': 6551,\n", 1349 | " 'moneycnn': 5783,\n", 1350 | " 'scientificamerican': 7438,\n", 1351 | " 'thehindu': 8467,\n", 1352 | " 'haaretz': 4022,\n", 1353 | " 'darkreading': 2640,\n", 1354 | " 'manycorante': 5410,\n", 1355 | " 'crunchgear': 2489,\n", 1356 | " 'thestartupcafe': 8544,\n", 1357 | " 'blogshbr': 1535,\n", 1358 | " 'swombat': 8157,\n", 1359 | " 'macgasm': 5343,\n", 1360 | " 'paulkedrosky': 6534,\n", 1361 | " 'swisscsailmit': 8151,\n", 1362 | " 'aynrand': 599,\n", 1363 | " 'techcrunchit': 8257,\n", 1364 | " 'badrixwordpress': 618,\n", 1365 | " 'zacharyburt': 9465,\n", 1366 | " 'localmotors': 5260,\n", 1367 | " 'qz': 7005,\n", 1368 | " 'thinkgene': 8579,\n", 1369 | " 'fee': 3437,\n", 1370 | " '10zenmonkeys': 6,\n", 1371 | " 'molcalc': 5777,\n", 1372 | " 'paulrouget': 6538,\n", 1373 | " 'jwz': 4874,\n", 1374 | " 'factorlanguageblogspot': 3387,\n", 1375 | " 'cultofmac': 2556,\n", 1376 | " 'brontecapitalblogspot': 1851,\n", 1377 | " 'articlegmane': 497,\n", 1378 | " 'hbr': 4116,\n", 1379 | " 'grayre': 3966,\n", 1380 | " 'blogxkcd': 1711,\n", 1381 | " 'halogensoftware': 4076,\n", 1382 | " 'clusterhq': 2223,\n", 1383 | " 'blogdevontechnologies': 1037,\n", 1384 | " 'hueniverse': 4254,\n", 1385 | " 'vator': 8991,\n", 1386 | " 'developermarvel': 2792,\n", 1387 | " 'priceonomics': 6846,\n", 1388 | " 'brianoberkirch': 1832,\n", 1389 | " 'universetoday': 8895,\n", 1390 | " 'sitesgoogle': 7700,\n", 1391 | " 'blogcodeship': 1000,\n", 1392 | " 'illusioncontestneuralcorrelate': 4361,\n", 1393 | " 'christianreber': 2125,\n", 1394 | " 'conal': 2384,\n", 1395 | " 'greengoose': 3974,\n", 1396 | " 'parislemon': 6500,\n", 1397 | " 'scottberkun': 7455,\n", 1398 | " 'alexgolecgithub': 247,\n", 1399 | " 'foundread': 3582,\n", 1400 | " 'msnbcmsn': 5842,\n", 1401 | " 'quantopian': 6982,\n", 1402 | " 'macstories': 5355,\n", 1403 | " 'colinsteele': 2341,\n", 1404 | " 'getbirdly': 3773,\n", 1405 | " 'edibleapple': 3110,\n", 1406 | " 'tavivootuniversewordpress': 8223,\n", 1407 | " 'againstmonopoly': 199,\n", 1408 | " 'perpetuallybeta': 6603,\n", 1409 | " 'nikolaplejic': 6148,\n", 1410 | " 'nicolasbcom': 6135,\n", 1411 | " 'jisiguo': 4733,\n", 1412 | " 'devzeraweb': 2830,\n", 1413 | " 'tineye': 8663,\n", 1414 | " 'spectrumieee': 7863,\n", 1415 | " 'askubuntu': 528,\n", 1416 | " 'newsku': 6062,\n", 1417 | " 'tribalwriter': 8761,\n", 1418 | " 'adchap': 161,\n", 1419 | " 'cbsnews': 2011,\n", 1420 | " 'lemire': 5145,\n", 1421 | " 'exploitdb': 3366,\n", 1422 | " 'grahamcluley': 3951,\n", 1423 | " 'apijquery': 416,\n", 1424 | " 'wheretowatch': 9255,\n", 1425 | " 'tbray': 8231,\n", 1426 | " 'oracle': 6416,\n", 1427 | " 'thestandard': 8542,\n", 1428 | " 'blogauthy': 914,\n", 1429 | " 'openmaterials': 6385,\n", 1430 | " 'mlive': 5736,\n", 1431 | " 'eetimes': 3136,\n", 1432 | " 'rachelbythebay': 7009,\n", 1433 | " 'chicagotribune': 2096,\n", 1434 | " 'lshift': 5300,\n", 1435 | " 'stormpath': 8053,\n", 1436 | " 'supportmicrosoft': 8127,\n", 1437 | " 'discussjoelonsoftware': 2894,\n", 1438 | " 'nearlyfreespeech': 5973,\n", 1439 | " 'financeyahoo': 3473,\n", 1440 | " 'ycsearch': 9426,\n", 1441 | " 'skynet': 7723,\n", 1442 | " 'merttol': 5613,\n", 1443 | " 'w3fools': 9106,\n", 1444 | " 'codemonkeyism': 2285,\n", 1445 | " 'storytotell': 8055,\n", 1446 | " 'fastcompany': 3421,\n", 1447 | " 'computerworld': 2379,\n", 1448 | " 'alternet': 292,\n", 1449 | " 'zi': 9499,\n", 1450 | " 'scrapages': 7462,\n", 1451 | " 'horsesaysinternet': 4217,\n", 1452 | " 'blogslickedit': 1557,\n", 1453 | " 'newsmongabay': 6065,\n", 1454 | " 'felipecwordpress': 3444,\n", 1455 | " 'techcitynews': 8253,\n", 1456 | " 'vimuniversity': 9048,\n", 1457 | " 'thechangelog': 8423,\n", 1458 | " 'projectwordsworth': 6898,\n", 1459 | " 'python': 6957,\n", 1460 | " 'theintercept': 8478,\n", 1461 | " 'lericsonblogg': 5152,\n", 1462 | " 'gabrielweinberg': 3673,\n", 1463 | " 'containersolutions': 2398,\n", 1464 | " 'physorg': 6675,\n", 1465 | " 'toolsietf': 8718,\n", 1466 | " 'blogezyang': 1082,\n", 1467 | " 'educer': 3118,\n", 1468 | " 'plope': 6749,\n", 1469 | " 'feld': 3443,\n", 1470 | " 'marco': 5428,\n", 1471 | " 'mwrc2009confreaks': 5881,\n", 1472 | " 'rubypond': 7324,\n", 1473 | " 'voidspaceorg': 9088,\n", 1474 | " 'blogsanctumgeek': 1493,\n", 1475 | " 'interledger': 4486,\n", 1476 | " 'rgruetfree': 7200,\n", 1477 | " 'blogprediction': 1427,\n", 1478 | " 'pipelinedb': 6697,\n", 1479 | " 'fortune': 3560,\n", 1480 | " 'lambdatheultimate': 5055,\n", 1481 | " 'roughtype': 7292,\n", 1482 | " 'mjg59dreamwidth': 5729,\n", 1483 | " 'blahgres0l': 832,\n", 1484 | " 'interfacelab': 4485,\n", 1485 | " 'alpblogheroku': 285,\n", 1486 | " 'usability': 8923,\n", 1487 | " 'blogfairsoftware': 1085,\n", 1488 | " 'foreignpolicy': 3551,\n", 1489 | " 'playfish': 6736,\n", 1490 | " 'windytan': 9307,\n", 1491 | " 'whydoeseverythingsuck': 9271,\n", 1492 | " 'iplawforstartups': 4530,\n", 1493 | " 'yarivsblog': 9421,\n", 1494 | " 'nasaspaceflight': 5932,\n", 1495 | " 'spudworks': 7904,\n", 1496 | " 'martinatsunset': 5476,\n", 1497 | " 'pressroomnvidia': 6840,\n", 1498 | " 'wikifranklinheathco': 9277,\n", 1499 | " 'vox': 9092,\n", 1500 | " 'tflgov': 8376,\n", 1501 | " 'blogpmarca': 1421,\n", 1502 | " 'sciencefocus': 7429,\n", 1503 | " 'fsf': 3634,\n", 1504 | " 'jpost': 4822,\n", 1505 | " 'slash7': 7728,\n", 1506 | " 'solutionwatch': 7814,\n", 1507 | " 'databaseprogrammerblogspot': 2650,\n", 1508 | " 'skorks': 7717,\n", 1509 | " 'weblograganwald': 9197,\n", 1510 | " 'htus': 4251,\n", 1511 | " 'journaldedasys': 4807,\n", 1512 | " 'rt': 7310,\n", 1513 | " 'orientdbleaksblogspot': 6425,\n", 1514 | " 'sproutly': 7903,\n", 1515 | " 'blixtsystems': 851,\n", 1516 | " 'rossignolcream': 7289,\n", 1517 | " 'ftalphavilleft': 3638,\n", 1518 | " 'lwn': 5331,\n", 1519 | " 'massivegreatness': 5487,\n", 1520 | " 'brooksreview': 1855,\n", 1521 | " 'villagevoice': 9044,\n", 1522 | " 'blogcircleshare': 987,\n", 1523 | " 'blogsfgate': 1525,\n", 1524 | " '8thdev': 69,\n", 1525 | " 'basement': 642,\n", 1526 | " 'lifebuzz': 5184,\n", 1527 | " 'scriptmag': 7471,\n", 1528 | " 'ukpcmag': 8863,\n", 1529 | " 'elusiveconsumerwordpress': 3176,\n", 1530 | " 'seekingalpha': 7527,\n", 1531 | " 'browserforthebetter': 1861,\n", 1532 | " 'i100independentco': 4288,\n", 1533 | " 'neilbowerswordpress': 5977,\n", 1534 | " 'weblogsjava': 9200,\n", 1535 | " 'motherjones': 5816,\n", 1536 | " 'newsstanford': 6086,\n", 1537 | " 'wry': 9356,\n", 1538 | " 'blognetbsd': 1350,\n", 1539 | " 'blognewcomb': 1355,\n", 1540 | " 'nltk': 6170,\n", 1541 | " 'independentco': 4406,\n", 1542 | " 'blogslawharvard': 1555,\n", 1543 | " 'nubyonrails': 6253,\n", 1544 | " 'statwonk': 8007,\n", 1545 | " 'popularmechanics': 6788,\n", 1546 | " 'blogsjanestreet': 1550,\n", 1547 | " 'venublog': 9012,\n", 1548 | " 'blogsapache': 1495,\n", 1549 | " 'newfacebook': 6035,\n", 1550 | " 'blogheadius': 1150,\n", 1551 | " 'cdscern': 2027,\n", 1552 | " 'predictablyirrational': 6831,\n", 1553 | " 'cbinsights': 2010,\n", 1554 | " 'mirage': 5713,\n", 1555 | " 'nirmalpatel': 6162,\n", 1556 | " 'showkr': 7633,\n", 1557 | " 'measuringmeasures': 5565,\n", 1558 | " 'polymerdesignerappspot': 6778,\n", 1559 | " 'blogpythonlibrary': 1438,\n", 1560 | " 'mbstoriesquora': 5553,\n", 1561 | " 'antipope': 397,\n", 1562 | " 'foundersatwork': 3580,\n", 1563 | " 'baekdal': 620,\n", 1564 | " 'appleinsider': 437,\n", 1565 | " 'utf8everywhere': 8959,\n", 1566 | " 'popvox': 6790,\n", 1567 | " 'billkerr2blogspot': 770,\n", 1568 | " 'eutechcrunch': 3322,\n", 1569 | " 'ericsink': 3278,\n", 1570 | " 'usfirst': 8948,\n", 1571 | " 'uswaretech': 8956,\n", 1572 | " 'dcsobralblogspot': 2695,\n", 1573 | " 'gwtsmalltalkwordpress': 4019,\n", 1574 | " 'chromeblogspot': 2130,\n", 1575 | " 'sumsar': 8109,\n", 1576 | " 'thehill': 8466,\n", 1577 | " 'duartes': 3043,\n", 1578 | " 'friedcpuwordpress': 3618,\n", 1579 | " 'apexdesigns': 411,\n", 1580 | " 'ayende': 598,\n", 1581 | " 'lmaugustintypepad': 5255,\n", 1582 | " 'continuations': 2401,\n", 1583 | " 'trueslant': 8776,\n", 1584 | " 'ioncannon': 4511,\n", 1585 | " 'amazon': 303,\n", 1586 | " 'forumssilverlight': 3568,\n", 1587 | " 'cgi2csrpi': 2044,\n", 1588 | " 'hacksmozilla': 4065,\n", 1589 | " 'gmailblogblogspot': 3867,\n", 1590 | " 'physicsmcgill': 6669,\n", 1591 | " 'particletree': 6507,\n", 1592 | " 'cstufts': 2537,\n", 1593 | " 'blogsopenforum': 1573,\n", 1594 | " 'actualeurope': 141,\n", 1595 | " 'publicstatic': 6929,\n", 1596 | " 'growlposterous': 4000,\n", 1597 | " 'bret': 1821,\n", 1598 | " 'technabob': 8283,\n", 1599 | " 'tgdaily': 8377,\n", 1600 | " 'hoist': 4197,\n", 1601 | " 'robsanheim': 7266,\n", 1602 | " 'curiouscapitalistblogstime': 2560,\n", 1603 | " 'codersatwork': 2300,\n", 1604 | " 'secretgeek': 7511,\n", 1605 | " 'derekflanzraich': 2750,\n", 1606 | " 'semiaccurate': 7539,\n", 1607 | " 'danah': 2613,\n", 1608 | " 'govcheck': 3939,\n", 1609 | " 'ubuntu': 8849,\n", 1610 | " 'quicksprout': 6991,\n", 1611 | " 'studenthacks': 8082,\n", 1612 | " 'anandtech': 330,\n", 1613 | " 'thedroidguy': 8446,\n", 1614 | " 'blogscnet': 1502,\n", 1615 | " 'wsj': 9359,\n", 1616 | " 'ijango': 4351,\n", 1617 | " 'theincidentaleconomist': 8471,\n", 1618 | " 'wallyapp': 9127,\n", 1619 | " 'pulseturbobytes': 6938,\n", 1620 | " 'correspondentstheatlantic': 2436,\n", 1621 | " 'eisp': 3152,\n", 1622 | " 'mattmight': 5534,\n", 1623 | " 'blogwikimedia': 1701,\n", 1624 | " 'xent': 9391,\n", 1625 | " 'sigops': 7650,\n", 1626 | " 'hexanews': 4153,\n", 1627 | " 'nybooks': 6264,\n", 1628 | " 'drdobbs': 3015,\n", 1629 | " 'cepr': 2039,\n", 1630 | " ...}" 1631 | ] 1632 | }, 1633 | "metadata": { 1634 | "tags": [] 1635 | }, 1636 | "execution_count": 104 1637 | } 1638 | ] 1639 | }, 1640 | { 1641 | "cell_type": "code", 1642 | "metadata": { 1643 | "id": "jwL3vsHiKbXW", 1644 | "colab_type": "code", 1645 | "colab": {} 1646 | }, 1647 | "source": [ 1648 | "data['publisher']=data['publisher'].astype('category').cat.codes" 1649 | ], 1650 | "execution_count": 0, 1651 | "outputs": [] 1652 | }, 1653 | { 1654 | "cell_type": "code", 1655 | "metadata": { 1656 | "id": "M7WoV1m8lSPb", 1657 | "colab_type": "code", 1658 | "colab": { 1659 | "base_uri": "https://localhost:8080/", 1660 | "height": 1000 1661 | }, 1662 | "outputId": "0c44be37-03bd-4da6-90fd-eb485ce3e2e8" 1663 | }, 1664 | "source": [ 1665 | "data['publisher']" 1666 | ], 1667 | "execution_count": 106, 1668 | "outputs": [ 1669 | { 1670 | "output_type": "execute_result", 1671 | "data": { 1672 | "text/plain": [ 1673 | "0 291\n", 1674 | "1 8726\n", 1675 | "2 2818\n", 1676 | "3 3284\n", 1677 | "4 1910\n", 1678 | "5 9506\n", 1679 | "6 1647\n", 1680 | "7 9451\n", 1681 | "8 8412\n", 1682 | "9 2254\n", 1683 | "10 3497\n", 1684 | "11 7110\n", 1685 | "12 7122\n", 1686 | "13 9451\n", 1687 | "14 1910\n", 1688 | "15 4137\n", 1689 | "16 5293\n", 1690 | "17 1918\n", 1691 | "18 5792\n", 1692 | "19 9096\n", 1693 | "20 1830\n", 1694 | "21 8302\n", 1695 | "22 7655\n", 1696 | "23 1942\n", 1697 | "24 2877\n", 1698 | "25 4588\n", 1699 | "26 4588\n", 1700 | "27 8299\n", 1701 | "28 1079\n", 1702 | "29 9047\n", 1703 | " ... \n", 1704 | "19970 8256\n", 1705 | "19971 1765\n", 1706 | "19972 8726\n", 1707 | "19973 2582\n", 1708 | "19974 6147\n", 1709 | "19975 1622\n", 1710 | "19976 656\n", 1711 | "19977 8164\n", 1712 | "19978 8121\n", 1713 | "19979 8256\n", 1714 | "19980 755\n", 1715 | "19981 35\n", 1716 | "19982 7932\n", 1717 | "19983 1758\n", 1718 | "19984 6044\n", 1719 | "19985 3484\n", 1720 | "19986 4948\n", 1721 | "19987 3283\n", 1722 | "19988 5492\n", 1723 | "19989 349\n", 1724 | "19990 6950\n", 1725 | "19991 3561\n", 1726 | "19992 7905\n", 1727 | "19993 5254\n", 1728 | "19994 5011\n", 1729 | "19995 9451\n", 1730 | "19996 8524\n", 1731 | "19997 8481\n", 1732 | "19998 4883\n", 1733 | "19999 7110\n", 1734 | "Name: publisher, Length: 20000, dtype: int16" 1735 | ] 1736 | }, 1737 | "metadata": { 1738 | "tags": [] 1739 | }, 1740 | "execution_count": 106 1741 | } 1742 | ] 1743 | }, 1744 | { 1745 | "cell_type": "markdown", 1746 | "metadata": { 1747 | "id": "zuzWZTi7LHvm", 1748 | "colab_type": "text" 1749 | }, 1750 | "source": [ 1751 | "# Split" 1752 | ] 1753 | }, 1754 | { 1755 | "cell_type": "code", 1756 | "metadata": { 1757 | "id": "UWzzuu_ZCTTI", 1758 | "colab_type": "code", 1759 | "colab": {} 1760 | }, 1761 | "source": [ 1762 | " from sklearn.model_selection import train_test_split\n", 1763 | " X_train, X_test, y_train, y_test = train_test_split(data['title'], data['publisher'], test_size=0.2, random_state=1)\n", 1764 | "\n", 1765 | " X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)" 1766 | ], 1767 | "execution_count": 0, 1768 | "outputs": [] 1769 | }, 1770 | { 1771 | "cell_type": "markdown", 1772 | "metadata": { 1773 | "id": "AsxEBkIaLXMp", 1774 | "colab_type": "text" 1775 | }, 1776 | "source": [ 1777 | "# Tokenize" 1778 | ] 1779 | }, 1780 | { 1781 | "cell_type": "code", 1782 | "metadata": { 1783 | "id": "1vMU1BfSAw1Z", 1784 | "colab_type": "code", 1785 | "colab": {} 1786 | }, 1787 | "source": [ 1788 | "vocab_size = 20000 \n", 1789 | "max_seq = 50" 1790 | ], 1791 | "execution_count": 0, 1792 | "outputs": [] 1793 | }, 1794 | { 1795 | "cell_type": "markdown", 1796 | "metadata": { 1797 | "id": "Kj6LqTVkN3Zg", 1798 | "colab_type": "text" 1799 | }, 1800 | "source": [ 1801 | "## Download Glove" 1802 | ] 1803 | }, 1804 | { 1805 | "cell_type": "code", 1806 | "metadata": { 1807 | "id": "Jh0Kv1lkN2tF", 1808 | "colab_type": "code", 1809 | "colab": {} 1810 | }, 1811 | "source": [ 1812 | "# %cd /content/\n", 1813 | "# !wget http://nlp.stanford.edu/data/glove.840B.300d.zip" 1814 | ], 1815 | "execution_count": 0, 1816 | "outputs": [] 1817 | }, 1818 | { 1819 | "cell_type": "code", 1820 | "metadata": { 1821 | "id": "c-_llHKijbsm", 1822 | "colab_type": "code", 1823 | "colab": {} 1824 | }, 1825 | "source": [ 1826 | "" 1827 | ], 1828 | "execution_count": 0, 1829 | "outputs": [] 1830 | }, 1831 | { 1832 | "cell_type": "code", 1833 | "metadata": { 1834 | "id": "1OQCbRV5Pxm9", 1835 | "colab_type": "code", 1836 | "colab": {} 1837 | }, 1838 | "source": [ 1839 | "# !unzip \"/content/glove.840B.300d.zip\"" 1840 | ], 1841 | "execution_count": 0, 1842 | "outputs": [] 1843 | }, 1844 | { 1845 | "cell_type": "code", 1846 | "metadata": { 1847 | "colab_type": "code", 1848 | "id": "Y7lY_--tjPv7", 1849 | "colab": {} 1850 | }, 1851 | "source": [ 1852 | "# def sent2vec(s):\n", 1853 | "# words = str(s).lower()\n", 1854 | "# words = word_tokenize(words)\n", 1855 | "# words = [w for w in words if not w in stop_words]\n", 1856 | "# words = [w for w in words if w.isalpha()]\n", 1857 | "# M = []\n", 1858 | "# for w in words:\n", 1859 | "# try:\n", 1860 | "# M.append(embeddings_index[w])\n", 1861 | "# except:\n", 1862 | "# continue\n", 1863 | "# M = np.array(M)\n", 1864 | "# v = M.sum(axis=0)\n", 1865 | "# if type(v) != np.ndarray:\n", 1866 | "# return np.zeros(300)\n", 1867 | "# return v / np.sqrt((v ** 2).sum())" 1868 | ], 1869 | "execution_count": 0, 1870 | "outputs": [] 1871 | }, 1872 | { 1873 | "cell_type": "code", 1874 | "metadata": { 1875 | "id": "d5E4MJgJQnWl", 1876 | "colab_type": "code", 1877 | "colab": {} 1878 | }, 1879 | "source": [ 1880 | "# from tqdm import tqdm" 1881 | ], 1882 | "execution_count": 0, 1883 | "outputs": [] 1884 | }, 1885 | { 1886 | "cell_type": "code", 1887 | "metadata": { 1888 | "colab_type": "code", 1889 | "id": "o43xcHzXjPoZ", 1890 | "colab": {} 1891 | }, 1892 | "source": [ 1893 | "# embeddings_index = {}\n", 1894 | "# f = open('/content/glove.840B.300d.txt')\n", 1895 | "# for line in tqdm(f):\n", 1896 | "# values = line.split()\n", 1897 | "# word = values[0]\n", 1898 | "# try:\n", 1899 | "# coefs = np.asarray(values[1:], dtype='float32')\n", 1900 | "# embeddings_index[word] = coefs\n", 1901 | "# except:\n", 1902 | "# pass\n", 1903 | "# f.close()\n", 1904 | "\n", 1905 | "# print('Found %s word vectors.' % len(embeddings_index))" 1906 | ], 1907 | "execution_count": 0, 1908 | "outputs": [] 1909 | }, 1910 | { 1911 | "cell_type": "code", 1912 | "metadata": { 1913 | "colab_type": "code", 1914 | "id": "RIGmhAF_jPex", 1915 | "colab": {} 1916 | }, 1917 | "source": [ 1918 | "# embedding_matrix = zeros((vocab_size,100))\n", 1919 | "# for word, index in " 1920 | ], 1921 | "execution_count": 0, 1922 | "outputs": [] 1923 | }, 1924 | { 1925 | "cell_type": "code", 1926 | "metadata": { 1927 | "id": "HjzqZkaXjgfd", 1928 | "colab_type": "code", 1929 | "colab": {} 1930 | }, 1931 | "source": [ 1932 | "embeddings_dictionary = dict()\n", 1933 | "glove_file = open('/content/glove.840B.300d.txt', encoding=\"utf8\")" 1934 | ], 1935 | "execution_count": 0, 1936 | "outputs": [] 1937 | }, 1938 | { 1939 | "cell_type": "code", 1940 | "metadata": { 1941 | "id": "xomKxUkelrVn", 1942 | "colab_type": "code", 1943 | "colab": { 1944 | "base_uri": "https://localhost:8080/", 1945 | "height": 59 1946 | }, 1947 | "outputId": "8bb0ae40-fb51-4e65-809e-f96cbd757a6e" 1948 | }, 1949 | "source": [ 1950 | "print(records)" 1951 | ], 1952 | "execution_count": 116, 1953 | "outputs": [ 1954 | { 1955 | "output_type": "stream", 1956 | "text": [ 1957 | "['zulchzulu', '-0.07969', '-0.22905', '0.80366', '-0.78865', '-0.40567', '-0.15716', '-0.42302', '0.64081', '-0.13215', '-1.4109', '0.73118', '-0.37391', '-0.36422', '0.024199', '-0.24359', '1.014', '0.00065176', '-0.89537', '0.8054', '-0.073101', '0.20257', '0.59553', '-0.0034971', '-0.28126', '0.58631', '-0.17115', '0.12428', '0.53392', '0.48289', '0.36989', '-0.091151', '-0.23874', '0.38864', '-0.16403', '-0.85745', '0.19', '0.4145', '0.35958', '-0.018726', '0.55213', '-0.0091331', '-0.48204', '-0.64685', '0.61736', '-0.27128', '0.13459', '0.94729', '-0.42939', '-0.32462', '-0.088466', '0.37337', '0.29062', '-0.0074411', '0.1984', '-0.42686', '-0.071294', '-0.043443', '-0.0033026', '-0.10519', '0.20885', '-0.30217', '0.27366', '-0.35602', '-0.89143', '0.28561', '-0.11656', '0.2246', '-0.021561', '-0.016219', '-0.96267', '0.85239', '-1.2714', '-0.8429', '0.25947', '0.10074', '-0.1253', '0.016124', '0.12488', '0.16413', '-0.46028', '0.32825', '-0.51367', '-0.16456', '0.5641', '0.092562', '1.1196', '0.15936', '0.66175', '-1.0068', '-0.086162', '0.17847', '-0.46644', '0.12672', '0.31786', '-0.25533', '0.73502', '-0.010719', '0.0054341', '-0.013019', '0.60229', '0.061846', '0.061026', '0.75747', '0.6877', '0.026887', '-0.36918', '-0.17628', '-0.77614', '-0.69935', '0.53906', '0.097763', '-0.23648', '-0.23217', '-0.35618', '0.049942', '-0.048307', '-0.71276', '-0.80498', '-0.00497', '-0.14976', '0.51274', '-0.30659', '0.12332', '0.46295', '0.51516', '-1.1373', '-0.57126', '0.5135', '0.29104', '-0.86347', '0.44613', '-0.81658', '-0.29672', '-0.71397', '-0.33071', '-0.12573', '-0.16253', '0.31273', '-0.59367', '-0.03315', '1.2405', '0.26456', '0.10989', '-0.33882', '0.26638', '0.049057', '-0.36959', '-0.40592', '-0.22758', '0.6045', '-0.37629', '0.24219', '0.27733', '-0.63102', '0.26867', '0.077098', '0.41419', '-0.12265', '-0.10442', '0.6554', '-0.28348', '0.01612', '-0.11086', '0.38989', '0.98121', '1.3837', '-0.48673', '0.19253', '-0.83225', '-0.61103', '-0.13101', '0.010166', '0.21825', '0.80634', '0.48111', '-0.31594', '0.10402', '-0.60965', '-0.42259', '-0.26895', '-0.43522', '0.62865', '0.091043', '-0.059981', '0.28502', '-0.31621', '0.036937', '0.13772', '-0.30015', '0.37415', '0.33253', '-0.14745', '0.1421', '-0.41723', '-0.088119', '-0.52391', '0.019749', '1.0092', '0.6747', '-0.70984', '-0.44131', '0.019539', '-0.1318', '-0.11064', '-0.57117', '0.14544', '-0.54714', '-0.24794', '0.64769', '0.083943', '0.67539', '0.63883', '0.00097277', '0.70608', '-0.19377', '-0.59744', '0.4864', '-0.056739', '0.26321', '0.06788', '-0.12797', '0.072588', '-0.20607', '0.28131', '0.80015', '-0.46605', '0.19531', '-0.36227', '-0.20949', '-0.11307', '0.4254', '0.10286', '-0.28195', '-0.1157', '0.47033', '0.31709', '0.33137', '-0.14531', '-0.10918', '-0.23374', '-0.19897', '0.12693', '-0.03747', '-0.45309', '-0.34972', '-0.3382', '0.70407', '-0.1055', '0.78543', '0.034963', '-0.68759', '0.74536', '0.025425', '-0.20919', '-0.0022691', '-0.90022', '-0.71534', '0.6525', '-0.10571', '-0.49702', '-0.38476', '-0.39235', '0.1116', '-0.20217', '-0.43163', '0.42698', '0.20545', '0.4036', '-0.86946', '0.57366', '-0.13683', '0.65796', '0.61283', '0.27316', '-0.73551', '-0.70123', '-0.39056', '-0.43813', '-0.32104', '-0.61864', '-0.74312', '-0.49329', '-0.70878', '-0.35697', '0.79095', '0.62299', '-0.36023', '0.66178', '-0.54589', '0.10604', '0.64657', '-0.41591', '0.1424', '-0.051749', '0.38925', '-0.20522', '0.26878', '-0.083561', '0.48532', '-0.7313']\n" 1958 | ], 1959 | "name": "stdout" 1960 | } 1961 | ] 1962 | }, 1963 | { 1964 | "cell_type": "code", 1965 | "metadata": { 1966 | "id": "TnFDD1_4mv5V", 1967 | "colab_type": "code", 1968 | "colab": {} 1969 | }, 1970 | "source": [ 1971 | "from keras.preprocessing.text import Tokenizer\n", 1972 | "word_tokenizer = Tokenizer()\n", 1973 | "word_tokenizer.fit_on_texts(data['title'])" 1974 | ], 1975 | "execution_count": 0, 1976 | "outputs": [] 1977 | }, 1978 | { 1979 | "cell_type": "code", 1980 | "metadata": { 1981 | "id": "0B7nGk5qjqGX", 1982 | "colab_type": "code", 1983 | "colab": {} 1984 | }, 1985 | "source": [ 1986 | "for line in glove_file:\n", 1987 | " records = line.split()\n", 1988 | " word = records[0]\n", 1989 | " try:\n", 1990 | " vector_dimensions = np.asarray(records[1:], dtype='float32')\n", 1991 | " except Exception as e:\n", 1992 | " pass\n", 1993 | " embeddings_dictionary [word] = vector_dimensions\n", 1994 | "\n", 1995 | "glove_file.close()" 1996 | ], 1997 | "execution_count": 0, 1998 | "outputs": [] 1999 | }, 2000 | { 2001 | "cell_type": "code", 2002 | "metadata": { 2003 | "id": "DhMvC66kjqB8", 2004 | "colab_type": "code", 2005 | "colab": {} 2006 | }, 2007 | "source": [ 2008 | "embedding_matrix = np.zeros((vocab_size, 300))\n", 2009 | "for word, index in word_tokenizer.word_index.items():\n", 2010 | " embedding_vector = embeddings_dictionary.get(word)\n", 2011 | " if embedding_vector is not None:\n", 2012 | " try:\n", 2013 | " embedding_matrix[index] = embedding_vector\n", 2014 | " except:\n", 2015 | " pass" 2016 | ], 2017 | "execution_count": 0, 2018 | "outputs": [] 2019 | }, 2020 | { 2021 | "cell_type": "code", 2022 | "metadata": { 2023 | "id": "jY8UsJuEpNjc", 2024 | "colab_type": "code", 2025 | "colab": {} 2026 | }, 2027 | "source": [ 2028 | "from keras.preprocessing.sequence import pad_sequences\n", 2029 | "word_tokenizer = Tokenizer()\n", 2030 | "embedded_sentences = word_tokenizer.texts_to_sequences(data['title'])\n", 2031 | "padded_sentences = pad_sequences(embedded_sentences, max_seq, padding='post')" 2032 | ], 2033 | "execution_count": 0, 2034 | "outputs": [] 2035 | }, 2036 | { 2037 | "cell_type": "code", 2038 | "metadata": { 2039 | "id": "kGDtNeeVjp4m", 2040 | "colab_type": "code", 2041 | "colab": {} 2042 | }, 2043 | "source": [ 2044 | "from keras.layers import GlobalMaxPooling1D, Conv1D, MaxPooling1D, Flatten, Bidirectional, SpatialDropout1D\n", 2045 | "from keras.layers.recurrent import LSTM, GRU\n", 2046 | "from keras.layers.core import Dense, Activation, Dropout\n", 2047 | "from keras.layers.embeddings import Embedding\n", 2048 | "from keras.layers.normalization import BatchNormalization\n", 2049 | "model = Sequential()\n", 2050 | "embedding_layer = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=max_seq, trainable=False)\n", 2051 | "\n", 2052 | "# model.add(Bidirectional(LSTM(300, dropout=0.3, recurrent_dropout=0.3)))\n", 2053 | "\n", 2054 | "model.add(Dense(1024, activation='relu'))\n", 2055 | "model.add(Dropout(0.8))\n", 2056 | "\n", 2057 | "model.add(Dense(1024, activation='relu'))\n", 2058 | "model.add(Dropout(0.8))\n", 2059 | "\n", 2060 | "model.add(Dense(3))\n", 2061 | "model.add(Activation('softmax'))\n", 2062 | "model.compile(loss='categorical_crossentropy', optimizer='adam')" 2063 | ], 2064 | "execution_count": 0, 2065 | "outputs": [] 2066 | }, 2067 | { 2068 | "cell_type": "code", 2069 | "metadata": { 2070 | "id": "N57iNDDRr2JB", 2071 | "colab_type": "code", 2072 | "colab": { 2073 | "base_uri": "https://localhost:8080/", 2074 | "height": 1000 2075 | }, 2076 | "outputId": "0e264de9-6a3d-483c-9df3-5ce0d82c6f9f" 2077 | }, 2078 | "source": [ 2079 | "model.fit(padded_sentences, data['publisher'],epochs=100, verbose = 1)" 2080 | ], 2081 | "execution_count": 139, 2082 | "outputs": [ 2083 | { 2084 | "output_type": "stream", 2085 | "text": [ 2086 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3733: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n", 2087 | "Instructions for updating:\n", 2088 | "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n", 2089 | "WARNING:tensorflow:Large dropout rate: 0.8 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.\n", 2090 | "WARNING:tensorflow:Large dropout rate: 0.8 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.\n" 2091 | ], 2092 | "name": "stdout" 2093 | }, 2094 | { 2095 | "output_type": "error", 2096 | "ename": "ValueError", 2097 | "evalue": "ignored", 2098 | "traceback": [ 2099 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 2100 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 2101 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpadded_sentences\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'publisher'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 2102 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m 1087\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[0mclass_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1089\u001b[0;31m batch_size=batch_size)\n\u001b[0m\u001b[1;32m 1090\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1091\u001b[0m \u001b[0;31m# Prepare validation data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 2103 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m 706\u001b[0m \u001b[0;34m'either a single '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 707\u001b[0m \u001b[0;34m'array or a list of arrays. '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 708\u001b[0;31m 'You passed: y=' + str(y))\n\u001b[0m\u001b[1;32m 709\u001b[0m \u001b[0;31m# Typecheck that all inputs are *either* value *or* symbolic.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 710\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 2104 | "\u001b[0;31mValueError\u001b[0m: Please provide as model targets either a single array or a list of arrays. You passed: y=0 291\n1 8726\n2 2818\n3 3284\n4 1910\n5 9506\n6 1647\n7 9451\n8 8412\n9 2254\n10 3497\n11 7110\n12 7122\n13 9451\n14 1910\n15 4137\n16 5293\n17 1918\n18 5792\n19 9096\n20 1830\n21 8302\n22 7655\n23 1942\n24 2877\n25 4588\n26 4588\n27 8299\n28 1079\n29 9047\n ... \n19970 8256\n19971 1765\n19972 8726\n19973 2582\n19974 6147\n19975 1622\n19976 656\n19977 8164\n19978 8121\n19979 8256\n19980 755\n19981 35\n19982 7932\n19983 1758\n19984 6044\n19985 3484\n19986 4948\n19987 3283\n19988 5492\n19989 349\n19990 6950\n19991 3561\n19992 7905\n19993 5254\n19994 5011\n19995 9451\n19996 8524\n19997 8481\n19998 4883\n19999 7110\nName: publisher, Length: 20000, dtype: int16" 2105 | ] 2106 | } 2107 | ] 2108 | }, 2109 | { 2110 | "cell_type": "code", 2111 | "metadata": { 2112 | "id": "jSwhLEMIAw_8", 2113 | "colab_type": "code", 2114 | "colab": { 2115 | "base_uri": "https://localhost:8080/", 2116 | "height": 374 2117 | }, 2118 | "outputId": "824e6465-58a8-43a7-b8b0-d230b519d429" 2119 | }, 2120 | "source": [ 2121 | "\n", 2122 | "print(model.summary())" 2123 | ], 2124 | "execution_count": 140, 2125 | "outputs": [ 2126 | { 2127 | "output_type": "stream", 2128 | "text": [ 2129 | "Model: \"sequential_12\"\n", 2130 | "_________________________________________________________________\n", 2131 | "Layer (type) Output Shape Param # \n", 2132 | "=================================================================\n", 2133 | "dense_20 (Dense) (None, 1024) 52224 \n", 2134 | "_________________________________________________________________\n", 2135 | "dropout_11 (Dropout) (None, 1024) 0 \n", 2136 | "_________________________________________________________________\n", 2137 | "dense_21 (Dense) (None, 1024) 1049600 \n", 2138 | "_________________________________________________________________\n", 2139 | "dropout_12 (Dropout) (None, 1024) 0 \n", 2140 | "_________________________________________________________________\n", 2141 | "dense_22 (Dense) (None, 3) 3075 \n", 2142 | "_________________________________________________________________\n", 2143 | "activation_6 (Activation) (None, 3) 0 \n", 2144 | "=================================================================\n", 2145 | "Total params: 1,104,899\n", 2146 | "Trainable params: 1,104,899\n", 2147 | "Non-trainable params: 0\n", 2148 | "_________________________________________________________________\n", 2149 | "None\n" 2150 | ], 2151 | "name": "stdout" 2152 | } 2153 | ] 2154 | }, 2155 | { 2156 | "cell_type": "code", 2157 | "metadata": { 2158 | "id": "e9lKSYndqLFa", 2159 | "colab_type": "code", 2160 | "colab": {} 2161 | }, 2162 | "source": [ 2163 | "len(data['publisher'])" 2164 | ], 2165 | "execution_count": 0, 2166 | "outputs": [] 2167 | }, 2168 | { 2169 | "cell_type": "code", 2170 | "metadata": { 2171 | "id": "5uXF8gZVqPhB", 2172 | "colab_type": "code", 2173 | "colab": {} 2174 | }, 2175 | "source": [ 2176 | "\n" 2177 | ], 2178 | "execution_count": 0, 2179 | "outputs": [] 2180 | }, 2181 | { 2182 | "cell_type": "code", 2183 | "metadata": { 2184 | "id": "1rp-uIOxo1VY", 2185 | "colab_type": "code", 2186 | "colab": { 2187 | "base_uri": "https://localhost:8080/", 2188 | "height": 330 2189 | }, 2190 | "outputId": "30f78d8f-7bc7-4a5a-a71e-ee86165e7986" 2191 | }, 2192 | "source": [ 2193 | "" 2194 | ], 2195 | "execution_count": 124, 2196 | "outputs": [ 2197 | { 2198 | "output_type": "error", 2199 | "ename": "ValueError", 2200 | "evalue": "ignored", 2201 | "traceback": [ 2202 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 2203 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 2204 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpadded_sentences\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'publisher'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 2205 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m 1087\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[0mclass_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1089\u001b[0;31m batch_size=batch_size)\n\u001b[0m\u001b[1;32m 1090\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1091\u001b[0m \u001b[0;31m# Prepare validation data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 2206 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m 793\u001b[0m \u001b[0mfeed_output_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 794\u001b[0m \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 795\u001b[0;31m exception_prefix='target')\n\u001b[0m\u001b[1;32m 796\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 797\u001b[0m \u001b[0;31m# Generate sample-wise weight values given the `sample_weight` and\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 2207 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m str(data_shape))\n\u001b[0m\u001b[1;32m 142\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 2208 | "\u001b[0;31mValueError\u001b[0m: Error when checking target: expected dense_3 to have shape (26,) but got array with shape (1,)" 2209 | ] 2210 | } 2211 | ] 2212 | } 2213 | ] 2214 | } -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This program converts all the comments in your code to a README.md file and saves a lot of your time provided you write comments in your code. It is obvious how easy it is to actually generate the README.md file and save your time.Using the argparse module to get the file path. Note that to run the file 3 | ```bash 4 | python main.py -p 5 | ``` 6 | 7 | ''' 8 | #The readme for this program is also generated by the code itself. 9 | import argparse 10 | 11 | 12 | #adds arguments 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("-p", type=str, help="enter the file path") 15 | args = parser.parse_args() 16 | file = open(args.p, 'r') 17 | f = file.readlines() 18 | 19 | def blockComm(): 20 | p = [(x, f[x][0]) for x in range(len(f))] 21 | fip = [] 22 | for a in range(len(p)): 23 | try: 24 | if p[a][1] == : 25 | fip.append(a) 26 | except IndexError: 27 | pass 28 | 29 | fip = [[fip[x], fip[x + 1]] for x in range(0, len(fip) - 1, 2)] 30 | contents = [] 31 | sidecomm = [] 32 | blockcomm = ['\n'.join(f[x[0] + 1:x[1]]).strip() for x in fip] 33 | return blockcomm 34 | ''' 35 | For now this program only supports python. 36 | ''' 37 | # find the side comments and create contents list 38 | def content_sidecomm(): 39 | contents = [] 40 | sidecomm = [] 41 | for a in range(0, len(f)): 42 | finder = f[a].find('') 43 | try: 44 | if finder != -1: 45 | if finder == 0: 46 | contents.append(f[a][1::].upper()) 47 | else: 48 | sidecomm.append(f[a][finder + 1::].capitalize()) 49 | except IndexError: 50 | pass 51 | 52 | return [contents, sidecomm] 53 | 54 | #Format the file and save 55 | def format_and_save(): 56 | #initial file 57 | fin_out = open('README.md', 'w+') #output file 58 | fin_out.write(' README\n') 59 | fin_out.write('Major contents\n') 60 | cs = content_sidecomm() 61 | for a in range(len(cs[0])): 62 | fin_out.write('{}. {}\n'.format(a+1,cs[0][a])) 63 | fin_out.write('Block comments\n') 64 | bc = blockComm() 65 | for a in range(len(bc)): 66 | fin_out.write('>{}\n'.format(bc[a])) 67 | fin_out.write('Side comments\n') 68 | for a in range(len(cs[1])): 69 | fin_out.write('- {}\n'.format(cs[1][a])) 70 | 71 | 72 | 73 | fin_out.close() 74 | # Main function 75 | format_and_save() 76 | --------------------------------------------------------------------------------