├── .gitignore ├── README.md ├── SmartStoplist.txt ├── __init__.py ├── example.png ├── group contributions ├── .gitmodules ├── Isaac │ ├── DataFrames │ │ └── RedditData_Nov-06-2017 │ ├── PRAW.ipynb │ ├── READme.txt │ ├── SmartStoplist.txt │ ├── flaskTest │ │ ├── READme.txt │ │ └── flaskTest │ │ │ ├── SmartStoplist.txt │ │ │ ├── __init__.py │ │ │ ├── flask_session │ │ │ ├── 606f784a4e50e2c62bd2988e5a5378ea │ │ │ ├── d149ceaaaf93012427d506eb33b1a8fb │ │ │ └── dc1127ffee6208bf37499f62ca026a4b │ │ │ ├── main.py │ │ │ ├── rake.py │ │ │ ├── run.py │ │ │ └── templates │ │ │ ├── _navbar.html │ │ │ ├── docs_findings.html │ │ │ ├── docs_team.html │ │ │ ├── docs_tools.html │ │ │ ├── examples.html │ │ │ ├── home.html │ │ │ ├── layout.html │ │ │ └── program.html │ └── rake.py ├── bvacek2 │ ├── praw_test.ipynb │ ├── praw_test.py │ ├── reddit.py │ └── sqlTest │ │ ├── SQLTest.ipynb │ │ ├── Untitled.ipynb │ │ ├── reddit.db │ │ └── reddit_data.csv ├── garyab2 │ ├── PRAW_Keyword_Extraction_Visualization.ipynb │ ├── PRAW_test.ipynb │ └── dict_to_graph.py └── testFolder │ ├── CNN+Articles+Test.ipynb │ ├── Keyword_Extraction_Jayam.ipynb │ ├── Keyword_Extraction_Runtime_Testing.ipynb │ ├── Newspaper + RAKE Testing.ipynb │ ├── PRAW_test-Copy1.ipynb │ ├── SmartStoplist.txt │ ├── data │ ├── docs │ │ ├── fao_test │ │ │ ├── v9094e.key │ │ │ ├── v9094e.txt │ │ │ ├── v9570e.key │ │ │ ├── v9570e.txt │ │ │ ├── v9650e.key │ │ │ ├── v9650e.txt │ │ │ ├── w1750e.key │ │ │ ├── w1750e.txt │ │ │ ├── w2167e.key │ │ │ ├── w2167e.txt │ │ │ ├── w3727e.key │ │ │ ├── w3727e.txt │ │ │ ├── w4442e.key │ │ │ ├── w4442e.txt │ │ │ ├── w4979e.key │ │ │ ├── w4979e.txt │ │ │ ├── w5367e.key │ │ │ ├── w5367e.txt │ │ │ ├── w5631e.key │ │ │ ├── w5631e.txt │ │ │ ├── w6206e.key │ │ │ ├── w6206e.txt │ │ │ ├── w6864e.key │ │ │ ├── w6864e.txt │ │ │ ├── w7295e.key │ │ │ ├── w7295e.txt │ │ │ ├── w7415e.key │ │ │ ├── w7415e.txt │ │ │ ├── w7483e.key │ │ │ ├── w7483e.txt │ │ │ ├── w7519e.key │ │ │ └── w7519e.txt │ │ ├── fao_train │ │ │ ├── ab387e.key │ │ │ ├── ab387e.txt │ │ │ ├── ad692e.key │ │ │ ├── ad692e.txt │ │ │ ├── ae050e.key │ │ │ ├── ae050e.txt │ │ │ ├── ae190e.key │ │ │ ├── ae190e.txt │ │ │ ├── ae385e.key │ │ │ ├── ae385e.txt │ │ │ ├── faobetf_fb29fe.key │ │ │ ├── faobetf_fb29fe.txt │ │ │ ├── faobetf_fb35be.key │ │ │ ├── faobetf_fb35be.txt │ │ │ ├── foodfirst_ff08ne.key │ │ │ ├── foodfirst_ff08ne.txt │ │ │ ├── gtz_g16foe.key │ │ │ ├── gtz_g16foe.txt │ │ │ ├── gtz_g24ine.key │ │ │ ├── gtz_g24ine.txt │ │ │ ├── gtz_g32bie.key │ │ │ ├── gtz_g32bie.txt │ │ │ ├── nri_nr12me.key │ │ │ ├── nri_nr12me.txt │ │ │ ├── t0178e.key │ │ │ ├── t0178e.txt │ │ │ ├── t0512e.key │ │ │ ├── t0512e.txt │ │ │ ├── t0522e.key │ │ │ ├── t0522e.txt │ │ │ ├── t0715e.key │ │ │ ├── t0715e.txt │ │ │ ├── v3600e.key │ │ │ ├── v3600e.txt │ │ │ ├── v5030e.key │ │ │ ├── v5030e.txt │ │ │ ├── v6718e.key │ │ │ ├── v6718e.txt │ │ │ ├── v8047e.key │ │ │ ├── v8047e.txt │ │ │ ├── v9909e.key │ │ │ ├── v9909e.txt │ │ │ ├── w2227e.key │ │ │ ├── w2227e.txt │ │ │ ├── w3241e.key │ │ │ ├── w3241e.txt │ │ │ ├── w3618e.key │ │ │ ├── w3618e.txt │ │ │ ├── w4233e.key │ │ │ ├── w4233e.txt │ │ │ ├── w4338e.key │ │ │ ├── w4338e.txt │ │ │ ├── w6679e.key │ │ │ ├── w6679e.txt │ │ │ ├── w7407e.key │ │ │ ├── w7407e.txt │ │ │ ├── w7714e.key │ │ │ ├── w7714e.txt │ │ │ ├── w8302e.key │ │ │ ├── w8302e.txt │ │ │ ├── w8594e.key │ │ │ ├── w8594e.txt │ │ │ ├── w9900e.key │ │ │ ├── w9900e.txt │ │ │ ├── wb_wb01oe.key │ │ │ ├── wb_wb01oe.txt │ │ │ ├── wb_wb41me.key │ │ │ ├── wb_wb41me.txt │ │ │ ├── winrock_wi10ce.key │ │ │ ├── winrock_wi10ce.txt │ │ │ ├── x0176e.key │ │ │ ├── x0176e.txt │ │ │ ├── x0185e.key │ │ │ ├── x0185e.txt │ │ │ ├── x0198e.key │ │ │ ├── x0198e.txt │ │ │ ├── x0199e.key │ │ │ ├── x0199e.txt │ │ │ ├── x0451e.key │ │ │ ├── x0451e.txt │ │ │ ├── x5082e.key │ │ │ ├── x5082e.txt │ │ │ ├── x5352e.key │ │ │ ├── x5352e.txt │ │ │ ├── x5644e.key │ │ │ ├── x5644e.txt │ │ │ ├── x5668e.key │ │ │ ├── x5668e.txt │ │ │ ├── x5676e.key │ │ │ ├── x5676e.txt │ │ │ ├── x5681e.key │ │ │ ├── x5681e.txt │ │ │ ├── x6759e.key │ │ │ ├── x6759e.txt │ │ │ ├── x6802e.key │ │ │ ├── x6802e.txt │ │ │ ├── y0500e.key │ │ │ ├── y0500e.txt │ │ │ ├── y1128e.key │ │ │ └── y1128e.txt │ │ ├── french │ │ │ └── frwikinews-test-1000.txt │ │ └── spanish │ │ │ └── text.txt │ ├── models │ │ └── readme.txt │ └── vocabulary │ │ └── agrovoc_en.rdf.gz │ ├── dict_to_graph.py │ ├── evaluate_rake.py │ ├── rake.py │ ├── test.py │ ├── testFolder2 │ └── thisIsATextFile.txt │ ├── test_data.py │ └── times.csv ├── libraries.md ├── main.py ├── rake.py ├── run.py └── templates ├── _navbar.html ├── docs_team.html ├── docs_tools.html ├── home.html ├── layout.html └── program.html /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reddit_Unlocked: A Reddit-cal Analysis of The Web 2 | A program for analyzing & visualizing data pulled from the Reddit API and scraped from news articles. 3 | 4 | 5 | ### What is Reddit_Unlocked? 6 | 7 | Reddit_Unlocked is a program based primarily on Python that analyzes & visualizes data pulled from the Reddit API and scraped from news articles. Our program implements various libraries, such as Newspaper and PRAW, to gather insight about the popular types of content and trends on Reddit. 8 | 9 | 10 | ### The Reddit_Unlocked Group 11 | 12 | Isaac - Data Extraction, Web Application (Flask + Frontend) 13 | 14 | Jayam - Data Analysis (Keyword Extraction + Sentiment Analysis) 15 | 16 | Gary - Data Visualization (Comprehensive Visual Overviews of Trend Analyses) 17 | 18 | 19 | ### The Why 20 | 21 | According to Alexa, Reddit is the 5th most visited website in the United States (and 8th in the world). As a social news aggregation, web content rating, and discussion website, Reddit represents a very significant portion of America's population of internet users. As evidenced by Reddit's higher-than-most typical per-user visit time of almost 16 minutes per day (compared to 10 minutes for Facebook and 6 for Twitter), Reddit tends to pull in users who are more likely to spend their time engaging in longer and thorough discussions to express their opinions and ideas. 22 | 23 | On top of all this, Reddit's voting and commenting features for every discussion "thread" enable each sub-community (subreddit) to not only aggregate news but also its users' sentiments toward and reactions to the wide range of topics in these threads. In conclusion, Reddit is essentially a goldmine for gathering and analyzing data on how the internet thinks and reacts to real-life issues and events. For our project, we wanted to tap into this goldmine to gather insights about the popular trends that permeate throughout the Reddit population and its sub-communities. 24 | 25 | 26 | ### The How 27 | 28 | Thanks to Reddit's voting feature, the best first step to gaining insight about a subreddit's prevalent and popular topics is to look at its most upvoted threads in a given timeframe (day, week, month, year, or lifetime). For our project, we chose to analyze the top ~50 threads in the past week of a subreddit. We concluded that, after testing our program with other timeframes, a week was optimal for balancing recent and popular threads. Thus, Reddit Unlocked pulls data about these top weekly threads from the Reddit API (upvote %, post title, relevant links, etc). 29 | 30 | The collected thread data serves as the basis for our data analysis. Also, the webscraping algorithm crawls links to news articles from Reddit posts for more text. The keyword extraction and sentiment analysis algorithms are run on the compiled text data to generate a list of the most occurring/popular important keywords with corresponding sentiment values (polarity and subjectivity) attached to each keyword. Finally, a graph that acccounts for all of the resulting data (frequency, sentiment, upvotes, etc) is generated for the user. 31 | 32 | ![Example of Trend Graph](https://github.com/RedditUnlocked/Reddit_Unlocked/blob/master/example.png?raw=true) 33 | 34 | Each colored bubble can be hovered over to show which keyword it represents. Since the above is only a static image, this example plot can be viewed [here.](https://plot.ly/~reddit_unlocked/100) 35 | -------------------------------------------------------------------------------- /SmartStoplist.txt: -------------------------------------------------------------------------------- 1 | #stop word list from SMART (Salton,1971). Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop 2 | a 3 | a's 4 | able 5 | about 6 | above 7 | according 8 | accordingly 9 | across 10 | actually 11 | after 12 | afterwards 13 | again 14 | against 15 | ain't 16 | all 17 | allow 18 | allows 19 | almost 20 | alone 21 | along 22 | already 23 | also 24 | although 25 | always 26 | am 27 | among 28 | amongst 29 | an 30 | and 31 | another 32 | any 33 | anybody 34 | anyhow 35 | anyone 36 | anything 37 | anyway 38 | anyways 39 | anywhere 40 | apart 41 | appear 42 | appreciate 43 | appropriate 44 | are 45 | aren't 46 | around 47 | as 48 | aside 49 | ask 50 | asking 51 | associated 52 | at 53 | available 54 | away 55 | awfully 56 | b 57 | be 58 | became 59 | because 60 | become 61 | becomes 62 | becoming 63 | been 64 | before 65 | beforehand 66 | behind 67 | being 68 | believe 69 | below 70 | beside 71 | besides 72 | best 73 | better 74 | between 75 | beyond 76 | both 77 | brief 78 | but 79 | by 80 | c 81 | c'mon 82 | c's 83 | came 84 | can 85 | can't 86 | cannot 87 | cant 88 | cause 89 | causes 90 | certain 91 | certainly 92 | changes 93 | clearly 94 | co 95 | com 96 | come 97 | comes 98 | concerning 99 | consequently 100 | consider 101 | considering 102 | contain 103 | containing 104 | contains 105 | corresponding 106 | could 107 | couldn't 108 | course 109 | currently 110 | d 111 | definitely 112 | described 113 | despite 114 | did 115 | didn't 116 | different 117 | do 118 | does 119 | doesn't 120 | doing 121 | don't 122 | done 123 | down 124 | downwards 125 | during 126 | e 127 | each 128 | edu 129 | eg 130 | eight 131 | either 132 | else 133 | elsewhere 134 | enough 135 | entirely 136 | especially 137 | et 138 | etc 139 | even 140 | ever 141 | every 142 | everybody 143 | everyone 144 | everything 145 | everywhere 146 | ex 147 | exactly 148 | example 149 | except 150 | f 151 | far 152 | few 153 | fifth 154 | first 155 | five 156 | followed 157 | following 158 | follows 159 | for 160 | former 161 | formerly 162 | forth 163 | four 164 | from 165 | further 166 | furthermore 167 | g 168 | get 169 | gets 170 | getting 171 | given 172 | gives 173 | go 174 | goes 175 | going 176 | gone 177 | got 178 | gotten 179 | greetings 180 | h 181 | had 182 | hadn't 183 | happens 184 | hardly 185 | has 186 | hasn't 187 | have 188 | haven't 189 | having 190 | he 191 | he's 192 | hello 193 | help 194 | hence 195 | her 196 | here 197 | here's 198 | hereafter 199 | hereby 200 | herein 201 | hereupon 202 | hers 203 | herself 204 | hi 205 | him 206 | himself 207 | his 208 | hither 209 | hopefully 210 | how 211 | howbeit 212 | however 213 | i 214 | i'd 215 | i'll 216 | i'm 217 | i've 218 | ie 219 | if 220 | ignored 221 | immediate 222 | in 223 | inasmuch 224 | inc 225 | indeed 226 | indicate 227 | indicated 228 | indicates 229 | inner 230 | insofar 231 | instead 232 | into 233 | inward 234 | is 235 | isn't 236 | it 237 | it'd 238 | it'll 239 | it's 240 | its 241 | itself 242 | j 243 | just 244 | k 245 | keep 246 | keeps 247 | kept 248 | know 249 | knows 250 | known 251 | l 252 | last 253 | lately 254 | later 255 | latter 256 | latterly 257 | least 258 | less 259 | lest 260 | let 261 | let's 262 | like 263 | liked 264 | likely 265 | little 266 | look 267 | looking 268 | looks 269 | ltd 270 | m 271 | mainly 272 | many 273 | may 274 | maybe 275 | me 276 | mean 277 | meanwhile 278 | merely 279 | might 280 | more 281 | moreover 282 | most 283 | mostly 284 | much 285 | must 286 | my 287 | myself 288 | n 289 | name 290 | namely 291 | nd 292 | near 293 | nearly 294 | necessary 295 | need 296 | needs 297 | neither 298 | never 299 | nevertheless 300 | new 301 | next 302 | nine 303 | no 304 | nobody 305 | non 306 | none 307 | noone 308 | nor 309 | normally 310 | not 311 | nothing 312 | novel 313 | now 314 | nowhere 315 | o 316 | obviously 317 | of 318 | off 319 | often 320 | oh 321 | ok 322 | okay 323 | old 324 | on 325 | once 326 | one 327 | ones 328 | only 329 | onto 330 | or 331 | other 332 | others 333 | otherwise 334 | ought 335 | our 336 | ours 337 | ourselves 338 | out 339 | outside 340 | over 341 | overall 342 | own 343 | p 344 | particular 345 | particularly 346 | per 347 | perhaps 348 | placed 349 | please 350 | plus 351 | possible 352 | presumably 353 | probably 354 | provides 355 | q 356 | que 357 | quite 358 | qv 359 | r 360 | rather 361 | rd 362 | re 363 | really 364 | reasonably 365 | regarding 366 | regardless 367 | regards 368 | relatively 369 | respectively 370 | right 371 | s 372 | said 373 | same 374 | saw 375 | say 376 | saying 377 | says 378 | second 379 | secondly 380 | see 381 | seeing 382 | seem 383 | seemed 384 | seeming 385 | seems 386 | seen 387 | self 388 | selves 389 | sensible 390 | sent 391 | serious 392 | seriously 393 | seven 394 | several 395 | shall 396 | she 397 | should 398 | shouldn't 399 | since 400 | six 401 | so 402 | some 403 | somebody 404 | somehow 405 | someone 406 | something 407 | sometime 408 | sometimes 409 | somewhat 410 | somewhere 411 | soon 412 | sorry 413 | specified 414 | specify 415 | specifying 416 | still 417 | sub 418 | such 419 | sup 420 | sure 421 | t 422 | t's 423 | take 424 | taken 425 | tell 426 | tends 427 | th 428 | than 429 | thank 430 | thanks 431 | thanx 432 | that 433 | that's 434 | thats 435 | the 436 | their 437 | theirs 438 | them 439 | themselves 440 | then 441 | thence 442 | there 443 | there's 444 | thereafter 445 | thereby 446 | therefore 447 | therein 448 | theres 449 | thereupon 450 | these 451 | they 452 | they'd 453 | they'll 454 | they're 455 | they've 456 | think 457 | third 458 | this 459 | thorough 460 | thoroughly 461 | those 462 | though 463 | three 464 | through 465 | throughout 466 | thru 467 | thus 468 | to 469 | together 470 | too 471 | took 472 | toward 473 | towards 474 | tried 475 | tries 476 | truly 477 | try 478 | trying 479 | twice 480 | two 481 | u 482 | un 483 | under 484 | unfortunately 485 | unless 486 | unlikely 487 | until 488 | unto 489 | up 490 | upon 491 | us 492 | use 493 | used 494 | useful 495 | uses 496 | using 497 | usually 498 | uucp 499 | v 500 | value 501 | various 502 | very 503 | via 504 | viz 505 | vs 506 | w 507 | want 508 | wants 509 | was 510 | wasn't 511 | way 512 | we 513 | we'd 514 | we'll 515 | we're 516 | we've 517 | welcome 518 | well 519 | went 520 | were 521 | weren't 522 | what 523 | what's 524 | whatever 525 | when 526 | whence 527 | whenever 528 | where 529 | where's 530 | whereafter 531 | whereas 532 | whereby 533 | wherein 534 | whereupon 535 | wherever 536 | whether 537 | which 538 | while 539 | whither 540 | who 541 | who's 542 | whoever 543 | whole 544 | whom 545 | whose 546 | why 547 | will 548 | willing 549 | wish 550 | with 551 | within 552 | without 553 | won't 554 | wonder 555 | would 556 | would 557 | wouldn't 558 | x 559 | y 560 | yes 561 | yet 562 | you 563 | you'd 564 | you'll 565 | you're 566 | you've 567 | your 568 | yours 569 | yourself 570 | yourselves 571 | z 572 | zero 573 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/__init__.py -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/example.png -------------------------------------------------------------------------------- /group contributions/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "newspaper"] 2 | path = newspaper 3 | url = https://github.com/codelucas/newspaper.git 4 | [submodule "testFolder/newspaper"] 5 | path = testFolder/newspaper 6 | url = https://github.com/codelucas/newspaper.git 7 | [submodule "testFolder/RAKE-tutorial"] 8 | path = testFolder/RAKE-tutorial 9 | url = https://github.com/zelandiya/RAKE-tutorial.git 10 | [submodule "RAKE-tutorial"] 11 | path = RAKE-tutorial 12 | url = https://github.com/zelandiya/RAKE-tutorial.git 13 | -------------------------------------------------------------------------------- /group contributions/Isaac/DataFrames/RedditData_Nov-06-2017: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/Isaac/DataFrames/RedditData_Nov-06-2017 -------------------------------------------------------------------------------- /group contributions/Isaac/READme.txt: -------------------------------------------------------------------------------- 1 | READme for /Isaac/ 2 | 3 | - PRAW.ipynb is the to-be working/final version of the PRAW portion of our code in the reddit_unlocked project. 4 | 5 | - DataFrames is the folder to which PRAW.ipynb will export pandas dataframes containing (daily) reddit/subreddit threads data using pickle. 6 | 7 | - flaskTest is the directory for testing and learning flask for our website. 8 | 9 | -Isaac Park, keonp2 -------------------------------------------------------------------------------- /group contributions/Isaac/SmartStoplist.txt: -------------------------------------------------------------------------------- 1 | #stop word list from SMART (Salton,1971). Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop 2 | a 3 | a's 4 | able 5 | about 6 | above 7 | according 8 | accordingly 9 | across 10 | actually 11 | after 12 | afterwards 13 | again 14 | against 15 | ain't 16 | all 17 | allow 18 | allows 19 | almost 20 | alone 21 | along 22 | already 23 | also 24 | although 25 | always 26 | am 27 | among 28 | amongst 29 | an 30 | and 31 | another 32 | any 33 | anybody 34 | anyhow 35 | anyone 36 | anything 37 | anyway 38 | anyways 39 | anywhere 40 | apart 41 | appear 42 | appreciate 43 | appropriate 44 | are 45 | aren't 46 | around 47 | as 48 | aside 49 | ask 50 | asking 51 | associated 52 | at 53 | available 54 | away 55 | awfully 56 | b 57 | be 58 | became 59 | because 60 | become 61 | becomes 62 | becoming 63 | been 64 | before 65 | beforehand 66 | behind 67 | being 68 | believe 69 | below 70 | beside 71 | besides 72 | best 73 | better 74 | between 75 | beyond 76 | both 77 | brief 78 | but 79 | by 80 | c 81 | c'mon 82 | c's 83 | came 84 | can 85 | can't 86 | cannot 87 | cant 88 | cause 89 | causes 90 | certain 91 | certainly 92 | changes 93 | clearly 94 | co 95 | com 96 | come 97 | comes 98 | concerning 99 | consequently 100 | consider 101 | considering 102 | contain 103 | containing 104 | contains 105 | corresponding 106 | could 107 | couldn't 108 | course 109 | currently 110 | d 111 | definitely 112 | described 113 | despite 114 | did 115 | didn't 116 | different 117 | do 118 | does 119 | doesn't 120 | doing 121 | don't 122 | done 123 | down 124 | downwards 125 | during 126 | e 127 | each 128 | edu 129 | eg 130 | eight 131 | either 132 | else 133 | elsewhere 134 | enough 135 | entirely 136 | especially 137 | et 138 | etc 139 | even 140 | ever 141 | every 142 | everybody 143 | everyone 144 | everything 145 | everywhere 146 | ex 147 | exactly 148 | example 149 | except 150 | f 151 | far 152 | few 153 | fifth 154 | first 155 | five 156 | followed 157 | following 158 | follows 159 | for 160 | former 161 | formerly 162 | forth 163 | four 164 | from 165 | further 166 | furthermore 167 | g 168 | get 169 | gets 170 | getting 171 | given 172 | gives 173 | go 174 | goes 175 | going 176 | gone 177 | got 178 | gotten 179 | greetings 180 | h 181 | had 182 | hadn't 183 | happens 184 | hardly 185 | has 186 | hasn't 187 | have 188 | haven't 189 | having 190 | he 191 | he's 192 | hello 193 | help 194 | hence 195 | her 196 | here 197 | here's 198 | hereafter 199 | hereby 200 | herein 201 | hereupon 202 | hers 203 | herself 204 | hi 205 | him 206 | himself 207 | his 208 | hither 209 | hopefully 210 | how 211 | howbeit 212 | however 213 | i 214 | i'd 215 | i'll 216 | i'm 217 | i've 218 | ie 219 | if 220 | ignored 221 | immediate 222 | in 223 | inasmuch 224 | inc 225 | indeed 226 | indicate 227 | indicated 228 | indicates 229 | inner 230 | insofar 231 | instead 232 | into 233 | inward 234 | is 235 | isn't 236 | it 237 | it'd 238 | it'll 239 | it's 240 | its 241 | itself 242 | j 243 | just 244 | k 245 | keep 246 | keeps 247 | kept 248 | know 249 | knows 250 | known 251 | l 252 | last 253 | lately 254 | later 255 | latter 256 | latterly 257 | least 258 | less 259 | lest 260 | let 261 | let's 262 | like 263 | liked 264 | likely 265 | little 266 | look 267 | looking 268 | looks 269 | ltd 270 | m 271 | mainly 272 | many 273 | may 274 | maybe 275 | me 276 | mean 277 | meanwhile 278 | merely 279 | might 280 | more 281 | moreover 282 | most 283 | mostly 284 | much 285 | must 286 | my 287 | myself 288 | n 289 | name 290 | namely 291 | nd 292 | near 293 | nearly 294 | necessary 295 | need 296 | needs 297 | neither 298 | never 299 | nevertheless 300 | new 301 | next 302 | nine 303 | no 304 | nobody 305 | non 306 | none 307 | noone 308 | nor 309 | normally 310 | not 311 | nothing 312 | novel 313 | now 314 | nowhere 315 | o 316 | obviously 317 | of 318 | off 319 | often 320 | oh 321 | ok 322 | okay 323 | old 324 | on 325 | once 326 | one 327 | ones 328 | only 329 | onto 330 | or 331 | other 332 | others 333 | otherwise 334 | ought 335 | our 336 | ours 337 | ourselves 338 | out 339 | outside 340 | over 341 | overall 342 | own 343 | p 344 | particular 345 | particularly 346 | per 347 | perhaps 348 | placed 349 | please 350 | plus 351 | possible 352 | presumably 353 | probably 354 | provides 355 | q 356 | que 357 | quite 358 | qv 359 | r 360 | rather 361 | rd 362 | re 363 | really 364 | reasonably 365 | regarding 366 | regardless 367 | regards 368 | relatively 369 | respectively 370 | right 371 | s 372 | said 373 | same 374 | saw 375 | say 376 | saying 377 | says 378 | second 379 | secondly 380 | see 381 | seeing 382 | seem 383 | seemed 384 | seeming 385 | seems 386 | seen 387 | self 388 | selves 389 | sensible 390 | sent 391 | serious 392 | seriously 393 | seven 394 | several 395 | shall 396 | she 397 | should 398 | shouldn't 399 | since 400 | six 401 | so 402 | some 403 | somebody 404 | somehow 405 | someone 406 | something 407 | sometime 408 | sometimes 409 | somewhat 410 | somewhere 411 | soon 412 | sorry 413 | specified 414 | specify 415 | specifying 416 | still 417 | sub 418 | such 419 | sup 420 | sure 421 | t 422 | t's 423 | take 424 | taken 425 | tell 426 | tends 427 | th 428 | than 429 | thank 430 | thanks 431 | thanx 432 | that 433 | that's 434 | thats 435 | the 436 | their 437 | theirs 438 | them 439 | themselves 440 | then 441 | thence 442 | there 443 | there's 444 | thereafter 445 | thereby 446 | therefore 447 | therein 448 | theres 449 | thereupon 450 | these 451 | they 452 | they'd 453 | they'll 454 | they're 455 | they've 456 | think 457 | third 458 | this 459 | thorough 460 | thoroughly 461 | those 462 | though 463 | three 464 | through 465 | throughout 466 | thru 467 | thus 468 | to 469 | together 470 | too 471 | took 472 | toward 473 | towards 474 | tried 475 | tries 476 | truly 477 | try 478 | trying 479 | twice 480 | two 481 | u 482 | un 483 | under 484 | unfortunately 485 | unless 486 | unlikely 487 | until 488 | unto 489 | up 490 | upon 491 | us 492 | use 493 | used 494 | useful 495 | uses 496 | using 497 | usually 498 | uucp 499 | v 500 | value 501 | various 502 | very 503 | via 504 | viz 505 | vs 506 | w 507 | want 508 | wants 509 | was 510 | wasn't 511 | way 512 | we 513 | we'd 514 | we'll 515 | we're 516 | we've 517 | welcome 518 | well 519 | went 520 | were 521 | weren't 522 | what 523 | what's 524 | whatever 525 | when 526 | whence 527 | whenever 528 | where 529 | where's 530 | whereafter 531 | whereas 532 | whereby 533 | wherein 534 | whereupon 535 | wherever 536 | whether 537 | which 538 | while 539 | whither 540 | who 541 | who's 542 | whoever 543 | whole 544 | whom 545 | whose 546 | why 547 | will 548 | willing 549 | wish 550 | with 551 | within 552 | without 553 | won't 554 | wonder 555 | would 556 | would 557 | wouldn't 558 | x 559 | y 560 | yes 561 | yet 562 | you 563 | you'd 564 | you'll 565 | you're 566 | you've 567 | your 568 | yours 569 | yourself 570 | yourselves 571 | z 572 | zero 573 | -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/READme.txt: -------------------------------------------------------------------------------- 1 | test folder for Flask, but in order to run this project a specific environment/set of packages is required. 2 | 3 | http://flask.pocoo.org/docs/0.12/installation/ 4 | NOTE: Instead of virtualenv, conda can be used to create virtual environments: 5 | https://conda.io/docs/user-guide/tasks/manage-environments.html 6 | https://conda.io/docs/_downloads/conda-cheatsheet.pdf 7 | 8 | Also, there is a flaskTest folder inside of a flaskTest folder 9 | because that's how the Flask tutorial told me to set it up :) 10 | 11 | -Isaac, keonp2 12 | -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/SmartStoplist.txt: -------------------------------------------------------------------------------- 1 | #stop word list from SMART (Salton,1971). Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop 2 | a 3 | a's 4 | able 5 | about 6 | above 7 | according 8 | accordingly 9 | across 10 | actually 11 | after 12 | afterwards 13 | again 14 | against 15 | ain't 16 | all 17 | allow 18 | allows 19 | almost 20 | alone 21 | along 22 | already 23 | also 24 | although 25 | always 26 | am 27 | among 28 | amongst 29 | an 30 | and 31 | another 32 | any 33 | anybody 34 | anyhow 35 | anyone 36 | anything 37 | anyway 38 | anyways 39 | anywhere 40 | apart 41 | appear 42 | appreciate 43 | appropriate 44 | are 45 | aren't 46 | around 47 | as 48 | aside 49 | ask 50 | asking 51 | associated 52 | at 53 | available 54 | away 55 | awfully 56 | b 57 | be 58 | became 59 | because 60 | become 61 | becomes 62 | becoming 63 | been 64 | before 65 | beforehand 66 | behind 67 | being 68 | believe 69 | below 70 | beside 71 | besides 72 | best 73 | better 74 | between 75 | beyond 76 | both 77 | brief 78 | but 79 | by 80 | c 81 | c'mon 82 | c's 83 | came 84 | can 85 | can't 86 | cannot 87 | cant 88 | cause 89 | causes 90 | certain 91 | certainly 92 | changes 93 | clearly 94 | co 95 | com 96 | come 97 | comes 98 | concerning 99 | consequently 100 | consider 101 | considering 102 | contain 103 | containing 104 | contains 105 | corresponding 106 | could 107 | couldn't 108 | course 109 | currently 110 | d 111 | definitely 112 | described 113 | despite 114 | did 115 | didn't 116 | different 117 | do 118 | does 119 | doesn't 120 | doing 121 | don't 122 | done 123 | down 124 | downwards 125 | during 126 | e 127 | each 128 | edu 129 | eg 130 | eight 131 | either 132 | else 133 | elsewhere 134 | enough 135 | entirely 136 | especially 137 | et 138 | etc 139 | even 140 | ever 141 | every 142 | everybody 143 | everyone 144 | everything 145 | everywhere 146 | ex 147 | exactly 148 | example 149 | except 150 | f 151 | far 152 | few 153 | fifth 154 | first 155 | five 156 | followed 157 | following 158 | follows 159 | for 160 | former 161 | formerly 162 | forth 163 | four 164 | from 165 | further 166 | furthermore 167 | g 168 | get 169 | gets 170 | getting 171 | given 172 | gives 173 | go 174 | goes 175 | going 176 | gone 177 | got 178 | gotten 179 | greetings 180 | h 181 | had 182 | hadn't 183 | happens 184 | hardly 185 | has 186 | hasn't 187 | have 188 | haven't 189 | having 190 | he 191 | he's 192 | hello 193 | help 194 | hence 195 | her 196 | here 197 | here's 198 | hereafter 199 | hereby 200 | herein 201 | hereupon 202 | hers 203 | herself 204 | hi 205 | him 206 | himself 207 | his 208 | hither 209 | hopefully 210 | how 211 | howbeit 212 | however 213 | i 214 | i'd 215 | i'll 216 | i'm 217 | i've 218 | ie 219 | if 220 | ignored 221 | immediate 222 | in 223 | inasmuch 224 | inc 225 | indeed 226 | indicate 227 | indicated 228 | indicates 229 | inner 230 | insofar 231 | instead 232 | into 233 | inward 234 | is 235 | isn't 236 | it 237 | it'd 238 | it'll 239 | it's 240 | its 241 | itself 242 | j 243 | just 244 | k 245 | keep 246 | keeps 247 | kept 248 | know 249 | knows 250 | known 251 | l 252 | last 253 | lately 254 | later 255 | latter 256 | latterly 257 | least 258 | less 259 | lest 260 | let 261 | let's 262 | like 263 | liked 264 | likely 265 | little 266 | look 267 | looking 268 | looks 269 | ltd 270 | m 271 | mainly 272 | many 273 | may 274 | maybe 275 | me 276 | mean 277 | meanwhile 278 | merely 279 | might 280 | more 281 | moreover 282 | most 283 | mostly 284 | much 285 | must 286 | my 287 | myself 288 | n 289 | name 290 | namely 291 | nd 292 | near 293 | nearly 294 | necessary 295 | need 296 | needs 297 | neither 298 | never 299 | nevertheless 300 | new 301 | next 302 | nine 303 | no 304 | nobody 305 | non 306 | none 307 | noone 308 | nor 309 | normally 310 | not 311 | nothing 312 | novel 313 | now 314 | nowhere 315 | o 316 | obviously 317 | of 318 | off 319 | often 320 | oh 321 | ok 322 | okay 323 | old 324 | on 325 | once 326 | one 327 | ones 328 | only 329 | onto 330 | or 331 | other 332 | others 333 | otherwise 334 | ought 335 | our 336 | ours 337 | ourselves 338 | out 339 | outside 340 | over 341 | overall 342 | own 343 | p 344 | particular 345 | particularly 346 | per 347 | perhaps 348 | placed 349 | please 350 | plus 351 | possible 352 | presumably 353 | probably 354 | provides 355 | q 356 | que 357 | quite 358 | qv 359 | r 360 | rather 361 | rd 362 | re 363 | really 364 | reasonably 365 | regarding 366 | regardless 367 | regards 368 | relatively 369 | respectively 370 | right 371 | s 372 | said 373 | same 374 | saw 375 | say 376 | saying 377 | says 378 | second 379 | secondly 380 | see 381 | seeing 382 | seem 383 | seemed 384 | seeming 385 | seems 386 | seen 387 | self 388 | selves 389 | sensible 390 | sent 391 | serious 392 | seriously 393 | seven 394 | several 395 | shall 396 | she 397 | should 398 | shouldn't 399 | since 400 | six 401 | so 402 | some 403 | somebody 404 | somehow 405 | someone 406 | something 407 | sometime 408 | sometimes 409 | somewhat 410 | somewhere 411 | soon 412 | sorry 413 | specified 414 | specify 415 | specifying 416 | still 417 | sub 418 | such 419 | sup 420 | sure 421 | t 422 | t's 423 | take 424 | taken 425 | tell 426 | tends 427 | th 428 | than 429 | thank 430 | thanks 431 | thanx 432 | that 433 | that's 434 | thats 435 | the 436 | their 437 | theirs 438 | them 439 | themselves 440 | then 441 | thence 442 | there 443 | there's 444 | thereafter 445 | thereby 446 | therefore 447 | therein 448 | theres 449 | thereupon 450 | these 451 | they 452 | they'd 453 | they'll 454 | they're 455 | they've 456 | think 457 | third 458 | this 459 | thorough 460 | thoroughly 461 | those 462 | though 463 | three 464 | through 465 | throughout 466 | thru 467 | thus 468 | to 469 | together 470 | too 471 | took 472 | toward 473 | towards 474 | tried 475 | tries 476 | truly 477 | try 478 | trying 479 | twice 480 | two 481 | u 482 | un 483 | under 484 | unfortunately 485 | unless 486 | unlikely 487 | until 488 | unto 489 | up 490 | upon 491 | us 492 | use 493 | used 494 | useful 495 | uses 496 | using 497 | usually 498 | uucp 499 | v 500 | value 501 | various 502 | very 503 | via 504 | viz 505 | vs 506 | w 507 | want 508 | wants 509 | was 510 | wasn't 511 | way 512 | we 513 | we'd 514 | we'll 515 | we're 516 | we've 517 | welcome 518 | well 519 | went 520 | were 521 | weren't 522 | what 523 | what's 524 | whatever 525 | when 526 | whence 527 | whenever 528 | where 529 | where's 530 | whereafter 531 | whereas 532 | whereby 533 | wherein 534 | whereupon 535 | wherever 536 | whether 537 | which 538 | while 539 | whither 540 | who 541 | who's 542 | whoever 543 | whole 544 | whom 545 | whose 546 | why 547 | will 548 | willing 549 | wish 550 | with 551 | within 552 | without 553 | won't 554 | wonder 555 | would 556 | would 557 | wouldn't 558 | x 559 | y 560 | yes 561 | yet 562 | you 563 | you'd 564 | you'll 565 | you're 566 | you've 567 | your 568 | yours 569 | yourself 570 | yourselves 571 | z 572 | zero 573 | -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/Isaac/flaskTest/flaskTest/__init__.py -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/flask_session/606f784a4e50e2c62bd2988e5a5378ea: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/Isaac/flaskTest/flaskTest/flask_session/606f784a4e50e2c62bd2988e5a5378ea -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/flask_session/d149ceaaaf93012427d506eb33b1a8fb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/Isaac/flaskTest/flaskTest/flask_session/d149ceaaaf93012427d506eb33b1a8fb -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/flask_session/dc1127ffee6208bf37499f62ca026a4b: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/Isaac/flaskTest/flaskTest/flask_session/dc1127ffee6208bf37499f62ca026a4b -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/main.py: -------------------------------------------------------------------------------- 1 | # 2 | # -To be used for locally (localhost) testing the web server/website. 3 | # -Installing the flask package on a virtual environment (instead of system-wide) 4 | # is recommended by Flask devs. 5 | # 6 | # http://flask.pocoo.org/docs/0.12/quickstart/# 7 | # 8 | # -Isaac Park, keonp2 9 | # 10 | 11 | 12 | from flask import Flask, request, render_template, redirect, url_for, session 13 | from flask_session import Session 14 | from run import display_praw, stats_praw, body_to_graph, get_keyword_dict 15 | 16 | 17 | app = Flask(__name__) 18 | app.config['SECRET_KEY'] = 'insert super secret string here' 19 | SESSION_TYPE = 'filesystem' 20 | app.config.from_object(__name__) 21 | Session(app) 22 | 23 | 24 | @app.route('/', methods=['GET', 'POST']) 25 | def index(): 26 | if request.method == 'POST': 27 | if 'basic-url' in request.form: 28 | name = request.form['basic-url'] 29 | info = stats_praw(name) 30 | session['info'] = info 31 | output = display_praw(name) 32 | session['output'] = output 33 | keywords = get_keyword_dict(output) 34 | graph_url = body_to_graph(keywords, name) 35 | session['graph_url'] = graph_url 36 | return redirect(url_for('program', name=name)) 37 | else: 38 | return render_template('home.html') 39 | # TODO: Implement subreddit input validity checking AKA Fix blank input error 40 | else: 41 | return render_template('home.html') 42 | 43 | 44 | @app.route('/docs/
') 45 | def docs(section): 46 | if section == "findings": 47 | return render_template("docs_findings.html") 48 | else: 49 | if section == "team": 50 | return render_template("docs_team.html") 51 | else: 52 | if section == "tools": 53 | return render_template("docs_tools.html") 54 | else: 55 | return "This docs page does not exist. Maybe it was a typo?

-Isaac

Back to Reddit_Unlocked Home" 56 | # TODO: if I have time, implement html template for page DNE message 57 | 58 | 59 | @app.route('/program/') 60 | def program(name): 61 | output = session['output'] 62 | info = session['info'] 63 | graph_url = session['graph_url'] 64 | return render_template('program.html', name=name, output=output, info=info, graph_url=graph_url) 65 | 66 | 67 | @app.route('/examples') 68 | def examples(): 69 | return render_template('examples.html') 70 | 71 | 72 | if __name__ == "__main__": 73 | app.run(debug=True) 74 | 75 | 76 | # Use url_for method for links in the webpage; url_for generates URL 77 | # based on the argument it is given (name of the function related to a URL. 78 | # 79 | # Example: 80 | # 81 | # @app.route('/user/') 82 | # def hello_user(name): 83 | # if name =='admin': 84 | # return redirect(url_for('hello_admin')) 85 | # else: 86 | # return redirect(url_for('hello_guest',guest = name)) 87 | # 88 | -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/run.py: -------------------------------------------------------------------------------- 1 | import praw 2 | import pandas as pd 3 | from datetime import datetime 4 | 5 | 6 | # TODO: Make this program faster! (a lot faster, this is way too slow) 7 | 8 | def display_praw(name): 9 | reddit = praw.Reddit(client_id='Pj5o8QpNXXJY9A', 10 | client_secret='pQKMRBmhp0In48NoNvvktfRo2eA', 11 | password='prawisgreat', 12 | user_agent='Reddit Unlocked CS196 Project @ UIUC', 13 | username='RedditUnlocked196') 14 | 15 | subreddit = reddit.subreddit(name) 16 | 17 | threads_df = pd.DataFrame({ 18 | 'Title': (), 19 | 'URL': (), 20 | 'Upvote Ratio (%)': (), 21 | 'Net Score': (), 22 | '# of Upvotes': (), 23 | '# of Downvotes': (), 24 | 'Post Date': (), 25 | 'Self Post?': (), 26 | 'Video Post?': (), 27 | 'Domain': () 28 | }) 29 | 30 | threads_df = threads_df[['Title', 'URL', 'Upvote Ratio (%)', 'Net Score', '# of Upvotes', '# of Downvotes', 31 | 'Post Date', 'Self Post?', 'Video Post?', 'Domain']] 32 | 33 | for thread in subreddit.top('week', limit=50): # TODO: change limit number when actually deploying program. 15 is the testing number. 34 | if thread.is_video: 35 | continue 36 | if 'fb' in thread.url: 37 | continue 38 | actualUps = float(thread.upvote_ratio * thread.score) / float(thread.upvote_ratio * 2 - 1) 39 | actualDowns = actualUps - thread.score 40 | gather = pd.Series([thread.title, thread.url, thread.upvote_ratio * 100, thread.score, 41 | actualUps, actualDowns, thread.created_utc, 42 | thread.is_self, thread.is_video, thread.domain], 43 | index=['Title', 'URL', 'Upvote Ratio (%)', 'Net Score', '# of Upvotes', '# of Downvotes', 44 | 'Post Date', 'Self Post?', 'Video Post?', 'Domain']) 45 | 46 | threads_df = threads_df.append(gather, ignore_index=True) 47 | 48 | threads_dict = threads_df.to_dict(orient='records') 49 | 50 | for entry in threads_dict: 51 | if isinstance(str(entry['Post Date']), str): 52 | time = datetime.fromtimestamp(entry['Post Date']) 53 | formatTime = time.strftime('%b %d, %Y') 54 | else: 55 | formatTime = None 56 | 57 | entry['Post Date'] = formatTime 58 | 59 | return threads_dict 60 | 61 | 62 | def stats_praw(name): 63 | reddit = praw.Reddit(client_id='Pj5o8QpNXXJY9A', 64 | client_secret='pQKMRBmhp0In48NoNvvktfRo2eA', 65 | password='prawisgreat', 66 | user_agent='Reddit Unlocked CS196 Project @ UIUC', 67 | username='RedditUnlocked196') 68 | 69 | info = reddit.request('GET', '/r/' + name + '/about.json') 70 | 71 | infoDict = {} 72 | 73 | infoDict['Current Users'] = info['data']['active_user_count'] 74 | infoDict['Creation Date'] = (datetime.fromtimestamp(info['data']['created_utc'])).strftime('%b %d, %Y') 75 | infoDict['Subscriber Count'] = info['data']['subscribers'] 76 | infoDict['Title'] = info['data']['title'] 77 | infoDict['Icon'] = info['data']['icon_img'] 78 | 79 | return infoDict 80 | 81 | import plotly 82 | plotly.tools.set_credentials_file(username='reddit_unlocked', api_key='gfnXKc7JvUKST4HRJyFX') 83 | import plotly.plotly as py 84 | import plotly.graph_objs as go 85 | from plotly.graph_objs import * 86 | 87 | 88 | # takes a dictionary of dictionaries of keywords from body text as input and returns the url for the plotly html embedding of 89 | # scatterplot made from the keywords and their attributes 90 | # 'Keyword','Occurences', 'Upvotes', 'Downvotes', "Score", "Subjectivity", "Polarity", "Domain" 91 | 92 | 93 | def body_to_graph(words = {}, subreddit = str): 94 | """ 95 | :type subreddit: String 96 | """ 97 | frames = [] 98 | #Turns dictionary of dictionaries into list of dataframes 99 | for key, value in words.items(): 100 | frames.append(pd.DataFrame(data = value, columns = [key], index = ['Keyword','Occurences', 'Upvotes', 'Downvotes', 'Score', 'Subjectivity', 'Polarity', 'Domain']).transpose()) 101 | #Concatenates the list of dataframes 102 | data_df = pd.concat(frames) 103 | trace1 = go.Scatter( 104 | y = data_df.Subjectivity, #Subjectivity of the text the keyword was found in on y axis 105 | x = data_df.Occurences * data_df.Score,#Occurrences * Score on x-axis for more spread out data 106 | mode = 'markers', 107 | marker = dict( 108 | size = (data_df.Occurences) * 20, #Occurrences of Keyword for size 109 | color = data_df.Polarity, #Polarity for color of the post (blue is sad, red is happy) 110 | colorscale = 'Portland', 111 | showscale = True 112 | ), 113 | text = "Keyword: " + data_df.Keyword 114 | ) 115 | layout = go.Layout( 116 | annotations=Annotations([ 117 | Annotation( 118 | x=0.5, 119 | y=-0.123, 120 | showarrow=False, 121 | text='(Occurrences * Score)', 122 | xref='paper', 123 | yref='paper' 124 | ), 125 | Annotation( 126 | x=1.055, 127 | y=0.5, 128 | showarrow=False, 129 | text='Text Polarity', 130 | textangle=-90, 131 | xref='paper', 132 | yref='paper' 133 | ), 134 | Annotation( 135 | x=.01, 136 | y=1, 137 | showarrow=False, 138 | text='Size = Occurrences', 139 | textangle=0, 140 | xref='paper', 141 | yref='paper', 142 | bordercolor = '#1f77b4', 143 | font=dict( 144 | family='Courier New, monospace', 145 | size=16, 146 | color='#ff7f0e' 147 | ) 148 | ) 149 | ]), 150 | title = 'Stats of top reddit /r/' + subreddit + ' keywords', 151 | yaxis = dict( 152 | title = 'Subjectivity', 153 | ticks = 5, 154 | ), 155 | xaxis = dict( 156 | title = 'popularity', 157 | ticklen = 10, 158 | ) 159 | ) 160 | data = [trace1] 161 | fig = go.Figure(data = data, layout = layout) 162 | url = py.plot(fig, filename='reddit plot', auto_open=False) 163 | return "" + url 164 | 165 | import operator 166 | import rake as rake 167 | rake_object = rake.Rake("SmartStoplist.txt", 1, 2, 1) 168 | from textblob import TextBlob, Word, Blobber 169 | import newspaper 170 | from newspaper import Article 171 | import numpy as np 172 | 173 | def get_keyword_dict(input_dict): 174 | # Transforms dict returned by display_praw into DataFrame for working with 175 | top10news_df = pd.DataFrame.from_dict(input_dict) 176 | 177 | words = {} 178 | 179 | ## NEWSPAPER STUFF HERE ## 180 | 181 | # Get keywords out of all articles 182 | for i in range(len(top10news_df)): 183 | if "self" in top10news_df.iloc[i]["Domain"]: 184 | continue 185 | elif "youtube" in top10news_df.iloc[i]["Domain"]: 186 | continue 187 | elif "imgur" in top10news_df.iloc[i]["Domain"]: 188 | continue 189 | 190 | myArticle = Article(top10news_df.iloc[i]['URL']) 191 | try: 192 | myArticle.download() 193 | myArticle.parse() 194 | except: 195 | continue 196 | myArticle.nlp() 197 | 198 | # Run sentiment analysis on each article, fetch subjectivity and polarity 199 | text = myArticle.text 200 | blob = TextBlob(text) 201 | polarity = blob.sentiment.polarity 202 | subjectivity = blob.sentiment.subjectivity 203 | 204 | # Get associated Reddit post info for each keyword, store in dictionary 205 | for keyword in myArticle.keywords: 206 | 207 | # Don't waste time with numeric keywords, skip them if they contain numbers 208 | if any(char.isdigit() for char in keyword): 209 | continue 210 | 211 | 212 | if keyword not in words: 213 | words[keyword] = [keyword, 1, 214 | top10news_df.iloc[i]['# of Upvotes'], 215 | top10news_df.iloc[i]["# of Downvotes"], 216 | top10news_df.iloc[i]["Net Score"], 217 | subjectivity, polarity, 218 | {(top10news_df.iloc[i]["Domain"]):1}] 219 | else: 220 | words[keyword][1] += 1 221 | words[keyword][2] += top10news_df.iloc[i]['# of Upvotes'] 222 | words[keyword][3] += int(top10news_df.iloc[i]['# of Downvotes']) 223 | words[keyword][4] += int(top10news_df.iloc[i]['Net Score']) 224 | words[keyword][5] = np.mean([subjectivity, words[keyword][5]]) 225 | words[keyword][6] = np.mean([polarity, words[keyword][6]]) 226 | if top10news_df.iloc[i]["Domain"] in words[keyword][7]: 227 | words[keyword][7][(top10news_df.iloc[i]["Domain"])] += 1 228 | else: 229 | words[keyword][7][top10news_df.iloc[i]["Domain"]] = 1 230 | 231 | ## RAKE STUFF HERE ## 232 | 233 | # Pull keywords from title strings 234 | for wordPair in rake_object.run(top10news_df.iloc[i]['Title']): 235 | currentWord = wordPair[0] 236 | 237 | # Don't waste time with numeric keywords, skip them if they contain numbers 238 | if any(char.isdigit() for char in currentWord): 239 | continue 240 | 241 | # Grab associated Reddit post data for each keyword, store in dictionary 242 | if currentWord not in words: 243 | words[currentWord] = [currentWord, 1, 244 | top10news_df.iloc[i]['# of Upvotes'], 245 | top10news_df.iloc[i]["# of Downvotes"], 246 | top10news_df.iloc[i]["Net Score"], 247 | subjectivity, polarity, 248 | {(top10news_df.iloc[i]["Domain"]):1}] 249 | else: 250 | words[currentWord][1] += 1 251 | words[currentWord][2] += int(top10news_df.iloc[i]['# of Upvotes']) 252 | words[currentWord][3] += int(top10news_df.iloc[i]['# of Downvotes']) 253 | words[currentWord][4] += int(top10news_df.iloc[i]['Net Score']) 254 | if top10news_df.iloc[i]["Domain"] in words[currentWord][7]: 255 | words[currentWord][7][(top10news_df.iloc[i]["Domain"])] += 1 256 | else: 257 | words[currentWord][7][top10news_df.iloc[i]["Domain"]] = 1 258 | 259 | 260 | ### FOR GARY'S USE ### 261 | # Output dictionary is named 'words' # 262 | # Format is as such: # 263 | # key = keyword # 264 | # value = [Occurences, Upvotes, Downvotes, Score, Subjectivity, Polarity, Domain Dictionary] # 265 | 266 | return words -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/_navbar.html: -------------------------------------------------------------------------------- 1 |
2 | 40 |
-------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/docs_findings.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | {% block body %} 6 |
7 |
8 |

This project has been a wonderful learning experience for us.

9 |
10 |

A Stack Overflow search a day keeps the bugs away.

11 |
12 |

The Why

13 |

According to Alexa, Reddit is the 5th most visited website in the United States (and 8th in the world). As a social news aggregation, web content rating, and discussion website, Reddit represents a very significant portion of America's population of internet users. 14 | And as evidenced by Reddit's higher-than-most typical user visit time of almost 16 minutes per day (compared to 10 minutes for Facebook and 6 for Twitter), Reddit tends to pull in users who are likely to spend more time engaging in longer discussions to express themselves and their opinions. 15 | On top of all this, Reddit's voting and commenting features for every discussion "thread" allow each sub-community (subreddit) to not only aggregate news and comments but also its users' sentiments towards various topics in these threads. 16 | In short, Reddit is essentially a goldmine for gathering and analyzing data on how the internet thinks and reacts to real-life issues and events. For our project, we wanted to tap into this goldmine to gather insights about the various popular trends that were present within the Reddit population and its sub-communities.

17 | As a long-time user of Reddit, I knew what topics were generally popular in the subreddits I frequented, but I was interested in quickly obtaining concrete, accurate, and data-driven overviews of any subreddit of my choosing. 18 | As a result, I conceived and pitched the original idea for the project, and I was very fortunate to get to work with a group of self-driven and professional CS students to make Reddit_Unlocked a reality.

-Isaac

19 |
20 | 21 | {% endblock %} 22 | 23 | -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/docs_team.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | {% block body %} 6 |
7 |
8 |

We're a group of Freshman CS majors at UIUC.

9 |
10 |

But our class standings are higher :)

11 |
12 |
13 |
14 |
15 |
16 | 23 |
24 |
25 | 72 |
73 |
74 |
75 | 76 | {% endblock %} -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/docs_tools.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | {% block body %} 6 |
7 |
8 |

Reddit_Unlocked is made possible by these amazing libraries.

9 |
10 |

No libraries were harmed in the making of this project.

11 |
12 |
13 |
14 |
15 | 23 |
24 | 102 |
103 |
104 |
105 | {% endblock %} -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/examples.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | {% block body %} 4 | {# deprecated page#} 5 | 6 | {% endblock %} -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/home.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | {% block body %} 6 |
7 |
8 |

A Reddit-cal Analysis of The Web

9 |
10 |

...How does the Internet think?

11 |
12 |
13 |
14 | 20 | 50 |
51 |

More Details: The Why

52 |
53 |

According to Alexa, Reddit is the 5th most visited website in the United States (and 8th in the world). As a social news aggregation, web content rating, and discussion website, Reddit represents a very significant portion of America's population of internet users. 54 | As evidenced by Reddit's higher-than-most typical user visit time of almost 16 minutes per day (compared to 10 minutes for Facebook and 6 for Twitter), Reddit tends to pull in users who are more likely to spend their time engaging in longer and thorough discussions to express their opinions and ideas. 55 | On top of all this, Reddit's voting and commenting features for every discussion "thread" enable each sub-community (subreddit) to not only aggregate news but also its users' sentiments toward and reactions to the wide range of topics in these threads. 56 | In conclusion, Reddit is essentially a goldmine for gathering and analyzing data on how the internet thinks and reacts to real-life issues and events. For our project, we wanted to tap into this goldmine to gather insights about the popular trends that permeate throughout the Reddit population and its sub-communities.

57 | As a long-time user of Reddit, I knew what topics were generally popular in the subreddits I frequented, but I was interested in quickly obtaining concrete, accurate, and data-driven overviews of any subreddit of my choosing. 58 | As a result, I conceived and pitched the original idea for the project, and I was very fortunate to get to work with this group of self-driven and professional CS students to make Reddit_Unlocked a reality.

-Isaac Park 59 |

60 |
61 |
62 |
63 |
64 | 65 |
66 | https://www.reddit.com/r/ 67 | 68 | 69 | 70 | 71 |
72 |
73 |
74 | {% endblock %} 75 | -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Reddit_Unlocked 6 | 7 | 8 | 9 | {% include '_navbar.html' %} 10 | {% block body %}{% endblock %} 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /group contributions/Isaac/flaskTest/flaskTest/templates/program.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | 6 | {% block body %} 7 | {% set ellip = "..." %} 8 |
9 |
10 |

You chose to analyze /r/{{ name }}. Here are the results.

11 |
12 |

Have a look at what's popular and trending in this section of the internet!

13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | Background statistics for /r/{{ name }} 21 |
22 |
23 |

{{ info['Title'] }}

24 |

Created: {{ info['Creation Date'] }}

25 |

Subscriber Count: {{ info['Subscriber Count'] }}

26 |

Currently Active Users: {{ info['Current Users'] }}

27 | Visit /r/{{ name }} 28 |
29 |
30 |
31 |
32 | 33 |
34 |
35 |
36 |
37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | {% for entry in output[0:10] %} 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | {% endfor %} 60 | 61 |
The top 10 threads from /r/{{ name }} in the past week, sorted by net score. Click on the URLs to see what made them popular.
#TitleURLUpvote %Net ScoreDate Posted
{{ loop.index }}{{ (entry['Title'][0:30]) + ellip }}{{ (entry['URL'][8:40]) + ellip }}{{ entry['Upvote Ratio (%)'] }}{{ entry['Net Score'] }}{{ entry['Post Date'] }}
62 |
63 |
64 |
65 |

The sentiment analysis graph below displays the most popular keywords from /r/{{ name }} in the past week.

66 |
67 |

It's interactive- you can hover over the bubbles, zoom in, or even view this chart on Plotly.

68 |
69 |
70 |
71 | reddit plot 72 | 73 |
74 | {% endblock %} -------------------------------------------------------------------------------- /group contributions/bvacek2/praw_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 34, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import praw\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import newspaper\n", 14 | "from newspaper import Article\n", 15 | "from datetime import datetime\n", 16 | "import pandas as pd\n", 17 | "import operator" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 46, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "reddit = praw.Reddit(client_id='UHCBgwhLYPSEeg',\n", 29 | "\t\t\tclient_secret='dw7eLPuOBlsQXo5EtWhWiSGgWvk',\n", 30 | "\t\t\tuser_agent='Vcrew192')\n", 31 | "\n", 32 | "sub = 'news'" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 16, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "['http://wtkr.com/2017/07/20/tmz-report-chester-bennington-of-linkin-park-commits-suicide/', 'http://www.reuters.com/article/us-usa-trump-immigration-idUSKBN15E1DE', 'http://abcn.ws/2qPcnnU', 'http://www.prnewswire.com/news-releases/american-icon-and-playboy-founder-hugh-m-hefner-has-died-300527267.html', 'https://lbpost.com/life/pets/2000011459-pet-store-bill-passes-california-senate-38-to-0', 'http://money.cnn.com/2017/02/24/media/cnn-blocked-white-house-gaggle/index.html', 'https://www.independent.co.uk/news/uk/home-news/billy-caldwell-cannabis-oil-boy-seizures-stopped-cured-prescription-medical-marijuana-a7933066.html', 'http://www.independent.co.uk/life-style/gadgets-and-tech/news/youtube-daddyofive-cody-videos-watch-children-custody-latest-prank-parents-a7713376.html', 'http://people.com/movies/carrie-fisher-dies/', 'http://wapo.st/2pPSCIo']\n", 45 | "['soon', 'true', 'park', 'success', 'album', 'suicide', 'dead', 'shinoda', 'wonder', 'remember', 'singer', 'bennington', 'band', 'statement', 'chester', 'linkin', '41']\n", 46 | "['immigration', 'state', 'house', 'trumps', 'white', 'yates', 'trump', 'lawyer', 'order', 'defied', 'united', 'department', 'fired', 'dumps', 'justice', 'states']\n", 47 | "['russia', 'director', 'comey', 'spicer', 'fbi', 'general', 'attorney', 'amid', 'trump', 'confidence', 'president', 'fired', 'white', 'probe']\n", 48 | "['award', 'hefner', 'founder', 'enterprises', 'playboy', 'm', 'honored', 'died', 'icon', 'magazine', 'hugh', 'world', 'american', 'inducted', 'mr']\n", 49 | "['485', 'shelters', 'store', 'passed', 'senate', 'dogs', 'animals', 'passes', 'bill', 'pets', '38', 'california', 'vote', 'pet']\n", 50 | "['house', 'organizations', 'media', 'reporters', 'blocks', 'times', 'trump', 'office', 'cnn', 'press', 'briefing', 'gaggle', 'white']\n", 51 | "['cbd', 'billy', 'uk', 'seizure', 'cannabis', 'day', 'days', 'seizures', '300', 'marijuana', 'oil', 'boy', 'medical', 'prescribed', '100', 'suffered', 'bud', 'caldwell']\n", 52 | "['million', 'youtubers', 'mother', 'videos', 'star', 'real', 'hall', 'prank', 'children', 'video', 'parents', 'daddyofive', 'custody', 'loses', 'featured', 'youtube']\n", 53 | "['dies', 'fisher', 'simon', 'star', '60', 'film', 'way', 'later', 'carrie', 'lourd', 'princess', 'wars', 'fishers']\n", 54 | "['intelligence', 'minister', 'russia', 'highly', 'revealed', 'trump', 'officials', 'united', 'official', 'security', 'foreign', 'russian', 'president', 'classified', 'information', 'ambassador', 'states']\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "score = []\n", 60 | "for submission in reddit.subreddit('news').top('year', limit = 10):\n", 61 | " score.append(submission.url)\n", 62 | "\n", 63 | "for i in score: \n", 64 | " a = Article(i, language = 'en')\n", 65 | " a.download()\n", 66 | " a.parse()\n", 67 | " a.text[:150]\n", 68 | " a.nlp()\n", 69 | " print(a.keywords)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "" 81 | ] 82 | }, 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "reddit.subreddit('news').hot(limit=10)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 44, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "top10news_df = pd.DataFrame({\n", 101 | " 'title': (),\n", 102 | " 'url' : (),\n", 103 | " 'upvote_percentage': (),\n", 104 | " 'year_posted': (),\n", 105 | " 'month_posted': (),\n", 106 | " 'day_posted': (),\n", 107 | " 'is_self': (),\n", 108 | " 'is_video': (),\n", 109 | " 'media': (),\n", 110 | " 'keywords_from_article': (),\n", 111 | "})\n", 112 | "for submission in reddit.subreddit(sub).hot(limit = 10):\n", 113 | " a = Article(submission.url, language = 'en')\n", 114 | " a.download()\n", 115 | " a.parse()\n", 116 | " a.text[:150]\n", 117 | " a.nlp()\n", 118 | " key = a.keywords\n", 119 | " s = pd.Series([submission.title, submission.url, submission.upvote_ratio * 100,\n", 120 | " datetime.utcfromtimestamp(submission.created_utc).year,\n", 121 | " datetime.utcfromtimestamp(submission.created_utc).month,\n", 122 | " datetime.utcfromtimestamp(submission.created_utc).day,\n", 123 | " submission.is_self, submission.is_video, submission.media, key],\n", 124 | " index=['title','url','upvote_percentage', 'year_posted', 'month_posted',\n", 125 | " 'day_posted', 'is_self', 'is_video', 'media', 'keywords_from_article'])\n", 126 | " top10news_df = top10news_df.append(s, ignore_index=True)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 45, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "[('victor', 1),\n", 138 | " ('wearing', 1),\n", 139 | " ('original', 1),\n", 140 | " ('itunes', 1),\n", 141 | " ('wont', 1),\n", 142 | " ('reel', 1),\n", 143 | " ('jidenna', 1),\n", 144 | " ('rule', 1),\n", 145 | " ('julianne', 1),\n", 146 | " ('solompg', 1),\n", 147 | " ('dropped', 1),\n", 148 | " ('youtube', 1),\n", 149 | " ('shins', 1),\n", 150 | " ('howi', 1),\n", 151 | " ('wooten', 1),\n", 152 | " ('gonna', 1),\n", 153 | " ('official', 1),\n", 154 | " ('long', 1),\n", 155 | " ('spelling', 1),\n", 156 | " ('fish', 1),\n", 157 | " ('lebowski', 1),\n", 158 | " ('recordswondaland', 1),\n", 159 | " ('jeff', 1),\n", 160 | " ('woeismebut', 1),\n", 161 | " ('youve', 1),\n", 162 | " ('viaitunes', 1),\n", 163 | " ('videojust', 1),\n", 164 | " ('grace', 1),\n", 165 | " ('shfiftyfive', 1),\n", 166 | " ('caring', 1),\n", 167 | " ('circle', 1),\n", 168 | " ('reddit', 1),\n", 169 | " ('schfiftyfive', 1),\n", 170 | " ('ways', 1),\n", 171 | " ('epic', 1),\n", 172 | " ('sony', 1),\n", 173 | " ('buscemi', 1),\n", 174 | " ('doesnt', 1),\n", 175 | " ('wants', 1),\n", 176 | " ('play', 1),\n", 177 | " ('perfect', 1),\n", 178 | " ('fears', 1),\n", 179 | " ('condition', 1),\n", 180 | " ('guess', 1),\n", 181 | " ('moore', 1),\n", 182 | " ('videos', 1),\n", 183 | " ('need', 1),\n", 184 | " ('creepy', 1),\n", 185 | " ('worldlyricsi', 1),\n", 186 | " ('mp3', 1),\n", 187 | " ('net', 1),\n", 188 | " ('x', 1),\n", 189 | " ('bass', 1),\n", 190 | " ('ill', 1),\n", 191 | " ('everybody', 1),\n", 192 | " ('doomed', 1),\n", 193 | " ('steve', 1),\n", 194 | " ('bridges', 1),\n", 195 | " ('youre', 1),\n", 196 | " ('goodman', 1),\n", 197 | " ('live', 1),\n", 198 | " ('schfifty', 1),\n", 199 | " ('audio', 1),\n", 200 | " ('sell', 1),\n", 201 | " ('records', 1),\n", 202 | " ('chief', 1),\n", 203 | " ('rmusic', 1),\n", 204 | " ('remember', 1),\n", 205 | " ('dude', 1),\n", 206 | " ('john', 1),\n", 207 | " ('amazing', 1),\n", 208 | " ('tears', 1),\n", 209 | " ('released', 1),\n", 210 | " ('pardon', 1),\n", 211 | " ('incubus', 1),\n", 212 | " ('music', 1),\n", 213 | " ('coen', 1),\n", 214 | " ('presents', 1),\n", 215 | " ('shfifty', 1),\n", 216 | " ('words', 1),\n", 217 | " ('dont', 1),\n", 218 | " ('wondering', 1),\n", 219 | " ('way', 1),\n", 220 | " ('wondaland', 1),\n", 221 | " ('click', 2),\n", 222 | " ('published', 2),\n", 223 | " ('big', 2),\n", 224 | " ('world', 2),\n", 225 | " ('rentedthis', 3),\n", 226 | " ('feature', 3),\n", 227 | " ('try', 3),\n", 228 | " ('later', 3),\n", 229 | " ('rating', 3),\n", 230 | " ('right', 3),\n", 231 | " ('available', 4),\n", 232 | " ('video', 4)]" 233 | ] 234 | }, 235 | "execution_count": 45, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "keywords = []\n", 242 | "for i in top10news_df['keywords_from_article']:\n", 243 | " for j in i:\n", 244 | " keywords.append(j)\n", 245 | "x = dict([x,keywords.count(x)] for x in set(keywords))\n", 246 | "sorted_x = sorted(x.items(), key=operator.itemgetter(1))\n", 247 | "sorted_x" 248 | ] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.6.1" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | -------------------------------------------------------------------------------- /group contributions/bvacek2/praw_test.py: -------------------------------------------------------------------------------- 1 | import praw 2 | import matplotlib.pyplot as plt 3 | 4 | reddit = praw.Reddit(client_id='UHCBgwhLYPSEeg', 5 | client_secret='dw7eLPuOBlsQXo5EtWhWiSGgWvk', 6 | user_agent='Vcrew192') 7 | 8 | for submission in reddit.subreddit('news').hot(limit=10): 9 | print(submission.domain, submission.score) 10 | 11 | plt.hist(submission.score) 12 | 13 | plt.show 14 | -------------------------------------------------------------------------------- /group contributions/bvacek2/reddit.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import json 3 | import requests 4 | import pandas as pd 5 | 6 | subreddit = input('Insert subreddit: ' ) 7 | 8 | site = requests.get('https://reddit.com/r/{}.json'.format(subreddit), headers={'user-agent': 'Mozilla/5.0'} 9 | ) 10 | 11 | reddit_json = json.loads(site.text) 12 | 13 | master_dict = {} 14 | 15 | for i in range(0, 19): 16 | master_dict[reddit_json['data']['children'][i]['data']['title']] = reddit_json['data']['children'][i]['data']['score'] 17 | 18 | reddit_df = pd.DataFrame(master_dict, index=[0]) 19 | 20 | print(reddit_df) 21 | -------------------------------------------------------------------------------- /group contributions/bvacek2/sqlTest/reddit.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/bvacek2/sqlTest/reddit.db -------------------------------------------------------------------------------- /group contributions/bvacek2/sqlTest/reddit_data.csv: -------------------------------------------------------------------------------- 1 | Title,URL,Upvote Ratio (%),Net Score,# of Upvotes,# of Downvotes,Year Posted,Month Posted,Day Posted,Self Post?,Video Post? 2 | Japanese firm gives non-smokers extra six days holiday to compensate for cigarette break,http://www.independent.co.uk/news/world/asia/japanese-firm-piala-inc-tokyo-non-smokers-extra-six-days-holiday-cigarette-break-a8028541.html,94.0,124098.0,132559.0,8461.0,2017.0,10.0,31.0,0.0,0.0 3 | Chester Bennington of Linkin Park commits suicide,http://wtkr.com/2017/07/20/tmz-report-chester-bennington-of-linkin-park-commits-suicide/,84.0,123594.0,152674.0,29080.0,2017.0,7.0,20.0,0.0,0.0 4 | U.S. President Donald Trump fired the federal government's top lawyer Sally Yates on Monday after she took the extraordinarily rare step of defying the White House and refused to defend new travel restrictions targeting seven Muslim-majority nations.,http://www.reuters.com/article/us-usa-trump-immigration-idUSKBN15E1DE,81.0,121528.0,158770.0,37242.0,2017.0,1.0,31.0,0.0,0.0 5 | James Comey terminated as Director of FBI,http://abcn.ws/2qPcnnU,88.0,110066.0,127444.0,17378.0,2017.0,5.0,9.0,0.0,0.0 6 | "American Icon and Playboy Founder, Hugh M. Hefner, Has Died",http://www.prnewswire.com/news-releases/american-icon-and-playboy-founder-hugh-m-hefner-has-died-300527267.html,84.0,108129.0,133571.0,25442.0,2017.0,9.0,28.0,0.0,0.0 7 | "Pet Store Bill Passes California Senate 38 to 0. It bans the sale of dogs, cats and rabbits sourced from high-volume, commercial breeding facilities, known as mills, in all pet shops throughout the state.",https://lbpost.com/life/pets/2000011459-pet-store-bill-passes-california-senate-38-to-0,92.0,106867.0,117044.0,10177.0,2017.0,9.0,13.0,0.0,0.0 8 | CNN banned from White House press briefing,http://money.cnn.com/2017/02/24/media/cnn-blocked-white-house-gaggle/index.html,84.0,102359.0,126443.0,24084.0,2017.0,2.0,24.0,0.0,0.0 9 | Boy who suffered up to 100 seizures a day has none in 300 days after being prescribed cannabis oil,https://www.independent.co.uk/news/uk/home-news/billy-caldwell-cannabis-oil-boy-seizures-stopped-cured-prescription-medical-marijuana-a7933066.html,83.0,101361.0,127469.0,26108.0,2017.0,9.0,7.0,0.0,0.0 10 | YouTube star Daddyofive loses custody of two children featured in 'prank' video.,http://www.independent.co.uk/life-style/gadgets-and-tech/news/youtube-daddyofive-cody-videos-watch-children-custody-latest-prank-parents-a7713376.html,89.0,100907.0,115137.0,14230.0,2017.0,5.0,2.0,0.0,0.0 11 | Carrie Fisher Dies at 60,http://people.com/movies/carrie-fisher-dies/,81.0,99276.0,129699.0,30423.0,2016.0,12.0,27.0,0.0,0.0 12 | Trump revealed highly classified information to Russian foreign minister and ambassador,http://wapo.st/2pPSCIo,69.0,92225.0,167461.0,75236.0,2017.0,5.0,15.0,0.0,0.0 13 | Hate crime charges filed against 4 in torture of teen,http://abc7chicago.com/news/4-charged-with-hate-crimes-in-torture-of-teen-in-facebook-live-video/1687517/,87.0,90947.0,106924.0,15977.0,2017.0,1.0,5.0,0.0,0.0 14 | Corey Feldman launches campaign to expose Hollywood pedophile ring,http://movieweb.com/amp/corey-feldman-truth-campaign-expose-hollywood-pedophile-ring/,88.0,88335.0,102282.0,13947.0,2017.0,10.0,25.0,0.0,0.0 15 | Comcast asks the FCC to prohibit states from enforcing net neutrality,https://arstechnica.com/tech-policy/2017/11/comcast-asks-the-fcc-to-prohibit-states-from-enforcing-net-neutrality/,92.0,88198.0,96597.0,8399.0,2017.0,11.0,4.0,0.0,0.0 16 | Trump withdrawing US from Paris climate agreement,http://abcnews.go.com/Politics/trump-withdrawing-us-paris-climate-agreement/story?id=47767077,80.0,86998.0,115997.0,28999.0,2017.0,6.0,1.0,0.0,0.0 17 | Record 9 million comments flood FCC on net neutrality,https://www.usatoday.com/story/tech/news/2017/07/19/record-9-million-comments-flood-fcc-net-neutrality/488042001/,91.0,86035.0,95477.0,9442.0,2017.0,7.0,22.0,0.0,0.0 18 | Japanese firm gives non-smokers extra six days holiday to compensate for cigarette break,http://www.independent.co.uk/news/world/asia/japanese-firm-piala-inc-tokyo-non-smokers-extra-six-days-holiday-cigarette-break-a8028541.html,94.0,124095.0,132556.0,8461.0,2017.0,10.0,31.0,0.0,0.0 19 | Chester Bennington of Linkin Park commits suicide,http://wtkr.com/2017/07/20/tmz-report-chester-bennington-of-linkin-park-commits-suicide/,84.0,123597.0,152678.0,29081.0,2017.0,7.0,20.0,0.0,0.0 20 | U.S. President Donald Trump fired the federal government's top lawyer Sally Yates on Monday after she took the extraordinarily rare step of defying the White House and refused to defend new travel restrictions targeting seven Muslim-majority nations.,http://www.reuters.com/article/us-usa-trump-immigration-idUSKBN15E1DE,81.0,121538.0,158783.0,37245.0,2017.0,1.0,31.0,0.0,0.0 21 | James Comey terminated as Director of FBI,http://abcn.ws/2qPcnnU,88.0,110059.0,127436.0,17377.0,2017.0,5.0,9.0,0.0,0.0 22 | "American Icon and Playboy Founder, Hugh M. Hefner, Has Died",http://www.prnewswire.com/news-releases/american-icon-and-playboy-founder-hugh-m-hefner-has-died-300527267.html,84.0,108124.0,133564.0,25440.0,2017.0,9.0,28.0,0.0,0.0 23 | "Pet Store Bill Passes California Senate 38 to 0. It bans the sale of dogs, cats and rabbits sourced from high-volume, commercial breeding facilities, known as mills, in all pet shops throughout the state.",https://lbpost.com/life/pets/2000011459-pet-store-bill-passes-california-senate-38-to-0,92.0,106871.0,117049.0,10178.0,2017.0,9.0,13.0,0.0,0.0 24 | CNN banned from White House press briefing,http://money.cnn.com/2017/02/24/media/cnn-blocked-white-house-gaggle/index.html,84.0,102366.0,126452.0,24086.0,2017.0,2.0,24.0,0.0,0.0 25 | Boy who suffered up to 100 seizures a day has none in 300 days after being prescribed cannabis oil,https://www.independent.co.uk/news/uk/home-news/billy-caldwell-cannabis-oil-boy-seizures-stopped-cured-prescription-medical-marijuana-a7933066.html,83.0,101366.0,127475.0,26109.0,2017.0,9.0,7.0,0.0,0.0 26 | YouTube star Daddyofive loses custody of two children featured in 'prank' video.,http://www.independent.co.uk/life-style/gadgets-and-tech/news/youtube-daddyofive-cody-videos-watch-children-custody-latest-prank-parents-a7713376.html,89.0,100904.0,115134.0,14230.0,2017.0,5.0,2.0,0.0,0.0 27 | Carrie Fisher Dies at 60,http://people.com/movies/carrie-fisher-dies/,81.0,99273.0,129695.0,30422.0,2016.0,12.0,27.0,0.0,0.0 28 | Japanese firm gives non-smokers extra six days holiday to compensate for cigarette break,http://www.independent.co.uk/news/world/asia/japanese-firm-piala-inc-tokyo-non-smokers-extra-six-days-holiday-cigarette-break-a8028541.html,94.0,124093.0,132553.0,8460.0,2017.0,10.0,31.0,0.0,0.0 29 | Chester Bennington of Linkin Park commits suicide,http://wtkr.com/2017/07/20/tmz-report-chester-bennington-of-linkin-park-commits-suicide/,84.0,123588.0,152667.0,29079.0,2017.0,7.0,20.0,0.0,0.0 30 | U.S. President Donald Trump fired the federal government's top lawyer Sally Yates on Monday after she took the extraordinarily rare step of defying the White House and refused to defend new travel restrictions targeting seven Muslim-majority nations.,http://www.reuters.com/article/us-usa-trump-immigration-idUSKBN15E1DE,81.0,121536.0,158780.0,37244.0,2017.0,1.0,31.0,0.0,0.0 31 | James Comey terminated as Director of FBI,http://abcn.ws/2qPcnnU,88.0,110068.0,127447.0,17379.0,2017.0,5.0,9.0,0.0,0.0 32 | "American Icon and Playboy Founder, Hugh M. Hefner, Has Died",http://www.prnewswire.com/news-releases/american-icon-and-playboy-founder-hugh-m-hefner-has-died-300527267.html,84.0,108124.0,133564.0,25440.0,2017.0,9.0,28.0,0.0,0.0 33 | "Pet Store Bill Passes California Senate 38 to 0. It bans the sale of dogs, cats and rabbits sourced from high-volume, commercial breeding facilities, known as mills, in all pet shops throughout the state.",https://lbpost.com/life/pets/2000011459-pet-store-bill-passes-california-senate-38-to-0,92.0,106872.0,117050.0,10178.0,2017.0,9.0,13.0,0.0,0.0 34 | CNN banned from White House press briefing,http://money.cnn.com/2017/02/24/media/cnn-blocked-white-house-gaggle/index.html,84.0,102363.0,126448.0,24085.0,2017.0,2.0,24.0,0.0,0.0 35 | Boy who suffered up to 100 seizures a day has none in 300 days after being prescribed cannabis oil,https://www.independent.co.uk/news/uk/home-news/billy-caldwell-cannabis-oil-boy-seizures-stopped-cured-prescription-medical-marijuana-a7933066.html,83.0,101359.0,127466.0,26107.0,2017.0,9.0,7.0,0.0,0.0 36 | YouTube star Daddyofive loses custody of two children featured in 'prank' video.,http://www.independent.co.uk/life-style/gadgets-and-tech/news/youtube-daddyofive-cody-videos-watch-children-custody-latest-prank-parents-a7713376.html,89.0,100902.0,115131.0,14229.0,2017.0,5.0,2.0,0.0,0.0 37 | Carrie Fisher Dies at 60,http://people.com/movies/carrie-fisher-dies/,81.0,99278.0,129701.0,30423.0,2016.0,12.0,27.0,0.0,0.0 38 | -------------------------------------------------------------------------------- /group contributions/garyab2/dict_to_graph.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | plotly.tools.set_credentials_file(username='reddit_unlocked', api_key='gfnXKc7JvUKST4HRJyFX') 3 | import plotly.plotly as py 4 | import plotly.graph_objs as go 5 | from plotly.graph_objs import * 6 | #takes a dictionary of dictionaries of keywords from body text as input and returns the url for the plotly html embedding of 7 | #scatterplot made from the keywords and their attributes 8 | #'Keyword','Occurences', 'Upvotes', 'Downvotes', "Score", "Subjectivity", "Polarity", "Domain" 9 | def body_to_graph(words = {}, subreddit = str): 10 | """ 11 | :type subreddit: String 12 | """ 13 | frames = [] 14 | #Turns dictionary of dictionaries into list of dataframes 15 | for key, value in words.items(): 16 | frames.append(pd.DataFrame(data = value, columns = [key], index = ['Keyword','Occurences', 'Upvotes', 'Downvotes', 'Score', 'Subjectivity', 'Polarity', 'Domain']).transpose()) 17 | #Concatenates the list of dataframes 18 | data_df = pd.concat(frames) 19 | trace1 = go.Scatter( 20 | y = data_df.Subjectivity, #Subjectivity of the text the keyword was found in on y axis 21 | x = data_df.Occurences * data_df.Score,#Occurrences * Score on x-axis for more spread out data 22 | mode = 'markers', 23 | marker = dict( 24 | size = (data_df.Occurences) * 20, #Occurrences of Keyword for size 25 | color = data_df.Polarity, #Polarity for color of the post (blue is sad, red is happy) 26 | colorscale = 'Portland', 27 | showscale = True 28 | ), 29 | text = "Keyword: " + data_df.Keyword 30 | ) 31 | layout = go.Layout( 32 | annotations=Annotations([ 33 | Annotation( 34 | x=0.5, 35 | y=-0.123, 36 | showarrow=False, 37 | text='(Occurrences * Score)', 38 | xref='paper', 39 | yref='paper' 40 | ), 41 | Annotation( 42 | x=1.055, 43 | y=0.5, 44 | showarrow=False, 45 | text='Text Polarity', 46 | textangle=-90, 47 | xref='paper', 48 | yref='paper' 49 | ), 50 | Annotation( 51 | x=.01, 52 | y=1, 53 | showarrow=False, 54 | text='Size = Occurrences', 55 | textangle=0, 56 | xref='paper', 57 | yref='paper', 58 | bordercolor = '#1f77b4', 59 | font=dict( 60 | family='Courier New, monospace', 61 | size=16, 62 | color='#ff7f0e' 63 | ) 64 | ) 65 | ]), 66 | title = 'Stats of top reddit/r/' + subreddit + ' keywords', 67 | yaxis = dict( 68 | title = 'Subjectivity', 69 | ticks = 5, 70 | ), 71 | xaxis = dict( 72 | title = 'popularity', 73 | ticklen = 10, 74 | ) 75 | ) 76 | data = [trace1] 77 | fig = go.Figure(data = data, layout = layout) 78 | url = py.plot(fig, filename = 'reddit plot') 79 | return "" + url -------------------------------------------------------------------------------- /group contributions/testFolder/Newspaper + RAKE Testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[('compatibility', 1.0), ('of', 1.0), ('systems', 1.0), ('linear', 1.0), ('constraints', 1.0), ('over', 1.0), ('the', 1.0), ('set', 1.0), ('natural', 1.0), ('numbers', 1.0), ('criteria', 1.0), ('system', 1.0), ('diophantine', 1.0), ('equations', 1.0)]\n", 13 | "[('minimal generating sets', 8.666666666666666), ('linear diophantine equations', 8.5), ('minimal supporting set', 7.666666666666666), ('minimal set', 4.666666666666666), ('linear constraints', 4.5), ('natural numbers', 4.0), ('strict inequations', 4.0), ('nonstrict inequations', 4.0), ('upper bounds', 4.0), ('mixed types', 3.666666666666667), ('considered types', 3.166666666666667), ('set', 2.0), ('types', 1.6666666666666667), ('considered', 1.5), ('compatibility', 1.0), ('systems', 1.0), ('criteria', 1.0), ('system', 1.0), ('components', 1.0), ('solutions', 1.0), ('algorithms', 1.0), ('construction', 1.0), ('constructing', 1.0), ('solving', 1.0)]\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "from newspaper import Article\n", 19 | "import rake\n", 20 | "import operator" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 10, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Please input an article: http://wtkr.com/2017/07/20/tmz-report-chester-bennington-of-linkin-park-commits-suicide/\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "readUrl = input(\"Please input an article: \")\n", 38 | "#readUrl = '''https://www.huffingtonpost.com/entry/firefighters-gaining-ground-against-californias-deadliest-ever-blazes_us_59e4bd3ce4b03a7be5821e8b?ncid=inblnkushpmg00000009'''\n", 39 | "\n", 40 | "#Creates an Article object that points newspaper to the URL of the webpage, lets you do all the fancy stuff.\n", 41 | "article = Article(readUrl)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 11, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "Downloaded and parsed!\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "#Pulls webpage from URL, parses through and cleans up all the yucky stuff and 'reads' the article\n", 59 | "#MUST DO ALL THREE OF THESE BEFORE TRYING TO ACCESS ANY ARTICLE INFO\n", 60 | "article.download()\n", 61 | "article.parse()\n", 62 | "article.nlp()\n", 63 | "print(\"Downloaded and parsed!\")" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 12, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "LOS ANGELES – Chester Bennington, the lead singer of the rock band Linkin Park, was found dead Thursday according to a spokesman for the LA County Coroner.\n", 76 | "The coroner’s office was called out to a home in Palos Verdes Estates shortly after 9 a.m. Thursday.\n", 77 | "Bennington’s band found success in 2000 with their album “Hybrid Theory” with Bennington’s heartfelt vocals leading the way.\n", 78 | "“When I was young, getting beaten up and pretty much raped was no fun,” he told The Guardian in 2011.\n", 79 | "If you are depressed and need someone confidentially to talk to, the National Suicide Prevention Lifeline is open 24/7 at 1-800-273-8255.\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "#Not so relevant, but I thought it was cool to see what newspaper pulls from a URL\n", 85 | "print(article.summary)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 13, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "['linkin', 'success', 'bennington', 'suicide', 'remember', 'shinoda', '41', 'band', 'soon', 'album', 'true', 'singer', 'dead', 'wonder', 'park', 'statement', 'chester']\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "#Print array of keywords\n", 103 | "print(article.keywords)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 8, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "United CEO Oscar Munoz doubled down in a letter to employees on Monday evening, claiming that employees \"followed established procedures\" when removing a passenger from a plane because it was overbooked, and calling the passenger \"disruptive and belligerent.\"\n", 116 | "\n", 117 | "\n", 118 | "\n", 119 | "United had to ask several passengers who had already boarded a flight from Chicago to Louisville on Sunday evening to leave, as the airline had sold too many tickets. One man refused to leave, and United called airport officials, who forcibly removed him from the plane.\n", 120 | "\n", 121 | "Video circulated of the incident earlier in the day, showing the man being dragged from the plane and later returning with blood on his face. The incident drew scorn on Twitter and other social media, especially when Munoz used the euphemism \"re-accomodate\" in a public statement to describe the customers booted from the flight.\n", 122 | "\n", 123 | "According to the letter, which was obtained by CNBC, when crew members first approached the passenger to tell him to leave, he \"raised his voice and refused to comply,\" and each time they asked again \"he refused and became more and more disruptive and belligerent.\"\n", 124 | "\n", 125 | "Crew members \"were left with no choice but to call Chicago Aviation Security Officers to assist in removing the customer from the flight,\" Munoz wrote, and at one point the passenger \"continued to resist - running back onto the aircraft in defiance of both our crew and security officials.\"\n", 126 | "\n", 127 | "Munoz acknowledged to employees that the company could learn lessons from the incident, but said: \"I emphatically stand behind all of you.\"\n", 128 | "\n", 129 | "Here's the video, which went viral earlier today:\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "#Run this if you want to check the actual text that it gets from the URL\n", 135 | "print(article.text)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 3, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "txt = \"Americans raise $200k overnight for children of grieving Army widow Trump insulted\"" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 5, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "#Initialize a RAKE object that looks for keywords that have:\n", 158 | "# Minimum 1 letter\n", 159 | "# Maximum 2 words per phrase\n", 160 | "# Appears at least 1 time the text (needs to be this low for titles)\n", 161 | "rake_object = rake.Rake(\"SmartStoplist.txt\", 1, 2, 1)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "[('children', 1.0)]\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "keywords = rake_object.run(txt)\n", 179 | "print(keywords)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.6.2" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 2 213 | } 214 | -------------------------------------------------------------------------------- /group contributions/testFolder/SmartStoplist.txt: -------------------------------------------------------------------------------- 1 | #stop word list from SMART (Salton,1971). Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop 2 | a 3 | a's 4 | able 5 | about 6 | above 7 | according 8 | accordingly 9 | across 10 | actually 11 | after 12 | afterwards 13 | again 14 | against 15 | ain't 16 | all 17 | allow 18 | allows 19 | almost 20 | alone 21 | along 22 | already 23 | also 24 | although 25 | always 26 | am 27 | among 28 | amongst 29 | an 30 | and 31 | another 32 | any 33 | anybody 34 | anyhow 35 | anyone 36 | anything 37 | anyway 38 | anyways 39 | anywhere 40 | apart 41 | appear 42 | appreciate 43 | appropriate 44 | are 45 | aren't 46 | around 47 | as 48 | aside 49 | ask 50 | asking 51 | associated 52 | at 53 | available 54 | away 55 | awfully 56 | b 57 | be 58 | became 59 | because 60 | become 61 | becomes 62 | becoming 63 | been 64 | before 65 | beforehand 66 | behind 67 | being 68 | believe 69 | below 70 | beside 71 | besides 72 | best 73 | better 74 | between 75 | beyond 76 | both 77 | brief 78 | but 79 | by 80 | c 81 | c'mon 82 | c's 83 | came 84 | can 85 | can't 86 | cannot 87 | cant 88 | cause 89 | causes 90 | certain 91 | certainly 92 | changes 93 | clearly 94 | co 95 | com 96 | come 97 | comes 98 | concerning 99 | consequently 100 | consider 101 | considering 102 | contain 103 | containing 104 | contains 105 | corresponding 106 | could 107 | couldn't 108 | course 109 | currently 110 | d 111 | definitely 112 | described 113 | despite 114 | did 115 | didn't 116 | different 117 | do 118 | does 119 | doesn't 120 | doing 121 | don't 122 | done 123 | down 124 | downwards 125 | during 126 | e 127 | each 128 | edu 129 | eg 130 | eight 131 | either 132 | else 133 | elsewhere 134 | enough 135 | entirely 136 | especially 137 | et 138 | etc 139 | even 140 | ever 141 | every 142 | everybody 143 | everyone 144 | everything 145 | everywhere 146 | ex 147 | exactly 148 | example 149 | except 150 | f 151 | far 152 | few 153 | fifth 154 | first 155 | five 156 | followed 157 | following 158 | follows 159 | for 160 | former 161 | formerly 162 | forth 163 | four 164 | from 165 | further 166 | furthermore 167 | g 168 | get 169 | gets 170 | getting 171 | given 172 | gives 173 | go 174 | goes 175 | going 176 | gone 177 | got 178 | gotten 179 | greetings 180 | h 181 | had 182 | hadn't 183 | happens 184 | hardly 185 | has 186 | hasn't 187 | have 188 | haven't 189 | having 190 | he 191 | he's 192 | hello 193 | help 194 | hence 195 | her 196 | here 197 | here's 198 | hereafter 199 | hereby 200 | herein 201 | hereupon 202 | hers 203 | herself 204 | hi 205 | him 206 | himself 207 | his 208 | hither 209 | hopefully 210 | how 211 | howbeit 212 | however 213 | i 214 | i'd 215 | i'll 216 | i'm 217 | i've 218 | ie 219 | if 220 | ignored 221 | immediate 222 | in 223 | inasmuch 224 | inc 225 | indeed 226 | indicate 227 | indicated 228 | indicates 229 | inner 230 | insofar 231 | instead 232 | into 233 | inward 234 | is 235 | isn't 236 | it 237 | it'd 238 | it'll 239 | it's 240 | its 241 | itself 242 | j 243 | just 244 | k 245 | keep 246 | keeps 247 | kept 248 | know 249 | knows 250 | known 251 | l 252 | last 253 | lately 254 | later 255 | latter 256 | latterly 257 | least 258 | less 259 | lest 260 | let 261 | let's 262 | like 263 | liked 264 | likely 265 | little 266 | look 267 | looking 268 | looks 269 | ltd 270 | m 271 | mainly 272 | many 273 | may 274 | maybe 275 | me 276 | mean 277 | meanwhile 278 | merely 279 | might 280 | more 281 | moreover 282 | most 283 | mostly 284 | much 285 | must 286 | my 287 | myself 288 | n 289 | name 290 | namely 291 | nd 292 | near 293 | nearly 294 | necessary 295 | need 296 | needs 297 | neither 298 | never 299 | nevertheless 300 | new 301 | next 302 | nine 303 | no 304 | nobody 305 | non 306 | none 307 | noone 308 | nor 309 | normally 310 | not 311 | nothing 312 | novel 313 | now 314 | nowhere 315 | o 316 | obviously 317 | of 318 | off 319 | often 320 | oh 321 | ok 322 | okay 323 | old 324 | on 325 | once 326 | one 327 | ones 328 | only 329 | onto 330 | or 331 | other 332 | others 333 | otherwise 334 | ought 335 | our 336 | ours 337 | ourselves 338 | out 339 | outside 340 | over 341 | overall 342 | own 343 | p 344 | particular 345 | particularly 346 | per 347 | perhaps 348 | placed 349 | please 350 | plus 351 | possible 352 | presumably 353 | probably 354 | provides 355 | q 356 | que 357 | quite 358 | qv 359 | r 360 | rather 361 | rd 362 | re 363 | really 364 | reasonably 365 | regarding 366 | regardless 367 | regards 368 | relatively 369 | respectively 370 | right 371 | s 372 | said 373 | same 374 | saw 375 | say 376 | saying 377 | says 378 | second 379 | secondly 380 | see 381 | seeing 382 | seem 383 | seemed 384 | seeming 385 | seems 386 | seen 387 | self 388 | selves 389 | sensible 390 | sent 391 | serious 392 | seriously 393 | seven 394 | several 395 | shall 396 | she 397 | should 398 | shouldn't 399 | since 400 | six 401 | so 402 | some 403 | somebody 404 | somehow 405 | someone 406 | something 407 | sometime 408 | sometimes 409 | somewhat 410 | somewhere 411 | soon 412 | sorry 413 | specified 414 | specify 415 | specifying 416 | still 417 | sub 418 | such 419 | sup 420 | sure 421 | t 422 | t's 423 | take 424 | taken 425 | tell 426 | tends 427 | th 428 | than 429 | thank 430 | thanks 431 | thanx 432 | that 433 | that's 434 | thats 435 | the 436 | their 437 | theirs 438 | them 439 | themselves 440 | then 441 | thence 442 | there 443 | there's 444 | thereafter 445 | thereby 446 | therefore 447 | therein 448 | theres 449 | thereupon 450 | these 451 | they 452 | they'd 453 | they'll 454 | they're 455 | they've 456 | think 457 | third 458 | this 459 | thorough 460 | thoroughly 461 | those 462 | though 463 | three 464 | through 465 | throughout 466 | thru 467 | thus 468 | to 469 | together 470 | too 471 | took 472 | toward 473 | towards 474 | tried 475 | tries 476 | truly 477 | try 478 | trying 479 | twice 480 | two 481 | u 482 | un 483 | under 484 | unfortunately 485 | unless 486 | unlikely 487 | until 488 | unto 489 | up 490 | upon 491 | us 492 | use 493 | used 494 | useful 495 | uses 496 | using 497 | usually 498 | uucp 499 | v 500 | value 501 | various 502 | very 503 | via 504 | viz 505 | vs 506 | w 507 | want 508 | wants 509 | was 510 | wasn't 511 | way 512 | we 513 | we'd 514 | we'll 515 | we're 516 | we've 517 | welcome 518 | well 519 | went 520 | were 521 | weren't 522 | what 523 | what's 524 | whatever 525 | when 526 | whence 527 | whenever 528 | where 529 | where's 530 | whereafter 531 | whereas 532 | whereby 533 | wherein 534 | whereupon 535 | wherever 536 | whether 537 | which 538 | while 539 | whither 540 | who 541 | who's 542 | whoever 543 | whole 544 | whom 545 | whose 546 | why 547 | will 548 | willing 549 | wish 550 | with 551 | within 552 | without 553 | won't 554 | wonder 555 | would 556 | would 557 | wouldn't 558 | x 559 | y 560 | yes 561 | yet 562 | you 563 | you'd 564 | you'll 565 | you're 566 | you've 567 | your 568 | yours 569 | yourself 570 | yourselves 571 | z 572 | zero 573 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/v9094e.key: -------------------------------------------------------------------------------- 1 | Agriculture 2 | Development policies 3 | Employment 4 | Female labour 5 | Male labour 6 | Namibia 7 | Role of women 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/v9094e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/v9094e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/v9570e.key: -------------------------------------------------------------------------------- 1 | Animal diseases 2 | Animal power 3 | Animal training 4 | Behaviour 5 | Ecology 6 | Elephants 7 | Handling 8 | Hauling 9 | Parasitoses 10 | Sri Lanka 11 | Working animals 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/v9570e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/v9570e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/v9650e.key: -------------------------------------------------------------------------------- 1 | Agricultural development 2 | Decision making 3 | Employment 4 | Extension activities 5 | Honduras 6 | Labour 7 | Role of women 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/v9650e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/v9650e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w1750e.key: -------------------------------------------------------------------------------- 1 | Fisheries development 2 | Fishery data 3 | Fishery management 4 | Fishery resources 5 | Indian ocean 6 | International agreements 7 | International cooperation 8 | International organizations 9 | Resource conservation 10 | Tuna 11 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w1750e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w1750e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w2167e.key: -------------------------------------------------------------------------------- 1 | Food production 2 | Food resources 3 | Food security 4 | Forest resources 5 | Households 6 | National planning 7 | Nonwood forest products 8 | Social consciousness 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w2167e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w2167e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w3727e.key: -------------------------------------------------------------------------------- 1 | Evaluation 2 | FAO 3 | Foods 4 | Pesticides 5 | Residues 6 | Toxicity 7 | WHO 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w3727e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w3727e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w4442e.key: -------------------------------------------------------------------------------- 1 | Arid zones 2 | Case studies 3 | Forest ecology 4 | Forest inventories 5 | Forest management 6 | Forest resources 7 | Silviculture 8 | Silvopastoral systems 9 | Tropical zones 10 | Virgin forests 11 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w4442e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w4442e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w4979e.key: -------------------------------------------------------------------------------- 1 | Buffer stocks 2 | Developing countries 3 | Financing 4 | Food security 5 | Food supply 6 | Grain 7 | Information systems 8 | Management 9 | Marketing 10 | Storage 11 | Trade liberalization 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w4979e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w4979e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w5367e.key: -------------------------------------------------------------------------------- 1 | Health protection 2 | Quality controls 3 | Sampling 4 | Wastewater 5 | Wastewater irrigation 6 | Water analysis 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w5367e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w5367e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w5631e.key: -------------------------------------------------------------------------------- 1 | Budgets 2 | Cooperation 3 | FAO 4 | Financial situation 5 | Government 6 | Regulations 7 | Wfp 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w5631e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w5631e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w6206e.key: -------------------------------------------------------------------------------- 1 | Forestry development 2 | Men 3 | National planning 4 | Role of women 5 | Social consciousness 6 | Women 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w6864e.key: -------------------------------------------------------------------------------- 1 | Feasibility studies 2 | Financing 3 | Food technology 4 | Fruit 5 | Managers 6 | Market research 7 | Products 8 | Quality controls 9 | Vegetables 10 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w6864e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w6864e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7295e.key: -------------------------------------------------------------------------------- 1 | Laboratories 2 | Laboratory equipment 3 | Plants 4 | Quality assurance 5 | Soil analysis 6 | Standards 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7295e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w7295e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7415e.key: -------------------------------------------------------------------------------- 1 | Case studies 2 | Economic analysis 3 | Economic policies 4 | European union 5 | Income 6 | Italy 7 | Milk 8 | Milk products 9 | Netherlands 10 | Price support 11 | Producer prices 12 | United Kingdom 13 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7415e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w7415e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7483e.key: -------------------------------------------------------------------------------- 1 | Communal forests 2 | Community forestry 3 | Forest management 4 | Forest resources 5 | Forestry development 6 | Incentives 7 | Social consciousness 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7483e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w7483e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7519e.key: -------------------------------------------------------------------------------- 1 | Asia 2 | Data collection 3 | Energy consumption 4 | Forecasting 5 | Fuelwood 6 | Supply balance 7 | Wood 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_test/w7519e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_test/w7519e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ab387e.key: -------------------------------------------------------------------------------- 1 | Choice of species 2 | Forest resources 3 | Forestry policies 4 | Genetic resources 5 | Resource conservation 6 | Selection 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ab387e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/ab387e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ad692e.key: -------------------------------------------------------------------------------- 1 | Agriculture 2 | Decentralization 3 | Financing 4 | Local government 5 | Public sector 6 | Rural development 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ad692e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/ad692e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ae050e.key: -------------------------------------------------------------------------------- 1 | Agricultural development 2 | Development policies 3 | Rural development 4 | Socioeconomic development 5 | Structural change 6 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ae050e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/ae050e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ae190e.key: -------------------------------------------------------------------------------- 1 | Case studies 2 | Government 3 | Natural disasters 4 | Weather hazards 5 | Workshops 6 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ae190e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/ae190e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ae385e.key: -------------------------------------------------------------------------------- 1 | Agroindustrial sector 2 | Egg production 3 | Poultry farming 4 | Poultry meat 5 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/ae385e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/ae385e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/faobetf_fb29fe.key: -------------------------------------------------------------------------------- 1 | FISH PONDS 2 | FISH CULTURE 3 | FRESHWATER FISHES 4 | EXTENSION ACTIVITIES 5 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/faobetf_fb29fe.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/faobetf_fb29fe.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/faobetf_fb35be.key: -------------------------------------------------------------------------------- 1 | FISH CULTURE 2 | FISH PONDS 3 | FRESHWATER 4 | WATER SUPPLY 5 | FERTILIZERS 6 | HARVESTING 7 | AGROPISCICULTURE 8 | FISH DISEASES 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/faobetf_fb35be.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/faobetf_fb35be.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/foodfirst_ff08ne.key: -------------------------------------------------------------------------------- 1 | FAMINE 2 | RURAL ENVIRONMENT 3 | VILLAGES 4 | FOOD SUPPLY 5 | TENURE 6 | POVERTY 7 | RURAL POPULATION 8 | SOCIAL STRUCTURE 9 | DEVELOPMENT AID 10 | BANGLADESH 11 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/foodfirst_ff08ne.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/foodfirst_ff08ne.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/gtz_g16foe.key: -------------------------------------------------------------------------------- 1 | CERAMICS 2 | CLAY 3 | DESIGN 4 | METHODS 5 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/gtz_g16foe.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/gtz_g16foe.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/gtz_g24ine.key: -------------------------------------------------------------------------------- 1 | ANIMAL POWER 2 | MILLS 3 | DESIGN 4 | CEREALS 5 | DESIGN 6 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/gtz_g24ine.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/gtz_g24ine.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/gtz_g32bie.key: -------------------------------------------------------------------------------- 1 | BIOGAS 2 | DESIGN 3 | ANIMAL PRODUCTION 4 | APPROPRIATE TECHNOLOGY 5 | DESIGN 6 | DIGESTERS 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/gtz_g32bie.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/gtz_g32bie.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/nri_nr12me.key: -------------------------------------------------------------------------------- 1 | MARINE FISHERIES 2 | BYCATCH 3 | POSTHARVEST LOSSES 4 | LOSSES 5 | FISH MEAL 6 | SALTED FISH 7 | FISH 8 | PRODUCTION POSSIBILITIES 9 | FISH PROCESSING 10 | INDIA 11 | GUJARAT 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/nri_nr12me.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/nri_nr12me.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0178e.key: -------------------------------------------------------------------------------- 1 | Agroforestry 2 | Browse plants 3 | Community forestry 4 | Employment 5 | Environment 6 | Food production 7 | Food security 8 | Forest resources 9 | Forestry development 10 | Fuelwood 11 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0178e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/t0178e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0512e.key: -------------------------------------------------------------------------------- 1 | Biomass 2 | Charcoal 3 | Engines 4 | Equipment 5 | Fuelwood 6 | Gases 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0512e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/t0512e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0522e.key: -------------------------------------------------------------------------------- 1 | Drying 2 | Equipment 3 | Grain 4 | Postharvest technology 5 | Storage 6 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0522e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/t0522e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0715e.key: -------------------------------------------------------------------------------- 1 | Agricultural policies 2 | Development policies 3 | Land use 4 | Planning 5 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/t0715e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/t0715e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v3600e.key: -------------------------------------------------------------------------------- 1 | Adjustment of production 2 | Exchange rate 3 | Surpluses 4 | Taxes 5 | Trade 6 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v3600e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/v3600e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v5030e.key: -------------------------------------------------------------------------------- 1 | Deterioration 2 | Factories 3 | Food hygiene 4 | Food technology 5 | Fruit products 6 | Fruits 7 | Preservation 8 | Quality controls 9 | Raw materials 10 | Vegetable products 11 | Vegetables 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v5030e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/v5030e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v6718e.key: -------------------------------------------------------------------------------- 1 | Administration 2 | Aquaculture 3 | Artisanal fisheries 4 | Cooperative activities 5 | Credit 6 | Development projects 7 | Ethiopia 8 | Fisheries development 9 | Fishermen 10 | Fishery policies 11 | Government 12 | Inland fisheries 13 | Sectoral planning 14 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v6718e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/v6718e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v8047e.key: -------------------------------------------------------------------------------- 1 | Developing countries 2 | Land resources 3 | Land use 4 | Planning 5 | Resource management 6 | Sustainability 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v8047e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/v8047e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v9909e.key: -------------------------------------------------------------------------------- 1 | Asia 2 | Environmental degradation 3 | Farmland 4 | Land management 5 | Land use 6 | Oceania 7 | Planning 8 | Population density 9 | Social consciousness 10 | Soil conservation 11 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/v9909e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/v9909e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w2227e.key: -------------------------------------------------------------------------------- 1 | Development projects 2 | Economic value 3 | FAO 4 | Forest trees 5 | Forestry development 6 | Genetic resources 7 | International cooperation 8 | National planning 9 | Resource conservation 10 | Species 11 | Sustainability 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w2227e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w2227e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w3241e.key: -------------------------------------------------------------------------------- 1 | Information systems 2 | Marketing 3 | Rapid rural appraisal 4 | Research 5 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w3241e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w3241e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w3618e.key: -------------------------------------------------------------------------------- 1 | Africa 2 | Diffusion of information 3 | International cooperation 4 | Radio 5 | Research 6 | Rural areas 7 | Rural development 8 | Training 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w4233e.key: -------------------------------------------------------------------------------- 1 | Agricultural development 2 | Europe 3 | Food production 4 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w4233e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w4233e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w4338e.key: -------------------------------------------------------------------------------- 1 | Africa 2 | Private sector 3 | Public services 4 | Veterinary services 5 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w4338e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w4338e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w6679e.key: -------------------------------------------------------------------------------- 1 | Costs 2 | Environmental protection 3 | Felling 4 | Forestry operations 5 | Hauling 6 | Logging 7 | Manpower 8 | Planning 9 | Roads 10 | Training 11 | Work study 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w6679e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w6679e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w7407e.key: -------------------------------------------------------------------------------- 1 | Energy consumption 2 | Europe 3 | Wood 4 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w7714e.key: -------------------------------------------------------------------------------- 1 | Asia 2 | Economic situation 3 | Environmental policies 4 | Forestry development 5 | Forestry policies 6 | Forests 7 | Multiple use forestry 8 | Nature reserves 9 | Oceania 10 | Services 11 | Socioeconomic environment 12 | South Pacific 13 | Tourism 14 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w7714e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w7714e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w8302e.key: -------------------------------------------------------------------------------- 1 | Asia 2 | Exports 3 | Forest products industry 4 | Forest resources 5 | Forestry development 6 | Forestry policies 7 | Imports 8 | Mongolia 9 | Socioeconomic environment 10 | Wood industry 11 | Wood products 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w8302e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w8302e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w8594e.key: -------------------------------------------------------------------------------- 1 | Soil chemicophysical properties 2 | Soil classification 3 | Soil morphological features 4 | Soil resources 5 | Soil types 6 | World 7 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w8594e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w8594e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w9900e.key: -------------------------------------------------------------------------------- 1 | Aquaculture 2 | Demand 3 | Fisheries 4 | Fishery management 5 | Fishery policies 6 | Fishery production 7 | Fishery products 8 | Fishery resources 9 | Inland fisheries 10 | International trade 11 | Supply 12 | Sustainability 13 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/w9900e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/w9900e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/wb_wb01oe.key: -------------------------------------------------------------------------------- 1 | POTTING COMPOSTS 2 | GROWING MEDIA 3 | PLANT NURSERIES 4 | SEEDLINGS 5 | TREES 6 | CHEMICOPHYSICAL PROPERTIES 7 | COMPOSTING 8 | METHODS 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/wb_wb01oe.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/wb_wb01oe.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/wb_wb41me.key: -------------------------------------------------------------------------------- 1 | MARINE RESOURCES 2 | BIOTECHNOLOGY 3 | MARINE ENVIRONMENT 4 | MICROBIOLOGY 5 | RESEARCH 6 | USES 7 | AQUACULTURE 8 | CHEMICAL INDUSTRY 9 | PHARMACOLOGY 10 | INDICATOR ORGANISMS 11 | DEVELOPING COUNTRIES 12 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/wb_wb41me.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/wb_wb41me.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/winrock_wi10ce.key: -------------------------------------------------------------------------------- 1 | CALLIANDRA CALOTHYRSUS 2 | PLANT PRODUCTION 3 | USES 4 | PEST CONTROL 5 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/winrock_wi10ce.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/winrock_wi10ce.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0176e.key: -------------------------------------------------------------------------------- 1 | Agroindustrial sector 2 | Employment 3 | Female labour 4 | Households 5 | Middle East 6 | Remuneration 7 | Role of women 8 | Rural development 9 | Social change 10 | Social consciousness 11 | Social policies 12 | Women 13 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0176e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x0176e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0185e.key: -------------------------------------------------------------------------------- 1 | Adult education 2 | Agricultural workers 3 | China 4 | Social groups 5 | Women 6 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0185e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x0185e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0198e.key: -------------------------------------------------------------------------------- 1 | Africa 2 | Agricultural development 3 | Asia 4 | Caribbean 5 | Europe 6 | Food security 7 | Latin america 8 | Malnutrition 9 | Middle East 10 | Oceania 11 | Population growth 12 | Role of women 13 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0199e.key: -------------------------------------------------------------------------------- 1 | Charcoal 2 | Consumption 3 | Fuelwood 4 | Households 5 | Roundwood 6 | Social conditions 7 | Socioeconomic environment 8 | Statistical data 9 | Sudan 10 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0451e.key: -------------------------------------------------------------------------------- 1 | Chemical composition 2 | Chemicophysical properties 3 | Geographical distribution 4 | Nonwood forest products 5 | Palmae 6 | Proximate composition 7 | Uses 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x0451e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x0451e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5082e.key: -------------------------------------------------------------------------------- 1 | Animal breeding 2 | Animal diseases 3 | Animal housing 4 | Animal husbandry 5 | Animal nutrition 6 | Animal production 7 | Animal products 8 | Hair 9 | Rabbits 10 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5082e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x5082e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5352e.key: -------------------------------------------------------------------------------- 1 | Consumption 2 | Exports 3 | Imports 4 | International trade 5 | Mine timber 6 | Production data 7 | Pulp 8 | Wood 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5352e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x5352e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5644e.key: -------------------------------------------------------------------------------- 1 | Asia 2 | Forest management 3 | Forest products industry 4 | Forestry policies 5 | Land use 6 | National planning 7 | Oceania 8 | Sustainability 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5644e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x5644e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5668e.key: -------------------------------------------------------------------------------- 1 | Fuelwood 2 | Household consumption 3 | Manual operation 4 | Men 5 | Role of women 6 | Sri Lanka 7 | Trade 8 | Women 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5668e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x5668e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5676e.key: -------------------------------------------------------------------------------- 1 | Animal production 2 | Economic analysis 3 | Goats 4 | Interest groups 5 | Nepal 6 | Project evaluation 7 | Social consciousness 8 | Women 9 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5676e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x5676e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5681e.key: -------------------------------------------------------------------------------- 1 | Antigua and Barbuda 2 | Development plans 3 | Forestry development 4 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x5681e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x5681e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x6759e.key: -------------------------------------------------------------------------------- 1 | Data analysis 2 | Data collection 3 | Domestic production 4 | Evaluation 5 | Exports 6 | Imports 7 | Mauritius 8 | Wood 9 | Wood industry 10 | Wood products 11 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x6802e.key: -------------------------------------------------------------------------------- 1 | Data analysis 2 | Data collection 3 | Domestic production 4 | Domestic trade 5 | Energy 6 | Energy consumption 7 | Fuelwood 8 | Human population 9 | Prices 10 | Regeneration 11 | Supply 12 | Zambia 13 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/x6802e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/x6802e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/y0500e.key: -------------------------------------------------------------------------------- 1 | Animal husbandry 2 | Animal husbandry methods 3 | Animal production 4 | Appropriate technology 5 | Livestock 6 | Livestock management 7 | Traditional technology 8 | Urban areas 9 | Working animals 10 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/y0500e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/y0500e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/y1128e.key: -------------------------------------------------------------------------------- 1 | Fisheries 2 | Fishery policies 3 | Research 4 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/fao_train/y1128e.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/docs/fao_train/y1128e.txt -------------------------------------------------------------------------------- /group contributions/testFolder/data/docs/spanish/text.txt: -------------------------------------------------------------------------------- 1 | Hace millones de años que las flores tiene espinas y hace también millones de años que los 2 | corderos, a pesar de las espinas, se comen las flores. ¿Es que no es cosa seria averiguar por qué las 3 | flores pierden el tiempo fabricando unas espinas que no les sirven para nada? ¿Es que no es importante 4 | la guerra de los corderos y las flores? ¿No es esto más serio e importante que las sumas de un señor 5 | gordo y colorado? Y si yo sé de una flor única en el mundo y que no existe en ninguna parte más que en 6 | mi planeta; si yo sé que un buen día un corderillo puede aniquilarla sin darse cuenta de ello, ¿es que esto 7 | no es importante? 8 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/models/readme.txt: -------------------------------------------------------------------------------- 1 | directory for storing models 2 | -------------------------------------------------------------------------------- /group contributions/testFolder/data/vocabulary/agrovoc_en.rdf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parkkeo1/Reddit_Unlocked/5d68d9f3bf994beed36b91e9eb21ab8911798a9c/group contributions/testFolder/data/vocabulary/agrovoc_en.rdf.gz -------------------------------------------------------------------------------- /group contributions/testFolder/dict_to_graph.py: -------------------------------------------------------------------------------- 1 | import plotly 2 | plotly.tools.set_credentials_file(username='reddit_unlocked', api_key='gfnXKc7JvUKST4HRJyFX') 3 | import plotly.plotly as py 4 | import plotly.graph_objs as go 5 | from plotly.graph_objs import * 6 | #takes a dictionary of dictionaries of keywords from body text as input and returns the url for the plotly html embedding of 7 | #scatterplot made from the keywords and their attributes 8 | #'Keyword','Occurences', 'Upvotes', 'Downvotes', "Score", "Subjectivity", "Polarity", "Domain" 9 | def body_to_graph(words = {}, subreddit = str): 10 | """ 11 | :type subreddit: String 12 | """ 13 | frames = [] 14 | #Turns dictionary of dictionaries into list of dataframes 15 | for key, value in words.items(): 16 | frames.append(pd.DataFrame(data = value, columns = [key], index = ['Keyword','Occurences', 'Upvotes', 'Downvotes', 'Score', 'Subjectivity', 'Polarity', 'Domain']).transpose()) 17 | #Concatenates the list of dataframes 18 | data_df = pd.concat(frames) 19 | trace1 = go.Scatter( 20 | y = data_df.Subjectivity, #Subjectivity of the text the keyword was found in on y axis 21 | x = data_df.Occurences * data_df.Score,#Occurrences * Score on x-axis for more spread out data 22 | mode = 'markers', 23 | marker = dict( 24 | size = (data_df.Occurences) * 20, #Occurrences of Keyword for size 25 | color = data_df.Polarity, #Polarity for color of the post (blue is sad, red is happy) 26 | colorscale = 'Portland', 27 | showscale = True 28 | ), 29 | text = "Keyword: " + data_df.Keyword 30 | ) 31 | layout = go.Layout( 32 | annotations=Annotations([ 33 | Annotation( 34 | x=0.5, 35 | y=-0.123, 36 | showarrow=False, 37 | text='(Occurrences * Score)', 38 | xref='paper', 39 | yref='paper' 40 | ), 41 | Annotation( 42 | x=1.055, 43 | y=0.5, 44 | showarrow=False, 45 | text='Text Polarity', 46 | textangle=-90, 47 | xref='paper', 48 | yref='paper' 49 | ), 50 | Annotation( 51 | x=.01, 52 | y=1, 53 | showarrow=False, 54 | text='Size = Occurrences', 55 | textangle=0, 56 | xref='paper', 57 | yref='paper', 58 | bordercolor = '#1f77b4', 59 | font=dict( 60 | family='Courier New, monospace', 61 | size=16, 62 | color='#ff7f0e' 63 | ) 64 | ) 65 | ]), 66 | title = 'Stats of top reddit/r/' + subreddit + ' keywords', 67 | yaxis = dict( 68 | title = 'Subjectivity', 69 | ticks = 5, 70 | ), 71 | xaxis = dict( 72 | title = 'popularity', 73 | ticklen = 10, 74 | ) 75 | ) 76 | data = [trace1] 77 | fig = go.Figure(data = data, layout = layout) 78 | url = py.plot(fig, filename = 'reddit plot') 79 | return "" + url -------------------------------------------------------------------------------- /group contributions/testFolder/evaluate_rake.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from six.moves import range 4 | __author__ = 'a_medelyan' 5 | import rake 6 | import test_data 7 | import sys 8 | 9 | 10 | # reading a directory with test documents 11 | input_dir = sys.argv[1] 12 | 13 | # number of top ranked keywords to evaluate 14 | top = int(sys.argv[2]) 15 | 16 | test_set = test_data.read_data(input_dir) 17 | 18 | # evaluating 19 | rake_object = rake.Rake("SmartStoplist.txt", 5, 3, 4) 20 | total_precision = 0 21 | total_recall = 0 22 | 23 | for test_doc in test_set.values(): 24 | print('document', test_doc.name) 25 | print(len(test_doc.keywords), 'manual keywords: ', test_doc.keywords) 26 | 27 | keywords = rake_object.run(test_doc.text)[:top] 28 | print('RAKE keywords:', keywords) 29 | 30 | num_manual_keywords = len(test_doc.keywords) 31 | 32 | correct = 0 33 | for i in range(0,min(top, len(keywords))): 34 | if keywords[i][0] in set(test_doc.keywords): 35 | correct += 1 36 | total_precision += correct/float(len(keywords)) 37 | total_recall += correct/float(num_manual_keywords) 38 | print('correct:', correct, 'out of', num_manual_keywords) 39 | 40 | avg_precision = round(total_precision*100/float(len(test_set)), 2) 41 | avg_recall = round(total_recall*100/float(len(test_set)), 2) 42 | 43 | avg_fmeasure = round(2*avg_precision*avg_recall/(avg_precision + avg_recall), 2) 44 | 45 | print("Precision", avg_precision, "Recall", avg_recall, "F-Measure", avg_fmeasure) -------------------------------------------------------------------------------- /group contributions/testFolder/test.py: -------------------------------------------------------------------------------- 1 | <<<<<<< HEAD 2 | from bs4 import BeautifulSoup 3 | import json 4 | import requests 5 | 6 | subreddit = input('Insert subreddit: ' ) 7 | 8 | site = requests.get('https://reddit.com/r/{}.json'.format(subreddit), headers={'user-agent': 'Mozilla/5.0'} 9 | ) 10 | 11 | reddit_json = json.loads(site.text) 12 | 13 | master_dict = {} 14 | 15 | for i in range(0, 19): 16 | master_dict[reddit_json['data']['children'][i]['data']['domain']] = reddit_json['data']['children'][i]['data']['score'] 17 | 18 | print(master_dict) 19 | ======= 20 | print("Hello, world!") 21 | print ("Hey this is Gary's test") 22 | print ("practicing pushing to github") 23 | >>>>>>> b3f5f1fef19ce45302e9f45f0f8010cc5166fd03 24 | -------------------------------------------------------------------------------- /group contributions/testFolder/testFolder2/thisIsATextFile.txt: -------------------------------------------------------------------------------- 1 | Hey there guys this here is a cool little text file! 2 | -------------------------------------------------------------------------------- /group contributions/testFolder/test_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | __author__ = 'a_medelyan' 3 | import os 4 | import io 5 | 6 | # class to hold our test instance (document plus its correct manual keywords) 7 | class TestDoc: 8 | def __init__(self, name): 9 | self.name = name 10 | self.text = '' 11 | self.keywords = [] 12 | 13 | 14 | # reading documents and their keywords from a directory 15 | def read_data(input_dir): 16 | 17 | test_set = {} 18 | 19 | for doc in os.listdir(input_dir): 20 | 21 | file_reader = io.open(os.path.join(input_dir,doc), 'r',encoding="iso-8859-1") 22 | file_name = doc[:-4] 23 | if file_name not in test_set: 24 | d = TestDoc(file_name) 25 | else: 26 | d = test_set[file_name] 27 | 28 | if not doc.endswith(".txt"): 29 | continue 30 | 31 | # get document text 32 | text = file_reader.read() 33 | d.text = text 34 | 35 | # get document keywords 36 | file_reader = open(os.path.join(input_dir,file_name + ".key"), 'r') 37 | manual_keywords = file_reader.read() 38 | for line in manual_keywords.split('\n'): 39 | line = line.rstrip().lower() 40 | if len(line) > 0: 41 | if '\t' in line: 42 | d.keywords.append(line[0:line.find('\t')]) 43 | else: 44 | d.keywords.append(line) 45 | 46 | # add document to test set 47 | test_set[file_name] = d 48 | 49 | return test_set 50 | -------------------------------------------------------------------------------- /group contributions/testFolder/times.csv: -------------------------------------------------------------------------------- 1 | 5,0:00:24.910024 2 | 10,0:00:47.791920 3 | 15,0:01:05.960438 4 | 20,0:01:23.476297 5 | 25,0:01:45.783548 6 | 30,0:01:57.311794 7 | 35,0:02:16.769322 8 | 40,0:02:56.369686 9 | 45,0:03:18.151234 10 | 50,0:03:22.456394 11 | 55,0:03:57.386750 12 | 60,0:04:18.296719 13 | -------------------------------------------------------------------------------- /libraries.md: -------------------------------------------------------------------------------- 1 | ### Libraries We Used 2 | 3 | ##### PRAW - Python Wrapper For The Reddit API 4 | PRAW is a library that we used to gain access to the reddit API, allowing us to pull threads from any subreddit's section as well as their respective attributes. The format in which PRAW functions is a loop through the top posts of a particular section ("hot" or "top") of a subreddit, so we added the attributes of the threads to a dataframe within the for loop. -Gary 5 | 6 | ##### Pandas 7 | With its fast and easy-to-use data structures and data organization functions, Pandas was an invaluable library in storing and sorting extensive subreddit, submission, and keyword data that we gathered from the Reddit API. Specifically, we opted to use Pandas dataframes for their speed and intuitive structure compared to standard Python dictionaries and lists. -Isaac 8 | 9 | ##### Newspaper 10 | We used this package to parse through the HTML markup of any URL we passed to it. Newspaper is built to handle news articles, so if we detected that a Reddit post had an article linked to it, we were able to use newspaper to parse through the text in the article and extract keywords with Newspaper's built-in keywords method. -Jayam 11 | 12 | ##### RAKE - Rapid Automatic Keyword Extraction 13 | The Rapid Automatic Keyword Extraction (RAKE) library allows us to pass in standalone strings (as opposed to URLs that point to articles) to analyze for keywords. This library's algorithm looks at the frequency of word appearance and co-occurrence with other words inside a text to determine which words describe the text as a whole. The library proved useful in analyzing Reddit post titles, which we were able to directly obtain as strings. -Jayam 14 | 15 | ##### TextBlob 16 | Sentiment Analysis Library Based on NLTK (WIP description/docs) 17 | 18 | ##### Plotly 19 | The reason we used plot.ly is primarily due to the more aesthetically pleasing features of their graphs, easy conversion from pandas dataframes to whatever graph we wish to do, and also the interactive feature of the graphs. It was also very vital that plot.ly allowed for graphs to be embedded into html and the url to the graph used for embedding into html would be updated with each new graph pulled. -Gary 20 | 21 | ##### Flask 22 | From the start, we knew we wanted to deploy our program onto a dynamic website. After some research and recommendations from upperclassmen, we decided to use Flask. Flask enabled us to integrate the front-end HTML and CSS with the back-end Python program. Flask's sessions utility was especially useful because it allowed the saving and transfer of (data) variables across multiple pages in the website. -Isaac 23 | 24 | ##### Flask-Session 25 | In the final stages to combining the back-end with the front-end website using Flask, I ran into a problem: the default client-side 4KB-size cookie that Flask provided for transfering data across different pages could not store all of the necessary Reddit and keyword data we need to run the program. Thus, I opted to use Flask-Session, a Flask plugin that enabled me to implement a server-side filesystem cookie system that was able to save all of the data needed thanks to its greater size. -Isaac 26 | 27 | ##### Bootstrap 28 | Since this was my first time learning HTML and CSS, I decided to use Bootstrap to ease my introduction to web development. With its built-in utilities and components, I developed a functional front-end without having to write CSS from scratch. -Isaac 29 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # 2 | # -To be used for locally (localhost) testing the web server/website. 3 | # -Installing the flask package on a virtual environment (instead of system-wide) 4 | # is recommended by Flask devs. 5 | # 6 | # http://flask.pocoo.org/docs/0.12/quickstart/# 7 | # 8 | # -Isaac Park, keonp2 9 | # 10 | 11 | 12 | from flask import Flask, request, render_template, redirect, url_for, session 13 | from flask_session import Session 14 | from run import display_praw, stats_praw, body_to_graph, get_keyword_dict 15 | import webbrowser 16 | 17 | 18 | app = Flask(__name__) 19 | app.config['SECRET_KEY'] = 'insert super secret string here' 20 | SESSION_TYPE = 'filesystem' 21 | app.config.from_object(__name__) 22 | Session(app) 23 | 24 | 25 | @app.route('/', methods=['GET', 'POST']) 26 | def index(): 27 | if request.method == 'POST': 28 | if 'basic-url' in request.form: 29 | name = request.form['basic-url'] 30 | info = stats_praw(name) 31 | session['info'] = info 32 | output = display_praw(name) 33 | session['output'] = output 34 | keywords = get_keyword_dict(output) 35 | graph_url = body_to_graph(keywords, name) 36 | session['graph_url'] = graph_url 37 | return redirect(url_for('program', name=name)) 38 | else: 39 | return render_template('home.html') 40 | # TODO: Implement subreddit input validity checking AKA Fix blank input error 41 | else: 42 | return render_template('home.html') 43 | 44 | 45 | @app.route('/docs/
') 46 | def docs(section): 47 | if section == "findings": 48 | return render_template("docs_findings.html") 49 | else: 50 | if section == "team": 51 | return render_template("docs_team.html") 52 | else: 53 | if section == "tools": 54 | return render_template("docs_tools.html") 55 | else: 56 | return "This docs page does not exist. Maybe it was a typo?

-Isaac

Back to Reddit_Unlocked Home" 57 | # TODO: if I have time, implement html template for page DNE message 58 | 59 | 60 | @app.route('/program/') 61 | def program(name): 62 | output = session['output'] 63 | info = session['info'] 64 | graph_url = session['graph_url'] 65 | return render_template('program.html', name=name, output=output, info=info, graph_url=graph_url) 66 | 67 | 68 | @app.route('/examples') 69 | def examples(): 70 | return render_template('examples.html') 71 | 72 | 73 | if __name__ == "__main__": 74 | webbrowser.open_new('http://127.0.0.1:5000') 75 | app.run(debug=False) 76 | 77 | 78 | # Use url_for method for links in the webpage; url_for generates URL 79 | # based on the argument it is given (name of the function related to a URL. 80 | # 81 | # Example: 82 | # 83 | # @app.route('/user/') 84 | # def hello_user(name): 85 | # if name =='admin': 86 | # return redirect(url_for('hello_admin')) 87 | # else: 88 | # return redirect(url_for('hello_guest',guest = name)) 89 | # 90 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import praw 2 | import pandas as pd 3 | from datetime import datetime 4 | 5 | 6 | # TODO: Make this program faster! (a lot faster, this is way too slow) 7 | 8 | def display_praw(name): 9 | reddit = praw.Reddit(client_id='Pj5o8QpNXXJY9A', 10 | client_secret='pQKMRBmhp0In48NoNvvktfRo2eA', 11 | password='prawisgreat', 12 | user_agent='Reddit Unlocked CS196 Project @ UIUC', 13 | username='RedditUnlocked196') 14 | 15 | subreddit = reddit.subreddit(name) 16 | 17 | threads_df = pd.DataFrame({ 18 | 'Title': (), 19 | 'URL': (), 20 | 'Upvote Ratio (%)': (), 21 | 'Net Score': (), 22 | '# of Upvotes': (), 23 | '# of Downvotes': (), 24 | 'Post Date': (), 25 | 'Self Post?': (), 26 | 'Video Post?': (), 27 | 'Domain': () 28 | }) 29 | 30 | threads_df = threads_df[['Title', 'URL', 'Upvote Ratio (%)', 'Net Score', '# of Upvotes', '# of Downvotes', 31 | 'Post Date', 'Self Post?', 'Video Post?', 'Domain']] 32 | 33 | for thread in subreddit.top('week', limit=40): # TODO: change limit number when actually deploying program. 15 is the testing number. 34 | # if thread.is_video: 35 | # continue 36 | if 'fb' in thread.url: 37 | continue 38 | actualUps = float(thread.upvote_ratio * thread.score) / float(thread.upvote_ratio * 2 - 1) 39 | actualDowns = actualUps - thread.score 40 | gather = pd.Series([thread.title, thread.url, thread.upvote_ratio * 100, thread.score, 41 | actualUps, actualDowns, thread.created_utc, 42 | thread.is_self, thread.is_video, thread.domain], 43 | index=['Title', 'URL', 'Upvote Ratio (%)', 'Net Score', '# of Upvotes', '# of Downvotes', 44 | 'Post Date', 'Self Post?', 'Video Post?', 'Domain']) 45 | 46 | threads_df = threads_df.append(gather, ignore_index=True) 47 | 48 | threads_dict = threads_df.to_dict(orient='records') 49 | 50 | for entry in threads_dict: 51 | if isinstance(str(entry['Post Date']), str): 52 | time = datetime.fromtimestamp(entry['Post Date']) 53 | formatTime = time.strftime('%b %d, %Y') 54 | else: 55 | formatTime = None 56 | 57 | entry['Post Date'] = formatTime 58 | 59 | return threads_dict 60 | 61 | 62 | def stats_praw(name): 63 | reddit = praw.Reddit(client_id='Pj5o8QpNXXJY9A', 64 | client_secret='pQKMRBmhp0In48NoNvvktfRo2eA', 65 | password='prawisgreat', 66 | user_agent='Reddit Unlocked CS196 Project @ UIUC', 67 | username='RedditUnlocked196') 68 | 69 | info = reddit.request('GET', '/r/' + name + '/about.json') 70 | 71 | infoDict = {} 72 | 73 | infoDict['Current Users'] = info['data']['active_user_count'] 74 | infoDict['Creation Date'] = (datetime.fromtimestamp(info['data']['created_utc'])).strftime('%b %d, %Y') 75 | infoDict['Subscriber Count'] = info['data']['subscribers'] 76 | infoDict['Title'] = info['data']['title'] 77 | infoDict['Icon'] = info['data']['icon_img'] 78 | 79 | return infoDict 80 | 81 | import plotly 82 | plotly.tools.set_credentials_file(username='reddit_unlocked', api_key='gfnXKc7JvUKST4HRJyFX') 83 | import plotly.plotly as py 84 | import plotly.graph_objs as go 85 | from plotly.graph_objs import * 86 | 87 | 88 | # takes a dictionary of dictionaries of keywords from body text as input and returns the url for the plotly html embedding of 89 | # scatterplot made from the keywords and their attributes 90 | # 'Keyword','Occurences', 'Upvotes', 'Downvotes', "Score", "Subjectivity", "Polarity", "Domain" 91 | 92 | 93 | def body_to_graph(words = {}, subreddit = str): 94 | """ 95 | :type subreddit: String 96 | """ 97 | frames = [] 98 | #Turns dictionary of dictionaries into list of dataframes 99 | for key, value in words.items(): 100 | frames.append(pd.DataFrame(data = value, columns = [key], index = ['Keyword','Occurences', 'Upvotes', 'Downvotes', 'Score', 'Subjectivity', 'Polarity', 'Domain']).transpose()) 101 | #Concatenates the list of dataframes 102 | data_df = pd.concat(frames) 103 | trace1 = go.Scatter( 104 | y = data_df.Subjectivity, #Subjectivity of the text the keyword was found in on y axis 105 | x = data_df.Occurences * data_df.Score,#Occurrences * Score on x-axis for more spread out data 106 | mode = 'markers', 107 | marker = dict( 108 | size = (data_df.Occurences) * 20, #Occurrences of Keyword for size 109 | color = data_df.Polarity, #Polarity for color of the post (blue is sad, red is happy) 110 | colorscale = 'Portland', 111 | showscale = True 112 | ), 113 | text = "Keyword: " + data_df.Keyword 114 | ) 115 | layout = go.Layout( 116 | annotations=Annotations([ 117 | Annotation( 118 | x=0.5, 119 | y=-0.123, 120 | showarrow=False, 121 | text='(Occurrences * Score)', 122 | xref='paper', 123 | yref='paper' 124 | ), 125 | Annotation( 126 | x=1.055, 127 | y=0.5, 128 | showarrow=False, 129 | text='Text Polarity', 130 | textangle=-90, 131 | xref='paper', 132 | yref='paper' 133 | ), 134 | Annotation( 135 | x=.01, 136 | y=1, 137 | showarrow=False, 138 | text='Size = Occurrences', 139 | textangle=0, 140 | xref='paper', 141 | yref='paper', 142 | bordercolor = '#1f77b4', 143 | font=dict( 144 | family='Courier New, monospace', 145 | size=16, 146 | color='#ff7f0e' 147 | ) 148 | ) 149 | ]), 150 | title = 'Stats of top reddit /r/' + subreddit + ' keywords', 151 | yaxis = dict( 152 | title = 'Subjectivity', 153 | ticks = 5, 154 | ), 155 | xaxis = dict( 156 | title = 'popularity', 157 | ticklen = 10, 158 | ) 159 | ) 160 | data = [trace1] 161 | fig = go.Figure(data = data, layout = layout) 162 | url = py.plot(fig, filename='reddit plot', auto_open=False) 163 | return "" + url 164 | 165 | import operator 166 | import rake as rake 167 | rake_object = rake.Rake("SmartStoplist.txt", 1, 2, 1) 168 | from textblob import TextBlob, Word, Blobber 169 | import newspaper 170 | from newspaper import Article 171 | import numpy as np 172 | 173 | def get_keyword_dict(input_dict): 174 | # Transforms dict returned by display_praw into DataFrame for working with 175 | top10news_df = pd.DataFrame.from_dict(input_dict) 176 | 177 | words = {} 178 | 179 | ## NEWSPAPER STUFF HERE ## 180 | 181 | # Get keywords out of all articles 182 | for i in range(len(top10news_df)): 183 | if "self" in top10news_df.iloc[i]["Domain"]: 184 | continue 185 | elif "youtube" in top10news_df.iloc[i]["Domain"]: 186 | continue 187 | elif "imgur" in top10news_df.iloc[i]["Domain"]: 188 | continue 189 | 190 | myArticle = Article(top10news_df.iloc[i]['URL']) 191 | try: 192 | myArticle.download() 193 | myArticle.parse() 194 | except: 195 | continue 196 | myArticle.nlp() 197 | 198 | # Run sentiment analysis on each article, fetch subjectivity and polarity 199 | text = myArticle.text 200 | blob = TextBlob(text) 201 | polarity = blob.sentiment.polarity 202 | subjectivity = blob.sentiment.subjectivity 203 | 204 | # Get associated Reddit post info for each keyword, store in dictionary 205 | for keyword in myArticle.keywords: 206 | 207 | # Don't waste time with numeric keywords, skip them if they contain numbers 208 | if any(char.isdigit() for char in keyword): 209 | continue 210 | 211 | 212 | if keyword not in words: 213 | words[keyword] = [keyword, 1, 214 | top10news_df.iloc[i]['# of Upvotes'], 215 | top10news_df.iloc[i]["# of Downvotes"], 216 | top10news_df.iloc[i]["Net Score"], 217 | subjectivity, polarity, 218 | {(top10news_df.iloc[i]["Domain"]):1}] 219 | else: 220 | words[keyword][1] += 1 221 | words[keyword][2] += top10news_df.iloc[i]['# of Upvotes'] 222 | words[keyword][3] += int(top10news_df.iloc[i]['# of Downvotes']) 223 | words[keyword][4] += int(top10news_df.iloc[i]['Net Score']) 224 | words[keyword][5] = np.mean([subjectivity, words[keyword][5]]) 225 | words[keyword][6] = np.mean([polarity, words[keyword][6]]) 226 | if top10news_df.iloc[i]["Domain"] in words[keyword][7]: 227 | words[keyword][7][(top10news_df.iloc[i]["Domain"])] += 1 228 | else: 229 | words[keyword][7][top10news_df.iloc[i]["Domain"]] = 1 230 | 231 | ## RAKE STUFF HERE ## 232 | 233 | # Pull keywords from title strings 234 | for wordPair in rake_object.run(top10news_df.iloc[i]['Title']): 235 | currentWord = wordPair[0] 236 | 237 | # Don't waste time with numeric keywords, skip them if they contain numbers 238 | if any(char.isdigit() for char in currentWord): 239 | continue 240 | 241 | # Grab associated Reddit post data for each keyword, store in dictionary 242 | if currentWord not in words: 243 | words[currentWord] = [currentWord, 1, 244 | top10news_df.iloc[i]['# of Upvotes'], 245 | top10news_df.iloc[i]["# of Downvotes"], 246 | top10news_df.iloc[i]["Net Score"], 247 | subjectivity, polarity, 248 | {(top10news_df.iloc[i]["Domain"]):1}] 249 | else: 250 | words[currentWord][1] += 1 251 | words[currentWord][2] += int(top10news_df.iloc[i]['# of Upvotes']) 252 | words[currentWord][3] += int(top10news_df.iloc[i]['# of Downvotes']) 253 | words[currentWord][4] += int(top10news_df.iloc[i]['Net Score']) 254 | if top10news_df.iloc[i]["Domain"] in words[currentWord][7]: 255 | words[currentWord][7][(top10news_df.iloc[i]["Domain"])] += 1 256 | else: 257 | words[currentWord][7][top10news_df.iloc[i]["Domain"]] = 1 258 | 259 | 260 | ### FOR GARY'S USE ### 261 | # Output dictionary is named 'words' # 262 | # Format is as such: # 263 | # key = keyword # 264 | # value = [Occurences, Upvotes, Downvotes, Score, Subjectivity, Polarity, Domain Dictionary] # 265 | 266 | return words -------------------------------------------------------------------------------- /templates/_navbar.html: -------------------------------------------------------------------------------- 1 |
2 | 36 |
37 | -------------------------------------------------------------------------------- /templates/docs_team.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | {% block body %} 6 |
7 |
8 |

We're a group of Freshman CS majors at UIUC.

9 |
10 |

But our class standings are higher :)

11 |
12 |
13 |
14 |
15 |
16 | 23 |
24 |
25 | 63 |
64 |
65 |
66 | 67 | {% endblock %} 68 | -------------------------------------------------------------------------------- /templates/docs_tools.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | {% block body %} 6 |
7 |
8 |

Reddit_Unlocked is made possible by these amazing libraries.

9 |
10 |

No libraries were harmed in the making of this project.

11 |
12 |
13 |
14 |
15 | 23 |
24 | 97 |
98 |
99 |
100 | {% endblock %} 101 | -------------------------------------------------------------------------------- /templates/home.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | {% block body %} 6 |
7 |
8 |

A Reddit-cal Analysis of The Web

9 |
10 |

...How does the Internet think?

11 |
12 |
13 |
14 | 20 | 50 |
51 |
52 |
53 | 54 |
55 | https://www.reddit.com/r/ 56 | 57 | 58 | 59 | 60 |
61 |
62 |
63 | {% endblock %} 64 | -------------------------------------------------------------------------------- /templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Reddit_Unlocked 6 | 7 | 8 | 9 | {% include '_navbar.html' %} 10 | {% block body %}{% endblock %} 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /templates/program.html: -------------------------------------------------------------------------------- 1 | {% extends 'layout.html' %} 2 | 3 | 4 | 5 | 6 | {% block body %} 7 | {% set ellip = "..." %} 8 |
9 |
10 |

You chose to analyze /r/{{ name }}. Here are the results.

11 |
12 |

Have a look at what's popular and trending in this section of the internet!

13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | Background statistics for /r/{{ name }} 21 |
22 |
23 |

{{ info['Title'] }}

24 |

Created: {{ info['Creation Date'] }}

25 |

Subscriber Count: {{ info['Subscriber Count'] }}

26 |

Currently Active Users: {{ info['Current Users'] }}

27 | Visit /r/{{ name }} 28 |
29 |
30 |
31 |
32 | 33 |
34 |
35 |
36 |
37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | {% for entry in output[0:10] %} 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | {% endfor %} 60 | 61 |
The top 10 threads from /r/{{ name }} in the past week, sorted by net score. Click on the URLs to see what made them popular.
#TitleURLUpvote %Net ScoreDate Posted
{{ loop.index }}{{ (entry['Title'][0:30]) + ellip }}{{ (entry['URL'][8:40]) + ellip }}{{ entry['Upvote Ratio (%)'] }}{{ entry['Net Score'] }}{{ entry['Post Date'] }}
62 |
63 |
64 |
65 |

The sentiment analysis graph below displays the most popular keywords from /r/{{ name }} in the past week.

66 |
67 |

It's interactive- you can hover over the bubbles, zoom in, or even view this chart on Plotly.

68 |
69 |
70 |
71 | reddit plot 72 | 73 |
74 | {% endblock %} --------------------------------------------------------------------------------