├── .dockerignore ├── .gitignore ├── .gitmodules ├── Dockerfile ├── PRODUCTION.md ├── README.md ├── data ├── IDETECT_test_dataset - NLP.csv.xlsx ├── IDMC_fully_labelled.csv.xlsx ├── cities_to_countries.json ├── idmc_uniteideas_training_dataset.csv └── stop_words_en_long.txt ├── docker-compose-dev.yml ├── docker-compose-spacy.yml ├── docker-compose.yml ├── docker.env ├── docker ├── localdb │ ├── Dockerfile │ ├── docker-entrypoint-initdb.d │ │ ├── id.sh │ │ └── id_test.sh │ └── schema.sql └── nginx │ ├── Dockerfile │ ├── conf.d │ └── node.conf │ └── nginx.conf ├── environment.yml ├── fact_extraction.md ├── images └── internal-displacement-plan.png ├── internal-displacement-web ├── Dockerfile ├── README.md ├── client │ ├── README.md │ ├── package.json │ ├── public │ │ ├── favicon.ico │ │ ├── images │ │ │ └── banner.jpg │ │ ├── index.html │ │ └── themeJS │ │ │ ├── ie │ │ │ ├── backgroundsize.min.htc │ │ │ ├── html5shiv.js │ │ │ └── respond.min.js │ │ │ ├── jquery.min.js │ │ │ ├── jquery.scrollex.min.js │ │ │ ├── jquery.scrolly.min.js │ │ │ ├── main.js │ │ │ ├── skel.min.js │ │ │ └── util.js │ ├── src │ │ ├── Api │ │ │ └── api.js │ │ ├── App.css │ │ ├── App.js │ │ ├── App.test.js │ │ ├── common │ │ │ ├── Footer.js │ │ │ └── Header.js │ │ ├── components │ │ │ ├── HomePage │ │ │ │ └── index.js │ │ │ ├── NotFound │ │ │ │ ├── index.js │ │ │ │ ├── index.spec.js │ │ │ │ └── styles.css │ │ │ └── UrlForm │ │ │ │ └── index.js │ │ ├── containers │ │ │ ├── MapVizContainer │ │ │ │ ├── MaVizPageV2.js │ │ │ │ ├── MapVizExample.js │ │ │ │ ├── MapVizPage.js │ │ │ │ ├── MapVizPage.scatter.js │ │ │ │ ├── actions │ │ │ │ │ └── index.js │ │ │ │ ├── components │ │ │ │ │ ├── map │ │ │ │ │ │ ├── index.js │ │ │ │ │ │ └── mapboxTest.js │ │ │ │ │ └── mapOverlays │ │ │ │ │ │ ├── customScatterOverlay.js │ │ │ │ │ │ ├── displacementHeatmapOverlay.js │ │ │ │ │ │ ├── exampleGeojson.js │ │ │ │ │ │ ├── geojsonDataOverlay.js │ │ │ │ │ │ └── scatterplotOverlay.js │ │ │ │ ├── constants │ │ │ │ │ ├── actionTypes.js │ │ │ │ │ └── mapConstants.js │ │ │ │ ├── mapVis.css │ │ │ │ ├── mapbox-gl.css │ │ │ │ ├── reducers │ │ │ │ │ ├── initialState.js │ │ │ │ │ └── mapReducers.js │ │ │ │ ├── sagas │ │ │ │ │ ├── index.js │ │ │ │ │ ├── mapDataSaga.js │ │ │ │ │ └── watchers.js │ │ │ │ ├── store │ │ │ │ │ └── configureStore.js │ │ │ │ └── vancouver-blocks.json │ │ │ ├── app.js │ │ │ └── home.js │ │ ├── index.css │ │ ├── index.js │ │ ├── layout.js │ │ ├── logo.svg │ │ ├── reducers │ │ │ └── index.js │ │ ├── routes.js │ │ ├── themeCss │ │ │ ├── css │ │ │ │ ├── font-awesome.min.css │ │ │ │ ├── ie8.css │ │ │ │ ├── ie9.css │ │ │ │ ├── images │ │ │ │ │ ├── arrow.svg │ │ │ │ │ ├── banner.jpg │ │ │ │ │ ├── bars.svg │ │ │ │ │ └── close.svg │ │ │ │ └── main.css │ │ │ ├── fonts │ │ │ │ ├── FontAwesome.otf │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ └── fontawesome-webfont.woff2 │ │ │ └── images │ │ │ │ ├── Screen Shot 2017-04-27 at 15.11.09 copy.png │ │ │ │ ├── banner.jpg │ │ │ │ ├── banner1.jpg │ │ │ │ ├── d4d-logo-meetup-banner.png │ │ │ │ ├── no02.jpg │ │ │ │ ├── no03.jpg │ │ │ │ ├── pic01.jpg │ │ │ │ ├── pic02.jpg │ │ │ │ ├── pic03.jpg │ │ │ │ ├── pic04.jpg │ │ │ │ └── pic05.jpg │ │ └── utils │ │ │ └── convertDataToGeojson.js │ └── yarn.lock ├── package.json ├── server │ ├── api │ │ ├── reportLocationRequest.js │ │ ├── sampleArticleRequest.js │ │ └── test.js │ ├── index.js │ ├── package.json │ ├── pgDB │ │ └── index.js │ ├── routes.js │ └── yarn.lock ├── src │ └── db.js └── yarn.lock ├── internal_displacement ├── __init__.py ├── add_countries.py ├── article.py ├── classifiers │ ├── default_encoder.pkl │ └── readme.txt ├── excerpt_helper.py ├── extracted_report.py ├── fact.py ├── interpreter.py ├── model │ └── model.py ├── pipeline.py ├── scraper.py ├── tests │ ├── __init__.py │ ├── test_ExtractedReport.py │ ├── test_Interpreter.py │ ├── test_Pipeline.py │ ├── test_Scraper.py │ ├── test_coordinates_extraction.py │ ├── test_fact_extraction.py │ ├── test_model.py │ └── test_report.py └── textract_requirements.txt ├── notebooks ├── .ipynb_checkpoints │ ├── newspaper-scrape-tests-checkpoint.ipynb │ ├── sql-concurrent-checkpoint.ipynb │ └── tests-checkpoint.ipynb ├── article_classification │ ├── ArticleTaggingForVisualization.ipynb │ ├── BagOfWordsArticleClassifier.ipynb │ ├── classification-2.ipynb │ ├── classification-data-prep.ipynb │ ├── classification.ipynb │ ├── crowdflower-classification-data.ipynb │ └── lsi_svm_classification-old.ipynb ├── classifiers │ ├── ClassifyArticlesByTitle.ipynb │ └── default_model.pkl ├── information_extraction │ ├── Current_Best_Results.ipynb │ ├── DependencyTreeExperiments.ipynb │ ├── DependencyTreeExperiments2.ipynb │ ├── DependencyTreeExperiments3.ipynb │ ├── DependencyTreeExperiments4-SB.ipynb │ ├── DependencyTreeExperiments5-SB.ipynb │ ├── DependencyTreeExperiments5.ipynb │ ├── DependencyTreeExperiments6.ipynb │ ├── FactExtractionTests.ipynb │ ├── LocationCoordinatesExtractionTest.ipynb │ ├── LocationExtractionTrial.ipynb │ ├── article-text-parsing-attempt1 (refugees project).ipynb │ ├── get_abs_date_test.ipynb │ ├── nlp-spacy-exploration-2.ipynb │ ├── nlp-spacy-exploration.ipynb │ └── test_file_NLP.ipynb ├── scraping_and_db │ ├── AutomateReportGenerationTests.ipynb │ ├── DB-Populate.ipynb │ ├── DatabaseExample.ipynb │ ├── Example_pipeline.ipynb │ ├── Pipeline-2.ipynb │ ├── TestDatabase.ipynb │ ├── newspaper-scrape-tests.ipynb │ ├── scraping_review.ipynb │ └── sql-concurrent.ipynb ├── test.sqlite ├── tests.ipynb └── visualize_tagged_articles.html ├── production-compose.yml ├── production.env ├── requirements.txt ├── sql_db.sqlite └── workplan.md /.dockerignore: -------------------------------------------------------------------------------- 1 | */.idea 2 | *.env -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Notebooks 2 | notebooks/test.ipynb 3 | notebooks/.ipynb_checkpoints/*.ipynb 4 | .ipynb_checkpoints/*.ipynb 5 | id-tests.ipynb 6 | 7 | # Test data 8 | *.sqlite 9 | *.pdf 10 | 11 | # Front end 12 | node_modules 13 | internal-displacement-web/dist 14 | internal_displacement/classifiers/default_model.pkl 15 | npm-debug.log 16 | 17 | # Misc 18 | *.pyc 19 | .idea/ 20 | *.csv 21 | 22 | # Mac 23 | .DS_Store 24 | /internal-displacement-web/server/pgDB/pgConfig.js\ 25 | 26 | # ML Models 27 | /internal-displacement/classifiers/* 28 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/.gitmodules -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | 3 | # python basics 4 | RUN apt-get update && \ 5 | apt-get -y install python3 python3-pip python3-dev 6 | 7 | # cld2-cffi doesn't install properly after the rest of the packages, for some reason 8 | RUN apt-get -y install libffi-dev && \ 9 | pip3 install cld2-cffi 10 | 11 | # install the big packages and the ones with complex dependencies 12 | RUN apt-get -y install libxslt1-dev antiword unrtf poppler-utils pstotext \ 13 | tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev \ 14 | postgresql libpq-dev git && \ 15 | pip3 install --upgrade pip && \ 16 | pip3 install NumPy SciPy spacy && \ 17 | pip3 install git+git://github.com/aerkalov/ebooklib.git && \ 18 | pip3 install textract 19 | 20 | # download the spacy model using curl for progress indication 21 | # uncomment the below to include it in the build 22 | #RUN apt-get -y install curl && \ 23 | # mkdir /spacy-data && \ 24 | # curl -L -o "/spacy-data/en_core_web_md-1.2.1.tar.gz" $zflag \ 25 | # https://github.com/explosion/spacy-models/releases/download/en_core_web_md-1.2.1/en_core_web_md-1.2.1.tar.gz 26 | #RUN pip3 install "/spacy-data/en_core_web_md-1.2.1.tar.gz" && \ 27 | # python3 -m spacy link en_core_web_md en_default 28 | 29 | RUN mkdir /internal-displacement 30 | VOLUME /internal-displacement 31 | WORKDIR /internal-displacement 32 | COPY . /internal-displacement 33 | 34 | RUN pip3 install -r /internal-displacement/requirements.txt 35 | 36 | CMD jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks 37 | -------------------------------------------------------------------------------- /PRODUCTION.md: -------------------------------------------------------------------------------- 1 | To run in Production: 2 | 3 | 1. Make sure that the correct DB password appears in `production.env` 4 | 5 | 2. ```docker-compose -f production-compose.yml up -d``` 6 | -------------------------------------------------------------------------------- /data/IDETECT_test_dataset - NLP.csv.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/data/IDETECT_test_dataset - NLP.csv.xlsx -------------------------------------------------------------------------------- /data/IDMC_fully_labelled.csv.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/data/IDMC_fully_labelled.csv.xlsx -------------------------------------------------------------------------------- /data/stop_words_en_long.txt: -------------------------------------------------------------------------------- 1 | a 2 | able 3 | about 4 | above 5 | abst 6 | accordance 7 | according 8 | accordingly 9 | across 10 | act 11 | actually 12 | added 13 | adj 14 | affected 15 | affecting 16 | affects 17 | after 18 | afterwards 19 | again 20 | against 21 | ah 22 | all 23 | almost 24 | alone 25 | along 26 | already 27 | also 28 | although 29 | always 30 | am 31 | among 32 | amongst 33 | an 34 | and 35 | announce 36 | another 37 | any 38 | anybody 39 | anyhow 40 | anymore 41 | anyone 42 | anything 43 | anyway 44 | anyways 45 | anywhere 46 | apparently 47 | approximately 48 | are 49 | aren 50 | arent 51 | arise 52 | around 53 | as 54 | aside 55 | ask 56 | asking 57 | at 58 | auth 59 | available 60 | away 61 | awfully 62 | b 63 | back 64 | be 65 | became 66 | because 67 | become 68 | becomes 69 | becoming 70 | been 71 | before 72 | beforehand 73 | begin 74 | beginning 75 | beginnings 76 | begins 77 | behind 78 | being 79 | believe 80 | below 81 | beside 82 | besides 83 | between 84 | beyond 85 | biol 86 | both 87 | brief 88 | briefly 89 | but 90 | by 91 | c 92 | ca 93 | came 94 | can 95 | cannot 96 | can't 97 | cause 98 | causes 99 | certain 100 | certainly 101 | co 102 | com 103 | come 104 | comes 105 | contain 106 | containing 107 | contains 108 | could 109 | couldnt 110 | d 111 | date 112 | did 113 | didn't 114 | different 115 | do 116 | does 117 | doesn't 118 | doing 119 | done 120 | don't 121 | down 122 | downwards 123 | due 124 | during 125 | e 126 | each 127 | ed 128 | edu 129 | effect 130 | eg 131 | eight 132 | eighty 133 | either 134 | else 135 | elsewhere 136 | end 137 | ending 138 | enough 139 | especially 140 | et 141 | et-al 142 | etc 143 | even 144 | ever 145 | every 146 | everybody 147 | everyone 148 | everything 149 | everywhere 150 | ex 151 | except 152 | f 153 | far 154 | few 155 | ff 156 | fifth 157 | first 158 | five 159 | fix 160 | followed 161 | following 162 | follows 163 | for 164 | former 165 | formerly 166 | forth 167 | found 168 | four 169 | from 170 | further 171 | furthermore 172 | g 173 | gave 174 | get 175 | gets 176 | getting 177 | give 178 | given 179 | gives 180 | giving 181 | go 182 | goes 183 | gone 184 | got 185 | gotten 186 | h 187 | had 188 | happens 189 | hardly 190 | has 191 | hasn't 192 | have 193 | haven't 194 | having 195 | he 196 | hed 197 | hence 198 | her 199 | here 200 | hereafter 201 | hereby 202 | herein 203 | heres 204 | hereupon 205 | hers 206 | herself 207 | hes 208 | hi 209 | hid 210 | him 211 | himself 212 | his 213 | hither 214 | home 215 | how 216 | howbeit 217 | however 218 | hundred 219 | i 220 | id 221 | ie 222 | if 223 | i'll 224 | im 225 | immediate 226 | immediately 227 | importance 228 | important 229 | in 230 | inc 231 | indeed 232 | index 233 | information 234 | instead 235 | into 236 | invention 237 | inward 238 | is 239 | isn't 240 | it 241 | itd 242 | it'll 243 | its 244 | itself 245 | i've 246 | j 247 | just 248 | k 249 | keep 250 | keeps 251 | kept 252 | kg 253 | km 254 | know 255 | known 256 | knows 257 | l 258 | largely 259 | last 260 | lately 261 | later 262 | latter 263 | latterly 264 | least 265 | less 266 | lest 267 | let 268 | lets 269 | like 270 | liked 271 | likely 272 | line 273 | little 274 | 'll 275 | look 276 | looking 277 | looks 278 | ltd 279 | m 280 | made 281 | mainly 282 | make 283 | makes 284 | many 285 | may 286 | maybe 287 | me 288 | mean 289 | means 290 | meantime 291 | meanwhile 292 | merely 293 | mg 294 | might 295 | million 296 | miss 297 | ml 298 | more 299 | moreover 300 | most 301 | mostly 302 | mr 303 | mrs 304 | much 305 | mug 306 | must 307 | my 308 | myself 309 | n 310 | na 311 | name 312 | namely 313 | nay 314 | nd 315 | near 316 | nearly 317 | necessarily 318 | necessary 319 | need 320 | needs 321 | neither 322 | never 323 | nevertheless 324 | new 325 | next 326 | nine 327 | ninety 328 | no 329 | nobody 330 | non 331 | none 332 | nonetheless 333 | noone 334 | nor 335 | normally 336 | nos 337 | not 338 | noted 339 | nothing 340 | now 341 | nowhere 342 | o 343 | obtain 344 | obtained 345 | obviously 346 | of 347 | off 348 | often 349 | oh 350 | ok 351 | okay 352 | old 353 | omitted 354 | on 355 | once 356 | one 357 | ones 358 | only 359 | onto 360 | or 361 | ord 362 | other 363 | others 364 | otherwise 365 | ought 366 | our 367 | ours 368 | ourselves 369 | out 370 | outside 371 | over 372 | overall 373 | owing 374 | own 375 | p 376 | page 377 | pages 378 | part 379 | particular 380 | particularly 381 | past 382 | per 383 | perhaps 384 | placed 385 | please 386 | plus 387 | poorly 388 | possible 389 | possibly 390 | potentially 391 | pp 392 | predominantly 393 | present 394 | previously 395 | primarily 396 | probably 397 | promptly 398 | proud 399 | provides 400 | put 401 | q 402 | que 403 | quickly 404 | quite 405 | qv 406 | r 407 | ran 408 | rather 409 | rd 410 | re 411 | readily 412 | really 413 | recent 414 | recently 415 | ref 416 | refs 417 | regarding 418 | regardless 419 | regards 420 | related 421 | relatively 422 | research 423 | respectively 424 | resulted 425 | resulting 426 | results 427 | right 428 | run 429 | s 430 | said 431 | same 432 | saw 433 | say 434 | saying 435 | says 436 | sec 437 | section 438 | see 439 | seeing 440 | seem 441 | seemed 442 | seeming 443 | seems 444 | seen 445 | self 446 | selves 447 | sent 448 | seven 449 | several 450 | shall 451 | she 452 | shed 453 | she'll 454 | shes 455 | should 456 | shouldn't 457 | show 458 | showed 459 | shown 460 | showns 461 | shows 462 | significant 463 | significantly 464 | similar 465 | similarly 466 | since 467 | six 468 | slightly 469 | so 470 | some 471 | somebody 472 | somehow 473 | someone 474 | somethan 475 | something 476 | sometime 477 | sometimes 478 | somewhat 479 | somewhere 480 | soon 481 | sorry 482 | specifically 483 | specified 484 | specify 485 | specifying 486 | still 487 | stop 488 | strongly 489 | sub 490 | substantially 491 | successfully 492 | such 493 | sufficiently 494 | suggest 495 | sup 496 | sure 497 | t 498 | take 499 | taken 500 | taking 501 | tell 502 | tends 503 | th 504 | than 505 | thank 506 | thanks 507 | thanx 508 | that 509 | that'll 510 | thats 511 | that've 512 | the 513 | their 514 | theirs 515 | them 516 | themselves 517 | then 518 | thence 519 | there 520 | thereafter 521 | thereby 522 | thered 523 | therefore 524 | therein 525 | there'll 526 | thereof 527 | therere 528 | theres 529 | thereto 530 | thereupon 531 | there've 532 | these 533 | they 534 | theyd 535 | they'll 536 | theyre 537 | they've 538 | think 539 | this 540 | those 541 | thou 542 | though 543 | thoughh 544 | thousand 545 | throug 546 | through 547 | throughout 548 | thru 549 | thus 550 | til 551 | tip 552 | to 553 | together 554 | too 555 | took 556 | toward 557 | towards 558 | tried 559 | tries 560 | truly 561 | try 562 | trying 563 | ts 564 | twice 565 | two 566 | u 567 | un 568 | under 569 | unfortunately 570 | unless 571 | unlike 572 | unlikely 573 | until 574 | unto 575 | up 576 | upon 577 | ups 578 | us 579 | use 580 | used 581 | useful 582 | usefully 583 | usefulness 584 | uses 585 | using 586 | usually 587 | v 588 | value 589 | various 590 | 've 591 | very 592 | via 593 | viz 594 | vol 595 | vols 596 | vs 597 | w 598 | want 599 | wants 600 | was 601 | wasnt 602 | way 603 | we 604 | wed 605 | welcome 606 | we'll 607 | went 608 | were 609 | werent 610 | we've 611 | what 612 | whatever 613 | what'll 614 | whats 615 | when 616 | whence 617 | whenever 618 | where 619 | whereafter 620 | whereas 621 | whereby 622 | wherein 623 | wheres 624 | whereupon 625 | wherever 626 | whether 627 | which 628 | while 629 | whim 630 | whither 631 | who 632 | whod 633 | whoever 634 | whole 635 | who'll 636 | whom 637 | whomever 638 | whos 639 | whose 640 | why 641 | widely 642 | willing 643 | wish 644 | with 645 | within 646 | without 647 | wont 648 | words 649 | world 650 | would 651 | wouldnt 652 | www 653 | x 654 | y 655 | yes 656 | yet 657 | you 658 | youd 659 | you'll 660 | your 661 | youre 662 | yours 663 | yourself 664 | yourselves 665 | you've 666 | z 667 | zero -------------------------------------------------------------------------------- /docker-compose-dev.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | localdb: 4 | build: docker/localdb 5 | image: localdb 6 | jupyter: 7 | build: . 8 | image: internal-displacement 9 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks" 10 | stdin_open: true 11 | tty: true 12 | volumes: 13 | - .:/internal-displacement 14 | ports: 15 | - "3323:3323" 16 | depends_on: 17 | - localdb 18 | env_file: docker.env 19 | nodejs: 20 | build: internal-displacement-web 21 | image: internal-displacement-web 22 | volumes: 23 | # client 24 | - ./internal-displacement-web/client/public:/internal-displacement-web/client/public 25 | - ./internal-displacement-web/client/src:/internal-displacement-web/client/src 26 | - ./internal-displacement-web/client/package.json:/internal-displacement-web/client/package.json 27 | - ./internal-displacement-web/client/yarn.lock:/internal-displacement-web/client/yarn.lock 28 | # server 29 | - ./internal-displacement-web/server/api:/internal-displacement-web/server/api 30 | - ./internal-displacement-web/server/pgDB:/internal-displacement-web/server/pgDB 31 | - ./internal-displacement-web/server/public:/internal-displacement-web/server/public 32 | - ./internal-displacement-web/server/src:/internal-displacement-web/server/src 33 | - ./internal-displacement-web/server/index.js:/internal-displacement-web/server/index.js 34 | - ./internal-displacement-web/server/package.json:/internal-displacement-web/server/package.json 35 | - ./internal-displacement-web/server/routes.js:/internal-displacement-web/server/routes.js 36 | - ./internal-displacement-web/server/yarn.lock:/internal-displacement-web/server/yarn.lock 37 | # start 38 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json 39 | - ./internal-displacement-web/yarn.lock:/internal-displacement-web/yarn.lock 40 | ports: 41 | - "3000:3000" 42 | - "3322:3322" 43 | depends_on: 44 | - localdb 45 | env_file: docker.env 46 | -------------------------------------------------------------------------------- /docker-compose-spacy.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | localdb: 4 | build: docker/localdb 5 | image: localdb 6 | jupyter: 7 | image: aneel/internal-displacement-jupyter:spacy 8 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks" 9 | stdin_open: true 10 | tty: true 11 | volumes: 12 | - .:/internal-displacement 13 | ports: 14 | - "3323:3323" 15 | depends_on: 16 | - localdb 17 | env_file: docker.env 18 | nodejs: 19 | build: internal-displacement-web 20 | image: internal-displacement-web 21 | volumes: 22 | # client 23 | - ./internal-displacement-web/client/public:/internal-displacement-web/client/public 24 | - ./internal-displacement-web/client/src:/internal-displacement-web/client/src 25 | - ./internal-displacement-web/client/package.json:/internal-displacement-web/client/package.json 26 | - ./internal-displacement-web/client/yarn.lock:/internal-displacement-web/client/yarn.lock 27 | # server 28 | - ./internal-displacement-web/server/api:/internal-displacement-web/server/api 29 | - ./internal-displacement-web/server/pgDB:/internal-displacement-web/server/pgDB 30 | - ./internal-displacement-web/server/public:/internal-displacement-web/server/public 31 | - ./internal-displacement-web/server/src:/internal-displacement-web/server/src 32 | - ./internal-displacement-web/server/index.js:/internal-displacement-web/server/index.js 33 | - ./internal-displacement-web/server/package.json:/internal-displacement-web/server/package.json 34 | - ./internal-displacement-web/server/routes.js:/internal-displacement-web/server/routes.js 35 | - ./internal-displacement-web/server/yarn.lock:/internal-displacement-web/server/yarn.lock 36 | # start 37 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json 38 | - ./internal-displacement-web/yarn.lock:/internal-displacement-web/yarn.lock 39 | ports: 40 | - "3000:3000" 41 | - "3322:3322" 42 | depends_on: 43 | - localdb 44 | env_file: docker.env 45 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | localdb: 4 | build: docker/localdb 5 | image: localdb 6 | jupyter: 7 | image: aneel/internal-displacement-jupyter:no-spacy 8 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks" 9 | stdin_open: true 10 | tty: true 11 | volumes: 12 | - .:/internal-displacement 13 | ports: 14 | - "3323:3323" 15 | depends_on: 16 | - localdb 17 | env_file: docker.env 18 | nodejs: 19 | build: internal-displacement-web 20 | image: internal-displacement-web 21 | volumes: 22 | # client 23 | - ./internal-displacement-web/client/public:/internal-displacement-web/client/public 24 | - ./internal-displacement-web/client/src:/internal-displacement-web/client/src 25 | - ./internal-displacement-web/client/package.json:/internal-displacement-web/client/package.json 26 | - ./internal-displacement-web/client/yarn.lock:/internal-displacement-web/client/yarn.lock 27 | # server 28 | - ./internal-displacement-web/server/api:/internal-displacement-web/server/api 29 | - ./internal-displacement-web/server/pgDB:/internal-displacement-web/server/pgDB 30 | - ./internal-displacement-web/server/public:/internal-displacement-web/server/public 31 | - ./internal-displacement-web/server/src:/internal-displacement-web/server/src 32 | - ./internal-displacement-web/server/index.js:/internal-displacement-web/server/index.js 33 | - ./internal-displacement-web/server/package.json:/internal-displacement-web/server/package.json 34 | - ./internal-displacement-web/server/routes.js:/internal-displacement-web/server/routes.js 35 | - ./internal-displacement-web/server/yarn.lock:/internal-displacement-web/server/yarn.lock 36 | # start 37 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json 38 | - ./internal-displacement-web/yarn.lock:/internal-displacement-web/yarn.lock 39 | ports: 40 | - "3000:3000" 41 | - "3322:3322" 42 | depends_on: 43 | - localdb 44 | env_file: docker.env 45 | -------------------------------------------------------------------------------- /docker.env: -------------------------------------------------------------------------------- 1 | # Localdb is a database running on your machine in Docker. If you need access to the 2 | # shared DB, please ask @aneel on the Slack for credentials. Please do not commit them to git. 3 | DB_HOST=localdb 4 | DB_USER=d4d 5 | DB_PASS=democracy 6 | DB_NAME=id 7 | PYTHONPATH=/internal-displacement 8 | -------------------------------------------------------------------------------- /docker/localdb/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres 2 | 3 | COPY docker-entrypoint-initdb.d/* /docker-entrypoint-initdb.d/ 4 | COPY schema.sql /schema.sql -------------------------------------------------------------------------------- /docker/localdb/docker-entrypoint-initdb.d/id.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Set up user and database 5 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL 6 | CREATE USER d4d WITH PASSWORD 'democracy'; 7 | CREATE DATABASE id; 8 | GRANT ALL PRIVILEGES ON DATABASE id TO d4d; 9 | EOSQL 10 | 11 | # Create database schema in that database 12 | psql -v ON_ERROR_STOP=1 --username "d4d" id < /schema.sql -------------------------------------------------------------------------------- /docker/localdb/docker-entrypoint-initdb.d/id_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Set up user and database 5 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL 6 | CREATE USER tester WITH PASSWORD 'tester'; 7 | CREATE DATABASE id_test; 8 | GRANT ALL PRIVILEGES ON DATABASE id_test TO tester; 9 | EOSQL 10 | 11 | # Create database schema in that database 12 | psql -v ON_ERROR_STOP=1 --username "tester" id_test < /schema.sql -------------------------------------------------------------------------------- /docker/localdb/schema.sql: -------------------------------------------------------------------------------- 1 | DROP TYPE IF EXISTS status CASCADE; 2 | CREATE TYPE status AS ENUM ('new', 'fetching', 'fetched', 3 | 'processing', 'processed', 'fetching failed', 'processing failed'); 4 | 5 | DROP TYPE IF EXISTS category CASCADE; 6 | CREATE TYPE category AS ENUM ('other', 'disaster', 'conflict'); 7 | 8 | DROP TABLE IF EXISTS article CASCADE; 9 | CREATE TABLE article ( 10 | id SERIAL PRIMARY KEY, 11 | url TEXT UNIQUE NOT NULL, 12 | domain TEXT, 13 | status status, 14 | title TEXT, 15 | publication_date TIMESTAMP, 16 | authors TEXT, 17 | language CHAR(2), 18 | relevance BOOL, 19 | reliability DECIMAL 20 | ); 21 | 22 | DROP TABLE IF EXISTS content CASCADE; 23 | CREATE TABLE content ( 24 | article INT PRIMARY KEY REFERENCES article ON DELETE CASCADE, 25 | retrieval_date TIMESTAMP, 26 | content TEXT, 27 | content_type TEXT 28 | ); 29 | 30 | DROP TABLE IF EXISTS article_category CASCADE; 31 | CREATE TABLE article_category ( 32 | article INT REFERENCES article ON DELETE CASCADE, 33 | category category, 34 | PRIMARY KEY (article, category) 35 | ); 36 | 37 | DROP TABLE IF EXISTS country CASCADE; 38 | CREATE TABLE country ( 39 | code CHAR(3) PRIMARY KEY 40 | ); 41 | 42 | DROP TABLE IF EXISTS country_term CASCADE; 43 | CREATE TABLE country_term ( 44 | term TEXT PRIMARY KEY, 45 | country CHAR(3) REFERENCES country ON DELETE CASCADE 46 | ); 47 | 48 | DROP TABLE IF EXISTS location CASCADE; 49 | CREATE TABLE location ( 50 | id SERIAL PRIMARY KEY, 51 | description TEXT, 52 | city TEXT, 53 | subdivision TEXT, 54 | country CHAR(3) REFERENCES country ON DELETE CASCADE, 55 | latlong TEXT 56 | ); 57 | 58 | DROP TABLE IF EXISTS report CASCADE; 59 | CREATE TABLE report ( 60 | id SERIAL PRIMARY KEY, 61 | article INT REFERENCES article ON DELETE CASCADE, 62 | event_term TEXT, 63 | subject_term TEXT, 64 | quantity INT, 65 | tag_locations JSON, 66 | accuracy DECIMAL, 67 | analyzer TEXT, 68 | analysis_date TIMESTAMP WITH TIME ZONE 69 | ); 70 | 71 | DROP TABLE IF EXISTS report_location CASCADE; 72 | CREATE TABLE report_location ( 73 | report INT REFERENCES report ON DELETE CASCADE, 74 | location INT REFERENCES location ON DELETE CASCADE, 75 | PRIMARY KEY (report, location) 76 | ); 77 | 78 | DROP TABLE IF EXISTS report_datespan CASCADE; 79 | CREATE TABLE report_datespan ( 80 | id SERIAL PRIMARY KEY, 81 | report INT REFERENCES report ON DELETE CASCADE, 82 | start TIMESTAMP, 83 | finish TIMESTAMP 84 | ); 85 | 86 | 87 | -------------------------------------------------------------------------------- /docker/nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx 2 | ADD conf.d/ /etc/nginx/conf.d 3 | -------------------------------------------------------------------------------- /docker/nginx/conf.d/node.conf: -------------------------------------------------------------------------------- 1 | server { 2 | 3 | listen 80; 4 | server_name internal-displacement.datafordemocracy.org; 5 | access_log /var/log/nginx/node.access.log main; 6 | charset utf-8; 7 | 8 | location / { 9 | proxy_pass http://nodejs:3000; 10 | proxy_set_header Host $host; 11 | proxy_set_header X-Real-IP $remote_addr; 12 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /docker/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | 2 | user nginx; 3 | worker_processes 1; 4 | 5 | error_log /var/log/nginx/error.log warn; 6 | pid /var/run/nginx.pid; 7 | 8 | 9 | events { 10 | worker_connections 1024; 11 | } 12 | 13 | 14 | http { 15 | include /etc/nginx/mime.types; 16 | default_type application/octet-stream; 17 | 18 | log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 19 | '$status $body_bytes_sent "$http_referer" ' 20 | '"$http_user_agent" "$http_x_forwarded_for"'; 21 | 22 | access_log /var/log/nginx/access.log main; 23 | 24 | sendfile on; 25 | #tcp_nopush on; 26 | 27 | keepalive_timeout 65; 28 | 29 | #gzip on; 30 | 31 | include /etc/nginx/conf.d/*.conf; 32 | } 33 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: d4d-internal-displacement 2 | channels: 3 | - defaults 4 | dependencies: 5 | - cycler=0.10.0=py36_0 6 | - freetype=2.5.5=2 7 | - icu=54.1=0 8 | - libpng=1.6.27=0 9 | - matplotlib=2.0.0=np111py36_0 10 | - mkl=2017.0.1=0 11 | - numpy=1.11.3=py36_0 12 | - openssl=1.0.2j=0 13 | - pandas=0.19.2=np111py36_1 14 | - pip=9.0.1=py36_1 15 | - pyparsing=2.1.4=py36_0 16 | - pyqt=5.6.0=py36_2 17 | - python=3.6.0=0 18 | - python-dateutil=2.6.0=py36_0 19 | - pytz=2016.10=py36_0 20 | - qt=5.6.2=0 21 | - readline=6.2=2 22 | - scikit-learn=0.18.1=np111py36_1 23 | - scipy=0.18.1=np111py36_1 24 | - seaborn=0.7.1=py36_0 25 | - setuptools=27.2.0=py36_0 26 | - sip=4.18=py36_0 27 | - six=1.10.0=py36_0 28 | - sqlite=3.13.0=0 29 | - tk=8.5.18=0 30 | - wheel=0.29.0=py36_0 31 | - xz=5.2.2=1 32 | - zlib=1.2.8=3 33 | - pip: 34 | - appnope==0.1.0 35 | - beautifulsoup4==4.5.3 36 | - bleach==1.5.0 37 | - cssselect==1.0.1 38 | - decorator==4.0.11 39 | - entrypoints==0.2.2 40 | - feedfinder2==0.0.4 41 | - feedparser==5.2.1 42 | - html5lib==0.9999999 43 | - idna==2.2 44 | - ipykernel==4.5.2 45 | - ipython==5.2.2 46 | - ipython-genutils==0.1.0 47 | - ipywidgets==5.2.2 48 | - jieba3k==0.35.1 49 | - jinja2==2.9.5 50 | - jsonschema==2.5.1 51 | - jupyter==1.0.0 52 | - jupyter-client==4.4.0 53 | - jupyter-console==5.1.0 54 | - jupyter-core==4.2.1 55 | - lxml==3.7.2 56 | - markupsafe==0.23 57 | - mistune==0.7.3 58 | - nbconvert==5.1.1 59 | - nbformat==4.2.0 60 | - newspaper3k==0.1.9 61 | - nltk==3.2.2 62 | - notebook==4.3.2 63 | - olefile==0.44 64 | - pandocfilters==1.4.1 65 | - pexpect==4.2.1 66 | - pickleshare==0.7.4 67 | - pillow==4.0.0 68 | - prompt-toolkit==1.0.13 69 | - ptyprocess==0.5.1 70 | - pygments==2.2.0 71 | - pyyaml==3.12 72 | - pyzmq==16.0.2 73 | - qtconsole==4.2.1 74 | - requests==2.13.0 75 | - requests-file==1.4.1 76 | - simplegeneric==0.8.1 77 | - terminado==0.6 78 | - testpath==0.3 79 | - tldextract==2.0.2 80 | - tornado==4.4.2 81 | - traitlets==4.3.1 82 | - wcwidth==0.1.7 83 | - widgetsnbextension==1.2.6 84 | prefix: /Users/joshuaarnold/anaconda/envs/d4d-internal-displacement 85 | 86 | -------------------------------------------------------------------------------- /fact_extraction.md: -------------------------------------------------------------------------------- 1 | ## Approach to Fact Extraction Using Spacy 2 | 3 | Following several weeks of experimentation, this document summarizes the current approach to extracting facts and reports from articles using the Spacy library. 4 | 5 | ### Reporting Terms and Units 6 | 7 | As per the competition guildeines, fact extraction is based upon a set of core reporting terms, and reporting units. 8 | There are two broad categories: 9 | 10 | __Reporting Terms and Units Relating to People:__ 11 | 12 | ``` 13 | person_reporting_terms = [ 14 | 'displaced', 'evacuated', 'forced flee', 'homeless', 'relief camp', 15 | 'sheltered', 'relocated', 'stranded','stuck','stranded',"killed","dead","died" 16 | ] 17 | person_reporting_units = ["families", "person", "people", 18 | "individuals", "locals", "villagers", "residents", "occupants", "citizens", "households"] 19 | ``` 20 | 21 | __Reporting Terms and Units Relating to Structures:__ 22 | 23 | ``` 24 | structure_reporting_terms = [ 25 | 'destroyed', 'damaged', 'swept', 'collapsed', 'flooded', 'washed' 26 | ] 27 | structure_reporting_units = ["home", "house", "hut", "dwelling", 28 | "building", "shop", "business", "apartment", "flat", "residence"] 29 | ``` 30 | 31 | In practice, each of these terms and units is lemmatized for comparison with tokens parsed from the article. 32 | 33 | These terms and units can be updated as needed to ensure we are maximizing coverage of events referenced in articles. 34 | 35 | --- 36 | 37 | ### High Level Country Extraction 38 | 39 | The competition guidelines require that each article be tagged with ISO 3166 country codes. 40 | This is achieved using: 41 | - Spacy library for named entity recognition 42 | - Pycountry for mapping country names and subdivisions (states, provinces etc) to country codes 43 | - JSON of cities -> country code for all cities with a population > 5,000 extracted from [www.geonames.org](www.geonames.org). 44 | 45 | The procedure is: 46 | 47 | 1. Combine article title and contents and parse using Spacy to identify geographical named entities 48 | 2. Attempt to match the identified entity to a country code using the following steps in order: 49 | - Try a direct match for the entity against country names, common names and official names 50 | - Try to identify the country by comparing the entity to country subdivisions 51 | - Try to identify the country by seeing if the entity appears in the cities_to_countries JSON 52 | 53 | --- 54 | 55 | ### Report Extraction 56 | 57 | The possible fields that a Report can have are: 58 | - Referenced locations 59 | - Referenced Date 60 | - Reporting term (see above) 61 | - Reporting unit (see above) 62 | - Quantity 63 | 64 | At a minimum, a Reporting Term and relevant Reporting Unit must be present in order to create an Article (the other fields can be blank / none). 65 | 66 | The high-level procedure is: 67 | 68 | 1. Parse the article contents using Spacy and split into sentences 69 | 2. Process each sentence and attempt to identify: 70 | - Locations 71 | - Date 72 | - Reporting Term 73 | - Reporting Unit 74 | - Quantity 75 | 3. If the necessary reporting elements are correctly extracted, a Report is created 76 | 4. Multiple reports can be created for a given article 77 | 78 | --- 79 | 80 | #### Location Identification 81 | 82 | Sentence parsing to identify locations is based upon the following procedure: 83 | 84 | - Examines the sentence and identify if any constituent tokens describe a location (based on Spacy named entity recognition) 85 | - If a root token is specified, only location tokens below the level of this token in the tree will be examined. 86 | - If no root is specified, location tokens will be drawn from the entirety of the span. 87 | 88 | ***Fallback location:*** 89 | 90 | - In many cases the event location may be referenced one or more sentences prior to the sentence containing the reporting term and unit. 91 | - In order to deal with this, during article processing, a local variable is maintained for keeping track of the last extracted location. 92 | - When a Report is extracted, if it has no specific location, then its location can be set to be the most recently identified prior location 93 | - If a new location is extracted for a Report, then the local fallback location variable is updated 94 | 95 | --- 96 | 97 | #### Date Identification 98 | 99 | Sentence parsing to identify dates is based upon the following procedure: 100 | 101 | - Examines the sentence and identify if any constituent tokens describe a date (based on Spacy named entity recognition) 102 | - If a root token is specified, only location tokens below the level of this token in the tree will be examined. 103 | - If no root is specified, location tokens will be drawn from the entirety of the span. 104 | 105 | ***Fallback date:*** 106 | 107 | - In many cases the event date may be referenced one or more sentences prior to the sentence containing the reporting term and unit. 108 | - In order to deal with this, during article processing, a local variable is maintained for keeping track of the last extracted date. 109 | - When a Report is extracted, if it has no specific date, then its date can be set to be the most recently identified prior date 110 | - If a new date is extracted for a Report, then the local fallback date variable is updated 111 | 112 | --- 113 | 114 | #### Reporting Term and Unit Identification 115 | 116 | - Each sentence is split into tokens 117 | - Each token is compared to reporting terms for both people and structures 118 | - If a given token matches a reporting term: 119 | + Each branch below the token is examined to search for reporting units and numbers 120 | + If a reporting unit and number are identified, then a Report is created 121 | + If only a reporting unit (but no number) is identified, then look further up the tree above the reporting term to see if a number is present 122 | 123 | ***Special Cases:*** 124 | 125 | - In addition to this general procedure, there are also some special cases that can be more simply identified by looking at specific combinations of Reporting Terms and Reporting Units 126 | - In some cases, these 'term-unit phrases' do not have a dependency within the parse tree that will be matched by the above algorithm i.e., 'families homeless' 127 | - Should a specific phrase be encountered, then similar methods to above are used for extracting: 128 | + Location 129 | + Date 130 | + Number 131 | 132 | --- 133 | 134 | ### Required Enhancements 135 | 136 | See [Issue #62](https://github.com/Data4Democracy/internal-displacement/issues/62) -------------------------------------------------------------------------------- /images/internal-displacement-plan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/images/internal-displacement-plan.png -------------------------------------------------------------------------------- /internal-displacement-web/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:latest 2 | 3 | RUN mkdir /internal-displacement-web 4 | WORKDIR /internal-displacement-web 5 | 6 | COPY . /internal-displacement-web 7 | 8 | RUN yarn install 9 | RUN cd /internal-displacement-web/client && yarn install 10 | RUN cd /internal-displacement-web/server && yarn install 11 | 12 | CMD npm run start 13 | -------------------------------------------------------------------------------- /internal-displacement-web/client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "id-web-client", 3 | "version": "0.1.0", 4 | "private": true, 5 | "proxy": "http://localhost:3322/", 6 | "devDependencies": { 7 | "react-scripts": "0.9.5" 8 | }, 9 | "dependencies": { 10 | "babel-polyfill": "^6.23.0", 11 | "bootstrap": "^3.3.7", 12 | "d3": "^4.8.0", 13 | "deck.gl": "^4", 14 | "immutable": "^3.8.1", 15 | "luma.gl": "^3.0.0", 16 | "r-dom": "^2.3.2", 17 | "react": "^15.4.2", 18 | "react-bootstrap": "^0.30.7", 19 | "react-dom": "^15.4.2", 20 | "react-map-gl": "^1.8.2", 21 | "react-map-gl-heatmap-overlay": "^1.1.2", 22 | "react-redux": "^5.0.3", 23 | "react-router": "^2.6.0", 24 | "react-router-redux": "^4.0.8", 25 | "react-scroll": "^1.5.2", 26 | "redux": "^3.6.0", 27 | "redux-saga": "^0.14.6" 28 | }, 29 | "scripts": { 30 | "start": "react-scripts start", 31 | "build": "react-scripts build", 32 | "test": "react-scripts test --env=jsdom", 33 | "eject": "react-scripts eject" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /internal-displacement-web/client/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/public/favicon.ico -------------------------------------------------------------------------------- /internal-displacement-web/client/public/images/banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/public/images/banner.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 11 | 12 | 13 | 14 | 23 | Internal Displacement 24 | 25 | 26 |
27 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /internal-displacement-web/client/public/themeJS/ie/backgroundsize.min.htc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /internal-displacement-web/client/public/themeJS/ie/html5shiv.js: -------------------------------------------------------------------------------- 1 | /* 2 | HTML5 Shiv v3.6.2 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | (function(l,f){function m(){var a=e.elements;return"string"==typeof a?a.split(" "):a}function i(a){var b=n[a[o]];b||(b={},h++,a[o]=h,n[h]=b);return b}function p(a,b,c){b||(b=f);if(g)return b.createElement(a);c||(c=i(b));b=c.cache[a]?c.cache[a].cloneNode():r.test(a)?(c.cache[a]=c.createElem(a)).cloneNode():c.createElem(a);return b.canHaveChildren&&!s.test(a)?c.frag.appendChild(b):b}function t(a,b){if(!b.cache)b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag(); 5 | a.createElement=function(c){return!e.shivMethods?b.createElem(c):p(c,a,b)};a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+m().join().replace(/\w+/g,function(a){b.createElem(a);b.frag.createElement(a);return'c("'+a+'")'})+");return n}")(e,b.frag)}function q(a){a||(a=f);var b=i(a);if(e.shivCSS&&!j&&!b.hasCSS){var c,d=a;c=d.createElement("p");d=d.getElementsByTagName("head")[0]||d.documentElement;c.innerHTML="x"; 6 | c=d.insertBefore(c.lastChild,d.firstChild);b.hasCSS=!!c}g||t(a,b);return a}var k=l.html5||{},s=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,r=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,j,o="_html5shiv",h=0,n={},g;(function(){try{var a=f.createElement("a");a.innerHTML="";j="hidden"in a;var b;if(!(b=1==a.childNodes.length)){f.createElement("a");var c=f.createDocumentFragment();b="undefined"==typeof c.cloneNode|| 7 | "undefined"==typeof c.createDocumentFragment||"undefined"==typeof c.createElement}g=b}catch(d){g=j=!0}})();var e={elements:k.elements||"abbr article aside audio bdi canvas data datalist details figcaption figure footer header hgroup main mark meter nav output progress section summary time video",version:"3.6.2",shivCSS:!1!==k.shivCSS,supportsUnknownElements:g,shivMethods:!1!==k.shivMethods,type:"default",shivDocument:q,createElement:p,createDocumentFragment:function(a,b){a||(a=f);if(g)return a.createDocumentFragment(); 8 | for(var b=b||i(a),c=b.frag.cloneNode(),d=0,e=m(),h=e.length;d #mq-test-1 { width: 42px; }',c.insertBefore(e,d),b=42===f.offsetWidth,c.removeChild(e),{matches:b,media:a}}}(a.document)}(this),function(a){"use strict";function b(){v(!0)}var c={};a.respond=c,c.update=function(){};var d=[],e=function(){var b=!1;try{b=new a.XMLHttpRequest}catch(c){b=new a.ActiveXObject("Microsoft.XMLHTTP")}return function(){return b}}(),f=function(a,b){var c=e();c&&(c.open("GET",a,!0),c.onreadystatechange=function(){4!==c.readyState||200!==c.status&&304!==c.status||b(c.responseText)},4!==c.readyState&&c.send(null))},g=function(a){return a.replace(c.regex.minmaxwh,"").match(c.regex.other)};if(c.ajax=f,c.queue=d,c.unsupportedmq=g,c.regex={media:/@media[^\{]+\{([^\{\}]*\{[^\}\{]*\})+/gi,keyframes:/@(?:\-(?:o|moz|webkit)\-)?keyframes[^\{]+\{(?:[^\{\}]*\{[^\}\{]*\})+[^\}]*\}/gi,comments:/\/\*[^*]*\*+([^/][^*]*\*+)*\//gi,urls:/(url\()['"]?([^\/\)'"][^:\)'"]+)['"]?(\))/g,findStyles:/@media *([^\{]+)\{([\S\s]+?)$/,only:/(only\s+)?([a-zA-Z]+)\s?/,minw:/\(\s*min\-width\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/,maxw:/\(\s*max\-width\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/,minmaxwh:/\(\s*m(in|ax)\-(height|width)\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/gi,other:/\([^\)]*\)/g},c.mediaQueriesSupported=a.matchMedia&&null!==a.matchMedia("only all")&&a.matchMedia("only all").matches,!c.mediaQueriesSupported){var h,i,j,k=a.document,l=k.documentElement,m=[],n=[],o=[],p={},q=30,r=k.getElementsByTagName("head")[0]||l,s=k.getElementsByTagName("base")[0],t=r.getElementsByTagName("link"),u=function(){var a,b=k.createElement("div"),c=k.body,d=l.style.fontSize,e=c&&c.style.fontSize,f=!1;return b.style.cssText="position:absolute;font-size:1em;width:1em",c||(c=f=k.createElement("body"),c.style.background="none"),l.style.fontSize="100%",c.style.fontSize="100%",c.appendChild(b),f&&l.insertBefore(c,l.firstChild),a=b.offsetWidth,f?l.removeChild(c):c.removeChild(b),l.style.fontSize=d,e&&(c.style.fontSize=e),a=j=parseFloat(a)},v=function(b){var c="clientWidth",d=l[c],e="CSS1Compat"===k.compatMode&&d||k.body[c]||d,f={},g=t[t.length-1],p=(new Date).getTime();if(b&&h&&q>p-h)return a.clearTimeout(i),i=a.setTimeout(v,q),void 0;h=p;for(var s in m)if(m.hasOwnProperty(s)){var w=m[s],x=w.minw,y=w.maxw,z=null===x,A=null===y,B="em";x&&(x=parseFloat(x)*(x.indexOf(B)>-1?j||u():1)),y&&(y=parseFloat(y)*(y.indexOf(B)>-1?j||u():1)),w.hasquery&&(z&&A||!(z||e>=x)||!(A||y>=e))||(f[w.media]||(f[w.media]=[]),f[w.media].push(n[w.rules]))}for(var C in o)o.hasOwnProperty(C)&&o[C]&&o[C].parentNode===r&&r.removeChild(o[C]);o.length=0;for(var D in f)if(f.hasOwnProperty(D)){var E=k.createElement("style"),F=f[D].join("\n");E.type="text/css",E.media=D,r.insertBefore(E,g.nextSibling),E.styleSheet?E.styleSheet.cssText=F:E.appendChild(k.createTextNode(F)),o.push(E)}},w=function(a,b,d){var e=a.replace(c.regex.comments,"").replace(c.regex.keyframes,"").match(c.regex.media),f=e&&e.length||0;b=b.substring(0,b.lastIndexOf("/"));var h=function(a){return a.replace(c.regex.urls,"$1"+b+"$2$3")},i=!f&&d;b.length&&(b+="/"),i&&(f=1);for(var j=0;f>j;j++){var k,l,o,p;i?(k=d,n.push(h(a))):(k=e[j].match(c.regex.findStyles)&&RegExp.$1,n.push(RegExp.$2&&h(RegExp.$2))),o=k.split(","),p=o.length;for(var q=0;p>q;q++)l=o[q],g(l)||m.push({media:l.split("(")[0].match(c.regex.only)&&RegExp.$2||"all",rules:n.length-1,hasquery:l.indexOf("(")>-1,minw:l.match(c.regex.minw)&&parseFloat(RegExp.$1)+(RegExp.$2||""),maxw:l.match(c.regex.maxw)&&parseFloat(RegExp.$1)+(RegExp.$2||"")})}v()},x=function(){if(d.length){var b=d.shift();f(b.href,function(c){w(c,b.href,b.media),p[b.href]=!0,a.setTimeout(function(){x()},0)})}},y=function(){for(var b=0;b1){for(var r=0;r=i&&o>=t};break;case"bottom":h=function(t,e,n,i,o){return n>=i&&o>=n};break;case"middle":h=function(t,e,n,i,o){return e>=i&&o>=e};break;case"top-only":h=function(t,e,n,i,o){return i>=t&&n>=i};break;case"bottom-only":h=function(t,e,n,i,o){return n>=o&&o>=t};break;default:case"default":h=function(t,e,n,i,o){return n>=i&&o>=t}}return c=function(t){var i,o,l,s,r,a,u=this.state,h=!1,c=this.$element.offset();i=n.height(),o=t+i/2,l=t+i,s=this.$element.outerHeight(),r=c.top+e(this.options.top,s,i),a=c.top+s-e(this.options.bottom,s,i),h=this.test(t,o,l,r,a),h!=u&&(this.state=h,h?this.options.enter&&this.options.enter.apply(this.element):this.options.leave&&this.options.leave.apply(this.element)),this.options.scroll&&this.options.scroll.apply(this.element,[(o-r)/(a-r)])},p={id:a,options:u,test:h,handler:c,state:null,element:this,$element:s,timeoutId:null},o[a]=p,s.data("_scrollexId",p.id),p.options.initialize&&p.options.initialize.apply(this),s},jQuery.fn.unscrollex=function(){var e=t(this);if(0==this.length)return e;if(this.length>1){for(var n=0;n1){for(o=0;o') 68 | .appendTo($body) 69 | .panel({ 70 | delay: 500, 71 | hideOnClick: true, 72 | hideOnSwipe: true, 73 | resetScroll: true, 74 | resetForms: true, 75 | side: 'right', 76 | target: $body, 77 | visibleClass: 'is-menu-visible' 78 | }); 79 | 80 | // Header. 81 | if (skel.vars.IEVersion < 9) 82 | $header.removeClass('alt'); 83 | 84 | if ($banner.length > 0 85 | && $header.hasClass('alt')) { 86 | 87 | $window.on('resize', function() { $window.trigger('scroll'); }); 88 | 89 | $banner.scrollex({ 90 | bottom: $header.outerHeight() + 1, 91 | terminate: function() { $header.removeClass('alt'); }, 92 | enter: function() { $header.addClass('alt'); }, 93 | leave: function() { $header.removeClass('alt'); } 94 | }); 95 | 96 | } 97 | 98 | }); 99 | 100 | })(jQuery); 101 | -------------------------------------------------------------------------------- /internal-displacement-web/client/public/themeJS/skel.min.js: -------------------------------------------------------------------------------- 1 | /* skel.js v3.0.0 | (c) n33 | skel.io | MIT licensed */ 2 | var skel=function(){"use strict";var t={breakpointIds:null,events:{},isInit:!1,obj:{attachments:{},breakpoints:{},head:null,states:{}},sd:"/",state:null,stateHandlers:{},stateId:"",vars:{},DOMReady:null,indexOf:null,isArray:null,iterate:null,matchesMedia:null,extend:function(e,n){t.iterate(n,function(i){t.isArray(n[i])?(t.isArray(e[i])||(e[i]=[]),t.extend(e[i],n[i])):"object"==typeof n[i]?("object"!=typeof e[i]&&(e[i]={}),t.extend(e[i],n[i])):e[i]=n[i]})},newStyle:function(t){var e=document.createElement("style");return e.type="text/css",e.innerHTML=t,e},_canUse:null,canUse:function(e){t._canUse||(t._canUse=document.createElement("div"));var n=t._canUse.style,i=e.charAt(0).toUpperCase()+e.slice(1);return e in n||"Moz"+i in n||"Webkit"+i in n||"O"+i in n||"ms"+i in n},on:function(e,n){var i=e.split(/[\s]+/);return t.iterate(i,function(e){var a=i[e];if(t.isInit){if("init"==a)return void n();if("change"==a)n();else{var r=a.charAt(0);if("+"==r||"!"==r){var o=a.substring(1);if(o in t.obj.breakpoints)if("+"==r&&t.obj.breakpoints[o].active)n();else if("!"==r&&!t.obj.breakpoints[o].active)return void n()}}}t.events[a]||(t.events[a]=[]),t.events[a].push(n)}),t},trigger:function(e){return t.events[e]&&0!=t.events[e].length?(t.iterate(t.events[e],function(n){t.events[e][n]()}),t):void 0},breakpoint:function(e){return t.obj.breakpoints[e]},breakpoints:function(e){function n(t,e){this.name=this.id=t,this.media=e,this.active=!1,this.wasActive=!1}return n.prototype.matches=function(){return t.matchesMedia(this.media)},n.prototype.sync=function(){this.wasActive=this.active,this.active=this.matches()},t.iterate(e,function(i){t.obj.breakpoints[i]=new n(i,e[i])}),window.setTimeout(function(){t.poll()},0),t},addStateHandler:function(e,n){t.stateHandlers[e]=n},callStateHandler:function(e){var n=t.stateHandlers[e]();t.iterate(n,function(e){t.state.attachments.push(n[e])})},changeState:function(e){t.iterate(t.obj.breakpoints,function(e){t.obj.breakpoints[e].sync()}),t.vars.lastStateId=t.stateId,t.stateId=e,t.breakpointIds=t.stateId===t.sd?[]:t.stateId.substring(1).split(t.sd),t.obj.states[t.stateId]?t.state=t.obj.states[t.stateId]:(t.obj.states[t.stateId]={attachments:[]},t.state=t.obj.states[t.stateId],t.iterate(t.stateHandlers,t.callStateHandler)),t.detachAll(t.state.attachments),t.attachAll(t.state.attachments),t.vars.stateId=t.stateId,t.vars.state=t.state,t.trigger("change"),t.iterate(t.obj.breakpoints,function(e){t.obj.breakpoints[e].active?t.obj.breakpoints[e].wasActive||t.trigger("+"+e):t.obj.breakpoints[e].wasActive&&t.trigger("-"+e)})},generateStateConfig:function(e,n){var i={};return t.extend(i,e),t.iterate(t.breakpointIds,function(e){t.extend(i,n[t.breakpointIds[e]])}),i},getStateId:function(){var e="";return t.iterate(t.obj.breakpoints,function(n){var i=t.obj.breakpoints[n];i.matches()&&(e+=t.sd+i.id)}),e},poll:function(){var e="";e=t.getStateId(),""===e&&(e=t.sd),e!==t.stateId&&t.changeState(e)},_attach:null,attach:function(e){var n=t.obj.head,i=e.element;return i.parentNode&&i.parentNode.tagName?!1:(t._attach||(t._attach=n.firstChild),n.insertBefore(i,t._attach.nextSibling),e.permanent&&(t._attach=i),!0)},attachAll:function(e){var n=[];t.iterate(e,function(t){n[e[t].priority]||(n[e[t].priority]=[]),n[e[t].priority].push(e[t])}),n.reverse(),t.iterate(n,function(e){t.iterate(n[e],function(i){t.attach(n[e][i])})})},detach:function(t){var e=t.element;return t.permanent||!e.parentNode||e.parentNode&&!e.parentNode.tagName?!1:(e.parentNode.removeChild(e),!0)},detachAll:function(e){var n={};t.iterate(e,function(t){n[e[t].id]=!0}),t.iterate(t.obj.attachments,function(e){e in n||t.detach(t.obj.attachments[e])})},attachment:function(e){return e in t.obj.attachments?t.obj.attachments[e]:null},newAttachment:function(e,n,i,a){return t.obj.attachments[e]={id:e,element:n,priority:i,permanent:a}},init:function(){t.initMethods(),t.initVars(),t.initEvents(),t.obj.head=document.getElementsByTagName("head")[0],t.isInit=!0,t.trigger("init")},initEvents:function(){t.on("resize",function(){t.poll()}),t.on("orientationChange",function(){t.poll()}),t.DOMReady(function(){t.trigger("ready")}),window.onload&&t.on("load",window.onload),window.onload=function(){t.trigger("load")},window.onresize&&t.on("resize",window.onresize),window.onresize=function(){t.trigger("resize")},window.onorientationchange&&t.on("orientationChange",window.onorientationchange),window.onorientationchange=function(){t.trigger("orientationChange")}},initMethods:function(){document.addEventListener?!function(e,n){t.DOMReady=n()}("domready",function(){function t(t){for(r=1;t=n.shift();)t()}var e,n=[],i=document,a="DOMContentLoaded",r=/^loaded|^c/.test(i.readyState);return i.addEventListener(a,e=function(){i.removeEventListener(a,e),t()}),function(t){r?t():n.push(t)}}):!function(e,n){t.DOMReady=n()}("domready",function(t){function e(t){for(h=1;t=i.shift();)t()}var n,i=[],a=!1,r=document,o=r.documentElement,s=o.doScroll,c="DOMContentLoaded",d="addEventListener",u="onreadystatechange",l="readyState",f=s?/^loaded|^c/:/^loaded|c/,h=f.test(r[l]);return r[d]&&r[d](c,n=function(){r.removeEventListener(c,n,a),e()},a),s&&r.attachEvent(u,n=function(){/^c/.test(r[l])&&(r.detachEvent(u,n),e())}),t=s?function(e){self!=top?h?e():i.push(e):function(){try{o.doScroll("left")}catch(n){return setTimeout(function(){t(e)},50)}e()}()}:function(t){h?t():i.push(t)}}),Array.prototype.indexOf?t.indexOf=function(t,e){return t.indexOf(e)}:t.indexOf=function(t,e){if("string"==typeof t)return t.indexOf(e);var n,i,a=e?e:0;if(!this)throw new TypeError;if(i=this.length,0===i||a>=i)return-1;for(0>a&&(a=i-Math.abs(a)),n=a;i>n;n++)if(this[n]===t)return n;return-1},Array.isArray?t.isArray=function(t){return Array.isArray(t)}:t.isArray=function(t){return"[object Array]"===Object.prototype.toString.call(t)},Object.keys?t.iterate=function(t,e){if(!t)return[];var n,i=Object.keys(t);for(n=0;i[n]&&e(i[n],t[i[n]])!==!1;n++);}:t.iterate=function(t,e){if(!t)return[];var n;for(n in t)if(Object.prototype.hasOwnProperty.call(t,n)&&e(n,t[n])===!1)break},window.matchMedia?t.matchesMedia=function(t){return""==t?!0:window.matchMedia(t).matches}:window.styleMedia||window.media?t.matchesMedia=function(t){if(""==t)return!0;var e=window.styleMedia||window.media;return e.matchMedium(t||"all")}:window.getComputedStyle?t.matchesMedia=function(t){if(""==t)return!0;var e=document.createElement("style"),n=document.getElementsByTagName("script")[0],i=null;e.type="text/css",e.id="matchmediajs-test",n.parentNode.insertBefore(e,n),i="getComputedStyle"in window&&window.getComputedStyle(e,null)||e.currentStyle;var a="@media "+t+"{ #matchmediajs-test { width: 1px; } }";return e.styleSheet?e.styleSheet.cssText=a:e.textContent=a,"1px"===i.width}:t.matchesMedia=function(t){if(""==t)return!0;var e,n,i,a,r={"min-width":null,"max-width":null},o=!1;for(i=t.split(/\s+and\s+/),e=0;er["max-width"]||null!==r["min-height"]&&cr["max-height"]?!1:!0},navigator.userAgent.match(/MSIE ([0-9]+)/)&&RegExp.$1<9&&(t.newStyle=function(t){var e=document.createElement("span");return e.innerHTML=' ",e})},initVars:function(){var e,n,i,a=navigator.userAgent;e="other",n=0,i=[["firefox",/Firefox\/([0-9\.]+)/],["bb",/BlackBerry.+Version\/([0-9\.]+)/],["bb",/BB[0-9]+.+Version\/([0-9\.]+)/],["opera",/OPR\/([0-9\.]+)/],["opera",/Opera\/([0-9\.]+)/],["edge",/Edge\/([0-9\.]+)/],["safari",/Version\/([0-9\.]+).+Safari/],["chrome",/Chrome\/([0-9\.]+)/],["ie",/MSIE ([0-9]+)/],["ie",/Trident\/.+rv:([0-9]+)/]],t.iterate(i,function(t,i){return a.match(i[1])?(e=i[0],n=parseFloat(RegExp.$1),!1):void 0}),t.vars.browser=e,t.vars.browserVersion=n,e="other",n=0,i=[["ios",/([0-9_]+) like Mac OS X/,function(t){return t.replace("_",".").replace("_","")}],["ios",/CPU like Mac OS X/,function(t){return 0}],["android",/Android ([0-9\.]+)/,null],["mac",/Macintosh.+Mac OS X ([0-9_]+)/,function(t){return t.replace("_",".").replace("_","")}],["wp",/Windows Phone ([0-9\.]+)/,null],["windows",/Windows NT ([0-9\.]+)/,null],["bb",/BlackBerry.+Version\/([0-9\.]+)/,null],["bb",/BB[0-9]+.+Version\/([0-9\.]+)/,null]],t.iterate(i,function(t,i){return a.match(i[1])?(e=i[0],n=parseFloat(i[2]?i[2](RegExp.$1):RegExp.$1),!1):void 0}),t.vars.os=e,t.vars.osVersion=n,t.vars.IEVersion="ie"==t.vars.browser?t.vars.browserVersion:99,t.vars.touch="wp"==t.vars.os?navigator.msMaxTouchPoints>0:!!("ontouchstart"in window),t.vars.mobile="wp"==t.vars.os||"android"==t.vars.os||"ios"==t.vars.os||"bb"==t.vars.os}};return t.init(),t}();!function(t,e){"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?module.exports=e():t.skel=e()}(this,function(){return skel}); 3 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/Api/api.js: -------------------------------------------------------------------------------- 1 | function checkStatus(response) { 2 | if (response.status >= 200 && response.status < 300) { 3 | return response; 4 | } 5 | const error = new Error(`HTTP Error ${response.statusText}`); 6 | error.status = response.statusText; 7 | error.response = response; 8 | console.log(error); 9 | throw error; 10 | } 11 | 12 | function parseJSON(response) { 13 | return response.json(); 14 | } 15 | 16 | 17 | export const dummyMapData = () => { 18 | const dummyMapUrl = 'https://jamesleondufour.carto.com/api/v2/sql?q=select%20count,%20long,%20lat,%20date,%20sampleurl,%20humanname%20from%20public.gdelt_refugee_2016'; 19 | const dummyTestUrl = `${window.location.origin}/api/test`; 20 | return fetch(dummyTestUrl).then(checkStatus).then(parseJSON) 21 | 22 | }; 23 | 24 | export const testDB = () => { 25 | return fetch(`${window.location.origin}/api/testDB`).then(checkStatus).then(parseJSON); 26 | }; 27 | export const reportLocationData = () => { 28 | const reportLocationDataURL = `${window.location.origin}/api/report-location-data`; 29 | return fetch(reportLocationDataURL).then(checkStatus).then(parseJSON) 30 | } 31 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/App.css: -------------------------------------------------------------------------------- 1 | .App { 2 | text-align: center; 3 | } 4 | 5 | .App-logo { 6 | animation: App-logo-spin infinite 20s linear; 7 | height: 80px; 8 | } 9 | 10 | .App-header { 11 | background-color: #222; 12 | height: 150px; 13 | padding: 20px; 14 | color: white; 15 | } 16 | 17 | .App-intro { 18 | font-size: large; 19 | } 20 | 21 | @keyframes App-logo-spin { 22 | from { transform: rotate(0deg); } 23 | to { transform: rotate(360deg); } 24 | } 25 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/App.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import logo from './logo.svg'; 3 | import './App.css'; 4 | import Bootstrap from 'bootstrap/dist/css/bootstrap.css'; 5 | import { render } from 'react-dom'; 6 | import { syncHistoryWithStore } from 'react-router-redux' 7 | 8 | class App extends Component { 9 | render() { 10 | return ( 11 |
12 |
13 | logo 14 |

Welcome to React

15 |
16 |

17 | To get started, edit src/App.js and save to reload. 18 |

19 |
20 | ); 21 | } 22 | } 23 | 24 | export default App; 25 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/App.test.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import App from './App'; 4 | 5 | it('renders without crashing', () => { 6 | const div = document.createElement('div'); 7 | ReactDOM.render(, div); 8 | }); 9 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/common/Footer.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import {render} from 'react-dom'; 3 | 4 | const Footer = () => ( 5 | 26 | ); 27 | 28 | export default Footer; -------------------------------------------------------------------------------- /internal-displacement-web/client/src/common/Header.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Link, IndexLink } from 'react-router'; 3 | import {Navbar, Nav, NavItem} from 'react-bootstrap'; 4 | 5 | const navbarInstance = () => ( 6 | 7 | 8 | 9 | Home 10 | 11 | 12 | 16 | 17 | ); 18 | 19 | export default navbarInstance; 20 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/components/NotFound/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | import styles from './styles.css' 3 | 4 | const NotFound = () => { 5 | return

Not Found :(

6 | } 7 | 8 | export default NotFound 9 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/components/NotFound/index.spec.js: -------------------------------------------------------------------------------- 1 | import chai, { expect } from 'chai' 2 | import chaiEnzyme from 'chai-enzyme' 3 | import { shallow } from 'enzyme' 4 | import React from 'react' 5 | import sinon from 'sinon' 6 | import sinonChai from 'sinon-chai' 7 | 8 | import NotFound from './' 9 | 10 | chai.use(chaiEnzyme()) 11 | chai.use(sinonChai) 12 | 13 | describe('', () => { 14 | let sut 15 | 16 | beforeEach(() => { 17 | sut = shallow() 18 | }) 19 | 20 | it('should exist', () => { 21 | expect(sut).to.be.present 22 | }) 23 | }) 24 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/components/NotFound/styles.css: -------------------------------------------------------------------------------- 1 | .root { 2 | text-align: center 3 | } 4 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/components/UrlForm/index.js: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/MaVizPageV2.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/containers/MapVizContainer/MaVizPageV2.js -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/MapVizExample.js: -------------------------------------------------------------------------------- 1 | /* global window,document */ 2 | import React, {Component} from 'react'; 3 | import {render} from 'react-dom'; 4 | import MapGL from 'react-map-gl'; 5 | import DeckGLOverlay from './components/mapOverlays/exampleGeojson.js'; 6 | import { MAPBOX_ACCESS_TOKEN } from './constants/mapConstants'; 7 | import customData from './vancouver-blocks.json'; 8 | 9 | import { dummyMapData} from './../../Api/api'; 10 | import {convertArrToGeojsonPoints} from './../../utils/convertDataToGeojson'; 11 | 12 | // Set your mapbox token here 13 | const MAPBOX_TOKEN = MAPBOX_ACCESS_TOKEN; // eslint-disable-line 14 | 15 | const colorScale = r => [r * 255, 140, 200 * (1 - r)]; 16 | 17 | class MapVizPageTest extends Component { 18 | 19 | constructor(props) { 20 | super(props); 21 | this.state = { 22 | viewport: { 23 | ...DeckGLOverlay.defaultViewport, 24 | width: 500, 25 | height: 500 26 | }, 27 | data: null 28 | }; 29 | 30 | 31 | // requestJson('./vancouver-blocks.json', (error, response) => { 32 | // if (!error) { 33 | // this.setState({data: response}); 34 | // } 35 | // }); 36 | } 37 | 38 | componentDidMount() { 39 | window.addEventListener('resize', this._resize.bind(this)); 40 | this._resize(); 41 | let self=this; 42 | if (customData) { 43 | console.log(customData) 44 | let plotData = { 45 | "type": "FeatureCollection", 46 | "features": [] 47 | }; 48 | plotData.features = customData.features.map(d => { 49 | return {"type":"Feature","geometry":{"type":"Point","coordinates": d.geometry.coordinates[0][0]}} 50 | }); 51 | 52 | dummyMapData().then(data => { 53 | console.log('data'); 54 | let parsed = JSON.parse(data).rows; 55 | let parsedGeojson = convertArrToGeojsonPoints(parsed, 'long', 'lat') 56 | // this.props.dispatch(loadIDData(data)) 57 | self.setState({data: parsedGeojson}) 58 | }); 59 | this.setState({data: plotData}); 60 | } 61 | } 62 | 63 | _resize() { 64 | this._onChangeViewport({ 65 | width: window.innerWidth, 66 | height: window.innerHeight 67 | }); 68 | } 69 | 70 | _onChangeViewport(viewport) { 71 | this.setState({ 72 | viewport: {...this.state.viewport, ...viewport} 73 | }); 74 | } 75 | 76 | render() { 77 | const {viewport, data} = this.state; 78 | 79 | return ( 80 | 85 | 88 | 89 | ); 90 | } 91 | } 92 | 93 | export default MapVizPageTest 94 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/MapVizPage.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import {render} from 'react-dom'; 3 | 4 | import { dummyMapData, testDB} from './../../Api/api'; 5 | import './mapbox-gl.css'; //importing here since there are issues with webpack building mapbox-gl 6 | import './mapVis.css'; 7 | 8 | import mapboxgl from 'mapbox-gl'; 9 | 10 | import {RenderMap } from './components/map'; 11 | import {loadIDData, updateMap} from './actions'; 12 | import {createStore} from 'react-redux'; 13 | 14 | import "babel-polyfill"; 15 | import MapGL from 'react-map-gl'; 16 | 17 | 18 | import { MAPBOX_ACCESS_TOKEN } from './constants/mapConstants'; 19 | import GeojsonCustomOverlay from './components/mapOverlays/geojsonDataOverlay'; 20 | 21 | class MapVizPage extends Component { 22 | constructor(props) { 23 | super(props); 24 | this.state = { 25 | 26 | data: null, 27 | // mapData: [], 28 | viewport: { 29 | ...GeojsonCustomOverlay.defaultViewport, 30 | startDragLngLat: null, 31 | isDragging: false, 32 | width: window.innerWidth, 33 | height: window.innerHeight, 34 | }, 35 | maxRadius: 20, 36 | radiusAccessor: 'count' 37 | }; 38 | window.addEventListener('resize', () => this.setState({width: window.innerWidth})); 39 | } 40 | 41 | componentDidMount() { 42 | window.addEventListener('resize', this._resize.bind(this)); 43 | console.log(mapboxgl, 'mapbox exists?', window) 44 | this._resize(); 45 | let self = this; 46 | // componentDidMount() { 47 | dummyMapData().then(data => { 48 | console.log('data', self.state, self.setState); 49 | let parsed = JSON.parse(data).rows; 50 | // this.props.dispatch(loadIDData(data)) 51 | self.setState({data: parsed}) 52 | }); 53 | 54 | } 55 | 56 | _resize() { 57 | this._onChangeViewport({ 58 | width: window.innerWidth, 59 | height: window.innerHeight 60 | }); 61 | } 62 | 63 | render() { 64 | const {viewport, data, maxRadius, radiusAccessor} = this.state; 65 | 66 | return ( 67 | 72 | 73 | 79 |
Geojson Custom overlay
80 |
81 | ) 82 | 83 | 84 | } 85 | 86 | _onChangeViewport(viewport) { 87 | this.setState({ 88 | viewport: {...this.state.viewport, ...viewport} 89 | }); 90 | } 91 | 92 | } 93 | 94 | MapVizPage.propTypes = { 95 | 96 | }; 97 | MapVizPage.defaultProps = { 98 | 99 | }; 100 | 101 | export default MapVizPage -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/MapVizPage.scatter.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import { dummyMapData, testDB} from './../../Api/api'; 3 | import './mapbox-gl.css'; //importing here since there are issues with webpack building mapbox-gl 4 | import './mapVis.css'; 5 | import {RenderMap } from './components/map'; 6 | import {loadIDData, updateMap} from './actions'; 7 | import {createStore} from 'react-redux'; 8 | import "babel-polyfill"; 9 | import MapGL, {autobind} from 'react-map-gl'; 10 | 11 | import DeckGL, {LineLayer} from 'deck.gl'; 12 | // const store = createStore(mapReducer); 13 | // import MapboxGLMap from 'react-map-gl'; 14 | import { MAPBOX_ACCESS_TOKEN } from './constants/mapConstants'; 15 | import HeatMapOverlayRender from './components/mapOverlays/displacementHeatmapOverlay'; 16 | import {DeckGLOverlay} from './components/mapOverlays/geojsonDataOverlay'; 17 | import ScatterLayer from './components/mapOverlays/scatterplotOverlay'; 18 | class MapVizPageScatter extends Component { 19 | constructor(props) { 20 | super(props); 21 | this.state = { 22 | 23 | data: null, 24 | // mapData: [], 25 | viewport: { 26 | latitude: 0, 27 | longitude: 0, 28 | zoom: 0, 29 | startDragLngLat: null, 30 | isDragging: false, 31 | width: window.innerWidth, 32 | height: window.innerHeight, 33 | } 34 | }; 35 | window.addEventListener('resize', () => this.setState({width: window.innerWidth})); 36 | } 37 | 38 | componentDidMount() { 39 | let self = this; 40 | // componentDidMount() { 41 | dummyMapData().then(data => { 42 | console.log('data', self.state, self.setState); 43 | let parsed = JSON.parse(data).rows; 44 | // this.props.dispatch(loadIDData(data)) 45 | self.setState({data: parsed.map(d => { 46 | return { 47 | position: [d.long, d.lat], 48 | radius: d.count 49 | }})}) 50 | // self.setState({data: parsed}) 51 | }); 52 | 53 | // console.log(RenderMap) 54 | } 55 | 56 | _resize() { 57 | this._onChangeViewport({ 58 | width: window.innerWidth, 59 | height: window.innerHeight 60 | }); 61 | } 62 | 63 | render() { 64 | let mapProps = { 65 | ...this.state.viewport, 66 | // ...this.state.mapData 67 | }; 68 | const {viewport, data} = this.state; 69 | 70 | // return ( 71 | // 76 | // 80 | // 81 | // ); 82 | 83 | return ( 84 | 89 | 93 | 94 | ) 95 | 96 | if ( !this.state.mapData || this.state.mapData.length === 0) { 97 | return ( 98 | 99 | 108 |
Map rendering
109 |
110 | ); 111 | } 112 | 113 | return ( 114 | 115 | 120 | 121 | {HeatMapOverlayRender({...this.state.viewport, mapData: this.state.mapData}) } 122 | 123 | 124 | ); 125 | } 126 | 127 | _onChangeViewport(viewport) { 128 | this.setState({ 129 | viewport: {...this.state.viewport, ...viewport} 130 | }); 131 | } 132 | 133 | } 134 | 135 | 136 | 137 | export default MapVizPageScatter -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/actions/index.js: -------------------------------------------------------------------------------- 1 | 2 | // actions for map 3 | export const updateMap = (mapViewState) => { 4 | return {type: 'UPDATE_MAP', mapViewState}; 5 | }; 6 | 7 | export const loadIDData = (data) => { 8 | return {type: 'LOAD_ID_DATA_SUCCESS', data}; 9 | }; 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/components/map/index.js: -------------------------------------------------------------------------------- 1 | 2 | import React from 'react'; 3 | import MapGL from 'react-map-gl'; 4 | // import MapboxGLMap from 'react-map-gl'; 5 | import { MAPBOX_ACCESS_TOKEN } from './../../constants/mapConstants'; 6 | import HeatMapOverlayRender from './../mapOverlays/displacementHeatmapOverlay'; 7 | 8 | export const renderVisualizationOverlay = (data) => { 9 | // 10 | // const param = { 11 | // props: this.props, 12 | // state: this.state, 13 | // // onWebGLInitialized: this._onWebGLInitialized, 14 | // // effects: this._effects, 15 | // } 16 | 17 | if (data) { 18 | 19 | return ( 20 |
21 | 22 | {HeatMapOverlayRender(data) } 23 |
24 | ) 25 | } else { 26 | return(
) 27 | } 28 | }; 29 | export const RenderMap = (props) => { 30 | console.log(props) 31 | 32 | if ( !props.mapData || props.mapData.length === 0) { 33 | return ( 34 | 35 | 46 |
Map rendering
47 |
48 | ); 49 | } 50 | 51 | return ( 52 | 53 | 64 | {renderVisualizationOverlay(props)} 65 | 66 | // { 75 | // const {latitude, longitude, zoom} = viewport; 76 | // Optionally call `setState` and use the state to update the map. 77 | // }} 78 | // > 79 | // {/*{isActiveOverlay && this._renderVisualizationOverlay()}*/} 80 | // 81 | ); 82 | // return (
Maop here!!!
) 83 | }; 84 | 85 | 86 | // export default RenderMap -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/components/map/mapboxTest.js: -------------------------------------------------------------------------------- 1 | 2 | import React, {Component} from 'react'; 3 | import {render} from 'react-dom'; 4 | import mapboxgl from 'mapbox-gl'; 5 | import * as d3 from 'd3'; 6 | import { MAPBOX_ACCESS_TOKEN } from './../../constants/mapConstants'; 7 | 8 | export const renderMap = (data, containerID = 'map', centerLat = 0, centerLng = 0, zoom=0, maxCount, minCount, maxRadius, minRadius) => { 9 | mapboxgl.accessToken = MAPBOX_ACCESS_TOKEN; 10 | let map = new mapboxgl.Map({ 11 | container: containerID, 12 | style: 'mapbox://styles/mapbox/light-v9', 13 | center: [centerLng, centerLat], 14 | zoom: zoom 15 | }); 16 | 17 | map.on('load', () => { 18 | map.addSource('idData', { 19 | 'type': 'geojson', 20 | 'data': data 21 | }); 22 | 23 | map.addLayer({ 24 | 'id': 'idData-circles', 25 | 'type': 'circle', 26 | 'source': 'idData', 27 | 'paint': { 28 | 'circle-color': { 29 | property: 'mag', 30 | stops: [ 31 | [6, '#FCA107'], 32 | [8, '#7F3121'] 33 | ] 34 | }, 35 | 'circle-opacity': 0.75, 36 | 'circle-radius': { 37 | property: 'radius', 38 | "type": "exponential", 39 | "stops": [ 40 | [{ "zoom": 0, "value": 1 }, 10], 41 | [{ "zoom": 0, "value": 10 }, 50], 42 | [{ "zoom": 0, "value": 100 }, 100], 43 | [{ "zoom": 5, "value": 1 }, 20], 44 | [{ "zoom": 5, "value": 10 }, 60], 45 | [{ "zoom": 5, "value": 100 }, 110], 46 | [{ "zoom": 10, "value": 1 }, 30], 47 | [{ "zoom": 10, "value": 10 }, 70], 48 | [{ "zoom": 10, "value": 100 }, 120], 49 | [{ "zoom": 15, "value": 1 }, 40], 50 | [{ "zoom": 15, "value": 10 }, 80], 51 | [{ "zoom": 15, "value": 100 }, 130], 52 | [{ "zoom": 20, "value": 1 }, 50], 53 | [{ "zoom": 20, "value": 10 }, 90], 54 | [{ "zoom": 20, "value": 100 }, 140] 55 | ] 56 | } 57 | } 58 | }); 59 | }); 60 | }; 61 | export const renderVisualizationOverlay = (data) => { 62 | // 63 | // const param = { 64 | // props: this.props, 65 | // state: this.state, 66 | // // onWebGLInitialized: this._onWebGLInitialized, 67 | // // effects: this._effects, 68 | // } 69 | 70 | if (data) { 71 | 72 | return ( 73 |
74 | 75 |
76 | ) 77 | } else { 78 | return(
) 79 | } 80 | }; 81 | export const RenderMap = (props) => { 82 | console.log(props) 83 | 84 | if ( !props.mapData || props.mapData.length === 0) { 85 | return ( 86 | 87 | 98 |
Map rendering
99 |
100 | ); 101 | } 102 | 103 | return ( 104 | 105 | 116 | {renderVisualizationOverlay(props)} 117 | 118 | // { 127 | // const {latitude, longitude, zoom} = viewport; 128 | // Optionally call `setState` and use the state to update the map. 129 | // }} 130 | // > 131 | // {/*{isActiveOverlay && this._renderVisualizationOverlay()}*/} 132 | // 133 | ); 134 | // return (
Maop here!!!
) 135 | }; 136 | 137 | 138 | // export default RenderMap -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/customScatterOverlay.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import DeckGL, {ScatterplotLayer} from 'deck.gl'; 3 | 4 | // export default ScaledScatterplotLayer extends ScatterplotLayer { 5 | // 6 | // } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/displacementHeatmapOverlay.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import Immutable from 'immutable'; 3 | // import DeckGL from 'deck.gl'; 4 | // import HeatmapOverlay from 'react-map-gl-heatmap-overlay'; 5 | import {ScatterplotOverlay} from 'react-map-gl'; 6 | 7 | 8 | 9 | const HeatMapOverlayRender = (param) => { 10 | console.log('heatmapoverlay render', param.mapData.map(d => [d.long, d.lat])) 11 | const idData = Immutable.fromJS(param.mapData); 12 | // const idData = Immutable.fromJS(param.mapData.map(d => [d.long, d.lat])); 13 | const width = param.width; 14 | const height = param.height; 15 | const zoom = param.zoom || 0; 16 | // const { width, height, mapViewState } = param; 17 | return ( 18 | // [data.long, data.lat]} 24 | // /> 25 | [data.get('long'), data.get('lat')]} 40 | /> 41 | ) 42 | 43 | 44 | }; 45 | 46 | export default HeatMapOverlayRender; -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/exampleGeojson.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | 3 | import DeckGL, {GeoJsonLayer} from 'deck.gl'; 4 | 5 | const LIGHT_SETTINGS = { 6 | lightsPosition: [-125, 50.5, 5000, -122.8, 48.5, 8000], 7 | ambientRatio: 0.2, 8 | diffuseRatio: 0.5, 9 | specularRatio: 0.3, 10 | lightsStrength: [1.0, 0.0, 2.0, 0.0], 11 | numberOfLights: 2 12 | }; 13 | 14 | export default class DeckGLOverlay extends Component { 15 | 16 | static get defaultViewport() { 17 | return { 18 | latitude: 49.254, 19 | longitude: -123.13, 20 | zoom: 11, 21 | maxZoom: 16, 22 | pitch: 0, 23 | bearing: 0 24 | }; 25 | } 26 | 27 | _initialize(gl) { 28 | gl.enable(gl.DEPTH_TEST); 29 | gl.depthFunc(gl.LEQUAL); 30 | } 31 | 32 | render() { 33 | const {viewport, data, colorScale} = this.props; 34 | 35 | if (!data) { 36 | return null; 37 | } 38 | 39 | const layer = new GeoJsonLayer({ 40 | id: 'geojson', 41 | data, 42 | opacity: 0.8, 43 | stroked: false, 44 | filled: true, 45 | extruded: true, 46 | wireframe: true, 47 | fp64: true, 48 | //getElevation: f => Math.sqrt(f.properties.valuePerSqm) * 10, 49 | // getFillColor: f => colorScale(f.properties.growth), 50 | // getLineColor: f => [255, 255, 255], 51 | getRadius: d => 2050, 52 | getFillColor: d => [31, 186, 214, 100], 53 | lightSettings: LIGHT_SETTINGS, 54 | pickable: Boolean(this.props.onHover), 55 | onHover: this.props.onHover 56 | }); 57 | 58 | return ( 59 | 60 | ); 61 | } 62 | } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/geojsonDataOverlay.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import 'babel-polyfill'; 3 | import DeckGL, {GeoJsonLayer} from 'deck.gl'; 4 | import * as d3 from 'd3'; 5 | import {convertArrToGeojsonPoints} from './../../../../utils/convertDataToGeojson'; 6 | 7 | export default class GeojsonCustomOverlay extends Component { 8 | 9 | static get defaultViewport() { 10 | return { 11 | latitude: 0, 12 | longitude: 0, 13 | zoom: 5, 14 | maxZoom: 16, 15 | pitch: 45, 16 | bearing: 0 17 | }; 18 | } 19 | 20 | _initialize(gl) { 21 | gl.enable(gl.DEPTH_TEST); 22 | gl.depthFunc(gl.LEQUAL); 23 | } 24 | 25 | _getRadiusScale(maxRadius, maxDataValue) { 26 | return d3.scaleSqrt().domain([0, maxDataValue]).range([20, maxRadius]) 27 | } 28 | 29 | render() { 30 | const {viewport, data, maxRadius, radiusAccessor} = this.props; 31 | 32 | if (!data) { 33 | return null; 34 | } 35 | 36 | console.log('rendering data') 37 | 38 | let maxRadiusData = d3.max(data, d => d[radiusAccessor]); 39 | let radiusScale = this._getRadiusScale(maxRadius, 50); 40 | // let radiusScale = this._getRadiusScale(maxRadius, maxRadiusData); 41 | data.forEach(d => { 42 | d.radius = 50,//radiusScale(d.count); 43 | d.color = [31, 186, 214, 255] 44 | }); 45 | let geojsonMapData = convertArrToGeojsonPoints(data, 'long', 'lat'); 46 | 47 | let testData = { 48 | "type": "FeatureCollection", 49 | "features": geojsonMapData.features.slice(0,5) 50 | }; 51 | 52 | console.log('geojson', geojsonMapData, JSON.stringify(testData)); 53 | 54 | const layer = new GeoJsonLayer({ 55 | id: 'geojson', 56 | data: geojsonMapData, 57 | opacity: 0.8, 58 | visible: true, 59 | // stroked: false, 60 | filled: true, 61 | getRadius: d => d.properties.radius, 62 | getFillColor: d => [31, 186, 214, 100], 63 | // pickable: true, 64 | // onHover: () => {console.log('on hohver')}//this.props.onHover 65 | }); 66 | 67 | return ( 68 | 69 | ); 70 | } 71 | } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/scatterplotOverlay.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import DeckGL, {ScatterplotLayer} from 'deck.gl'; 3 | 4 | export default class ScatterLayer extends Component { 5 | 6 | static get defaultViewport() { 7 | return { 8 | longitude: -74, 9 | latitude: 40.7, 10 | zoom: 11, 11 | maxZoom: 16, 12 | pitch: 0, 13 | bearing: 0 14 | }; 15 | } 16 | 17 | _initialize(gl) { 18 | gl.enable(gl.DEPTH_TEST); 19 | gl.depthFunc(gl.LEQUAL); 20 | } 21 | 22 | render() { 23 | console.log('rendering', DeckGL) 24 | const {viewport, data, radius} = this.props; 25 | 26 | if (!data) { 27 | return null; 28 | } 29 | 30 | console.log('layer', data) 31 | const layer = new ScatterplotLayer({ 32 | id: 'scatter-plot', 33 | data, 34 | pickable: true, 35 | //radiusScale: radius, 36 | radiusMinPixels: 2, 37 | radiusMaxPixels: 280, 38 | // radiusMinPixels 39 | getPosition: d => d.position, 40 | getRadius: d => d.radius, 41 | getColor: d => [0,0,0,100] 42 | }); 43 | 44 | return ( 45 | 46 | ); 47 | } 48 | } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/constants/actionTypes.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/containers/MapVizContainer/constants/actionTypes.js -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/constants/mapConstants.js: -------------------------------------------------------------------------------- 1 | export const MAPBOX_ACCESS_TOKEN = 'pk.eyJ1Ijoid3d5bWFrIiwiYSI6IkxEbENMZzgifQ.pxk3bdzd7n8h4pKzc9zozw'; 2 | 3 | 4 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/mapVis.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; 3 | font-size: 16px; 4 | margin:0; 5 | padding:0; 6 | overflow:hidden; 7 | } 8 | 9 | .mapboxgl-canvas { 10 | position: absolute; 11 | left: 0; 12 | top: 0; 13 | } 14 | 15 | .overlay-contol-container { 16 | position: absolute; 17 | bottom: 0; 18 | padding-bottom: 10px; 19 | padding-left: 10px; 20 | padding-top: 10px; 21 | z-index: 99; 22 | width: 250px; 23 | background-color: rgba(0,0,0, 0.2); 24 | } 25 | 26 | #overlay-control { 27 | display: inline-block; 28 | } 29 | 30 | #overlay-map-control { 31 | display: inline-block; 32 | } 33 | 34 | .title-label { 35 | width: 300px; 36 | color: white; 37 | } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/reducers/initialState.js: -------------------------------------------------------------------------------- 1 | const INITIAL_STATE = { 2 | mapViewState: { 3 | latitude: 0, 4 | longitude: 0, 5 | zoom: 2, 6 | pitch: 0, 7 | bearing: 0 8 | }, 9 | displacementData: null, //location data of displacement reports, 10 | 11 | 12 | }; 13 | 14 | export default INITIAL_STATE 15 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/reducers/mapReducers.js: -------------------------------------------------------------------------------- 1 | import initialState from './initialState'; 2 | 3 | export default function (state = initialState, action) { 4 | switch (action.type) { 5 | case 'UPDATE_MAP': 6 | return {...state, mapViewState: action.mapViewState}; 7 | case 'LOAD_ID_DATA_SUCCESS': 8 | return { ...state, displacementData: action.displacementData }; 9 | default: 10 | return state; 11 | } 12 | } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/sagas/index.js: -------------------------------------------------------------------------------- 1 | // import { fork } from 'redux-saga/effects'; 2 | // import watchMapData from './watchers'; 3 | // 4 | // // Here, we register our watcher saga(s) and export as a single generator 5 | // // function (startForeman) as our root Saga. 6 | // export default function* startForman() { 7 | // yield fork(watchMapData); 8 | // } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/sagas/mapDataSaga.js: -------------------------------------------------------------------------------- 1 | // import { put, call } from 'redux-saga/effects'; 2 | // import { dummyMapData } from './../../../Api/api'; 3 | // 4 | // export function* mapDataSaga({ payload }) { 5 | // try { 6 | // const mapData = yield call(dummyMapData, payload); 7 | // yield [ 8 | // put({ type: 'LOAD_ID_DATA_SUCCESS', mapData }) 9 | // ]; 10 | // } catch (error) { 11 | // yield put({ type: 'LOAD_MAPDATA_ERROR', error }); 12 | // } 13 | // } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/sagas/watchers.js: -------------------------------------------------------------------------------- 1 | // import { takeLatest } from 'redux-saga/effects'; 2 | // import { mapDataSaga } from './mapDataSaga'; 3 | // 4 | // // Watches for LOAD_ID_DATA action type asynchronously 5 | // export default function* watchMapData() { 6 | // yield takeLatest('LOAD_ID_DATA_SUCCESS', mapDataSaga); 7 | // } -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/MapVizContainer/store/configureStore.js: -------------------------------------------------------------------------------- 1 | // import { createStore, applyMiddleware } from 'redux'; 2 | // import createSagaMiddleware from 'redux-saga'; 3 | // import reducer from './../reducers/mapReducers'; 4 | // import rootSaga from './../sagas'; // TODO: Next step 5 | // 6 | // // Returns the store instance 7 | // // It can also take initialState argument when provided 8 | // const configureStore = () => { 9 | // const sagaMiddleware = createSagaMiddleware(); 10 | // return { 11 | // ...createStore(reducer, 12 | // applyMiddleware(sagaMiddleware)), 13 | // runSaga: sagaMiddleware.run(rootSaga) 14 | // }; 15 | // }; 16 | // 17 | // export default configureStore; -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/app.js: -------------------------------------------------------------------------------- 1 | import React, { Component, PropTypes } from 'react'; 2 | import {createStore} from 'redux'; 3 | import {Provider, connect} from 'react-redux'; 4 | 5 | import Header from '../common/Header'; 6 | const propTypes = { 7 | children: PropTypes.element.isRequired, 8 | }; 9 | 10 | export default class App extends React.Component { 11 | render() { 12 | return ( 13 |
14 | {this.props.children} 15 |
16 | ) 17 | } 18 | } 19 | 20 | App.propTypes = propTypes; 21 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/containers/home.js: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | 3 | const Home = () => { 4 | return

Home...nothing here yet

5 | 6 | } 7 | 8 | export default Home 9 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/index.css: -------------------------------------------------------------------------------- 1 | /*body {*/ 2 | /*margin: 0;*/ 3 | /*padding: 0;*/ 4 | /*font-family: sans-serif;*/ 5 | /*}*/ 6 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import { Router, browserHistory } from 'react-router'; 4 | 5 | import routes from './routes'; 6 | import Bootstrap from 'bootstrap/dist/css/bootstrap.css'; 7 | import './themeCss/css/main.css'; 8 | import 'bootstrap/dist/css/bootstrap-theme.css'; 9 | import { Provider } from 'react-redux'; 10 | import {createStore} from 'redux'; 11 | import mapReducer from './containers/MapVizContainer/reducers/mapReducers'; 12 | import Layout from './layout'; 13 | 14 | 15 | const store = createStore(mapReducer); 16 | 17 | // We require the routes and render to the DOM using ReactDOM API 18 | ReactDOM.render( 19 | 20 | 21 | 22 | 23 | , 24 | document.getElementById('root') 25 | 26 | ); 27 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/layout.js: -------------------------------------------------------------------------------- 1 | import Header from './common/Header'; 2 | import Footer from './common/Footer'; 3 | import React, {Component} from 'react'; 4 | 5 | class Layout extends Component { 6 | render() { 7 | return ( 8 |
9 | {this.props.children} 10 |
11 |
12 | ) 13 | } 14 | }; 15 | 16 | export default Layout; -------------------------------------------------------------------------------- /internal-displacement-web/client/src/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/reducers/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/reducers/index.js -------------------------------------------------------------------------------- /internal-displacement-web/client/src/routes.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import {Route, IndexRoute }from 'react-router'; 3 | import App from './containers/app'; 4 | import HomePage from './components/HomePage'; 5 | import MapVizPage from './containers/MapVizContainer/MapVizPage'; 6 | import MapVizPageTest from './containers/MapVizContainer/MapVizExample'; 7 | import MapVizPageScatter from './containers/MapVizContainer/MapVizPage.scatter'; 8 | 9 | let routes = ( 10 | 11 | 12 | 13 | 14 | 15 | 16 | ); 17 | 18 | 19 | export default ( 20 | 21 | 22 | 23 | 24 | 25 | 26 | ) -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/css/ie8.css: -------------------------------------------------------------------------------- 1 | /* 2 | Spectral by HTML5 UP 3 | html5up.net | @n33co 4 | Free for personal and commercial use under the CCA 3.0 license (html5up.net/license) 5 | */ 6 | /* Icon */ 7 | .icon.major { 8 | border: none; } 9 | .icon.major:before { 10 | font-size: 3em; } 11 | 12 | /* Form */ 13 | label { 14 | color: #2E3842; } 15 | 16 | input[type="text"], 17 | input[type="password"], 18 | input[type="email"], 19 | select, 20 | textarea { 21 | border: solid 1px #dfdfdf; } 22 | 23 | /* Button */ 24 | input[type="submit"], 25 | input[type="reset"], 26 | input[type="button"], 27 | button, 28 | .button { 29 | border: solid 2px #dfdfdf; } 30 | input[type="submit"].special, 31 | input[type="reset"].special, 32 | input[type="button"].special, 33 | button.special, 34 | .button.special { 35 | border: 0 !important; } 36 | 37 | /* Page Wrapper + Menu */ 38 | #menu { 39 | display: none; } 40 | 41 | body.is-menu-visible #menu { 42 | display: block; } 43 | 44 | /* Header */ 45 | #header nav > ul > li > a.menuToggle:after { 46 | display: none; } 47 | 48 | /* Banner + Wrapper (style4) */ 49 | #banner, 50 | .wrapper.style4 { 51 | -ms-behavior: url("js/ie/backgroundsize.min.htc"); } 52 | #banner:before, 53 | .wrapper.style4:before { 54 | display: none; } 55 | 56 | /* Banner */ 57 | #banner .more { 58 | height: 4em; } 59 | #banner .more:after { 60 | display: none; } 61 | 62 | /* Main */ 63 | #main > header { 64 | -ms-behavior: url("js/ie/backgroundsize.min.htc"); } 65 | #main > header:before { 66 | display: none; } 67 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/css/ie9.css: -------------------------------------------------------------------------------- 1 | /* 2 | Spectral by HTML5 UP 3 | html5up.net | @n33co 4 | Free for personal and commercial use under the CCA 3.0 license (html5up.net/license) 5 | */ 6 | /* Spotlight */ 7 | .spotlight { 8 | display: block; } 9 | .spotlight .image { 10 | display: inline-block; 11 | vertical-align: top; } 12 | .spotlight .content { 13 | padding: 4em 4em 2em 4em ; 14 | display: inline-block; } 15 | .spotlight:after { 16 | clear: both; 17 | content: ''; 18 | display: block; } 19 | 20 | /* Features */ 21 | .features { 22 | display: block; } 23 | .features li { 24 | float: left; } 25 | .features:after { 26 | content: ''; 27 | display: block; 28 | clear: both; } 29 | 30 | /* Banner + Wrapper (style4) */ 31 | #banner, 32 | .wrapper.style4 { 33 | background-image: url("../../images/banner.jpg"); 34 | background-position: center center; 35 | background-repeat: no-repeat; 36 | background-size: cover; 37 | position: relative; } 38 | #banner:before, 39 | .wrapper.style4:before { 40 | background: #000000; 41 | content: ''; 42 | height: 100%; 43 | left: 0; 44 | opacity: 0.5; 45 | position: absolute; 46 | top: 0; 47 | width: 100%; } 48 | #banner .inner, 49 | .wrapper.style4 .inner { 50 | position: relative; 51 | z-index: 1; } 52 | 53 | /* Banner */ 54 | #banner { 55 | padding: 14em 0 12em 0 ; 56 | height: auto; } 57 | #banner:after { 58 | display: none; } 59 | 60 | /* CTA */ 61 | #cta .inner header { 62 | float: left; } 63 | #cta .inner .actions { 64 | float: left; } 65 | #cta .inner:after { 66 | clear: both; 67 | content: ''; 68 | display: block; } 69 | 70 | /* Main */ 71 | #main > header { 72 | background-image: url("../../images/banner.jpg"); 73 | background-position: center center; 74 | background-repeat: no-repeat; 75 | background-size: cover; 76 | position: relative; } 77 | #main > header:before { 78 | background: #000000; 79 | content: ''; 80 | height: 100%; 81 | left: 0; 82 | opacity: 0.5; 83 | position: absolute; 84 | top: 0; 85 | width: 100%; } 86 | #main > header > * { 87 | position: relative; 88 | z-index: 1; } 89 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/css/images/arrow.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/css/images/banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/css/images/banner.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/css/images/bars.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/css/images/close.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/Screen Shot 2017-04-27 at 15.11.09 copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/Screen Shot 2017-04-27 at 15.11.09 copy.png -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/banner.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/banner1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/banner1.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/d4d-logo-meetup-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/d4d-logo-meetup-banner.png -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/no02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/no02.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/no03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/no03.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/pic01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic01.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/pic02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic02.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/pic03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic03.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/pic04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic04.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/themeCss/images/pic05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic05.jpg -------------------------------------------------------------------------------- /internal-displacement-web/client/src/utils/convertDataToGeojson.js: -------------------------------------------------------------------------------- 1 | export const convertArrToGeojsonPoints = (dataArr, lngAccessor, latAccessor) => { 2 | let outFeatures = dataArr.map(d => { 3 | return { 4 | type: 'Feature', 5 | // properties: d, 6 | geometry: {"type":"Point","coordinates":[d[lngAccessor], d[latAccessor]]} 7 | } 8 | }); 9 | 10 | return { 11 | "type":"FeatureCollection", 12 | "features":outFeatures 13 | } 14 | 15 | }; 16 | -------------------------------------------------------------------------------- /internal-displacement-web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "id-web", 3 | "version": "1.0.0", 4 | "description": "", 5 | "private": true, 6 | "dependencies": { 7 | "babel-cli": "^6.24.0", 8 | "babel-core": "^6.24.0" 9 | 10 | }, 11 | "scripts": { 12 | "start": "concurrently \"nodemon server\" \"cd client && npm start\"", 13 | "start-localDB": "NODE_DB=LOCAL concurrently \"nodemon server\" \"cd client && npm start\"" 14 | }, 15 | "devDependencies": { 16 | "concurrently": "^3.4.0", 17 | "nodemon": "latest" 18 | }, 19 | "author": "", 20 | "license": "ISC" 21 | } 22 | -------------------------------------------------------------------------------- /internal-displacement-web/server/api/reportLocationRequest.js: -------------------------------------------------------------------------------- 1 | const db = require('./../pgDB'); 2 | 3 | module.exports = function (req, res) { 4 | db.any("select * from ( (select * from report_location LEFT JOIN location on report_location.location = location.id) t1 inner join (select id, quantity from report where quantity is not null) t2 on t1.report= t2.id) t3 where t3.latlong is not null", [true]) 5 | .then(data => { 6 | console.log(data); 7 | //todo need to check if data needs JSON.stringify 8 | res.json(data); 9 | }) 10 | .catch(error => { 11 | console.log(error) 12 | res.status(500).json({error: error, message: 'query error'}); 13 | }); 14 | }; 15 | -------------------------------------------------------------------------------- /internal-displacement-web/server/api/sampleArticleRequest.js: -------------------------------------------------------------------------------- 1 | const db = require('./../pgDB'); 2 | 3 | module.exports = function (req, res) { 4 | db.any("select * from article limit 1", [true]) 5 | .then(data => { 6 | console.log(data); 7 | //todo need to check if data needs JSON.stringify 8 | res.json(data); 9 | }) 10 | .catch(error => { 11 | console.log(error) 12 | res.status(500).json({error: error, message: 'query error'}); 13 | }); 14 | }; -------------------------------------------------------------------------------- /internal-displacement-web/server/api/test.js: -------------------------------------------------------------------------------- 1 | const request = require('request'); 2 | module.exports = function (req, res) { 3 | console.log('trying ot get test data'); 4 | const dummyMapUrl = 'https://jamesleondufour.carto.com/api/v2/sql?q=select%20count,%20long,%20lat,%20date%20from%20public.gdelt_refugee_2016'; 5 | request.get(dummyMapUrl, (err, resp, body) =>{ 6 | if(err) { 7 | console.log(err); 8 | resp.status(500).json({error: 'internal error'}) 9 | return 10 | } 11 | 12 | if (resp.statusCode == 200) { 13 | res.json(body); 14 | } else { 15 | res.status(404).json({error: 'not foubd'}); 16 | } 17 | }); 18 | 19 | }; 20 | 21 | -------------------------------------------------------------------------------- /internal-displacement-web/server/index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const express = require("express"); 4 | const http = require('http'); 5 | const env = process.env.NODE_ENV || 'development'; 6 | // Setup server 7 | const app = express(); 8 | const server = http.createServer(app); 9 | require('./routes')(app); 10 | 11 | app.set('port', (process.env.PORT || 3322)); 12 | 13 | if (process.env.NODE_ENV === 'production') { 14 | app.use(express.static('client/build')); 15 | } 16 | 17 | function startServer() { 18 | server.listen(app.get('port'), () => { 19 | console.log('Express server listening on %d, in %s mode', app.get('port'), env); 20 | }); 21 | } 22 | 23 | setImmediate(startServer); 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /internal-displacement-web/server/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "id-web-server", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "body-parser": "^1.17.1", 13 | "express": "^4.15.2", 14 | "pg": "^6.1.5", 15 | "pg-promise": "^5.6.4", 16 | "request": "^2.81.0" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /internal-displacement-web/server/pgDB/index.js: -------------------------------------------------------------------------------- 1 | const pgp = require('pg-promise')(); 2 | let connectionObj; 3 | //if not using docker 4 | //create a pgConfig.js file in the same directory and put your credentials there 5 | if (process.env.NODE_DB === 'LOCAL') { 6 | connectionObj = require('./pgConfig'); 7 | } else { 8 | connectionObj = { 9 | user: process.env.DB_USER, 10 | database: process.env.DB_NAME, 11 | password: process.env.DB_PASS, 12 | host: process.env.DB_HOST 13 | }; 14 | } 15 | 16 | //export db instance to be shared 17 | module.exports = pgp(connectionObj); -------------------------------------------------------------------------------- /internal-displacement-web/server/routes.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Main application routes 5 | */ 6 | 7 | const path = require('path'); 8 | const bodyParser = require('body-parser'); 9 | 10 | module.exports = function (app) { 11 | 12 | app.use(bodyParser.json()); // for parsing application/json 13 | app.use(bodyParser.urlencoded({ extended: true })); // for parsing application/x-www-form-urlencoded 14 | 15 | // Insert routes below 16 | app.use('/api/test', require('./api/test')); 17 | app.use('/api/testDB', require('./api/sampleArticleRequest')); 18 | app.use('/api/report-location-data', require('./api/reportLocationRequest')); 19 | 20 | // All other routes should redirect to the index.html 21 | app.route('/') 22 | .get((req, res) => { 23 | res.sendFile(path.resolve(app.get('appPath') + '/index.html')); 24 | }); 25 | 26 | app.use(function (req, res, next) { 27 | res.setHeader('Access-Control-Allow-Origin', '*'); 28 | res.setHeader('Access-Control-Allow-Methods', 'GET, POST'); 29 | res.setHeader('Access-Control-Allow-Headers', 'X-Requested-With,content-type, Authorization'); 30 | next(); 31 | }); 32 | }; 33 | -------------------------------------------------------------------------------- /internal-displacement-web/src/db.js: -------------------------------------------------------------------------------- 1 | var pg = require('pg'); 2 | 3 | // create a config to configure both pooling behavior 4 | // and client options 5 | // note: these environment variables are passed into the nodejs Docker container from docker.env 6 | var config = { 7 | user: process.env.DB_USER, 8 | database: process.env.DB_NAME, 9 | password: process.env.DB_PASS, 10 | host: process.env.DB_HOST, 11 | max: 10, // max number of clients in the pool 12 | idleTimeoutMillis: 30000, // how long a client is allowed to remain idle before being closed 13 | }; 14 | 15 | 16 | //this initializes a connection pool 17 | //it will keep idle connections open for 30 seconds 18 | //and set a limit of maximum 10 idle clients 19 | var pool = new pg.Pool(config); 20 | 21 | // to run a query we can acquire a client from the pool, 22 | // run a query on the client, and then return the client to the pool 23 | pool.connect(function(err, client, done) { 24 | if(err) { 25 | return console.error('error fetching client from pool', err); 26 | } 27 | console.log('connected') 28 | client.query('SELECT $1::int AS number', ['1'], function(err, result) { 29 | //call `done(err)` to release the client back to the pool (or destroy it if there is an error) 30 | done(err); 31 | 32 | if(err) { 33 | return console.error('error running query', err); 34 | } 35 | console.log(result.rows[0].number); 36 | //output: 1 37 | }); 38 | }); 39 | 40 | pool.on('error', function (err, client) { 41 | // if an error is encountered by a client while it sits idle in the pool 42 | // the pool itself will emit an error event with both the error and 43 | // the client which emitted the original error 44 | // this is a rare occurrence but can happen if there is a network partition 45 | // between your application and the database, the database restarts, etc. 46 | // and so you might want to handle it and at least log it out 47 | console.error('idle client error', err.message, err.stack) 48 | }) -------------------------------------------------------------------------------- /internal_displacement/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal_displacement/__init__.py -------------------------------------------------------------------------------- /internal_displacement/add_countries.py: -------------------------------------------------------------------------------- 1 | from internal_displacement.model.model import Country, CountryTerm, Session 2 | import pycountry 3 | from sqlalchemy import create_engine 4 | import sqlalchemy 5 | 6 | 7 | def load_countries(session): 8 | 9 | for c in pycountry.countries: 10 | country = Country(code=c.alpha_3) 11 | session.add(country) 12 | session.commit() 13 | country_name = CountryTerm(term=c.name, country=country) 14 | session.add(country_name) 15 | session.commit() 16 | try: 17 | off_name = c.official_name 18 | if off_name != c.name: 19 | official_name = CountryTerm( 20 | term=c.official_name, country=country) 21 | session.add(official_name) 22 | session.commit() 23 | except (AttributeError, sqlalchemy.exc.IntegrityError) as e: 24 | pass 25 | try: 26 | common_name = CountryTerm(term=c.common_name, country=country) 27 | session.add(common_name) 28 | session.commit() 29 | except (AttributeError, sqlalchemy.exc.IntegrityError) as e: 30 | pass 31 | session.commit() 32 | 33 | 34 | def delete_countries(session): 35 | 36 | session.execute("TRUNCATE TABLE country CASCADE;") 37 | session.commit() 38 | -------------------------------------------------------------------------------- /internal_displacement/article.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | def date_time_converter(dt): 5 | if isinstance(dt, datetime.datetime): 6 | return dt.__str__() 7 | else: 8 | return "Invalid datetime" 9 | # raise ValueError("{} is not a valid datetime object") 10 | 11 | def span_overlap(span1, span2): 12 | set1 = set(span1) 13 | if len(set1.intersection(span2)) > 0: 14 | return True 15 | 16 | 17 | class Article(object): 18 | """Contains article text, date, extracted information and tag 19 | Parameters 20 | ---------- 21 | content: the text from the article:String 22 | publication_date: the date of publication:datetime.datetime 23 | title: the title:String 24 | authors: the authors:list[String] 25 | domain: the domain:String 26 | content_type: the type of content (text,image,video etc):String 27 | url: the url of the article:String 28 | language: the two-letter language code of the article:String 29 | see https://cloud.google.com/translate/docs/languages 30 | country_codes: a list of ISO 3166 country codes:List 31 | reports: a list of extracted reports 32 | relevance: relevance of article to IDPs:Boolean 33 | 34 | """ 35 | 36 | def __init__(self, content, pub_date, title, content_type, authors, domain, url, language="EN", country_codes=[], reports=[], relevance=False): 37 | self.content = content 38 | self.publication_date = pub_date 39 | self.title = title 40 | self.authors = authors 41 | self.domain = domain 42 | self.content_type = content_type 43 | self.url = url 44 | self.language = language 45 | self.relevance = relevance 46 | 47 | def change_language(self, language): 48 | self.language = language 49 | 50 | def get_unique_tag_spans(self): 51 | '''Get a list of unique token spans 52 | for visualizing a complete article along 53 | with all extracted facts. 54 | Each extracted report has its own list of spans 55 | which may in some cases overlap, particularly 56 | for date and location tags. 57 | ''' 58 | ### need to deal with overlapping spans 59 | all_spans = [] 60 | for report in self.reports: 61 | all_spans.extend(report.tag_spans) 62 | unique_spans = list({v['start']: v for v in all_spans}.values()) 63 | unique_spans = sorted(unique_spans, key=lambda k: k['start']) 64 | ### Check for no overlap 65 | non_overlapping_spans = [] 66 | current_start = -1 67 | current_end = -1 68 | for span in unique_spans: 69 | if span['start'] > current_end: 70 | non_overlapping_spans.append(span) 71 | current_start, current_end = span['start'], span['end'] 72 | else: 73 | # Create a new merged span and add it to the end of the result 74 | current_last_span = non_overlapping_spans[-1] 75 | new_span = {} 76 | new_span['type'] = ", ".join([current_last_span['type'], span['type']]) 77 | new_span['start'] = current_last_span['start'] 78 | new_span['end'] = max(current_last_span['end'], span['end']) 79 | non_overlapping_spans[-1] = new_span 80 | current_end = new_span['end'] 81 | 82 | return non_overlapping_spans 83 | 84 | def tag(self, tag): 85 | """Use interpreter to tag article 86 | """ 87 | self.tag = tag 88 | 89 | def parse(self): 90 | """Use interpreter to parse article 91 | """ 92 | pass 93 | 94 | def get_pub_date_string(self): 95 | return date_time_converter(self.publication_date) 96 | -------------------------------------------------------------------------------- /internal_displacement/classifiers/default_encoder.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal_displacement/classifiers/default_encoder.pkl -------------------------------------------------------------------------------- /internal_displacement/classifiers/readme.txt: -------------------------------------------------------------------------------- 1 | Directory for pre-trained classification models. -------------------------------------------------------------------------------- /internal_displacement/extracted_report.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import re 3 | from spacy.tokens import Token, Span 4 | from datetime import datetime 5 | 6 | 7 | def convert_tokens_to_strings(value): 8 | if isinstance(value, Token): 9 | return value.text 10 | if isinstance(value, Span): 11 | return value.text 12 | else: 13 | return str(value) 14 | 15 | 16 | def convert_quantity(value): 17 | '''Convert an extracted quantity to an integer. 18 | Solution forked from 19 | https://github.com/ghewgill/text2num/blob/master/text2num.py 20 | and enhanced with numerical and array input 21 | ''' 22 | value = value.replace(",", "") 23 | Small = { 24 | 'zero': 0, 25 | 'one': 1, 26 | 'two': 2, 27 | 'three': 3, 28 | 'four': 4, 29 | 'five': 5, 30 | 'six': 6, 31 | 'seven': 7, 32 | 'eight': 8, 33 | 'nine': 9, 34 | 'ten': 10, 35 | 'eleven': 11, 36 | 'twelve': 12, 37 | 'thirteen': 13, 38 | 'fourteen': 14, 39 | 'fifteen': 15, 40 | 'sixteen': 16, 41 | 'seventeen': 17, 42 | 'eighteen': 18, 43 | 'nineteen': 19, 44 | 'twenty': 20, 45 | 'thirty': 30, 46 | 'forty': 40, 47 | 'fifty': 50, 48 | 'sixty': 60, 49 | 'seventy': 70, 50 | 'eighty': 80, 51 | 'ninety': 90 } 52 | 53 | Magnitude = { 54 | 'thousand': 1000, 55 | 'million': 1000000, 56 | 'billion': 1000000000, 57 | 'trillion': 1000000000000, 58 | 'quadrillion': 1000000000000000, 59 | 'quintillion': 1000000000000000000, 60 | 'sextillion': 1000000000000000000000, 61 | 'septillion': 1000000000000000000000000, 62 | 'octillion': 1000000000000000000000000000, 63 | 'nonillion': 1000000000000000000000000000000, 64 | 'decillion': 1000000000000000000000000000000000, 65 | } 66 | 67 | Vague = { 68 | 'numbers': 5, 69 | 'dozens': 55, 70 | 'tens': 55, 71 | 'hundreds': 550, 72 | 'thousands': 5500, 73 | 'millions': 5500000, 74 | 'billions': 5500000000, 75 | 'trillions': 5500000000000, 76 | 'quadrillions': 5500000000000000, 77 | 'quintillions': 5500000000000000000, 78 | 'sextillions': 5500000000000000000000, 79 | 'septillions': 5500000000000000000000000, 80 | 'octillions': 5500000000000000000000000000, 81 | 'nonillions': 5500000000000000000000000000000, 82 | 'decillions': 5500000000000000000000000000000000, 83 | } 84 | 85 | a = [] 86 | if not type(value) is list: 87 | value = [value] 88 | for s_item in value: 89 | a += re.split(r"[\s-]+", str(s_item)) 90 | n = 0 91 | g = 0 92 | vague_of = False 93 | for w in a: 94 | try: 95 | x = int(w) 96 | g += x 97 | except: 98 | if w.lower() == 'of': 99 | vague_of = True 100 | continue 101 | 102 | if vague_of: 103 | if w[-1:] != 's': 104 | w = w + 's' 105 | if w == 'hundreds' or w == 'hundred': 106 | g *= 100 107 | elif w[:-1] in Magnitude: 108 | g *= Magnitude[w[:-1]] 109 | continue 110 | 111 | if w in Small: 112 | g += Small[w] 113 | elif w == "hundred" and g != 0: 114 | g *= 100 115 | elif w in Magnitude: 116 | n += g * Magnitude[w] 117 | g = 0 118 | elif w in Vague: 119 | g = Vague[w] 120 | else: 121 | return None 122 | 123 | vague_of = False 124 | return n + g 125 | 126 | 127 | class ExtractedReport: 128 | 129 | def __init__(self, locations, event_term, subject_term, quantity, story, tag_spans=[]): 130 | if locations: 131 | self.locations = [convert_tokens_to_strings(l) for l in locations] 132 | else: 133 | self.locations = [] 134 | self.event_term = convert_tokens_to_strings(event_term) 135 | self.subject_term = convert_tokens_to_strings(subject_term) 136 | self.quantity = convert_quantity(convert_tokens_to_strings(quantity)) 137 | self.story = story 138 | 139 | def display(self): 140 | print("Location: {} DateTime: {} EventTerm: {} SubjectTerm: {} Quantity: {}" 141 | .format(self.locations, self.event_term, self.subject_term, self.quantity)) 142 | 143 | def __eq__(self, other): 144 | if isinstance(other, ExtractedReport): 145 | return ((self.locations == other.locations) and 146 | (self.event_term == other.event_term) and 147 | (self.subject_term == other.subject_term) and 148 | (self.quantity == other.quantity) 149 | ) 150 | else: 151 | return False 152 | 153 | def __ne__(self, other): 154 | return (not self.__eq__(other)) 155 | 156 | def __repr__(self): 157 | locations = ",".join(self.locations) 158 | rep = "Locations:{} Verb:{} Noun:{} Quantity:{}".format( 159 | locations, self.event_term, self.subject_term, self.quantity) 160 | return rep 161 | 162 | def __hash__(self): 163 | return hash(self.__repr__()) 164 | 165 | def to_json(self): 166 | d = {} 167 | d['Location'] = self.locations 168 | d['EventTerm'] = self.event_term 169 | d['SubjectTerm'] = self.subject_term 170 | d['Quantity'] = self.quantity 171 | return d 172 | 173 | 174 | class Fact(object): 175 | '''Wrapper for individual facts found within articles 176 | ''' 177 | 178 | def __init__(self, token, full_span=None, lemma_=None, fact_type=None, start_offset=0): 179 | self.token = token 180 | self.type_ = fact_type 181 | if full_span: 182 | self.text = full_span.text 183 | elif token: 184 | self.text = token.text 185 | else: 186 | self.text = '' 187 | self.lemma_ = lemma_ 188 | # Set the start index 189 | if isinstance(token, Token): 190 | self.start_idx = token.idx + start_offset 191 | elif isinstance(token, Span): 192 | self.start_idx = token[0].idx + start_offset 193 | else: 194 | self.start_idx = 0 195 | # Set the end index 196 | token_length = len(self.text) 197 | self.end_idx = self.start_idx + token_length 198 | 199 | def __str__(self): 200 | return self.text 201 | -------------------------------------------------------------------------------- /internal_displacement/fact.py: -------------------------------------------------------------------------------- 1 | from spacy.tokens import Token,Span 2 | 3 | class Fact(object): 4 | '''Wrapper for individual facts found within articles 5 | ''' 6 | 7 | def __init__(self, token, full_span=None, lemma_=None, fact_type=None, start_offset=0): 8 | self.token = token 9 | self.type_ = fact_type 10 | if full_span: 11 | self.text = full_span.text 12 | else: 13 | self.text = '' 14 | self.lemma_ = lemma_ 15 | # Set the start index 16 | if isinstance(token, Token): 17 | self.start_idx = token.idx + start_offset 18 | elif isinstance(token, Span): 19 | self.start_idx = token[0].idx + start_offset 20 | else: 21 | self.start_idx = 0 22 | # Set the end index 23 | token_length = len(self.text) 24 | self.end_idx = self.start_idx + token_length 25 | 26 | def __str__(self): 27 | return self.text -------------------------------------------------------------------------------- /internal_displacement/model/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from sqlalchemy import Table, text 4 | from sqlalchemy import create_engine 5 | from sqlalchemy.ext.declarative import declarative_base 6 | from sqlalchemy import Column, Integer, String, ForeignKey, DateTime, Boolean, Numeric 7 | from sqlalchemy.orm import sessionmaker, relationship, object_session 8 | 9 | Base = declarative_base() 10 | Session = sessionmaker() 11 | 12 | 13 | class Status: 14 | NEW = 'new' 15 | FETCHING = 'fetching' 16 | FETCHED = 'fetched' 17 | PROCESSING = 'processing' 18 | PROCESSED = 'processed' 19 | FETCHING_FAILED = 'fetching failed' 20 | PROCESSING_FAILED = 'processing failed' 21 | 22 | 23 | class Category: 24 | OTHER = 'other' 25 | DISASTER = 'disaster' 26 | CONFLICT = 'conflict' 27 | 28 | 29 | class UnexpectedArticleStatusException(Exception): 30 | def __init__(self, article, expected, actual): 31 | super(UnexpectedArticleStatusException, self).__init__( 32 | "Expected article {id} to be in state {expected}, but was in state {actual}".format( 33 | id=article.id, expected=expected, actual=actual 34 | )) 35 | self.expected = expected 36 | self.actual = actual 37 | 38 | 39 | class Article(Base): 40 | __tablename__ = 'article' 41 | 42 | id = Column(Integer, primary_key=True) 43 | url = Column(String) 44 | domain = Column(String) 45 | status = Column(String) 46 | title = Column(String) 47 | publication_date = Column(DateTime) 48 | authors = Column(String) 49 | language = Column(String(2)) 50 | relevance = Column(Boolean) 51 | reliability = Column(Numeric) 52 | content = relationship('Content', uselist=False, back_populates='article', cascade="all, delete-orphan") 53 | reports = relationship('Report', back_populates='article', cascade="all, delete-orphan") 54 | categories = relationship('ArticleCategory', cascade="all, delete-orphan") 55 | 56 | def update_status(self, new_status): 57 | """ 58 | Atomically Update the status of this Article from to new_status. 59 | If something changed the status of this article since it was loaded, raise. 60 | """ 61 | session = object_session(self) 62 | if not session: 63 | raise RuntimeError("Object has not been persisted in a session.") 64 | 65 | expected_status = self.status 66 | result = session.query(Article).filter(Article.id == self.id, Article.status == self.status).update({ 67 | Article.status: new_status 68 | }) 69 | if result != 1: 70 | updated = session.query(Article).filter(Article.id == self.id).one() 71 | raise UnexpectedArticleStatusException(self, expected_status, updated.status) 72 | 73 | 74 | class ArticleCategory(Base): 75 | __tablename__ = 'article_category' 76 | 77 | article_id = Column('article', Integer, ForeignKey('article.id'), primary_key=True) 78 | category = Column('category', String, primary_key=True) 79 | article = relationship('Article', back_populates='categories') 80 | 81 | 82 | class Content(Base): 83 | __tablename__ = 'content' 84 | 85 | article_id = Column('article', Integer, ForeignKey('article.id'), primary_key=True) 86 | article = relationship('Article', back_populates='content') 87 | retrieval_date = Column(DateTime) 88 | content = Column(String) 89 | content_type = Column(String) 90 | 91 | 92 | class Country(Base): 93 | __tablename__ = 'country' 94 | 95 | code = Column(String(3), primary_key=True) 96 | terms = relationship('CountryTerm', back_populates='country', cascade="all, delete-orphan") 97 | locations = relationship('Location', back_populates='country', cascade="all, delete-orphan") 98 | 99 | @classmethod 100 | def lookup(cls, session, code): 101 | return session.query(cls).filter_by(code=code).one() 102 | 103 | 104 | class CountryTerm(Base): 105 | __tablename__ = 'country_term' 106 | 107 | term = Column(String, primary_key=True) 108 | code = Column('country', String(3), ForeignKey('country.code')) 109 | country = relationship('Country', back_populates='terms') 110 | 111 | 112 | report_location = Table( 113 | 'report_location', Base.metadata, 114 | Column('report', ForeignKey('report.id'), primary_key=True), 115 | Column('location', ForeignKey('location.id'), primary_key=True) 116 | ) 117 | 118 | 119 | class Location(Base): 120 | __tablename__ = 'location' 121 | 122 | id = Column(Integer, primary_key=True) 123 | description = Column(String) 124 | city = Column(String) 125 | subdivision = Column(String) 126 | code = Column('country', String(3), ForeignKey('country.code')) 127 | country = relationship('Country', back_populates='locations') 128 | latlong = Column(String) # Not tackling PostGIS right now 129 | reports = relationship('Report', secondary=report_location, back_populates='locations') 130 | 131 | 132 | class Report(Base): 133 | __tablename__ = 'report' 134 | 135 | id = Column(Integer, primary_key=True, autoincrement=True) 136 | article_id = Column('article', Integer, ForeignKey('article.id'), primary_key=True) 137 | article = relationship('Article', back_populates='reports') 138 | event_term = Column(String) 139 | subject_term = Column(String) 140 | quantity = Column(Integer) 141 | tag_locations = Column(String) 142 | accuracy = Column(Numeric) 143 | analyzer = Column(String) 144 | analysis_date = Column(DateTime) 145 | locations = relationship('Location', secondary=report_location, back_populates='reports') 146 | datespans = relationship('ReportDateSpan', back_populates='report', cascade="all, delete-orphan") 147 | 148 | 149 | class ReportDateSpan(Base): 150 | __tablename__ = 'report_datespan' 151 | 152 | id = Column(Integer, primary_key=True) 153 | report_id = Column('report', Integer, ForeignKey('report.id')) 154 | report = relationship('Report', back_populates='datespans') 155 | start = Column(DateTime) 156 | finish = Column(DateTime) 157 | -------------------------------------------------------------------------------- /internal_displacement/scraper.py: -------------------------------------------------------------------------------- 1 | import newspaper 2 | import csv 3 | import urllib 4 | from urllib import request 5 | from urllib.parse import urlparse 6 | import textract 7 | import os 8 | from collections import OrderedDict 9 | import datetime 10 | from bs4 import BeautifulSoup 11 | import re 12 | 13 | # PDF helper functions 14 | 15 | 16 | def is_pdf_simple_tests(url): 17 | '''Test a url to see if it is a pdf by looking at url and content headers 18 | If so, return the relevant pdf url for parsing 19 | ''' 20 | # Simple url-based test 21 | if re.search(r'\.pdf$', url): 22 | return url 23 | 24 | # Test based on headers 25 | try: 26 | page = request.urlopen(url) 27 | content_type = page.getheader('Content-Type') 28 | if content_type == 'application/pdf': 29 | return url 30 | except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, ValueError): 31 | pass 32 | 33 | 34 | def is_pdf_iframe_test(url): 35 | '''Test a url to see if the page contains an iframe 36 | and if the iframe content is pdf or not; if True, return the pdf url 37 | ''' 38 | try: 39 | page = request.urlopen(url) 40 | soup = BeautifulSoup(page, "html.parser") 41 | iframes = soup.find_all('iframe') 42 | if len(iframes) > 0: 43 | for frame in iframes: 44 | if 'src' in frame.attrs.keys(): 45 | src = frame.attrs['src'] 46 | # should probably replace with something more robust 47 | if 'http' in src: 48 | if is_pdf_simple_tests(src): 49 | return src 50 | except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, ValueError): 51 | pass 52 | 53 | 54 | def is_pdf_consolidated_test(url): 55 | '''Run a series of tests to determine if it is a pdf 56 | If True, return the relevant url 57 | ''' 58 | 59 | # Carry out simple tests based upon url and content type 60 | pdf_attempt_1 = is_pdf_simple_tests(url) 61 | if pdf_attempt_1: 62 | return pdf_attempt_1 63 | 64 | # Carry out additional test based by looking for iframe 65 | pdf_attempt_2 = is_pdf_iframe_test(url) 66 | if pdf_attempt_2: 67 | return pdf_attempt_2 68 | 69 | return False 70 | 71 | 72 | def remove_newline(text): 73 | ''' Removes new line and   characters. 74 | ''' 75 | text = text.replace('\n', ' ') 76 | text = text.replace('\xa0', ' ') 77 | return text 78 | 79 | 80 | def format_date(date_string): 81 | '''Formats date string from http headers 82 | Returns standardized date format as string 83 | ''' 84 | try: 85 | dt = datetime.datetime.strptime( 86 | date_string, "%a, %d %b %Y %H:%M:%S %Z") 87 | formatted_date = dt.strftime("%Y-%m-%d %H:%M:%S") 88 | except (ValueError, TypeError, AttributeError): 89 | formatted_date = None 90 | return formatted_date 91 | 92 | 93 | class Scraper(object): 94 | 95 | def __init__(self): 96 | pass 97 | 98 | def html_article(self, url): 99 | """Downloads and extracts content plus metadata for html page 100 | Parameters 101 | ---------- 102 | url: url of page to be scraped 103 | 104 | Returns 105 | ------- 106 | article: An object of class Article containing the content and metadata. 107 | """ 108 | 109 | a = newspaper.Article(url) 110 | a.download() 111 | if a.is_downloaded: 112 | a.parse() 113 | article_domain = a.source_url 114 | article_title = a.title 115 | article_authors = a.authors 116 | article_pub_date = a.publish_date 117 | article_text = remove_newline(a.text) 118 | # tag the type of article 119 | # currently default to text but should be able to determine img/video 120 | # etc 121 | article_content_type = 'text' 122 | return article_text, article_pub_date, article_title, article_content_type, article_authors, article_domain 123 | else: # Temporary fix to deal with https://github.com/codelucas/newspaper/issues/280 124 | return "retrieval_failed", None, "", datetime.datetime.now(), "", "" 125 | 126 | def get_pdf(self, url): 127 | ''' Takes a pdf url, downloads it and saves it locally.''' 128 | try: 129 | response = request.urlopen(url) # not sure if this is needed? 130 | publish_date = response.getheader('Last-Modified') 131 | pdf_file = open('file_to_convert.pdf', 'wb') 132 | pdf_file.write(response.read()) 133 | pdf_file.close() 134 | return os.path.join('./', 'file_to_convert.pdf'), publish_date 135 | except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, ValueError) as e: 136 | return '', '' 137 | 138 | def get_body_text(self, url): 139 | ''' This function will extract all text from the url passed in 140 | ''' 141 | filepath, publish_date = self.get_pdf(url) 142 | if filepath == '': 143 | return '', None 144 | else: 145 | text = str(textract.process(filepath, method='pdfminer'), 'utf-8') 146 | text = text.replace('\n', ' ') # can replace with a call to 147 | text = text.replace('\xa0', ' ') # the helper function. 148 | publish_date = format_date(publish_date) 149 | return text, publish_date 150 | 151 | def remove_pdf(self, filepath): 152 | ''' Deletes pdf from disk 153 | Not currently in use as pdfs downloads overwrite self, but may come in 154 | useful later if pdfs are downloaded and stored under different names. 155 | ''' 156 | os.remove(filepath) 157 | 158 | def pdf_article(self, url): 159 | try: 160 | article_text, article_pub_date = self.get_body_text(url) 161 | if article_text == '': 162 | return "retrieval_failed", None, "", datetime.datetime.now(), "", "" 163 | else: 164 | article_domain = urlparse(url).hostname 165 | article_content_type = 'pdf' 166 | # improve parsing of pdfs to extract these? 167 | article_title = '' 168 | article_authors = '' 169 | return article_text, article_pub_date, article_title, article_content_type, article_authors, article_domain 170 | except: 171 | return "retrieval_failed", None, "", datetime.datetime.now(), "", "" 172 | 173 | def scrape(self, url, scrape_pdfs=True): 174 | """ 175 | Scrapes content and metadata from an url 176 | Parameters 177 | ---------- 178 | url: the url to be scraped 179 | scrape_pdfs: determines whether pdf files will be scraped or not 180 | default: True 181 | 182 | Returns 183 | ------- 184 | article: An article object prepared by scraping the url. 185 | 186 | 187 | """ 188 | pdf_check = is_pdf_consolidated_test(url) 189 | if pdf_check and scrape_pdfs: 190 | article = self.pdf_article(pdf_check) 191 | return article 192 | elif not pdf_check: 193 | article = self.html_article(url) 194 | return article 195 | else: 196 | pass 197 | -------------------------------------------------------------------------------- /internal_displacement/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal_displacement/tests/__init__.py -------------------------------------------------------------------------------- /internal_displacement/tests/test_ExtractedReport.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from internal_displacement.extracted_report import * 3 | 4 | class TestExtractedReport(TestCase): 5 | 6 | def test_convert_quantity(self): 7 | self.assertEqual(convert_quantity("twelve"), 12) 8 | self.assertEqual(convert_quantity("seventy five"), 75) 9 | self.assertEqual(convert_quantity("3 hundred"), 300) 10 | self.assertEqual(convert_quantity("twelve hundred"), 1200) 11 | self.assertEqual(convert_quantity("seven million"), 7000000) 12 | self.assertEqual(convert_quantity("twelve thousand three hundred four"), 12304) 13 | self.assertEqual(convert_quantity("32 thousand"), 32000) 14 | self.assertEqual(convert_quantity(["one", "million"]), 1000000) 15 | self.assertEqual(convert_quantity("hundreds of millions"), 550000000) 16 | self.assertEqual(convert_quantity("tens of thousands"), 55000) 17 | self.assertEqual(convert_quantity("tens of thousand"), 55000) 18 | self.assertEqual(convert_quantity("dozens of people"), 55) 19 | -------------------------------------------------------------------------------- /internal_displacement/tests/test_Interpreter.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from internal_displacement.interpreter import strip_words, Interpreter 3 | from internal_displacement.article import Article 4 | from internal_displacement.model.model import Category 5 | from langdetect import detect 6 | import pycountry 7 | import spacy 8 | import datetime 9 | 10 | nlp = spacy.load("en") 11 | person_reporting_terms = [ 12 | 'displaced', 'evacuated', 'forced', 'flee', 'homeless', 'relief camp', 13 | 'sheltered', 'relocated', 'stranded', 'stuck', 'stranded', "killed", "dead", "died", "drown" 14 | ] 15 | 16 | structure_reporting_terms = [ 17 | 'destroyed', 'damaged', 'swept', 'collapsed', 18 | 'flooded', 'washed', 'inundated', 'evacuate' 19 | ] 20 | 21 | person_reporting_units = ["families", "person", "people", "individuals", "locals", "villagers", "residents", 22 | "occupants", "citizens", "households"] 23 | 24 | structure_reporting_units = ["home", "house", "hut", "dwelling", "building", "shop", "business", "apartment", 25 | "flat", "residence"] 26 | 27 | relevant_article_terms = ['Rainstorm', 'hurricane', 28 | 'tornado', 'rain', 'storm', 'earthquake'] 29 | relevant_article_lemmas = [t.lemma_ for t in nlp( 30 | " ".join(relevant_article_terms))] 31 | 32 | 33 | class TestInterpreter(TestCase): 34 | 35 | def setUp(self): 36 | 37 | self.interpreter = Interpreter(nlp, person_reporting_terms, structure_reporting_terms, 38 | person_reporting_units, structure_reporting_units, relevant_article_lemmas, 'data/') 39 | self.date = datetime.datetime.now() 40 | 41 | def tearDown(self): 42 | pass 43 | 44 | def test_check_language(self): 45 | test_article = Article("A decent amount of test content which will be used for extracting the language", 46 | self.date, "test_title", "test_content_type", [ 47 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters") 48 | language = self.interpreter.check_language(test_article.content) 49 | self.assertEqual(language, "en") 50 | 51 | def test_strip_words(self): 52 | test_place_name = 'the province county district city' 53 | self.assertEqual(strip_words(test_place_name), '') 54 | test_place_name = 'the United States' 55 | self.assertEqual(strip_words(test_place_name), 'United States') 56 | 57 | def test_extract_countries(self): 58 | test_article = Article("The United Kingdom plus Afghanistan plus Sichuan Province, as well as Toronto, Cuba and Bosnia", 59 | self.date, "test_title", "test_content_type", [ 60 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters") 61 | countries = self.interpreter.extract_countries(test_article.content) 62 | self.assertIsInstance(countries, list) 63 | self.assertEqual(len(countries), 6) 64 | self.assertIn('GBR', countries) 65 | self.assertIn('AFG', countries) 66 | self.assertIn('CHN', countries) 67 | self.assertIn('CAN', countries) 68 | self.assertIn('CUB', countries) 69 | self.assertIn('BIH', countries) 70 | test_article = Article("No countries mentioned", 71 | self.date, "test_title", "test_content_type", [ 72 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters") 73 | countries = self.interpreter.extract_countries(test_article) 74 | self.assertIsInstance(countries, list) 75 | self.assertEqual(len(countries), 0) 76 | 77 | def test_classify_category(self): 78 | disaster_article = Article("Afghanistan – Flash Floods in Faryab and Baghlan Leave 8 Dead", self.date, "test_title", "test_content_type", [ 79 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters") 80 | conflict_article = Article("INSIGHT-India-Pakistan clashes escalate into a humanitarian tragedy", self.date, "test_title", "test_content_type", [ 81 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters") 82 | disaster = self.interpreter.classify_category(disaster_article) 83 | conflict = self.interpreter.classify_category(conflict_article) 84 | self.assertEqual(disaster, Category.DISASTER) 85 | self.assertEqual(conflict, Category.CONFLICT) 86 | -------------------------------------------------------------------------------- /internal_displacement/tests/test_Pipeline.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from internal_displacement.interpreter import strip_words, Interpreter 3 | from internal_displacement.scraper import Scraper 4 | from sqlalchemy import create_engine 5 | 6 | from internal_displacement.model.model import Status, Session, Category, Article, Content, Country, CountryTerm, \ 7 | Location, Report, ReportDateSpan, ArticleCategory, UnexpectedArticleStatusException 8 | from internal_displacement.pipeline import Pipeline 9 | import spacy 10 | import os 11 | 12 | 13 | nlp = spacy.load("en") 14 | person_reporting_terms = [ 15 | 'displaced', 'evacuated', 'forced', 'flee', 'homeless', 'relief camp', 16 | 'sheltered', 'relocated', 'stranded', 'stuck', 'stranded', "killed", "dead", "died", "drown" 17 | ] 18 | 19 | structure_reporting_terms = [ 20 | 'destroyed', 'damaged', 'swept', 'collapsed', 21 | 'flooded', 'washed', 'inundated', 'evacuate' 22 | ] 23 | 24 | person_reporting_units = ["families", "person", "people", "individuals", "locals", "villagers", "residents", 25 | "occupants", "citizens", "households"] 26 | 27 | structure_reporting_units = ["home", "house", "hut", "dwelling", "building", "shop", "business", "apartment", 28 | "flat", "residence"] 29 | 30 | relevant_article_terms = ['Rainstorm', 'hurricane', 31 | 'tornado', 'rain', 'storm', 'earthquake'] 32 | relevant_article_lemmas = [t.lemma_ for t in nlp( 33 | " ".join(relevant_article_terms))] 34 | 35 | test_urls = [ 36 | 'http://www.independent.co.uk/news/somefakenewsstory', 37 | 'http://www.eluniversal.com.mx/articulo/nacion/politica/2017/03/13/manifestantes-obligan-acortar-evento-de-amlo-en-ny', 38 | 'http://www.bbc.com/news/world-europe-39258436', 39 | 'http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html' 40 | ] 41 | 42 | 43 | class TestPipeline(TestCase): 44 | 45 | def setUp(self): 46 | db_host = os.environ.get('DB_HOST') 47 | db_url = 'postgresql://{user}:{password}@{db_host}/{db}'.format( 48 | user='tester', password='tester', db_host=db_host, db='id_test') 49 | engine = create_engine(db_url) 50 | Session.configure(bind=engine) 51 | session = Session() 52 | scraper = Scraper() 53 | interpreter = Interpreter(nlp, person_reporting_terms, structure_reporting_terms, 54 | person_reporting_units, structure_reporting_units, relevant_article_lemmas, 'data/') 55 | self.pipeline = Pipeline(session, scraper, interpreter) 56 | self.session = session 57 | # Add two countries 58 | for c in ['AFG', 'PAK']: 59 | country = Country(code=c) 60 | self.session.add(country) 61 | self.session.commit() 62 | 63 | def tearDown(self): 64 | self.session.rollback() 65 | for url in test_urls: 66 | self.session.query(Article).filter_by(url=url).delete() 67 | self.session.commit() 68 | for c in ['AFG', 'PAK']: 69 | self.session.query(Country).filter_by(code=c).delete() 70 | self.session.commit() 71 | 72 | def test_bad_url(self): 73 | url = test_urls[0] 74 | response = self.pipeline.process_url(url) 75 | self.assertEqual(response, 'fetching failed') 76 | article = self.session.query(Article).filter_by(url=url).first() 77 | self.assertIsNone(article.content) 78 | 79 | def test_non_english_url(self): 80 | url = test_urls[1] 81 | response = self.pipeline.process_url(url) 82 | self.assertEqual(response, 'Processed: Not in English') 83 | article = self.session.query(Article).filter_by(url=url).first() 84 | self.assertEqual(len(article.reports), 0) 85 | self.assertEqual(article.status, Status.PROCESSED) 86 | 87 | def test_irrelevant(self): 88 | url = test_urls[2] 89 | response = self.pipeline.process_url(url) 90 | self.assertEqual(response, 'Processed: Not relevant') 91 | article = self.session.query(Article).filter_by(url=url).first() 92 | self.assertEqual(len(article.reports), 0) 93 | self.assertEqual(article.status, Status.PROCESSED) 94 | 95 | def test_good_url(self): 96 | url = test_urls[3] 97 | response = self.pipeline.process_url(url) 98 | self.assertEqual(response, 'processed') 99 | article = self.session.query(Article).filter_by(url=url).first() 100 | self.assertEqual(len(article.reports), 9) 101 | self.assertEqual(article.status, Status.PROCESSED) 102 | country_codes = set([ 103 | location.country.code for report in article.reports for location in report.locations]) 104 | self.assertIn('AFG', country_codes) 105 | self.assertIn('PAK', country_codes) 106 | terms = [report.event_term for report in article.reports] 107 | self.assertIn('collapse', terms) 108 | self.assertIn('strand', terms) 109 | units = [report.subject_term for report in article.reports] 110 | self.assertIn('villager', units) 111 | self.assertIn('house', units) 112 | 113 | def test_existing_location(self): 114 | article = Article(url='test-url') 115 | self.session.add(article) 116 | report = Report(article_id=article.id) 117 | self.session.add(report) 118 | location = Location(code='AFG', description='somelocation') 119 | self.session.add(location) 120 | self.session.commit() 121 | original_id = location.id 122 | self.pipeline.process_location(report, 'somelocation') 123 | self.assertEqual(original_id, report.locations[0].id) 124 | -------------------------------------------------------------------------------- /internal_displacement/tests/test_Scraper.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | 4 | from sqlalchemy import create_engine 5 | 6 | from internal_displacement.model.model import Session, Article 7 | from internal_displacement.scraper import is_pdf_simple_tests, is_pdf_iframe_test, format_date, html_article 8 | 9 | 10 | class TestScraper(TestCase): 11 | 12 | def setUp(self): 13 | pass 14 | 15 | def tearDown(self): 16 | pass 17 | 18 | def test_is_pdf_simple_tests(self): 19 | url = "http://www.securitycouncilreport.org/atf/cf/%7B65BFCF9B-6D27-4E9C-8CD3-CF6E4FF96FF9%7D/S_2015_302.pdf" 20 | pdf_test = is_pdf_simple_tests(url) 21 | self.assertEqual(pdf_test, url) 22 | url = "http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html" 23 | self.assertFalse(is_pdf_simple_tests(url)) 24 | 25 | def test_is_pdf_iframe_test(self): 26 | url = "http://erccportal.jrc.ec.europa.eu/getdailymap/docId/1125" 27 | pdf_test = is_pdf_iframe_test(url) 28 | self.assertEqual( 29 | pdf_test, "http://erccportal.jrc.ec.europa.eu/ERCmaps/ECDM_20150415_Natural_Disasters_Afghanistan_v02.pdf") 30 | url = "http://html.com/tags/iframe/" 31 | self.assertFalse(is_pdf_simple_tests(url)) 32 | 33 | def test_format_date(self): 34 | date_string = 'Mon, 01 Jun 2015 16:25:25 GMT' 35 | formatted_date = format_date(date_string) 36 | self.assertEqual(formatted_date, '2015-06-01 16:25:25') 37 | date_string = '16:25:25 GMT' 38 | formatted_date = format_date(date_string) 39 | self.assertEqual(formatted_date, '') 40 | date_string = None 41 | formatted_date = format_date(date_string) 42 | self.assertEqual(formatted_date, '') 43 | 44 | 45 | class TestFetch(TestCase): 46 | 47 | def setUp(self): 48 | DB_URL = os.environ.get('DB_URL') 49 | if not DB_URL.endswith('/id_test'): 50 | raise RuntimeError('Refusing to run tests against non-test database') 51 | engine = create_engine(DB_URL) 52 | Session.configure(bind=engine) 53 | self.session = Session() 54 | 55 | def tearDown(self): 56 | # self.session.rollback() 57 | pass 58 | 59 | 60 | def test_html(self): 61 | old = self.session.query(Article)\ 62 | .filter_by(url='http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html')\ 63 | .one_or_none() 64 | if old: 65 | self.session.delete(old) 66 | self.session.commit() 67 | article = html_article( 68 | self.session, 69 | 'http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html') 70 | self.assertEquals(article.domain, 'http://www.independent.co.uk') 71 | self.assertRegexpMatches(article.content.content, 'Flash flood') 72 | 73 | -------------------------------------------------------------------------------- /internal_displacement/tests/test_coordinates_extraction.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from internal_displacement.pipeline import get_coordinates_mapzen 3 | 4 | class TestCoordinatesExtraction(TestCase): 5 | 6 | def test_get_coordinates_mapzen(self): 7 | res = get_coordinates_mapzen(country="Austria") 8 | self.assertEqual(res['coordinates'], "14.143702,47.522617") 9 | res = get_coordinates_mapzen("Austria") # a city 10 | self.assertEqual(res['coordinates'], "-93.33167,16.43083") 11 | self.assertEqual(res['flag'], "multiple-results") 12 | res = get_coordinates_mapzen("Vienna") 13 | self.assertEqual(res['coordinates'], "16.37208,48.20849") 14 | self.assertEqual(res['flag'], "multiple-results") 15 | res = get_coordinates_mapzen(city="Vienna", country="Austria") 16 | self.assertEqual(res['coordinates'], "16.37208,48.20849") 17 | self.assertEqual(res['flag'], "single-result") 18 | res = get_coordinates_mapzen(city="Vienna", country="United States") 19 | self.assertEqual(res['coordinates'], "-77.260053,38.898599") 20 | res = get_coordinates_mapzen(city="Vienna", subdivision="Maryland", country="United States") 21 | self.assertEqual(res['coordinates'], "-75.833966,38.483475") 22 | self.assertEqual(res['flag'], "single-result") 23 | res = get_coordinates_mapzen("Vienna", hints=['Turkey','Indonesia', 'Austria', 'France']) 24 | self.assertEqual(res['coordinates'], "16.37208,48.20849") 25 | self.assertEqual(res['flag'], "multiple-results") 26 | res = get_coordinates_mapzen("Vienna", hints=['Turkey','Germany', 'Australia', 'United States']) 27 | self.assertEqual(res['coordinates'], "-77.260053,38.898599") 28 | self.assertEqual(res['flag'], "multiple-results") 29 | res = get_coordinates_mapzen("Vienna", hints=['Turkey','Germany', 'Australia', 'United States', 'Georgia']) 30 | self.assertEqual(res['coordinates'], "-83.79545,32.09156") 31 | self.assertEqual(res['flag'], "multiple-results") -------------------------------------------------------------------------------- /internal_displacement/tests/test_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from unittest import TestCase 4 | 5 | from sqlalchemy import create_engine 6 | 7 | from internal_displacement.model.model import Status, Session, Category, Article, Content, Country, CountryTerm, \ 8 | Location, Report, ReportDateSpan, ArticleCategory, UnexpectedArticleStatusException 9 | 10 | 11 | class TestModel(TestCase): 12 | def setUp(self): 13 | db_host = os.environ.get('DB_HOST') 14 | db_url = 'postgresql://{user}:{passwd}@{db_host}/{db}'.format( 15 | user='tester', passwd='tester', db_host=db_host, db='id_test') 16 | engine = create_engine(db_url) 17 | Session.configure(bind=engine) 18 | self.session = Session() 19 | 20 | def tearDown(self): 21 | self.session.rollback() 22 | self.session.query(Article).filter_by(domain='example.com').delete() 23 | self.session.commit() 24 | 25 | def test_article(self): 26 | article = Article(url='http://example.com', 27 | domain='example.com', 28 | status=Status.NEW) 29 | content = Content(article=article, 30 | retrieval_date=datetime.now(), 31 | content="La la la") 32 | ArticleCategory(article=article, category=Category.DISASTER) 33 | ArticleCategory(article=article, category=Category.OTHER) 34 | self.session.add(article) 35 | 36 | article2 = self.session.query(Article).filter_by(status=Status.NEW).one() 37 | self.assertEqual(article2.domain, 'example.com') 38 | self.assertEqual(article2.content.content, "La la la") 39 | self.assertCountEqual([c.category for c in article2.categories], ['disaster', 'other']) 40 | 41 | article3 = self.session.query(Article).filter_by(status=Status.NEW).one() 42 | self.assertEqual(article3.domain, 'example.com') 43 | 44 | def test_delete_article(self): 45 | article = None 46 | try: 47 | article = Article(url='http://example.com', 48 | domain='example.com', 49 | status=Status.NEW) 50 | content = Content(article=article, 51 | retrieval_date=datetime.now(), 52 | content="La la la") 53 | ArticleCategory(article=article, category=Category.DISASTER) 54 | ArticleCategory(article=article, category=Category.OTHER) 55 | self.session.add(article) 56 | self.session.commit() 57 | self.session.delete(article) 58 | finally: 59 | self.session.rollback() 60 | if article: 61 | self.session.delete(article) 62 | self.session.commit() 63 | 64 | def test_country_term(self): 65 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR") 66 | myanmar = CountryTerm(term="Myanmar", country=mmr) 67 | burma = CountryTerm(term="Burma", country=mmr) 68 | self.session.add(mmr) 69 | 70 | myanmar = self.session.query(Country).join(CountryTerm).filter_by(term='Myanmar').one() 71 | burma = self.session.query(Country).join(CountryTerm).filter_by(term='Burma').one() 72 | self.assertEqual(myanmar, burma) 73 | 74 | def test_location(self): 75 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR") 76 | naypyidaw = Location(description="Nay Pyi Taw", country=mmr, latlong='19°45′N 96°6′E') 77 | self.session.add(mmr) 78 | self.assertIn(naypyidaw, self.session.query(Location).filter_by(country=mmr)) 79 | 80 | def test_report(self): 81 | article = None 82 | report = None 83 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR") 84 | bgd = self.session.query(Country).filter_by(code="BGD").one_or_none() or Country(code="BGD") 85 | try: 86 | article = Article(url='http://example.com', 87 | domain='example.com', 88 | status=Status.NEW) 89 | report = Report(article=article, 90 | accuracy=0.55, 91 | event_term='evacuation', 92 | subject_term='family', 93 | quantity='72') 94 | self.session.add(report) 95 | self.session.commit() # have to commit here to get the ID set 96 | 97 | naypyidaw = Location(description="Nay Pyi Taw", country=mmr, latlong='19°45′N 96°6′E') 98 | report.locations.append(naypyidaw) 99 | dhaka = Location(description="Dhaka", country=bgd) 100 | report.locations.append(dhaka) 101 | now = datetime.now() 102 | when = ReportDateSpan(report=report, start=datetime.today(), finish=now) 103 | 104 | article2 = self.session.query(Article).filter_by(domain='example.com').first() 105 | self.assertEqual(len(article2.reports), 1) 106 | 107 | article3 = self.session.query(Article).join(Report).filter(Report.locations.contains(dhaka)).first() 108 | self.assertEqual(len(article3.reports), 1) 109 | finally: 110 | self.session.rollback() 111 | if report: 112 | self.session.delete(report) 113 | if article: 114 | self.session.delete(article) 115 | self.session.commit() 116 | 117 | def test_report_delete(self): 118 | article = None 119 | report = None 120 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR") 121 | bgd = self.session.query(Country).filter_by(code="BGD").one_or_none() or Country(code="BGD") 122 | try: 123 | article = Article(url='http://example.com', 124 | domain='example.com', 125 | status=Status.NEW) 126 | report = Report(article=article, 127 | accuracy=0.55, 128 | event_term='evacuation', 129 | subject_term='family', 130 | quantity='72') 131 | self.session.add(report) 132 | 133 | naypyidaw = Location(description="Nay Pyi Taw", country=mmr, latlong='19°45′N 96°6′E') 134 | report.locations.append(naypyidaw) 135 | dhaka = Location(description="Dhaka", country=bgd) 136 | report.locations.append(dhaka) 137 | now = datetime.now() 138 | when = ReportDateSpan(report=report, start=datetime.today(), finish=now) 139 | 140 | self.session.commit() 141 | report_id = report.id 142 | self.session.query(Report).filter_by(article=article).delete() 143 | report = None 144 | self.session.commit() 145 | self.assertEqual(self.session.query(ReportDateSpan).filter_by(report_id=report_id).all(), []) 146 | finally: 147 | self.session.rollback() 148 | if report: 149 | self.session.delete(report) 150 | if article: 151 | self.session.delete(article) 152 | self.session.commit() 153 | 154 | def test_status_update(self): 155 | article = Article(url='http://example.com', 156 | domain='example.com', 157 | status=Status.NEW) 158 | self.session.add(article) 159 | self.session.commit() 160 | 161 | article.update_status(Status.FETCHING) 162 | self.session.commit() 163 | self.assertEqual(article.status, Status.FETCHING) 164 | 165 | # meanwhile, some other process changed the status of this... 166 | self.session.execute("UPDATE article SET status = :status WHERE id = :id", 167 | { 'status': Status.FETCHING_FAILED, 'id': article.id}) 168 | 169 | with self.assertRaises(UnexpectedArticleStatusException): 170 | article.update_status(Status.FETCHED) -------------------------------------------------------------------------------- /internal_displacement/tests/test_report.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from internal_displacement.report import Report 3 | 4 | class TestReport(TestCase): 5 | 6 | 7 | def setUp(self): 8 | pass 9 | def tearDown(self): 10 | pass 11 | 12 | 13 | def test_equality(self): 14 | test_report_1 = Report(["Some Place"],["Yesterday"],"destroyed","house",12,"Yesterday 12 houses were destroyed.") 15 | test_report_2 = Report(["Some Place"],["Yesterday"],"destroyed","house",12,"Yesterday 12 houses were destroyed.") 16 | test_report_3 = Report(["Some Place"],["Yesterday"],"destroyed","house",13,"Yesterday 13 houses were destroyed.") 17 | self.assertEqual(test_report_1,test_report_2) 18 | self.assertNotEqual(test_report_1,test_report_3) -------------------------------------------------------------------------------- /internal_displacement/textract_requirements.txt: -------------------------------------------------------------------------------- 1 | This file contains a list of packages required to get textract to run. 2 | Documentation: 3 | https://textract.readthedocs.io/en/stable/ 4 | https://github.com/deanmalmgren/textract 5 | Requirements (from github page): 6 | # This file contains all python dependencies that are required by the 7 | # textract package in order for it to properly work 8 | argcomplete 9 | chardet 10 | python-pptx>=0.5.1 11 | #pdfminer.six <-- go back to this after the shebang fix is released (see https://github.com/goulu/pdfminer/issues/27) 12 | https://github.com/goulu/pdfminer/zipball/e6ad15af79a26c31f4e384d8427b375c93b03533#egg=pdfminer.six 13 | docx2txt 14 | beautifulsoup4 15 | xlrd 16 | EbookLib 17 | SpeechRecognition>=3.1.0 18 | https://github.com/mattgwwalker/msg-extractor/zipball/master 19 | six 20 | 21 | For what it's worth, following the recommended installation procedure should be 22 | all you need to do. I found that I had to install pdfminer.six separately, and 23 | that did the trick. 24 | -------------------------------------------------------------------------------- /notebooks/.ipynb_checkpoints/newspaper-scrape-tests-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import newspaper\n", 12 | "import csv\n", 13 | "import pandas as pd" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 3, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "file = 'data/training_dataset.csv'" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "def urls_from_csv(csv_file, column=None):\n", 36 | " '''\n", 37 | " Takes csv directory and returns list of URLs\n", 38 | " '''\n", 39 | " with open(csv_file, 'r') as f:\n", 40 | " reader = csv.reader(f)\n", 41 | " contents = list(reader)\n", 42 | " \n", 43 | " urls = [line[1] for line in contents[1:]]\n", 44 | " return urls, contents" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 5, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "def urls_to_df(csv_file, column=None):\n", 56 | " '''\n", 57 | " Takes csv directory and returns list of URLs\n", 58 | " '''\n", 59 | " df = pd.read_csv(csv_file)\n", 60 | " df.columns = [x.lower() for x in df.columns]\n", 61 | " urls = list(df['url'])\n", 62 | " return urls, df" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 6, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "urls, contents = urls_from_csv(file)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 7, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "def remove_newline(text):\n", 85 | " ''' Removes new line and   characters.\n", 86 | " '''\n", 87 | " text = text.replace('\\n', ' ')\n", 88 | " text = text.replace('\\xa0', ' ')\n", 89 | " return text" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 8, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "def html_report(link, nlp=False):\n", 101 | " report = {}\n", 102 | " a = newspaper.Article(link)\n", 103 | " a.download()\n", 104 | " a.parse()\n", 105 | " report['domain'] = a.source_url\n", 106 | " report['title'] = a.title\n", 107 | " report['authors'] = a.authors\n", 108 | " report['date_pub'] = a.publish_date\n", 109 | " report['text'] = remove_newline(a.text)\n", 110 | " # tag the type of article\n", 111 | " ## currently default to text but should be able to determine img/video etc\n", 112 | " report['type'] = 'text'\n", 113 | " return report" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 9, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "urls, df = urls_to_df(file)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 10, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "def scrape_from_urls(urls):\n", 136 | " reports = []\n", 137 | " for url in urls:\n", 138 | " if url[-3:] == 'pdf':\n", 139 | " continue\n", 140 | " else:\n", 141 | " report = html_report(url)\n", 142 | " reports.append(report)\n", 143 | " \n", 144 | " return reports" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 11, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "url = urls[1]" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 12, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "a = newspaper.Article(url)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 13, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "a.download()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 14, 183 | "metadata": { 184 | "collapsed": false 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "a.parse()" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [], 198 | "source": [] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 15, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [ 207 | { 208 | "ename": "NameError", 209 | "evalue": "name 'report' is not defined", 210 | "output_type": "error", 211 | "traceback": [ 212 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 213 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 214 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mkeys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreport\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdict_writer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDictWriter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfieldnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mdict_writer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwriteheader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdict_writer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwriterows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreport\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 215 | "\u001b[0;31mNameError\u001b[0m: name 'report' is not defined" 216 | ] 217 | } 218 | ], 219 | "source": [ 220 | "keys = report[0].keys()\n", 221 | "with open('data.csv', 'w') as f:\n", 222 | " dict_writer = csv.DictWriter(f, fieldnames=keys)\n", 223 | " dict_writer.writeheader()\n", 224 | " dict_writer.writerows(report)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "collapsed": true 232 | }, 233 | "outputs": [], 234 | "source": [] 235 | } 236 | ], 237 | "metadata": { 238 | "anaconda-cloud": {}, 239 | "kernelspec": { 240 | "display_name": "Python [d4d-internal-displacement]", 241 | "language": "python", 242 | "name": "Python [d4d-internal-displacement]" 243 | }, 244 | "language_info": { 245 | "codemirror_mode": { 246 | "name": "ipython", 247 | "version": 3 248 | }, 249 | "file_extension": ".py", 250 | "mimetype": "text/x-python", 251 | "name": "python", 252 | "nbconvert_exporter": "python", 253 | "pygments_lexer": "ipython3", 254 | "version": "3.6.0" 255 | } 256 | }, 257 | "nbformat": 4, 258 | "nbformat_minor": 0 259 | } 260 | -------------------------------------------------------------------------------- /notebooks/.ipynb_checkpoints/sql-concurrent-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /notebooks/information_extraction/get_abs_date_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import datetime\n", 14 | "import parsedatetime\n", 15 | "from functools import reduce" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true, 23 | "deletable": true, 24 | "editable": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "def get_absolute_date(relative_date_string, publication_date=None):\n", 29 | " \"\"\"\n", 30 | " Turn relative dates into absolute datetimes.\n", 31 | " Currently uses API of parsedatetime\n", 32 | " https://bear.im/code/parsedatetime/docs/index.html\n", 33 | "\n", 34 | " Parameters:\n", 35 | " -----------\n", 36 | " relative_date_string the relative date in an article (e.g. 'Last week'): String\n", 37 | " publication_date the publication_date of the article: datetime\n", 38 | " \n", 39 | " Returns:\n", 40 | " --------\n", 41 | " One of: \n", 42 | " - a datetime that represents the absolute date of the relative date based on \n", 43 | " the publication_date\n", 44 | " - None, if parse is not successful\n", 45 | " \"\"\"\n", 46 | "\n", 47 | " cal = parsedatetime.Calendar()\n", 48 | " parsed_result = cal.nlp(relative_date_string, publication_date)\n", 49 | " if parsed_result is not None:\n", 50 | " # Parse is successful\n", 51 | " parsed_absolute_date = parsed_result[0][0]\n", 52 | "\n", 53 | " # Assumption: input date string is in the past\n", 54 | " # If parsed date is in the future (relative to publication_date), \n", 55 | " # we roll it back to the past\n", 56 | " \n", 57 | " if publication_date and parsed_absolute_date > publication_date:\n", 58 | " # parsedatetime returns a date in the future\n", 59 | " # likely because year isn't specified or date_string is relative\n", 60 | " \n", 61 | " # Check a specific date is included\n", 62 | " # TODO: Smarter way or regex to check if relative_date_string \n", 63 | " # contains a month name?\n", 64 | " months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', \n", 65 | " 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']\n", 66 | " contains_month = reduce( \n", 67 | " lambda result, month: result or relative_date_string.lower().find(month) != -1, \n", 68 | " months, False)\n", 69 | " \n", 70 | " if contains_month:\n", 71 | " # TODO: Is it enough to just check for month names to determine if a \n", 72 | " # date_string specifies a particular date?\n", 73 | "\n", 74 | " # If date is specified explicity, and year is not\n", 75 | " # roll back 1 year\n", 76 | " return datetime.datetime(parsed_absolute_date.year-1, \n", 77 | " parsed_absolute_date.month, parsed_absolute_date.day)\n", 78 | " else:\n", 79 | " # Use the relative datetime delta and roll back\n", 80 | " delta = parsed_absolute_date - publication_date\n", 81 | " num_weeks = int(delta.days/7)\n", 82 | " and_num_days_after = 7 if delta.days%7 == 0 else delta.days%7\n", 83 | " return publication_date - datetime.timedelta(weeks=num_weeks) - \\\n", 84 | " datetime.timedelta(7-and_num_days_after)\n", 85 | " else:\n", 86 | " # Return if date is in the past already or no publication_date is provided\n", 87 | " return parsed_absolute_date\n", 88 | " else:\n", 89 | " # Parse unsucessful\n", 90 | " return None" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "deletable": true, 97 | "editable": true 98 | }, 99 | "source": [ 100 | "## Year is not specified" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": false, 108 | "deletable": true, 109 | "editable": true 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | " publication_date = datetime.datetime(2016, 10, 30, 18, 0)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": false, 121 | "deletable": true, 122 | "editable": true 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "# Before publication_date\n", 127 | "get_absolute_date('28th December', publication_date)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false, 135 | "deletable": true, 136 | "editable": true 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "# After publication date\n", 141 | "get_absolute_date('26th October', publication_date)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": false, 149 | "deletable": true, 150 | "editable": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "get_absolute_date('1 January', publication_date)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": { 160 | "deletable": true, 161 | "editable": true 162 | }, 163 | "source": [ 164 | "## Relative date string" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": false, 172 | "deletable": true, 173 | "editable": true 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "get_absolute_date('2 weeks ago', publication_date)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": false, 185 | "deletable": true, 186 | "editable": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "get_absolute_date('3:30pm', publication_date)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "deletable": true, 197 | "editable": true 198 | }, 199 | "source": [ 200 | "## Year is specified" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false, 208 | "deletable": true, 209 | "editable": true 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "get_absolute_date('March 3 2014', publication_date)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": { 219 | "deletable": true, 220 | "editable": true 221 | }, 222 | "source": [ 223 | "This is considered **invalid** for now. Since we are assuming articles only contain dates in the past. (for future enhancement)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "collapsed": false, 231 | "deletable": true, 232 | "editable": true 233 | }, 234 | "outputs": [], 235 | "source": [ 236 | "get_absolute_date('March 3 2018', publication_date)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": true, 244 | "deletable": true, 245 | "editable": true 246 | }, 247 | "outputs": [], 248 | "source": [] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.4.4" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | -------------------------------------------------------------------------------- /notebooks/scraping_and_db/DatabaseExample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example of database usage\n", 8 | "## Create a session using the values in `docker.env`" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": { 15 | "collapsed": true, 16 | "deletable": true, 17 | "editable": true 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import os\n", 22 | "from sqlalchemy import create_engine\n", 23 | "from internal_displacement.model.model import Session\n", 24 | "\n", 25 | "db_url = 'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}/{DB_NAME}'.format(**os.environ)\n", 26 | "engine = create_engine(db_url)\n", 27 | "Session.configure(bind=engine)\n", 28 | "session = Session()" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Query for all Articles in the DB" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": false, 43 | "deletable": true, 44 | "editable": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "from internal_displacement.model.model import Article, Status\n", 49 | "\n", 50 | "session.query(Article).all()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Add an Article" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": true, 65 | "deletable": true, 66 | "editable": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "article = Article(url='http://example.com',\n", 71 | " domain='example.com',\n", 72 | " status=Status.NEW)\n", 73 | "session.add(article)\n", 74 | "session.commit()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## See that the article was persisted" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": false, 89 | "deletable": true, 90 | "editable": true 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "session.query(Article).all()" 95 | ] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python 3", 101 | "language": "python", 102 | "name": "python3" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 3 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython3", 114 | "version": "3.5.2" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 2 119 | } 120 | -------------------------------------------------------------------------------- /notebooks/scraping_and_db/TestDatabase.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "# Run DB Unit tests" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "collapsed": false, 18 | "deletable": true, 19 | "editable": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import unittest\n", 24 | "from internal_displacement.tests.test_model import TestModel\n", 25 | "suite = unittest.TestLoader().loadTestsFromTestCase(TestModel)\n", 26 | "unittest.TextTestRunner(verbosity=3).run(suite)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": true, 34 | "deletable": true, 35 | "editable": true 36 | }, 37 | "outputs": [], 38 | "source": [] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.5.2" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 0 62 | } 63 | -------------------------------------------------------------------------------- /notebooks/scraping_and_db/sql-concurrent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 27, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import csv\n", 12 | "import concurrent\n", 13 | "import sqlite3\n", 14 | "import pandas as pd" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 29, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "sql_connection = sqlite3.connect('test.sqlite', isolation_level=None)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 30, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "sql_cursor = sql_connection.cursor()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 31, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "" 50 | ] 51 | }, 52 | "execution_count": 31, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "sql_cursor.execute(\"CREATE TABLE IF NOT EXISTS Articles (title TEXT, url TEXT,author TEXT,datetime TEXT,domain TEXT, content TEXT, content_type TEXT)\")" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 32, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "" 72 | ] 73 | }, 74 | "execution_count": 32, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "sql_cursor.execute(\"CREATE TABLE IF NOT EXISTS Labels (url TEXT,category TEXT)\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 33, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "url = 'www.theguardian.co.uk'\n", 92 | "authors = 'GR'\n", 93 | "pub_date = 'Jan'\n", 94 | "domain = 'guardian'\n", 95 | "content = 'Some stuff happened'\n", 96 | "content_type = 'article'\n", 97 | "title = 'The stuff'" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 34, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "" 111 | ] 112 | }, 113 | "execution_count": 34, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "sql_cursor.execute(\"INSERT INTO Articles VALUES (?,?,?,?,?,?,?)\", (title, url, authors, pub_date, domain, content, content_type))" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 35, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "sql_connection.commit()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 40, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "['www.theguardian.co.uk']" 144 | ] 145 | }, 146 | "execution_count": 40, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "[r[0] for r in sql_cursor.execute(\"SELECT url FROM Articles\")]" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 42, 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "df = pd.read_sql_query(\"SELECT * from Articles\", sql_connection)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 50, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "from urllib.parse import urlparse" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 57, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "dave = urlparse('http://www.google.com/some-stuff-about-things').hostname" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 58, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "'www.google.com'" 199 | ] 200 | }, 201 | "execution_count": 58, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "dave" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "collapsed": true 215 | }, 216 | "outputs": [], 217 | "source": [] 218 | } 219 | ], 220 | "metadata": { 221 | "kernelspec": { 222 | "display_name": "Python [d4d-internal-displacement]", 223 | "language": "python", 224 | "name": "Python [d4d-internal-displacement]" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.6.0" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 0 241 | } 242 | -------------------------------------------------------------------------------- /notebooks/test.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/notebooks/test.sqlite -------------------------------------------------------------------------------- /notebooks/visualize_tagged_articles.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 81 | 82 |
83 |

Article 1

84 |

85 | Flash flooding across Afghanistan and Pakistan has left more than 160 dead and dozens stranded in one of South Asia\'s worst natural disasters this year, say officials. The flooding, caused by unusually heavy rain, has left villagers stuck in remote areas without shelter, food or power. Mountainous Afghanistan was the worst hit, with 61 people killed and approximately 500 traditional mud-brick homes washed away in more than a dozen villages in Sarobihomes500 traditional mud-brick homes washed away in more than a dozen villages in Sarobi, a rural district less than an hour from Kabul, officials said. Floods left a village devastated in the remote eastern Afghan province of Nuristan. At least 60 homes were destroyedhomes60 homes were destroyed across three districts, said provincial spokesman Mohammad Yusufi. No one was killed. Authorities have been unable to deliver aid to some badly affected villages by land as roads in the area are controlled by the Taliban, Yusufi added. “We have asked the national government for help as have an overwhelming number of locals asking for assistance, but this is a Taliban-ridden area,” Yusufi said. At least 24 people were also died in two other eastern border provinces, Khost and Nangarharpeople24 people were also died in two other eastern border provinces, Khost and Nangarhar, according to local officials. More than fifty homes and shops were destroyedhomesfifty homes and shops were destroyed and thousands of acres of farmland flooded. In Pakistan monsoon rains claimed more than 80 lives, local media reported. Houses collapsingHouses collapsing, drowning and electrocution all pushed up the death toll, said Sindh Information Minister Sharjeel Inam Memon. In Karachi, the commercial capital and a southern port city that is home to 18 million people, poor neighborhoods were submerged waist-deep in water and many precincts suffered long power outages. Deaths were also reported in the north and west of the country. 86 |

87 |
88 | 89 |
90 |

Article 2

91 |

92 | Afghanistan state news agency, Bakhtar News Agency (BNA) report that at least 7 people have been killedpeople7 people have been killed in flash floods in Faryab Province in the north of the country. Flash floods in Baghlan Province have killedBaghlan Province have killed 1 person1 person and injured around 10 others. Flash floods struck on 08 May 2015 in Faryab Province after a period of heavy rainfall. The districts of Garyzan, Pashtunkot and Belcheragh were worst affected. BNA report that at least 7 people were killedpeople7 people were killed and over 1,500 homes damaged. The Faizabada-Takhar highway have been closed to traffic and wide areas of crops and orchards have suffered damaged. Kuwaiti News Agency (KUNA) also report that flooding struck in the Baghlan-i-Markazi district of Baghlan province, where 1 person was killed and several injured early on Saturday 09 May 2015killedBaghlan province, where 1 person was killed and several injured early on Saturday 09 May 2015person1 person was killed and several injured early on Saturday 09 May 2015. “There was heavy rain in Baghlan-e-Markazi district Friday evening and the people left their houses to safer areas. It was early Saturday when a flash flood hit the area and washed away more than 500 houses500 houses,” district Governor Gohar Khan Babri told reporters in provincial capital Pul-e-Khumri, 160 km north of Kabul. 93 |

94 |
95 | 96 | 97 | -------------------------------------------------------------------------------- /production-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | jupyter: 4 | image: aneel/internal-displacement-jupyter:spacy 5 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks" 6 | stdin_open: true 7 | tty: true 8 | ports: 9 | - "3323:3323" 10 | env_file: production.env 11 | nodejs: 12 | build: internal-displacement-web 13 | image: internal-displacement-web 14 | volumes: 15 | - ./internal-displacement-web/src:/internal-displacement-web/src 16 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json 17 | - ./internal-displacement-web/index.html:/internal-displacement-web/index.html 18 | ports: 19 | - "3322:3322" 20 | env_file: production.env 21 | nginx: 22 | build: docker/nginx 23 | image: nginx 24 | ports: 25 | - "80:80" 26 | depends_on: 27 | - nodejs 28 | -------------------------------------------------------------------------------- /production.env: -------------------------------------------------------------------------------- 1 | DB_HOST=internal-displacement.cf1y5y4ffeey.us-west-2.rds.amazonaws.com 2 | DB_USER=d4d 3 | DB_PASS= 4 | DB_NAME=id 5 | PYTHONPATH=/internal-displacement 6 | NODE_ENV=production 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appnope==0.1.0 2 | beautifulsoup4==4.5.3 3 | bleach==1.5.0 4 | cssselect==1.0.1 5 | decorator==4.0.11 6 | entrypoints==0.2.2 7 | feedfinder2==0.0.4 8 | feedparser==5.2.1 9 | html5lib==0.9999999 10 | idna==2.2 11 | ipykernel==4.5.2 12 | ipython==5.2.2 13 | ipython-genutils==0.1.0 14 | ipywidgets==5.2.2 15 | jieba3k==0.35.1 16 | Jinja2==2.9.5 17 | jsonschema==2.5.1 18 | jupyter==1.0.0 19 | jupyter-client==4.4.0 20 | jupyter-console==5.1.0 21 | jupyter-core==4.2.1 22 | langdetect==1.0.7 23 | lxml==3.7.2 24 | MarkupSafe==0.23 25 | mistune==0.7.3 26 | nbconvert==5.1.1 27 | nbformat==4.2.0 28 | newspaper3k==0.1.9 29 | nltk==3.2.2 30 | nose==1.3.7 31 | notebook==4.3.2 32 | numpy==1.12.1 33 | olefile==0.44 34 | pandas==0.19.2 35 | pandocfilters==1.4.1 36 | parsedatetime==2.3 37 | pexpect==4.2.1 38 | pickleshare==0.7.4 39 | Pillow==4.0.0 40 | prompt-toolkit==1.0.13 41 | psycopg2==2.6.2 42 | ptyprocess==0.5.1 43 | pycountry==17.1.8 44 | Pygments==2.2.0 45 | python-dateutil==2.6.0 46 | pytz==2016.10 47 | PyYAML==3.12 48 | pyzmq==16.0.2 49 | qtconsole==4.2.1 50 | records==0.5.0 51 | requests==2.13.0 52 | requests-file==1.4.1 53 | simplegeneric==0.8.1 54 | six==1.10.0 55 | spacy>=1.7.5 56 | SQLAlchemy==1.1.5 57 | terminado==0.6 58 | testpath==0.3 59 | textacy==0.3.3 60 | tldextract==2.0.2 61 | tornado==4.4.2 62 | traitlets==4.3.1 63 | wcwidth==0.1.7 64 | widgetsnbextension==1.2.6 65 | -------------------------------------------------------------------------------- /sql_db.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/sql_db.sqlite -------------------------------------------------------------------------------- /workplan.md: -------------------------------------------------------------------------------- 1 | 2 | ### Raw Data 3 | We already have certain datasets containing URLs. 4 | 5 | __Activities:__ 6 | - *Do we want to obtain more urls for our own dev / testing?* 7 | 8 | ### Scraping 9 | To be moved to `info-sources` repo. Interested parties can contribute there. 10 | 11 | Also covers the following two points from Filtering: 12 | - Identify language of a document (English vs. not English) 13 | - Broken URLs 14 | 15 | __Activities:__ 16 | - Maintain some sort of link to `info-sources` to understand how these tools can be used / fit into the `internal-displacement` pipeline, 17 | i.e., function arguments, what they return, API type stuff. 18 | 19 | ### Pipeline 20 | Controls the input and output of data with respect to scraping and interpreting articles. 21 | 22 | __Activities__ 23 | - Define how to export / store articles (discussion): 24 | + CSVs for now, but likely some sort of database will be necessary to facilitate the online / interactive tool for modeling and analysis 25 | - Take new data and extract only URLs, converting them into list 26 | - Checks and keeps a log of all previously scraped URLs 27 | - Checks whether a URL is from a domain on an RSS whitelist (future) 28 | - Manages output from scraper and either appends to original database or creates new database 29 | 30 | (please see [#86](https://github.com/Data4Democracy/internal-displacement/issues/86)) 31 | 32 | ### Filtering (Interpreter) 33 | The third filtering requirement is more specific to the `internal-displacement` domain: 34 | 35 | - Filtering out documents not reporting on human mobility (binary classification) 36 | 37 | __Activities:__ 38 | - Implement filtering out of documents not reporting on human mobility (modeling) 39 | 40 | > @milanoleonardo: *'this can be done by looking at the dependency trees of the sentences in the text to make sure there is a link between a “reporting term” and a “reporting unit” (see challenge for details). This would definitely remove all documents reporting on “hip displacement” or sentences like “displaced the body of people” etc.'* 41 | 42 | *How to test this functionality? Build some hand-crafted examples of things that shouldn't be included?* 43 | 44 | ### Tagging (Interpreter) 45 | 46 | - Classification of documents into 3 classes (Disasters, Conflict & Violence, Other) 47 | 48 | __Activities:__ 49 | - Select approach / model that will allow for online learning or re-training in the future with new datasets. (discussion) 50 | - Build and train classifier for classifiying into the 3 required classes. (modeling) 51 | 52 | > @milanoleonardo: *'the best would be to set a fixed threshold on the probability distribution and assign a tag based on the content of the document.'* 53 | 54 | ### NLP 55 | 56 | "Fact extraction" from documents: 57 | - Publication date, locaction (ISO 3166 country codes) , reporting term, reporting units etc. 58 | 59 | __Activities:__ 60 | - Select NLP tool or framework (discussion) 61 | - Build and test working tool for extracting facts (modeling) 62 | 63 | ### Article Class 64 | 65 | __Activities:__ 66 | - Define the properties each Article needs to have and fill out code for instantiating new Articles (beginner friendly) 67 | - Create / fill-out functions for update articles properties by calling and using return values from Scraper and Interpreter functions (beginner friendly) 68 | - Fill out function for saving articles along with relevant properties (beginner friendly) 69 | 70 | ### Visualization 71 | 72 | Including but not limited to Interactive Map, Histograms 73 | 74 | __Activities:__ 75 | - Design of visualizations (data-viz) 76 | - Selection of tool for online visualizations (i.e. D3) (discussion) 77 | - Create visualization functions that take in data in standard format and produce desired and interactive visualizations (data-viz) 78 | 79 | ### Quantitative Analysis 80 | 81 | Online tool that allows analysts to interact directly with data, choose what they visualize and how etc. 82 | 83 | __Activities:__ 84 | - Design / build front-end page(s) for analysts 85 | - Create back-end functionality for connecting to database and returning necessary data, facts etc. 86 | 87 | 88 | ### Data Engineering 89 | 90 | We will need to construct a data pipeline / workflow to manage the end-to-end process, both for batch processing of files as well as (potentially) real-time processing of individual urls: 91 | 92 | + Data collection from various sources, i.e. existing csv files, new file provided by analysts etc. 93 | + Data pre-processing - applying the filtering tools created to exclude broken, irrelevant and non-English articles 94 | + Article classification - applying the pre-trained classifier, or training a new classifier 95 | + Fact Extraction - using NLP tools for extracting the key facts from the articles 96 | + Data storage - saving the article along with relevenat tags and extracted facts 97 | + API for enabling data analysts to interact with the data 98 | 99 | 100 | ### All Deliverables: 101 | 102 | - URL to working version of the tool 103 | - Source code repo 104 | - Analysis of the test dataset 105 | - User guide 106 | - Admin guide 107 | 108 | __Activities:__ 109 | - Create, maintain and update user guide (documentation) 110 | - Create, maintain and update admin guide (documentation) 111 | 112 | 113 | ### Possible Libraries 114 | 115 | ___NLP:___ 116 | - nltk 117 | - Tensor Flow 118 | - Spacy 119 | 120 | ___Text parsing and fact extraction:___ 121 | - mordecai - Geoparsing (extracting relevant country) 122 | - Newspaper module (python 3) 123 | - goose-extractor - Text + meta-data extraction (only python 2) 124 | 125 | 126 | --------------------------------------------------------------------------------