├── .dockerignore
├── .gitignore
├── .gitmodules
├── Dockerfile
├── PRODUCTION.md
├── README.md
├── data
├── IDETECT_test_dataset - NLP.csv.xlsx
├── IDMC_fully_labelled.csv.xlsx
├── cities_to_countries.json
├── idmc_uniteideas_training_dataset.csv
└── stop_words_en_long.txt
├── docker-compose-dev.yml
├── docker-compose-spacy.yml
├── docker-compose.yml
├── docker.env
├── docker
├── localdb
│ ├── Dockerfile
│ ├── docker-entrypoint-initdb.d
│ │ ├── id.sh
│ │ └── id_test.sh
│ └── schema.sql
└── nginx
│ ├── Dockerfile
│ ├── conf.d
│ └── node.conf
│ └── nginx.conf
├── environment.yml
├── fact_extraction.md
├── images
└── internal-displacement-plan.png
├── internal-displacement-web
├── Dockerfile
├── README.md
├── client
│ ├── README.md
│ ├── package.json
│ ├── public
│ │ ├── favicon.ico
│ │ ├── images
│ │ │ └── banner.jpg
│ │ ├── index.html
│ │ └── themeJS
│ │ │ ├── ie
│ │ │ ├── backgroundsize.min.htc
│ │ │ ├── html5shiv.js
│ │ │ └── respond.min.js
│ │ │ ├── jquery.min.js
│ │ │ ├── jquery.scrollex.min.js
│ │ │ ├── jquery.scrolly.min.js
│ │ │ ├── main.js
│ │ │ ├── skel.min.js
│ │ │ └── util.js
│ ├── src
│ │ ├── Api
│ │ │ └── api.js
│ │ ├── App.css
│ │ ├── App.js
│ │ ├── App.test.js
│ │ ├── common
│ │ │ ├── Footer.js
│ │ │ └── Header.js
│ │ ├── components
│ │ │ ├── HomePage
│ │ │ │ └── index.js
│ │ │ ├── NotFound
│ │ │ │ ├── index.js
│ │ │ │ ├── index.spec.js
│ │ │ │ └── styles.css
│ │ │ └── UrlForm
│ │ │ │ └── index.js
│ │ ├── containers
│ │ │ ├── MapVizContainer
│ │ │ │ ├── MaVizPageV2.js
│ │ │ │ ├── MapVizExample.js
│ │ │ │ ├── MapVizPage.js
│ │ │ │ ├── MapVizPage.scatter.js
│ │ │ │ ├── actions
│ │ │ │ │ └── index.js
│ │ │ │ ├── components
│ │ │ │ │ ├── map
│ │ │ │ │ │ ├── index.js
│ │ │ │ │ │ └── mapboxTest.js
│ │ │ │ │ └── mapOverlays
│ │ │ │ │ │ ├── customScatterOverlay.js
│ │ │ │ │ │ ├── displacementHeatmapOverlay.js
│ │ │ │ │ │ ├── exampleGeojson.js
│ │ │ │ │ │ ├── geojsonDataOverlay.js
│ │ │ │ │ │ └── scatterplotOverlay.js
│ │ │ │ ├── constants
│ │ │ │ │ ├── actionTypes.js
│ │ │ │ │ └── mapConstants.js
│ │ │ │ ├── mapVis.css
│ │ │ │ ├── mapbox-gl.css
│ │ │ │ ├── reducers
│ │ │ │ │ ├── initialState.js
│ │ │ │ │ └── mapReducers.js
│ │ │ │ ├── sagas
│ │ │ │ │ ├── index.js
│ │ │ │ │ ├── mapDataSaga.js
│ │ │ │ │ └── watchers.js
│ │ │ │ ├── store
│ │ │ │ │ └── configureStore.js
│ │ │ │ └── vancouver-blocks.json
│ │ │ ├── app.js
│ │ │ └── home.js
│ │ ├── index.css
│ │ ├── index.js
│ │ ├── layout.js
│ │ ├── logo.svg
│ │ ├── reducers
│ │ │ └── index.js
│ │ ├── routes.js
│ │ ├── themeCss
│ │ │ ├── css
│ │ │ │ ├── font-awesome.min.css
│ │ │ │ ├── ie8.css
│ │ │ │ ├── ie9.css
│ │ │ │ ├── images
│ │ │ │ │ ├── arrow.svg
│ │ │ │ │ ├── banner.jpg
│ │ │ │ │ ├── bars.svg
│ │ │ │ │ └── close.svg
│ │ │ │ └── main.css
│ │ │ ├── fonts
│ │ │ │ ├── FontAwesome.otf
│ │ │ │ ├── fontawesome-webfont.eot
│ │ │ │ ├── fontawesome-webfont.svg
│ │ │ │ ├── fontawesome-webfont.ttf
│ │ │ │ ├── fontawesome-webfont.woff
│ │ │ │ └── fontawesome-webfont.woff2
│ │ │ └── images
│ │ │ │ ├── Screen Shot 2017-04-27 at 15.11.09 copy.png
│ │ │ │ ├── banner.jpg
│ │ │ │ ├── banner1.jpg
│ │ │ │ ├── d4d-logo-meetup-banner.png
│ │ │ │ ├── no02.jpg
│ │ │ │ ├── no03.jpg
│ │ │ │ ├── pic01.jpg
│ │ │ │ ├── pic02.jpg
│ │ │ │ ├── pic03.jpg
│ │ │ │ ├── pic04.jpg
│ │ │ │ └── pic05.jpg
│ │ └── utils
│ │ │ └── convertDataToGeojson.js
│ └── yarn.lock
├── package.json
├── server
│ ├── api
│ │ ├── reportLocationRequest.js
│ │ ├── sampleArticleRequest.js
│ │ └── test.js
│ ├── index.js
│ ├── package.json
│ ├── pgDB
│ │ └── index.js
│ ├── routes.js
│ └── yarn.lock
├── src
│ └── db.js
└── yarn.lock
├── internal_displacement
├── __init__.py
├── add_countries.py
├── article.py
├── classifiers
│ ├── default_encoder.pkl
│ └── readme.txt
├── excerpt_helper.py
├── extracted_report.py
├── fact.py
├── interpreter.py
├── model
│ └── model.py
├── pipeline.py
├── scraper.py
├── tests
│ ├── __init__.py
│ ├── test_ExtractedReport.py
│ ├── test_Interpreter.py
│ ├── test_Pipeline.py
│ ├── test_Scraper.py
│ ├── test_coordinates_extraction.py
│ ├── test_fact_extraction.py
│ ├── test_model.py
│ └── test_report.py
└── textract_requirements.txt
├── notebooks
├── .ipynb_checkpoints
│ ├── newspaper-scrape-tests-checkpoint.ipynb
│ ├── sql-concurrent-checkpoint.ipynb
│ └── tests-checkpoint.ipynb
├── article_classification
│ ├── ArticleTaggingForVisualization.ipynb
│ ├── BagOfWordsArticleClassifier.ipynb
│ ├── classification-2.ipynb
│ ├── classification-data-prep.ipynb
│ ├── classification.ipynb
│ ├── crowdflower-classification-data.ipynb
│ └── lsi_svm_classification-old.ipynb
├── classifiers
│ ├── ClassifyArticlesByTitle.ipynb
│ └── default_model.pkl
├── information_extraction
│ ├── Current_Best_Results.ipynb
│ ├── DependencyTreeExperiments.ipynb
│ ├── DependencyTreeExperiments2.ipynb
│ ├── DependencyTreeExperiments3.ipynb
│ ├── DependencyTreeExperiments4-SB.ipynb
│ ├── DependencyTreeExperiments5-SB.ipynb
│ ├── DependencyTreeExperiments5.ipynb
│ ├── DependencyTreeExperiments6.ipynb
│ ├── FactExtractionTests.ipynb
│ ├── LocationCoordinatesExtractionTest.ipynb
│ ├── LocationExtractionTrial.ipynb
│ ├── article-text-parsing-attempt1 (refugees project).ipynb
│ ├── get_abs_date_test.ipynb
│ ├── nlp-spacy-exploration-2.ipynb
│ ├── nlp-spacy-exploration.ipynb
│ └── test_file_NLP.ipynb
├── scraping_and_db
│ ├── AutomateReportGenerationTests.ipynb
│ ├── DB-Populate.ipynb
│ ├── DatabaseExample.ipynb
│ ├── Example_pipeline.ipynb
│ ├── Pipeline-2.ipynb
│ ├── TestDatabase.ipynb
│ ├── newspaper-scrape-tests.ipynb
│ ├── scraping_review.ipynb
│ └── sql-concurrent.ipynb
├── test.sqlite
├── tests.ipynb
└── visualize_tagged_articles.html
├── production-compose.yml
├── production.env
├── requirements.txt
├── sql_db.sqlite
└── workplan.md
/.dockerignore:
--------------------------------------------------------------------------------
1 | */.idea
2 | *.env
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Notebooks
2 | notebooks/test.ipynb
3 | notebooks/.ipynb_checkpoints/*.ipynb
4 | .ipynb_checkpoints/*.ipynb
5 | id-tests.ipynb
6 |
7 | # Test data
8 | *.sqlite
9 | *.pdf
10 |
11 | # Front end
12 | node_modules
13 | internal-displacement-web/dist
14 | internal_displacement/classifiers/default_model.pkl
15 | npm-debug.log
16 |
17 | # Misc
18 | *.pyc
19 | .idea/
20 | *.csv
21 |
22 | # Mac
23 | .DS_Store
24 | /internal-displacement-web/server/pgDB/pgConfig.js\
25 |
26 | # ML Models
27 | /internal-displacement/classifiers/*
28 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/.gitmodules
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:latest
2 |
3 | # python basics
4 | RUN apt-get update && \
5 | apt-get -y install python3 python3-pip python3-dev
6 |
7 | # cld2-cffi doesn't install properly after the rest of the packages, for some reason
8 | RUN apt-get -y install libffi-dev && \
9 | pip3 install cld2-cffi
10 |
11 | # install the big packages and the ones with complex dependencies
12 | RUN apt-get -y install libxslt1-dev antiword unrtf poppler-utils pstotext \
13 | tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev \
14 | postgresql libpq-dev git && \
15 | pip3 install --upgrade pip && \
16 | pip3 install NumPy SciPy spacy && \
17 | pip3 install git+git://github.com/aerkalov/ebooklib.git && \
18 | pip3 install textract
19 |
20 | # download the spacy model using curl for progress indication
21 | # uncomment the below to include it in the build
22 | #RUN apt-get -y install curl && \
23 | # mkdir /spacy-data && \
24 | # curl -L -o "/spacy-data/en_core_web_md-1.2.1.tar.gz" $zflag \
25 | # https://github.com/explosion/spacy-models/releases/download/en_core_web_md-1.2.1/en_core_web_md-1.2.1.tar.gz
26 | #RUN pip3 install "/spacy-data/en_core_web_md-1.2.1.tar.gz" && \
27 | # python3 -m spacy link en_core_web_md en_default
28 |
29 | RUN mkdir /internal-displacement
30 | VOLUME /internal-displacement
31 | WORKDIR /internal-displacement
32 | COPY . /internal-displacement
33 |
34 | RUN pip3 install -r /internal-displacement/requirements.txt
35 |
36 | CMD jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks
37 |
--------------------------------------------------------------------------------
/PRODUCTION.md:
--------------------------------------------------------------------------------
1 | To run in Production:
2 |
3 | 1. Make sure that the correct DB password appears in `production.env`
4 |
5 | 2. ```docker-compose -f production-compose.yml up -d```
6 |
--------------------------------------------------------------------------------
/data/IDETECT_test_dataset - NLP.csv.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/data/IDETECT_test_dataset - NLP.csv.xlsx
--------------------------------------------------------------------------------
/data/IDMC_fully_labelled.csv.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/data/IDMC_fully_labelled.csv.xlsx
--------------------------------------------------------------------------------
/data/stop_words_en_long.txt:
--------------------------------------------------------------------------------
1 | a
2 | able
3 | about
4 | above
5 | abst
6 | accordance
7 | according
8 | accordingly
9 | across
10 | act
11 | actually
12 | added
13 | adj
14 | affected
15 | affecting
16 | affects
17 | after
18 | afterwards
19 | again
20 | against
21 | ah
22 | all
23 | almost
24 | alone
25 | along
26 | already
27 | also
28 | although
29 | always
30 | am
31 | among
32 | amongst
33 | an
34 | and
35 | announce
36 | another
37 | any
38 | anybody
39 | anyhow
40 | anymore
41 | anyone
42 | anything
43 | anyway
44 | anyways
45 | anywhere
46 | apparently
47 | approximately
48 | are
49 | aren
50 | arent
51 | arise
52 | around
53 | as
54 | aside
55 | ask
56 | asking
57 | at
58 | auth
59 | available
60 | away
61 | awfully
62 | b
63 | back
64 | be
65 | became
66 | because
67 | become
68 | becomes
69 | becoming
70 | been
71 | before
72 | beforehand
73 | begin
74 | beginning
75 | beginnings
76 | begins
77 | behind
78 | being
79 | believe
80 | below
81 | beside
82 | besides
83 | between
84 | beyond
85 | biol
86 | both
87 | brief
88 | briefly
89 | but
90 | by
91 | c
92 | ca
93 | came
94 | can
95 | cannot
96 | can't
97 | cause
98 | causes
99 | certain
100 | certainly
101 | co
102 | com
103 | come
104 | comes
105 | contain
106 | containing
107 | contains
108 | could
109 | couldnt
110 | d
111 | date
112 | did
113 | didn't
114 | different
115 | do
116 | does
117 | doesn't
118 | doing
119 | done
120 | don't
121 | down
122 | downwards
123 | due
124 | during
125 | e
126 | each
127 | ed
128 | edu
129 | effect
130 | eg
131 | eight
132 | eighty
133 | either
134 | else
135 | elsewhere
136 | end
137 | ending
138 | enough
139 | especially
140 | et
141 | et-al
142 | etc
143 | even
144 | ever
145 | every
146 | everybody
147 | everyone
148 | everything
149 | everywhere
150 | ex
151 | except
152 | f
153 | far
154 | few
155 | ff
156 | fifth
157 | first
158 | five
159 | fix
160 | followed
161 | following
162 | follows
163 | for
164 | former
165 | formerly
166 | forth
167 | found
168 | four
169 | from
170 | further
171 | furthermore
172 | g
173 | gave
174 | get
175 | gets
176 | getting
177 | give
178 | given
179 | gives
180 | giving
181 | go
182 | goes
183 | gone
184 | got
185 | gotten
186 | h
187 | had
188 | happens
189 | hardly
190 | has
191 | hasn't
192 | have
193 | haven't
194 | having
195 | he
196 | hed
197 | hence
198 | her
199 | here
200 | hereafter
201 | hereby
202 | herein
203 | heres
204 | hereupon
205 | hers
206 | herself
207 | hes
208 | hi
209 | hid
210 | him
211 | himself
212 | his
213 | hither
214 | home
215 | how
216 | howbeit
217 | however
218 | hundred
219 | i
220 | id
221 | ie
222 | if
223 | i'll
224 | im
225 | immediate
226 | immediately
227 | importance
228 | important
229 | in
230 | inc
231 | indeed
232 | index
233 | information
234 | instead
235 | into
236 | invention
237 | inward
238 | is
239 | isn't
240 | it
241 | itd
242 | it'll
243 | its
244 | itself
245 | i've
246 | j
247 | just
248 | k
249 | keep
250 | keeps
251 | kept
252 | kg
253 | km
254 | know
255 | known
256 | knows
257 | l
258 | largely
259 | last
260 | lately
261 | later
262 | latter
263 | latterly
264 | least
265 | less
266 | lest
267 | let
268 | lets
269 | like
270 | liked
271 | likely
272 | line
273 | little
274 | 'll
275 | look
276 | looking
277 | looks
278 | ltd
279 | m
280 | made
281 | mainly
282 | make
283 | makes
284 | many
285 | may
286 | maybe
287 | me
288 | mean
289 | means
290 | meantime
291 | meanwhile
292 | merely
293 | mg
294 | might
295 | million
296 | miss
297 | ml
298 | more
299 | moreover
300 | most
301 | mostly
302 | mr
303 | mrs
304 | much
305 | mug
306 | must
307 | my
308 | myself
309 | n
310 | na
311 | name
312 | namely
313 | nay
314 | nd
315 | near
316 | nearly
317 | necessarily
318 | necessary
319 | need
320 | needs
321 | neither
322 | never
323 | nevertheless
324 | new
325 | next
326 | nine
327 | ninety
328 | no
329 | nobody
330 | non
331 | none
332 | nonetheless
333 | noone
334 | nor
335 | normally
336 | nos
337 | not
338 | noted
339 | nothing
340 | now
341 | nowhere
342 | o
343 | obtain
344 | obtained
345 | obviously
346 | of
347 | off
348 | often
349 | oh
350 | ok
351 | okay
352 | old
353 | omitted
354 | on
355 | once
356 | one
357 | ones
358 | only
359 | onto
360 | or
361 | ord
362 | other
363 | others
364 | otherwise
365 | ought
366 | our
367 | ours
368 | ourselves
369 | out
370 | outside
371 | over
372 | overall
373 | owing
374 | own
375 | p
376 | page
377 | pages
378 | part
379 | particular
380 | particularly
381 | past
382 | per
383 | perhaps
384 | placed
385 | please
386 | plus
387 | poorly
388 | possible
389 | possibly
390 | potentially
391 | pp
392 | predominantly
393 | present
394 | previously
395 | primarily
396 | probably
397 | promptly
398 | proud
399 | provides
400 | put
401 | q
402 | que
403 | quickly
404 | quite
405 | qv
406 | r
407 | ran
408 | rather
409 | rd
410 | re
411 | readily
412 | really
413 | recent
414 | recently
415 | ref
416 | refs
417 | regarding
418 | regardless
419 | regards
420 | related
421 | relatively
422 | research
423 | respectively
424 | resulted
425 | resulting
426 | results
427 | right
428 | run
429 | s
430 | said
431 | same
432 | saw
433 | say
434 | saying
435 | says
436 | sec
437 | section
438 | see
439 | seeing
440 | seem
441 | seemed
442 | seeming
443 | seems
444 | seen
445 | self
446 | selves
447 | sent
448 | seven
449 | several
450 | shall
451 | she
452 | shed
453 | she'll
454 | shes
455 | should
456 | shouldn't
457 | show
458 | showed
459 | shown
460 | showns
461 | shows
462 | significant
463 | significantly
464 | similar
465 | similarly
466 | since
467 | six
468 | slightly
469 | so
470 | some
471 | somebody
472 | somehow
473 | someone
474 | somethan
475 | something
476 | sometime
477 | sometimes
478 | somewhat
479 | somewhere
480 | soon
481 | sorry
482 | specifically
483 | specified
484 | specify
485 | specifying
486 | still
487 | stop
488 | strongly
489 | sub
490 | substantially
491 | successfully
492 | such
493 | sufficiently
494 | suggest
495 | sup
496 | sure
497 | t
498 | take
499 | taken
500 | taking
501 | tell
502 | tends
503 | th
504 | than
505 | thank
506 | thanks
507 | thanx
508 | that
509 | that'll
510 | thats
511 | that've
512 | the
513 | their
514 | theirs
515 | them
516 | themselves
517 | then
518 | thence
519 | there
520 | thereafter
521 | thereby
522 | thered
523 | therefore
524 | therein
525 | there'll
526 | thereof
527 | therere
528 | theres
529 | thereto
530 | thereupon
531 | there've
532 | these
533 | they
534 | theyd
535 | they'll
536 | theyre
537 | they've
538 | think
539 | this
540 | those
541 | thou
542 | though
543 | thoughh
544 | thousand
545 | throug
546 | through
547 | throughout
548 | thru
549 | thus
550 | til
551 | tip
552 | to
553 | together
554 | too
555 | took
556 | toward
557 | towards
558 | tried
559 | tries
560 | truly
561 | try
562 | trying
563 | ts
564 | twice
565 | two
566 | u
567 | un
568 | under
569 | unfortunately
570 | unless
571 | unlike
572 | unlikely
573 | until
574 | unto
575 | up
576 | upon
577 | ups
578 | us
579 | use
580 | used
581 | useful
582 | usefully
583 | usefulness
584 | uses
585 | using
586 | usually
587 | v
588 | value
589 | various
590 | 've
591 | very
592 | via
593 | viz
594 | vol
595 | vols
596 | vs
597 | w
598 | want
599 | wants
600 | was
601 | wasnt
602 | way
603 | we
604 | wed
605 | welcome
606 | we'll
607 | went
608 | were
609 | werent
610 | we've
611 | what
612 | whatever
613 | what'll
614 | whats
615 | when
616 | whence
617 | whenever
618 | where
619 | whereafter
620 | whereas
621 | whereby
622 | wherein
623 | wheres
624 | whereupon
625 | wherever
626 | whether
627 | which
628 | while
629 | whim
630 | whither
631 | who
632 | whod
633 | whoever
634 | whole
635 | who'll
636 | whom
637 | whomever
638 | whos
639 | whose
640 | why
641 | widely
642 | willing
643 | wish
644 | with
645 | within
646 | without
647 | wont
648 | words
649 | world
650 | would
651 | wouldnt
652 | www
653 | x
654 | y
655 | yes
656 | yet
657 | you
658 | youd
659 | you'll
660 | your
661 | youre
662 | yours
663 | yourself
664 | yourselves
665 | you've
666 | z
667 | zero
--------------------------------------------------------------------------------
/docker-compose-dev.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | localdb:
4 | build: docker/localdb
5 | image: localdb
6 | jupyter:
7 | build: .
8 | image: internal-displacement
9 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks"
10 | stdin_open: true
11 | tty: true
12 | volumes:
13 | - .:/internal-displacement
14 | ports:
15 | - "3323:3323"
16 | depends_on:
17 | - localdb
18 | env_file: docker.env
19 | nodejs:
20 | build: internal-displacement-web
21 | image: internal-displacement-web
22 | volumes:
23 | # client
24 | - ./internal-displacement-web/client/public:/internal-displacement-web/client/public
25 | - ./internal-displacement-web/client/src:/internal-displacement-web/client/src
26 | - ./internal-displacement-web/client/package.json:/internal-displacement-web/client/package.json
27 | - ./internal-displacement-web/client/yarn.lock:/internal-displacement-web/client/yarn.lock
28 | # server
29 | - ./internal-displacement-web/server/api:/internal-displacement-web/server/api
30 | - ./internal-displacement-web/server/pgDB:/internal-displacement-web/server/pgDB
31 | - ./internal-displacement-web/server/public:/internal-displacement-web/server/public
32 | - ./internal-displacement-web/server/src:/internal-displacement-web/server/src
33 | - ./internal-displacement-web/server/index.js:/internal-displacement-web/server/index.js
34 | - ./internal-displacement-web/server/package.json:/internal-displacement-web/server/package.json
35 | - ./internal-displacement-web/server/routes.js:/internal-displacement-web/server/routes.js
36 | - ./internal-displacement-web/server/yarn.lock:/internal-displacement-web/server/yarn.lock
37 | # start
38 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json
39 | - ./internal-displacement-web/yarn.lock:/internal-displacement-web/yarn.lock
40 | ports:
41 | - "3000:3000"
42 | - "3322:3322"
43 | depends_on:
44 | - localdb
45 | env_file: docker.env
46 |
--------------------------------------------------------------------------------
/docker-compose-spacy.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | localdb:
4 | build: docker/localdb
5 | image: localdb
6 | jupyter:
7 | image: aneel/internal-displacement-jupyter:spacy
8 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks"
9 | stdin_open: true
10 | tty: true
11 | volumes:
12 | - .:/internal-displacement
13 | ports:
14 | - "3323:3323"
15 | depends_on:
16 | - localdb
17 | env_file: docker.env
18 | nodejs:
19 | build: internal-displacement-web
20 | image: internal-displacement-web
21 | volumes:
22 | # client
23 | - ./internal-displacement-web/client/public:/internal-displacement-web/client/public
24 | - ./internal-displacement-web/client/src:/internal-displacement-web/client/src
25 | - ./internal-displacement-web/client/package.json:/internal-displacement-web/client/package.json
26 | - ./internal-displacement-web/client/yarn.lock:/internal-displacement-web/client/yarn.lock
27 | # server
28 | - ./internal-displacement-web/server/api:/internal-displacement-web/server/api
29 | - ./internal-displacement-web/server/pgDB:/internal-displacement-web/server/pgDB
30 | - ./internal-displacement-web/server/public:/internal-displacement-web/server/public
31 | - ./internal-displacement-web/server/src:/internal-displacement-web/server/src
32 | - ./internal-displacement-web/server/index.js:/internal-displacement-web/server/index.js
33 | - ./internal-displacement-web/server/package.json:/internal-displacement-web/server/package.json
34 | - ./internal-displacement-web/server/routes.js:/internal-displacement-web/server/routes.js
35 | - ./internal-displacement-web/server/yarn.lock:/internal-displacement-web/server/yarn.lock
36 | # start
37 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json
38 | - ./internal-displacement-web/yarn.lock:/internal-displacement-web/yarn.lock
39 | ports:
40 | - "3000:3000"
41 | - "3322:3322"
42 | depends_on:
43 | - localdb
44 | env_file: docker.env
45 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | localdb:
4 | build: docker/localdb
5 | image: localdb
6 | jupyter:
7 | image: aneel/internal-displacement-jupyter:no-spacy
8 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks"
9 | stdin_open: true
10 | tty: true
11 | volumes:
12 | - .:/internal-displacement
13 | ports:
14 | - "3323:3323"
15 | depends_on:
16 | - localdb
17 | env_file: docker.env
18 | nodejs:
19 | build: internal-displacement-web
20 | image: internal-displacement-web
21 | volumes:
22 | # client
23 | - ./internal-displacement-web/client/public:/internal-displacement-web/client/public
24 | - ./internal-displacement-web/client/src:/internal-displacement-web/client/src
25 | - ./internal-displacement-web/client/package.json:/internal-displacement-web/client/package.json
26 | - ./internal-displacement-web/client/yarn.lock:/internal-displacement-web/client/yarn.lock
27 | # server
28 | - ./internal-displacement-web/server/api:/internal-displacement-web/server/api
29 | - ./internal-displacement-web/server/pgDB:/internal-displacement-web/server/pgDB
30 | - ./internal-displacement-web/server/public:/internal-displacement-web/server/public
31 | - ./internal-displacement-web/server/src:/internal-displacement-web/server/src
32 | - ./internal-displacement-web/server/index.js:/internal-displacement-web/server/index.js
33 | - ./internal-displacement-web/server/package.json:/internal-displacement-web/server/package.json
34 | - ./internal-displacement-web/server/routes.js:/internal-displacement-web/server/routes.js
35 | - ./internal-displacement-web/server/yarn.lock:/internal-displacement-web/server/yarn.lock
36 | # start
37 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json
38 | - ./internal-displacement-web/yarn.lock:/internal-displacement-web/yarn.lock
39 | ports:
40 | - "3000:3000"
41 | - "3322:3322"
42 | depends_on:
43 | - localdb
44 | env_file: docker.env
45 |
--------------------------------------------------------------------------------
/docker.env:
--------------------------------------------------------------------------------
1 | # Localdb is a database running on your machine in Docker. If you need access to the
2 | # shared DB, please ask @aneel on the Slack for credentials. Please do not commit them to git.
3 | DB_HOST=localdb
4 | DB_USER=d4d
5 | DB_PASS=democracy
6 | DB_NAME=id
7 | PYTHONPATH=/internal-displacement
8 |
--------------------------------------------------------------------------------
/docker/localdb/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM postgres
2 |
3 | COPY docker-entrypoint-initdb.d/* /docker-entrypoint-initdb.d/
4 | COPY schema.sql /schema.sql
--------------------------------------------------------------------------------
/docker/localdb/docker-entrypoint-initdb.d/id.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | # Set up user and database
5 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
6 | CREATE USER d4d WITH PASSWORD 'democracy';
7 | CREATE DATABASE id;
8 | GRANT ALL PRIVILEGES ON DATABASE id TO d4d;
9 | EOSQL
10 |
11 | # Create database schema in that database
12 | psql -v ON_ERROR_STOP=1 --username "d4d" id < /schema.sql
--------------------------------------------------------------------------------
/docker/localdb/docker-entrypoint-initdb.d/id_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | # Set up user and database
5 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
6 | CREATE USER tester WITH PASSWORD 'tester';
7 | CREATE DATABASE id_test;
8 | GRANT ALL PRIVILEGES ON DATABASE id_test TO tester;
9 | EOSQL
10 |
11 | # Create database schema in that database
12 | psql -v ON_ERROR_STOP=1 --username "tester" id_test < /schema.sql
--------------------------------------------------------------------------------
/docker/localdb/schema.sql:
--------------------------------------------------------------------------------
1 | DROP TYPE IF EXISTS status CASCADE;
2 | CREATE TYPE status AS ENUM ('new', 'fetching', 'fetched',
3 | 'processing', 'processed', 'fetching failed', 'processing failed');
4 |
5 | DROP TYPE IF EXISTS category CASCADE;
6 | CREATE TYPE category AS ENUM ('other', 'disaster', 'conflict');
7 |
8 | DROP TABLE IF EXISTS article CASCADE;
9 | CREATE TABLE article (
10 | id SERIAL PRIMARY KEY,
11 | url TEXT UNIQUE NOT NULL,
12 | domain TEXT,
13 | status status,
14 | title TEXT,
15 | publication_date TIMESTAMP,
16 | authors TEXT,
17 | language CHAR(2),
18 | relevance BOOL,
19 | reliability DECIMAL
20 | );
21 |
22 | DROP TABLE IF EXISTS content CASCADE;
23 | CREATE TABLE content (
24 | article INT PRIMARY KEY REFERENCES article ON DELETE CASCADE,
25 | retrieval_date TIMESTAMP,
26 | content TEXT,
27 | content_type TEXT
28 | );
29 |
30 | DROP TABLE IF EXISTS article_category CASCADE;
31 | CREATE TABLE article_category (
32 | article INT REFERENCES article ON DELETE CASCADE,
33 | category category,
34 | PRIMARY KEY (article, category)
35 | );
36 |
37 | DROP TABLE IF EXISTS country CASCADE;
38 | CREATE TABLE country (
39 | code CHAR(3) PRIMARY KEY
40 | );
41 |
42 | DROP TABLE IF EXISTS country_term CASCADE;
43 | CREATE TABLE country_term (
44 | term TEXT PRIMARY KEY,
45 | country CHAR(3) REFERENCES country ON DELETE CASCADE
46 | );
47 |
48 | DROP TABLE IF EXISTS location CASCADE;
49 | CREATE TABLE location (
50 | id SERIAL PRIMARY KEY,
51 | description TEXT,
52 | city TEXT,
53 | subdivision TEXT,
54 | country CHAR(3) REFERENCES country ON DELETE CASCADE,
55 | latlong TEXT
56 | );
57 |
58 | DROP TABLE IF EXISTS report CASCADE;
59 | CREATE TABLE report (
60 | id SERIAL PRIMARY KEY,
61 | article INT REFERENCES article ON DELETE CASCADE,
62 | event_term TEXT,
63 | subject_term TEXT,
64 | quantity INT,
65 | tag_locations JSON,
66 | accuracy DECIMAL,
67 | analyzer TEXT,
68 | analysis_date TIMESTAMP WITH TIME ZONE
69 | );
70 |
71 | DROP TABLE IF EXISTS report_location CASCADE;
72 | CREATE TABLE report_location (
73 | report INT REFERENCES report ON DELETE CASCADE,
74 | location INT REFERENCES location ON DELETE CASCADE,
75 | PRIMARY KEY (report, location)
76 | );
77 |
78 | DROP TABLE IF EXISTS report_datespan CASCADE;
79 | CREATE TABLE report_datespan (
80 | id SERIAL PRIMARY KEY,
81 | report INT REFERENCES report ON DELETE CASCADE,
82 | start TIMESTAMP,
83 | finish TIMESTAMP
84 | );
85 |
86 |
87 |
--------------------------------------------------------------------------------
/docker/nginx/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx
2 | ADD conf.d/ /etc/nginx/conf.d
3 |
--------------------------------------------------------------------------------
/docker/nginx/conf.d/node.conf:
--------------------------------------------------------------------------------
1 | server {
2 |
3 | listen 80;
4 | server_name internal-displacement.datafordemocracy.org;
5 | access_log /var/log/nginx/node.access.log main;
6 | charset utf-8;
7 |
8 | location / {
9 | proxy_pass http://nodejs:3000;
10 | proxy_set_header Host $host;
11 | proxy_set_header X-Real-IP $remote_addr;
12 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
13 | }
14 |
15 | }
--------------------------------------------------------------------------------
/docker/nginx/nginx.conf:
--------------------------------------------------------------------------------
1 |
2 | user nginx;
3 | worker_processes 1;
4 |
5 | error_log /var/log/nginx/error.log warn;
6 | pid /var/run/nginx.pid;
7 |
8 |
9 | events {
10 | worker_connections 1024;
11 | }
12 |
13 |
14 | http {
15 | include /etc/nginx/mime.types;
16 | default_type application/octet-stream;
17 |
18 | log_format main '$remote_addr - $remote_user [$time_local] "$request" '
19 | '$status $body_bytes_sent "$http_referer" '
20 | '"$http_user_agent" "$http_x_forwarded_for"';
21 |
22 | access_log /var/log/nginx/access.log main;
23 |
24 | sendfile on;
25 | #tcp_nopush on;
26 |
27 | keepalive_timeout 65;
28 |
29 | #gzip on;
30 |
31 | include /etc/nginx/conf.d/*.conf;
32 | }
33 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: d4d-internal-displacement
2 | channels:
3 | - defaults
4 | dependencies:
5 | - cycler=0.10.0=py36_0
6 | - freetype=2.5.5=2
7 | - icu=54.1=0
8 | - libpng=1.6.27=0
9 | - matplotlib=2.0.0=np111py36_0
10 | - mkl=2017.0.1=0
11 | - numpy=1.11.3=py36_0
12 | - openssl=1.0.2j=0
13 | - pandas=0.19.2=np111py36_1
14 | - pip=9.0.1=py36_1
15 | - pyparsing=2.1.4=py36_0
16 | - pyqt=5.6.0=py36_2
17 | - python=3.6.0=0
18 | - python-dateutil=2.6.0=py36_0
19 | - pytz=2016.10=py36_0
20 | - qt=5.6.2=0
21 | - readline=6.2=2
22 | - scikit-learn=0.18.1=np111py36_1
23 | - scipy=0.18.1=np111py36_1
24 | - seaborn=0.7.1=py36_0
25 | - setuptools=27.2.0=py36_0
26 | - sip=4.18=py36_0
27 | - six=1.10.0=py36_0
28 | - sqlite=3.13.0=0
29 | - tk=8.5.18=0
30 | - wheel=0.29.0=py36_0
31 | - xz=5.2.2=1
32 | - zlib=1.2.8=3
33 | - pip:
34 | - appnope==0.1.0
35 | - beautifulsoup4==4.5.3
36 | - bleach==1.5.0
37 | - cssselect==1.0.1
38 | - decorator==4.0.11
39 | - entrypoints==0.2.2
40 | - feedfinder2==0.0.4
41 | - feedparser==5.2.1
42 | - html5lib==0.9999999
43 | - idna==2.2
44 | - ipykernel==4.5.2
45 | - ipython==5.2.2
46 | - ipython-genutils==0.1.0
47 | - ipywidgets==5.2.2
48 | - jieba3k==0.35.1
49 | - jinja2==2.9.5
50 | - jsonschema==2.5.1
51 | - jupyter==1.0.0
52 | - jupyter-client==4.4.0
53 | - jupyter-console==5.1.0
54 | - jupyter-core==4.2.1
55 | - lxml==3.7.2
56 | - markupsafe==0.23
57 | - mistune==0.7.3
58 | - nbconvert==5.1.1
59 | - nbformat==4.2.0
60 | - newspaper3k==0.1.9
61 | - nltk==3.2.2
62 | - notebook==4.3.2
63 | - olefile==0.44
64 | - pandocfilters==1.4.1
65 | - pexpect==4.2.1
66 | - pickleshare==0.7.4
67 | - pillow==4.0.0
68 | - prompt-toolkit==1.0.13
69 | - ptyprocess==0.5.1
70 | - pygments==2.2.0
71 | - pyyaml==3.12
72 | - pyzmq==16.0.2
73 | - qtconsole==4.2.1
74 | - requests==2.13.0
75 | - requests-file==1.4.1
76 | - simplegeneric==0.8.1
77 | - terminado==0.6
78 | - testpath==0.3
79 | - tldextract==2.0.2
80 | - tornado==4.4.2
81 | - traitlets==4.3.1
82 | - wcwidth==0.1.7
83 | - widgetsnbextension==1.2.6
84 | prefix: /Users/joshuaarnold/anaconda/envs/d4d-internal-displacement
85 |
86 |
--------------------------------------------------------------------------------
/fact_extraction.md:
--------------------------------------------------------------------------------
1 | ## Approach to Fact Extraction Using Spacy
2 |
3 | Following several weeks of experimentation, this document summarizes the current approach to extracting facts and reports from articles using the Spacy library.
4 |
5 | ### Reporting Terms and Units
6 |
7 | As per the competition guildeines, fact extraction is based upon a set of core reporting terms, and reporting units.
8 | There are two broad categories:
9 |
10 | __Reporting Terms and Units Relating to People:__
11 |
12 | ```
13 | person_reporting_terms = [
14 | 'displaced', 'evacuated', 'forced flee', 'homeless', 'relief camp',
15 | 'sheltered', 'relocated', 'stranded','stuck','stranded',"killed","dead","died"
16 | ]
17 | person_reporting_units = ["families", "person", "people",
18 | "individuals", "locals", "villagers", "residents", "occupants", "citizens", "households"]
19 | ```
20 |
21 | __Reporting Terms and Units Relating to Structures:__
22 |
23 | ```
24 | structure_reporting_terms = [
25 | 'destroyed', 'damaged', 'swept', 'collapsed', 'flooded', 'washed'
26 | ]
27 | structure_reporting_units = ["home", "house", "hut", "dwelling",
28 | "building", "shop", "business", "apartment", "flat", "residence"]
29 | ```
30 |
31 | In practice, each of these terms and units is lemmatized for comparison with tokens parsed from the article.
32 |
33 | These terms and units can be updated as needed to ensure we are maximizing coverage of events referenced in articles.
34 |
35 | ---
36 |
37 | ### High Level Country Extraction
38 |
39 | The competition guidelines require that each article be tagged with ISO 3166 country codes.
40 | This is achieved using:
41 | - Spacy library for named entity recognition
42 | - Pycountry for mapping country names and subdivisions (states, provinces etc) to country codes
43 | - JSON of cities -> country code for all cities with a population > 5,000 extracted from [www.geonames.org](www.geonames.org).
44 |
45 | The procedure is:
46 |
47 | 1. Combine article title and contents and parse using Spacy to identify geographical named entities
48 | 2. Attempt to match the identified entity to a country code using the following steps in order:
49 | - Try a direct match for the entity against country names, common names and official names
50 | - Try to identify the country by comparing the entity to country subdivisions
51 | - Try to identify the country by seeing if the entity appears in the cities_to_countries JSON
52 |
53 | ---
54 |
55 | ### Report Extraction
56 |
57 | The possible fields that a Report can have are:
58 | - Referenced locations
59 | - Referenced Date
60 | - Reporting term (see above)
61 | - Reporting unit (see above)
62 | - Quantity
63 |
64 | At a minimum, a Reporting Term and relevant Reporting Unit must be present in order to create an Article (the other fields can be blank / none).
65 |
66 | The high-level procedure is:
67 |
68 | 1. Parse the article contents using Spacy and split into sentences
69 | 2. Process each sentence and attempt to identify:
70 | - Locations
71 | - Date
72 | - Reporting Term
73 | - Reporting Unit
74 | - Quantity
75 | 3. If the necessary reporting elements are correctly extracted, a Report is created
76 | 4. Multiple reports can be created for a given article
77 |
78 | ---
79 |
80 | #### Location Identification
81 |
82 | Sentence parsing to identify locations is based upon the following procedure:
83 |
84 | - Examines the sentence and identify if any constituent tokens describe a location (based on Spacy named entity recognition)
85 | - If a root token is specified, only location tokens below the level of this token in the tree will be examined.
86 | - If no root is specified, location tokens will be drawn from the entirety of the span.
87 |
88 | ***Fallback location:***
89 |
90 | - In many cases the event location may be referenced one or more sentences prior to the sentence containing the reporting term and unit.
91 | - In order to deal with this, during article processing, a local variable is maintained for keeping track of the last extracted location.
92 | - When a Report is extracted, if it has no specific location, then its location can be set to be the most recently identified prior location
93 | - If a new location is extracted for a Report, then the local fallback location variable is updated
94 |
95 | ---
96 |
97 | #### Date Identification
98 |
99 | Sentence parsing to identify dates is based upon the following procedure:
100 |
101 | - Examines the sentence and identify if any constituent tokens describe a date (based on Spacy named entity recognition)
102 | - If a root token is specified, only location tokens below the level of this token in the tree will be examined.
103 | - If no root is specified, location tokens will be drawn from the entirety of the span.
104 |
105 | ***Fallback date:***
106 |
107 | - In many cases the event date may be referenced one or more sentences prior to the sentence containing the reporting term and unit.
108 | - In order to deal with this, during article processing, a local variable is maintained for keeping track of the last extracted date.
109 | - When a Report is extracted, if it has no specific date, then its date can be set to be the most recently identified prior date
110 | - If a new date is extracted for a Report, then the local fallback date variable is updated
111 |
112 | ---
113 |
114 | #### Reporting Term and Unit Identification
115 |
116 | - Each sentence is split into tokens
117 | - Each token is compared to reporting terms for both people and structures
118 | - If a given token matches a reporting term:
119 | + Each branch below the token is examined to search for reporting units and numbers
120 | + If a reporting unit and number are identified, then a Report is created
121 | + If only a reporting unit (but no number) is identified, then look further up the tree above the reporting term to see if a number is present
122 |
123 | ***Special Cases:***
124 |
125 | - In addition to this general procedure, there are also some special cases that can be more simply identified by looking at specific combinations of Reporting Terms and Reporting Units
126 | - In some cases, these 'term-unit phrases' do not have a dependency within the parse tree that will be matched by the above algorithm i.e., 'families homeless'
127 | - Should a specific phrase be encountered, then similar methods to above are used for extracting:
128 | + Location
129 | + Date
130 | + Number
131 |
132 | ---
133 |
134 | ### Required Enhancements
135 |
136 | See [Issue #62](https://github.com/Data4Democracy/internal-displacement/issues/62)
--------------------------------------------------------------------------------
/images/internal-displacement-plan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/images/internal-displacement-plan.png
--------------------------------------------------------------------------------
/internal-displacement-web/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:latest
2 |
3 | RUN mkdir /internal-displacement-web
4 | WORKDIR /internal-displacement-web
5 |
6 | COPY . /internal-displacement-web
7 |
8 | RUN yarn install
9 | RUN cd /internal-displacement-web/client && yarn install
10 | RUN cd /internal-displacement-web/server && yarn install
11 |
12 | CMD npm run start
13 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "id-web-client",
3 | "version": "0.1.0",
4 | "private": true,
5 | "proxy": "http://localhost:3322/",
6 | "devDependencies": {
7 | "react-scripts": "0.9.5"
8 | },
9 | "dependencies": {
10 | "babel-polyfill": "^6.23.0",
11 | "bootstrap": "^3.3.7",
12 | "d3": "^4.8.0",
13 | "deck.gl": "^4",
14 | "immutable": "^3.8.1",
15 | "luma.gl": "^3.0.0",
16 | "r-dom": "^2.3.2",
17 | "react": "^15.4.2",
18 | "react-bootstrap": "^0.30.7",
19 | "react-dom": "^15.4.2",
20 | "react-map-gl": "^1.8.2",
21 | "react-map-gl-heatmap-overlay": "^1.1.2",
22 | "react-redux": "^5.0.3",
23 | "react-router": "^2.6.0",
24 | "react-router-redux": "^4.0.8",
25 | "react-scroll": "^1.5.2",
26 | "redux": "^3.6.0",
27 | "redux-saga": "^0.14.6"
28 | },
29 | "scripts": {
30 | "start": "react-scripts start",
31 | "build": "react-scripts build",
32 | "test": "react-scripts test --env=jsdom",
33 | "eject": "react-scripts eject"
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/public/favicon.ico
--------------------------------------------------------------------------------
/internal-displacement-web/client/public/images/banner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/public/images/banner.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
11 |
12 |
13 |
14 |
23 | Internal Displacement
24 |
25 |
26 |
27 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/public/themeJS/ie/backgroundsize.min.htc:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/public/themeJS/ie/html5shiv.js:
--------------------------------------------------------------------------------
1 | /*
2 | HTML5 Shiv v3.6.2 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | (function(l,f){function m(){var a=e.elements;return"string"==typeof a?a.split(" "):a}function i(a){var b=n[a[o]];b||(b={},h++,a[o]=h,n[h]=b);return b}function p(a,b,c){b||(b=f);if(g)return b.createElement(a);c||(c=i(b));b=c.cache[a]?c.cache[a].cloneNode():r.test(a)?(c.cache[a]=c.createElem(a)).cloneNode():c.createElem(a);return b.canHaveChildren&&!s.test(a)?c.frag.appendChild(b):b}function t(a,b){if(!b.cache)b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag();
5 | a.createElement=function(c){return!e.shivMethods?b.createElem(c):p(c,a,b)};a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+m().join().replace(/\w+/g,function(a){b.createElem(a);b.frag.createElement(a);return'c("'+a+'")'})+");return n}")(e,b.frag)}function q(a){a||(a=f);var b=i(a);if(e.shivCSS&&!j&&!b.hasCSS){var c,d=a;c=d.createElement("p");d=d.getElementsByTagName("head")[0]||d.documentElement;c.innerHTML="x";
6 | c=d.insertBefore(c.lastChild,d.firstChild);b.hasCSS=!!c}g||t(a,b);return a}var k=l.html5||{},s=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,r=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,j,o="_html5shiv",h=0,n={},g;(function(){try{var a=f.createElement("a");a.innerHTML=" ";j="hidden"in a;var b;if(!(b=1==a.childNodes.length)){f.createElement("a");var c=f.createDocumentFragment();b="undefined"==typeof c.cloneNode||
7 | "undefined"==typeof c.createDocumentFragment||"undefined"==typeof c.createElement}g=b}catch(d){g=j=!0}})();var e={elements:k.elements||"abbr article aside audio bdi canvas data datalist details figcaption figure footer header hgroup main mark meter nav output progress section summary time video",version:"3.6.2",shivCSS:!1!==k.shivCSS,supportsUnknownElements:g,shivMethods:!1!==k.shivMethods,type:"default",shivDocument:q,createElement:p,createDocumentFragment:function(a,b){a||(a=f);if(g)return a.createDocumentFragment();
8 | for(var b=b||i(a),c=b.frag.cloneNode(),d=0,e=m(),h=e.length;d #mq-test-1 { width: 42px; }',c.insertBefore(e,d),b=42===f.offsetWidth,c.removeChild(e),{matches:b,media:a}}}(a.document)}(this),function(a){"use strict";function b(){v(!0)}var c={};a.respond=c,c.update=function(){};var d=[],e=function(){var b=!1;try{b=new a.XMLHttpRequest}catch(c){b=new a.ActiveXObject("Microsoft.XMLHTTP")}return function(){return b}}(),f=function(a,b){var c=e();c&&(c.open("GET",a,!0),c.onreadystatechange=function(){4!==c.readyState||200!==c.status&&304!==c.status||b(c.responseText)},4!==c.readyState&&c.send(null))},g=function(a){return a.replace(c.regex.minmaxwh,"").match(c.regex.other)};if(c.ajax=f,c.queue=d,c.unsupportedmq=g,c.regex={media:/@media[^\{]+\{([^\{\}]*\{[^\}\{]*\})+/gi,keyframes:/@(?:\-(?:o|moz|webkit)\-)?keyframes[^\{]+\{(?:[^\{\}]*\{[^\}\{]*\})+[^\}]*\}/gi,comments:/\/\*[^*]*\*+([^/][^*]*\*+)*\//gi,urls:/(url\()['"]?([^\/\)'"][^:\)'"]+)['"]?(\))/g,findStyles:/@media *([^\{]+)\{([\S\s]+?)$/,only:/(only\s+)?([a-zA-Z]+)\s?/,minw:/\(\s*min\-width\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/,maxw:/\(\s*max\-width\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/,minmaxwh:/\(\s*m(in|ax)\-(height|width)\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/gi,other:/\([^\)]*\)/g},c.mediaQueriesSupported=a.matchMedia&&null!==a.matchMedia("only all")&&a.matchMedia("only all").matches,!c.mediaQueriesSupported){var h,i,j,k=a.document,l=k.documentElement,m=[],n=[],o=[],p={},q=30,r=k.getElementsByTagName("head")[0]||l,s=k.getElementsByTagName("base")[0],t=r.getElementsByTagName("link"),u=function(){var a,b=k.createElement("div"),c=k.body,d=l.style.fontSize,e=c&&c.style.fontSize,f=!1;return b.style.cssText="position:absolute;font-size:1em;width:1em",c||(c=f=k.createElement("body"),c.style.background="none"),l.style.fontSize="100%",c.style.fontSize="100%",c.appendChild(b),f&&l.insertBefore(c,l.firstChild),a=b.offsetWidth,f?l.removeChild(c):c.removeChild(b),l.style.fontSize=d,e&&(c.style.fontSize=e),a=j=parseFloat(a)},v=function(b){var c="clientWidth",d=l[c],e="CSS1Compat"===k.compatMode&&d||k.body[c]||d,f={},g=t[t.length-1],p=(new Date).getTime();if(b&&h&&q>p-h)return a.clearTimeout(i),i=a.setTimeout(v,q),void 0;h=p;for(var s in m)if(m.hasOwnProperty(s)){var w=m[s],x=w.minw,y=w.maxw,z=null===x,A=null===y,B="em";x&&(x=parseFloat(x)*(x.indexOf(B)>-1?j||u():1)),y&&(y=parseFloat(y)*(y.indexOf(B)>-1?j||u():1)),w.hasquery&&(z&&A||!(z||e>=x)||!(A||y>=e))||(f[w.media]||(f[w.media]=[]),f[w.media].push(n[w.rules]))}for(var C in o)o.hasOwnProperty(C)&&o[C]&&o[C].parentNode===r&&r.removeChild(o[C]);o.length=0;for(var D in f)if(f.hasOwnProperty(D)){var E=k.createElement("style"),F=f[D].join("\n");E.type="text/css",E.media=D,r.insertBefore(E,g.nextSibling),E.styleSheet?E.styleSheet.cssText=F:E.appendChild(k.createTextNode(F)),o.push(E)}},w=function(a,b,d){var e=a.replace(c.regex.comments,"").replace(c.regex.keyframes,"").match(c.regex.media),f=e&&e.length||0;b=b.substring(0,b.lastIndexOf("/"));var h=function(a){return a.replace(c.regex.urls,"$1"+b+"$2$3")},i=!f&&d;b.length&&(b+="/"),i&&(f=1);for(var j=0;f>j;j++){var k,l,o,p;i?(k=d,n.push(h(a))):(k=e[j].match(c.regex.findStyles)&&RegExp.$1,n.push(RegExp.$2&&h(RegExp.$2))),o=k.split(","),p=o.length;for(var q=0;p>q;q++)l=o[q],g(l)||m.push({media:l.split("(")[0].match(c.regex.only)&&RegExp.$2||"all",rules:n.length-1,hasquery:l.indexOf("(")>-1,minw:l.match(c.regex.minw)&&parseFloat(RegExp.$1)+(RegExp.$2||""),maxw:l.match(c.regex.maxw)&&parseFloat(RegExp.$1)+(RegExp.$2||"")})}v()},x=function(){if(d.length){var b=d.shift();f(b.href,function(c){w(c,b.href,b.media),p[b.href]=!0,a.setTimeout(function(){x()},0)})}},y=function(){for(var b=0;b1){for(var r=0;r=i&&o>=t};break;case"bottom":h=function(t,e,n,i,o){return n>=i&&o>=n};break;case"middle":h=function(t,e,n,i,o){return e>=i&&o>=e};break;case"top-only":h=function(t,e,n,i,o){return i>=t&&n>=i};break;case"bottom-only":h=function(t,e,n,i,o){return n>=o&&o>=t};break;default:case"default":h=function(t,e,n,i,o){return n>=i&&o>=t}}return c=function(t){var i,o,l,s,r,a,u=this.state,h=!1,c=this.$element.offset();i=n.height(),o=t+i/2,l=t+i,s=this.$element.outerHeight(),r=c.top+e(this.options.top,s,i),a=c.top+s-e(this.options.bottom,s,i),h=this.test(t,o,l,r,a),h!=u&&(this.state=h,h?this.options.enter&&this.options.enter.apply(this.element):this.options.leave&&this.options.leave.apply(this.element)),this.options.scroll&&this.options.scroll.apply(this.element,[(o-r)/(a-r)])},p={id:a,options:u,test:h,handler:c,state:null,element:this,$element:s,timeoutId:null},o[a]=p,s.data("_scrollexId",p.id),p.options.initialize&&p.options.initialize.apply(this),s},jQuery.fn.unscrollex=function(){var e=t(this);if(0==this.length)return e;if(this.length>1){for(var n=0;n1){for(o=0;o')
68 | .appendTo($body)
69 | .panel({
70 | delay: 500,
71 | hideOnClick: true,
72 | hideOnSwipe: true,
73 | resetScroll: true,
74 | resetForms: true,
75 | side: 'right',
76 | target: $body,
77 | visibleClass: 'is-menu-visible'
78 | });
79 |
80 | // Header.
81 | if (skel.vars.IEVersion < 9)
82 | $header.removeClass('alt');
83 |
84 | if ($banner.length > 0
85 | && $header.hasClass('alt')) {
86 |
87 | $window.on('resize', function() { $window.trigger('scroll'); });
88 |
89 | $banner.scrollex({
90 | bottom: $header.outerHeight() + 1,
91 | terminate: function() { $header.removeClass('alt'); },
92 | enter: function() { $header.addClass('alt'); },
93 | leave: function() { $header.removeClass('alt'); }
94 | });
95 |
96 | }
97 |
98 | });
99 |
100 | })(jQuery);
101 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/public/themeJS/skel.min.js:
--------------------------------------------------------------------------------
1 | /* skel.js v3.0.0 | (c) n33 | skel.io | MIT licensed */
2 | var skel=function(){"use strict";var t={breakpointIds:null,events:{},isInit:!1,obj:{attachments:{},breakpoints:{},head:null,states:{}},sd:"/",state:null,stateHandlers:{},stateId:"",vars:{},DOMReady:null,indexOf:null,isArray:null,iterate:null,matchesMedia:null,extend:function(e,n){t.iterate(n,function(i){t.isArray(n[i])?(t.isArray(e[i])||(e[i]=[]),t.extend(e[i],n[i])):"object"==typeof n[i]?("object"!=typeof e[i]&&(e[i]={}),t.extend(e[i],n[i])):e[i]=n[i]})},newStyle:function(t){var e=document.createElement("style");return e.type="text/css",e.innerHTML=t,e},_canUse:null,canUse:function(e){t._canUse||(t._canUse=document.createElement("div"));var n=t._canUse.style,i=e.charAt(0).toUpperCase()+e.slice(1);return e in n||"Moz"+i in n||"Webkit"+i in n||"O"+i in n||"ms"+i in n},on:function(e,n){var i=e.split(/[\s]+/);return t.iterate(i,function(e){var a=i[e];if(t.isInit){if("init"==a)return void n();if("change"==a)n();else{var r=a.charAt(0);if("+"==r||"!"==r){var o=a.substring(1);if(o in t.obj.breakpoints)if("+"==r&&t.obj.breakpoints[o].active)n();else if("!"==r&&!t.obj.breakpoints[o].active)return void n()}}}t.events[a]||(t.events[a]=[]),t.events[a].push(n)}),t},trigger:function(e){return t.events[e]&&0!=t.events[e].length?(t.iterate(t.events[e],function(n){t.events[e][n]()}),t):void 0},breakpoint:function(e){return t.obj.breakpoints[e]},breakpoints:function(e){function n(t,e){this.name=this.id=t,this.media=e,this.active=!1,this.wasActive=!1}return n.prototype.matches=function(){return t.matchesMedia(this.media)},n.prototype.sync=function(){this.wasActive=this.active,this.active=this.matches()},t.iterate(e,function(i){t.obj.breakpoints[i]=new n(i,e[i])}),window.setTimeout(function(){t.poll()},0),t},addStateHandler:function(e,n){t.stateHandlers[e]=n},callStateHandler:function(e){var n=t.stateHandlers[e]();t.iterate(n,function(e){t.state.attachments.push(n[e])})},changeState:function(e){t.iterate(t.obj.breakpoints,function(e){t.obj.breakpoints[e].sync()}),t.vars.lastStateId=t.stateId,t.stateId=e,t.breakpointIds=t.stateId===t.sd?[]:t.stateId.substring(1).split(t.sd),t.obj.states[t.stateId]?t.state=t.obj.states[t.stateId]:(t.obj.states[t.stateId]={attachments:[]},t.state=t.obj.states[t.stateId],t.iterate(t.stateHandlers,t.callStateHandler)),t.detachAll(t.state.attachments),t.attachAll(t.state.attachments),t.vars.stateId=t.stateId,t.vars.state=t.state,t.trigger("change"),t.iterate(t.obj.breakpoints,function(e){t.obj.breakpoints[e].active?t.obj.breakpoints[e].wasActive||t.trigger("+"+e):t.obj.breakpoints[e].wasActive&&t.trigger("-"+e)})},generateStateConfig:function(e,n){var i={};return t.extend(i,e),t.iterate(t.breakpointIds,function(e){t.extend(i,n[t.breakpointIds[e]])}),i},getStateId:function(){var e="";return t.iterate(t.obj.breakpoints,function(n){var i=t.obj.breakpoints[n];i.matches()&&(e+=t.sd+i.id)}),e},poll:function(){var e="";e=t.getStateId(),""===e&&(e=t.sd),e!==t.stateId&&t.changeState(e)},_attach:null,attach:function(e){var n=t.obj.head,i=e.element;return i.parentNode&&i.parentNode.tagName?!1:(t._attach||(t._attach=n.firstChild),n.insertBefore(i,t._attach.nextSibling),e.permanent&&(t._attach=i),!0)},attachAll:function(e){var n=[];t.iterate(e,function(t){n[e[t].priority]||(n[e[t].priority]=[]),n[e[t].priority].push(e[t])}),n.reverse(),t.iterate(n,function(e){t.iterate(n[e],function(i){t.attach(n[e][i])})})},detach:function(t){var e=t.element;return t.permanent||!e.parentNode||e.parentNode&&!e.parentNode.tagName?!1:(e.parentNode.removeChild(e),!0)},detachAll:function(e){var n={};t.iterate(e,function(t){n[e[t].id]=!0}),t.iterate(t.obj.attachments,function(e){e in n||t.detach(t.obj.attachments[e])})},attachment:function(e){return e in t.obj.attachments?t.obj.attachments[e]:null},newAttachment:function(e,n,i,a){return t.obj.attachments[e]={id:e,element:n,priority:i,permanent:a}},init:function(){t.initMethods(),t.initVars(),t.initEvents(),t.obj.head=document.getElementsByTagName("head")[0],t.isInit=!0,t.trigger("init")},initEvents:function(){t.on("resize",function(){t.poll()}),t.on("orientationChange",function(){t.poll()}),t.DOMReady(function(){t.trigger("ready")}),window.onload&&t.on("load",window.onload),window.onload=function(){t.trigger("load")},window.onresize&&t.on("resize",window.onresize),window.onresize=function(){t.trigger("resize")},window.onorientationchange&&t.on("orientationChange",window.onorientationchange),window.onorientationchange=function(){t.trigger("orientationChange")}},initMethods:function(){document.addEventListener?!function(e,n){t.DOMReady=n()}("domready",function(){function t(t){for(r=1;t=n.shift();)t()}var e,n=[],i=document,a="DOMContentLoaded",r=/^loaded|^c/.test(i.readyState);return i.addEventListener(a,e=function(){i.removeEventListener(a,e),t()}),function(t){r?t():n.push(t)}}):!function(e,n){t.DOMReady=n()}("domready",function(t){function e(t){for(h=1;t=i.shift();)t()}var n,i=[],a=!1,r=document,o=r.documentElement,s=o.doScroll,c="DOMContentLoaded",d="addEventListener",u="onreadystatechange",l="readyState",f=s?/^loaded|^c/:/^loaded|c/,h=f.test(r[l]);return r[d]&&r[d](c,n=function(){r.removeEventListener(c,n,a),e()},a),s&&r.attachEvent(u,n=function(){/^c/.test(r[l])&&(r.detachEvent(u,n),e())}),t=s?function(e){self!=top?h?e():i.push(e):function(){try{o.doScroll("left")}catch(n){return setTimeout(function(){t(e)},50)}e()}()}:function(t){h?t():i.push(t)}}),Array.prototype.indexOf?t.indexOf=function(t,e){return t.indexOf(e)}:t.indexOf=function(t,e){if("string"==typeof t)return t.indexOf(e);var n,i,a=e?e:0;if(!this)throw new TypeError;if(i=this.length,0===i||a>=i)return-1;for(0>a&&(a=i-Math.abs(a)),n=a;i>n;n++)if(this[n]===t)return n;return-1},Array.isArray?t.isArray=function(t){return Array.isArray(t)}:t.isArray=function(t){return"[object Array]"===Object.prototype.toString.call(t)},Object.keys?t.iterate=function(t,e){if(!t)return[];var n,i=Object.keys(t);for(n=0;i[n]&&e(i[n],t[i[n]])!==!1;n++);}:t.iterate=function(t,e){if(!t)return[];var n;for(n in t)if(Object.prototype.hasOwnProperty.call(t,n)&&e(n,t[n])===!1)break},window.matchMedia?t.matchesMedia=function(t){return""==t?!0:window.matchMedia(t).matches}:window.styleMedia||window.media?t.matchesMedia=function(t){if(""==t)return!0;var e=window.styleMedia||window.media;return e.matchMedium(t||"all")}:window.getComputedStyle?t.matchesMedia=function(t){if(""==t)return!0;var e=document.createElement("style"),n=document.getElementsByTagName("script")[0],i=null;e.type="text/css",e.id="matchmediajs-test",n.parentNode.insertBefore(e,n),i="getComputedStyle"in window&&window.getComputedStyle(e,null)||e.currentStyle;var a="@media "+t+"{ #matchmediajs-test { width: 1px; } }";return e.styleSheet?e.styleSheet.cssText=a:e.textContent=a,"1px"===i.width}:t.matchesMedia=function(t){if(""==t)return!0;var e,n,i,a,r={"min-width":null,"max-width":null},o=!1;for(i=t.split(/\s+and\s+/),e=0;er["max-width"]||null!==r["min-height"]&&cr["max-height"]?!1:!0},navigator.userAgent.match(/MSIE ([0-9]+)/)&&RegExp.$1<9&&(t.newStyle=function(t){var e=document.createElement("span");return e.innerHTML=' ",e})},initVars:function(){var e,n,i,a=navigator.userAgent;e="other",n=0,i=[["firefox",/Firefox\/([0-9\.]+)/],["bb",/BlackBerry.+Version\/([0-9\.]+)/],["bb",/BB[0-9]+.+Version\/([0-9\.]+)/],["opera",/OPR\/([0-9\.]+)/],["opera",/Opera\/([0-9\.]+)/],["edge",/Edge\/([0-9\.]+)/],["safari",/Version\/([0-9\.]+).+Safari/],["chrome",/Chrome\/([0-9\.]+)/],["ie",/MSIE ([0-9]+)/],["ie",/Trident\/.+rv:([0-9]+)/]],t.iterate(i,function(t,i){return a.match(i[1])?(e=i[0],n=parseFloat(RegExp.$1),!1):void 0}),t.vars.browser=e,t.vars.browserVersion=n,e="other",n=0,i=[["ios",/([0-9_]+) like Mac OS X/,function(t){return t.replace("_",".").replace("_","")}],["ios",/CPU like Mac OS X/,function(t){return 0}],["android",/Android ([0-9\.]+)/,null],["mac",/Macintosh.+Mac OS X ([0-9_]+)/,function(t){return t.replace("_",".").replace("_","")}],["wp",/Windows Phone ([0-9\.]+)/,null],["windows",/Windows NT ([0-9\.]+)/,null],["bb",/BlackBerry.+Version\/([0-9\.]+)/,null],["bb",/BB[0-9]+.+Version\/([0-9\.]+)/,null]],t.iterate(i,function(t,i){return a.match(i[1])?(e=i[0],n=parseFloat(i[2]?i[2](RegExp.$1):RegExp.$1),!1):void 0}),t.vars.os=e,t.vars.osVersion=n,t.vars.IEVersion="ie"==t.vars.browser?t.vars.browserVersion:99,t.vars.touch="wp"==t.vars.os?navigator.msMaxTouchPoints>0:!!("ontouchstart"in window),t.vars.mobile="wp"==t.vars.os||"android"==t.vars.os||"ios"==t.vars.os||"bb"==t.vars.os}};return t.init(),t}();!function(t,e){"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?module.exports=e():t.skel=e()}(this,function(){return skel});
3 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/Api/api.js:
--------------------------------------------------------------------------------
1 | function checkStatus(response) {
2 | if (response.status >= 200 && response.status < 300) {
3 | return response;
4 | }
5 | const error = new Error(`HTTP Error ${response.statusText}`);
6 | error.status = response.statusText;
7 | error.response = response;
8 | console.log(error);
9 | throw error;
10 | }
11 |
12 | function parseJSON(response) {
13 | return response.json();
14 | }
15 |
16 |
17 | export const dummyMapData = () => {
18 | const dummyMapUrl = 'https://jamesleondufour.carto.com/api/v2/sql?q=select%20count,%20long,%20lat,%20date,%20sampleurl,%20humanname%20from%20public.gdelt_refugee_2016';
19 | const dummyTestUrl = `${window.location.origin}/api/test`;
20 | return fetch(dummyTestUrl).then(checkStatus).then(parseJSON)
21 |
22 | };
23 |
24 | export const testDB = () => {
25 | return fetch(`${window.location.origin}/api/testDB`).then(checkStatus).then(parseJSON);
26 | };
27 | export const reportLocationData = () => {
28 | const reportLocationDataURL = `${window.location.origin}/api/report-location-data`;
29 | return fetch(reportLocationDataURL).then(checkStatus).then(parseJSON)
30 | }
31 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/App.css:
--------------------------------------------------------------------------------
1 | .App {
2 | text-align: center;
3 | }
4 |
5 | .App-logo {
6 | animation: App-logo-spin infinite 20s linear;
7 | height: 80px;
8 | }
9 |
10 | .App-header {
11 | background-color: #222;
12 | height: 150px;
13 | padding: 20px;
14 | color: white;
15 | }
16 |
17 | .App-intro {
18 | font-size: large;
19 | }
20 |
21 | @keyframes App-logo-spin {
22 | from { transform: rotate(0deg); }
23 | to { transform: rotate(360deg); }
24 | }
25 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/App.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react';
2 | import logo from './logo.svg';
3 | import './App.css';
4 | import Bootstrap from 'bootstrap/dist/css/bootstrap.css';
5 | import { render } from 'react-dom';
6 | import { syncHistoryWithStore } from 'react-router-redux'
7 |
8 | class App extends Component {
9 | render() {
10 | return (
11 |
12 |
13 |
14 |
Welcome to React
15 |
16 |
17 | To get started, edit src/App.js and save to reload.
18 |
19 |
20 | );
21 | }
22 | }
23 |
24 | export default App;
25 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/App.test.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom';
3 | import App from './App';
4 |
5 | it('renders without crashing', () => {
6 | const div = document.createElement('div');
7 | ReactDOM.render( , div);
8 | });
9 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/common/Footer.js:
--------------------------------------------------------------------------------
1 | import React, {Component} from 'react';
2 | import {render} from 'react-dom';
3 |
4 | const Footer = () => (
5 |
26 | );
27 |
28 | export default Footer;
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/common/Header.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { Link, IndexLink } from 'react-router';
3 | import {Navbar, Nav, NavItem} from 'react-bootstrap';
4 |
5 | const navbarInstance = () => (
6 |
7 |
8 |
9 | Home
10 |
11 |
12 |
13 | Link
14 | Link
15 |
16 |
17 | );
18 |
19 | export default navbarInstance;
20 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/components/NotFound/index.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | import styles from './styles.css'
3 |
4 | const NotFound = () => {
5 | return Not Found :(
6 | }
7 |
8 | export default NotFound
9 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/components/NotFound/index.spec.js:
--------------------------------------------------------------------------------
1 | import chai, { expect } from 'chai'
2 | import chaiEnzyme from 'chai-enzyme'
3 | import { shallow } from 'enzyme'
4 | import React from 'react'
5 | import sinon from 'sinon'
6 | import sinonChai from 'sinon-chai'
7 |
8 | import NotFound from './'
9 |
10 | chai.use(chaiEnzyme())
11 | chai.use(sinonChai)
12 |
13 | describe(' ', () => {
14 | let sut
15 |
16 | beforeEach(() => {
17 | sut = shallow( )
18 | })
19 |
20 | it('should exist', () => {
21 | expect(sut).to.be.present
22 | })
23 | })
24 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/components/NotFound/styles.css:
--------------------------------------------------------------------------------
1 | .root {
2 | text-align: center
3 | }
4 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/components/UrlForm/index.js:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/MaVizPageV2.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/containers/MapVizContainer/MaVizPageV2.js
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/MapVizExample.js:
--------------------------------------------------------------------------------
1 | /* global window,document */
2 | import React, {Component} from 'react';
3 | import {render} from 'react-dom';
4 | import MapGL from 'react-map-gl';
5 | import DeckGLOverlay from './components/mapOverlays/exampleGeojson.js';
6 | import { MAPBOX_ACCESS_TOKEN } from './constants/mapConstants';
7 | import customData from './vancouver-blocks.json';
8 |
9 | import { dummyMapData} from './../../Api/api';
10 | import {convertArrToGeojsonPoints} from './../../utils/convertDataToGeojson';
11 |
12 | // Set your mapbox token here
13 | const MAPBOX_TOKEN = MAPBOX_ACCESS_TOKEN; // eslint-disable-line
14 |
15 | const colorScale = r => [r * 255, 140, 200 * (1 - r)];
16 |
17 | class MapVizPageTest extends Component {
18 |
19 | constructor(props) {
20 | super(props);
21 | this.state = {
22 | viewport: {
23 | ...DeckGLOverlay.defaultViewport,
24 | width: 500,
25 | height: 500
26 | },
27 | data: null
28 | };
29 |
30 |
31 | // requestJson('./vancouver-blocks.json', (error, response) => {
32 | // if (!error) {
33 | // this.setState({data: response});
34 | // }
35 | // });
36 | }
37 |
38 | componentDidMount() {
39 | window.addEventListener('resize', this._resize.bind(this));
40 | this._resize();
41 | let self=this;
42 | if (customData) {
43 | console.log(customData)
44 | let plotData = {
45 | "type": "FeatureCollection",
46 | "features": []
47 | };
48 | plotData.features = customData.features.map(d => {
49 | return {"type":"Feature","geometry":{"type":"Point","coordinates": d.geometry.coordinates[0][0]}}
50 | });
51 |
52 | dummyMapData().then(data => {
53 | console.log('data');
54 | let parsed = JSON.parse(data).rows;
55 | let parsedGeojson = convertArrToGeojsonPoints(parsed, 'long', 'lat')
56 | // this.props.dispatch(loadIDData(data))
57 | self.setState({data: parsedGeojson})
58 | });
59 | this.setState({data: plotData});
60 | }
61 | }
62 |
63 | _resize() {
64 | this._onChangeViewport({
65 | width: window.innerWidth,
66 | height: window.innerHeight
67 | });
68 | }
69 |
70 | _onChangeViewport(viewport) {
71 | this.setState({
72 | viewport: {...this.state.viewport, ...viewport}
73 | });
74 | }
75 |
76 | render() {
77 | const {viewport, data} = this.state;
78 |
79 | return (
80 |
85 |
88 |
89 | );
90 | }
91 | }
92 |
93 | export default MapVizPageTest
94 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/MapVizPage.js:
--------------------------------------------------------------------------------
1 | import React, {Component} from 'react';
2 | import {render} from 'react-dom';
3 |
4 | import { dummyMapData, testDB} from './../../Api/api';
5 | import './mapbox-gl.css'; //importing here since there are issues with webpack building mapbox-gl
6 | import './mapVis.css';
7 |
8 | import mapboxgl from 'mapbox-gl';
9 |
10 | import {RenderMap } from './components/map';
11 | import {loadIDData, updateMap} from './actions';
12 | import {createStore} from 'react-redux';
13 |
14 | import "babel-polyfill";
15 | import MapGL from 'react-map-gl';
16 |
17 |
18 | import { MAPBOX_ACCESS_TOKEN } from './constants/mapConstants';
19 | import GeojsonCustomOverlay from './components/mapOverlays/geojsonDataOverlay';
20 |
21 | class MapVizPage extends Component {
22 | constructor(props) {
23 | super(props);
24 | this.state = {
25 |
26 | data: null,
27 | // mapData: [],
28 | viewport: {
29 | ...GeojsonCustomOverlay.defaultViewport,
30 | startDragLngLat: null,
31 | isDragging: false,
32 | width: window.innerWidth,
33 | height: window.innerHeight,
34 | },
35 | maxRadius: 20,
36 | radiusAccessor: 'count'
37 | };
38 | window.addEventListener('resize', () => this.setState({width: window.innerWidth}));
39 | }
40 |
41 | componentDidMount() {
42 | window.addEventListener('resize', this._resize.bind(this));
43 | console.log(mapboxgl, 'mapbox exists?', window)
44 | this._resize();
45 | let self = this;
46 | // componentDidMount() {
47 | dummyMapData().then(data => {
48 | console.log('data', self.state, self.setState);
49 | let parsed = JSON.parse(data).rows;
50 | // this.props.dispatch(loadIDData(data))
51 | self.setState({data: parsed})
52 | });
53 |
54 | }
55 |
56 | _resize() {
57 | this._onChangeViewport({
58 | width: window.innerWidth,
59 | height: window.innerHeight
60 | });
61 | }
62 |
63 | render() {
64 | const {viewport, data, maxRadius, radiusAccessor} = this.state;
65 |
66 | return (
67 |
72 |
73 |
79 | Geojson Custom overlay
80 |
81 | )
82 |
83 |
84 | }
85 |
86 | _onChangeViewport(viewport) {
87 | this.setState({
88 | viewport: {...this.state.viewport, ...viewport}
89 | });
90 | }
91 |
92 | }
93 |
94 | MapVizPage.propTypes = {
95 |
96 | };
97 | MapVizPage.defaultProps = {
98 |
99 | };
100 |
101 | export default MapVizPage
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/MapVizPage.scatter.js:
--------------------------------------------------------------------------------
1 | import React, {Component} from 'react';
2 | import { dummyMapData, testDB} from './../../Api/api';
3 | import './mapbox-gl.css'; //importing here since there are issues with webpack building mapbox-gl
4 | import './mapVis.css';
5 | import {RenderMap } from './components/map';
6 | import {loadIDData, updateMap} from './actions';
7 | import {createStore} from 'react-redux';
8 | import "babel-polyfill";
9 | import MapGL, {autobind} from 'react-map-gl';
10 |
11 | import DeckGL, {LineLayer} from 'deck.gl';
12 | // const store = createStore(mapReducer);
13 | // import MapboxGLMap from 'react-map-gl';
14 | import { MAPBOX_ACCESS_TOKEN } from './constants/mapConstants';
15 | import HeatMapOverlayRender from './components/mapOverlays/displacementHeatmapOverlay';
16 | import {DeckGLOverlay} from './components/mapOverlays/geojsonDataOverlay';
17 | import ScatterLayer from './components/mapOverlays/scatterplotOverlay';
18 | class MapVizPageScatter extends Component {
19 | constructor(props) {
20 | super(props);
21 | this.state = {
22 |
23 | data: null,
24 | // mapData: [],
25 | viewport: {
26 | latitude: 0,
27 | longitude: 0,
28 | zoom: 0,
29 | startDragLngLat: null,
30 | isDragging: false,
31 | width: window.innerWidth,
32 | height: window.innerHeight,
33 | }
34 | };
35 | window.addEventListener('resize', () => this.setState({width: window.innerWidth}));
36 | }
37 |
38 | componentDidMount() {
39 | let self = this;
40 | // componentDidMount() {
41 | dummyMapData().then(data => {
42 | console.log('data', self.state, self.setState);
43 | let parsed = JSON.parse(data).rows;
44 | // this.props.dispatch(loadIDData(data))
45 | self.setState({data: parsed.map(d => {
46 | return {
47 | position: [d.long, d.lat],
48 | radius: d.count
49 | }})})
50 | // self.setState({data: parsed})
51 | });
52 |
53 | // console.log(RenderMap)
54 | }
55 |
56 | _resize() {
57 | this._onChangeViewport({
58 | width: window.innerWidth,
59 | height: window.innerHeight
60 | });
61 | }
62 |
63 | render() {
64 | let mapProps = {
65 | ...this.state.viewport,
66 | // ...this.state.mapData
67 | };
68 | const {viewport, data} = this.state;
69 |
70 | // return (
71 | //
76 | //
80 | //
81 | // );
82 |
83 | return (
84 |
89 |
93 |
94 | )
95 |
96 | if ( !this.state.mapData || this.state.mapData.length === 0) {
97 | return (
98 |
99 |
108 | Map rendering
109 |
110 | );
111 | }
112 |
113 | return (
114 |
115 |
120 |
121 | {HeatMapOverlayRender({...this.state.viewport, mapData: this.state.mapData}) }
122 |
123 |
124 | );
125 | }
126 |
127 | _onChangeViewport(viewport) {
128 | this.setState({
129 | viewport: {...this.state.viewport, ...viewport}
130 | });
131 | }
132 |
133 | }
134 |
135 |
136 |
137 | export default MapVizPageScatter
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/actions/index.js:
--------------------------------------------------------------------------------
1 |
2 | // actions for map
3 | export const updateMap = (mapViewState) => {
4 | return {type: 'UPDATE_MAP', mapViewState};
5 | };
6 |
7 | export const loadIDData = (data) => {
8 | return {type: 'LOAD_ID_DATA_SUCCESS', data};
9 | };
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/components/map/index.js:
--------------------------------------------------------------------------------
1 |
2 | import React from 'react';
3 | import MapGL from 'react-map-gl';
4 | // import MapboxGLMap from 'react-map-gl';
5 | import { MAPBOX_ACCESS_TOKEN } from './../../constants/mapConstants';
6 | import HeatMapOverlayRender from './../mapOverlays/displacementHeatmapOverlay';
7 |
8 | export const renderVisualizationOverlay = (data) => {
9 | //
10 | // const param = {
11 | // props: this.props,
12 | // state: this.state,
13 | // // onWebGLInitialized: this._onWebGLInitialized,
14 | // // effects: this._effects,
15 | // }
16 |
17 | if (data) {
18 |
19 | return (
20 |
21 |
22 | {HeatMapOverlayRender(data) }
23 |
24 | )
25 | } else {
26 | return(
)
27 | }
28 | };
29 | export const RenderMap = (props) => {
30 | console.log(props)
31 |
32 | if ( !props.mapData || props.mapData.length === 0) {
33 | return (
34 |
35 |
46 | Map rendering
47 |
48 | );
49 | }
50 |
51 | return (
52 |
53 |
64 | {renderVisualizationOverlay(props)}
65 |
66 | // {
75 | // const {latitude, longitude, zoom} = viewport;
76 | // Optionally call `setState` and use the state to update the map.
77 | // }}
78 | // >
79 | // {/*{isActiveOverlay && this._renderVisualizationOverlay()}*/}
80 | //
81 | );
82 | // return (Maop here!!!
)
83 | };
84 |
85 |
86 | // export default RenderMap
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/components/map/mapboxTest.js:
--------------------------------------------------------------------------------
1 |
2 | import React, {Component} from 'react';
3 | import {render} from 'react-dom';
4 | import mapboxgl from 'mapbox-gl';
5 | import * as d3 from 'd3';
6 | import { MAPBOX_ACCESS_TOKEN } from './../../constants/mapConstants';
7 |
8 | export const renderMap = (data, containerID = 'map', centerLat = 0, centerLng = 0, zoom=0, maxCount, minCount, maxRadius, minRadius) => {
9 | mapboxgl.accessToken = MAPBOX_ACCESS_TOKEN;
10 | let map = new mapboxgl.Map({
11 | container: containerID,
12 | style: 'mapbox://styles/mapbox/light-v9',
13 | center: [centerLng, centerLat],
14 | zoom: zoom
15 | });
16 |
17 | map.on('load', () => {
18 | map.addSource('idData', {
19 | 'type': 'geojson',
20 | 'data': data
21 | });
22 |
23 | map.addLayer({
24 | 'id': 'idData-circles',
25 | 'type': 'circle',
26 | 'source': 'idData',
27 | 'paint': {
28 | 'circle-color': {
29 | property: 'mag',
30 | stops: [
31 | [6, '#FCA107'],
32 | [8, '#7F3121']
33 | ]
34 | },
35 | 'circle-opacity': 0.75,
36 | 'circle-radius': {
37 | property: 'radius',
38 | "type": "exponential",
39 | "stops": [
40 | [{ "zoom": 0, "value": 1 }, 10],
41 | [{ "zoom": 0, "value": 10 }, 50],
42 | [{ "zoom": 0, "value": 100 }, 100],
43 | [{ "zoom": 5, "value": 1 }, 20],
44 | [{ "zoom": 5, "value": 10 }, 60],
45 | [{ "zoom": 5, "value": 100 }, 110],
46 | [{ "zoom": 10, "value": 1 }, 30],
47 | [{ "zoom": 10, "value": 10 }, 70],
48 | [{ "zoom": 10, "value": 100 }, 120],
49 | [{ "zoom": 15, "value": 1 }, 40],
50 | [{ "zoom": 15, "value": 10 }, 80],
51 | [{ "zoom": 15, "value": 100 }, 130],
52 | [{ "zoom": 20, "value": 1 }, 50],
53 | [{ "zoom": 20, "value": 10 }, 90],
54 | [{ "zoom": 20, "value": 100 }, 140]
55 | ]
56 | }
57 | }
58 | });
59 | });
60 | };
61 | export const renderVisualizationOverlay = (data) => {
62 | //
63 | // const param = {
64 | // props: this.props,
65 | // state: this.state,
66 | // // onWebGLInitialized: this._onWebGLInitialized,
67 | // // effects: this._effects,
68 | // }
69 |
70 | if (data) {
71 |
72 | return (
73 |
74 |
75 |
76 | )
77 | } else {
78 | return(
)
79 | }
80 | };
81 | export const RenderMap = (props) => {
82 | console.log(props)
83 |
84 | if ( !props.mapData || props.mapData.length === 0) {
85 | return (
86 |
87 |
98 | Map rendering
99 |
100 | );
101 | }
102 |
103 | return (
104 |
105 |
116 | {renderVisualizationOverlay(props)}
117 |
118 | // {
127 | // const {latitude, longitude, zoom} = viewport;
128 | // Optionally call `setState` and use the state to update the map.
129 | // }}
130 | // >
131 | // {/*{isActiveOverlay && this._renderVisualizationOverlay()}*/}
132 | //
133 | );
134 | // return (Maop here!!!
)
135 | };
136 |
137 |
138 | // export default RenderMap
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/customScatterOverlay.js:
--------------------------------------------------------------------------------
1 | import React, {Component} from 'react';
2 | import DeckGL, {ScatterplotLayer} from 'deck.gl';
3 |
4 | // export default ScaledScatterplotLayer extends ScatterplotLayer {
5 | //
6 | // }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/displacementHeatmapOverlay.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react';
2 | import Immutable from 'immutable';
3 | // import DeckGL from 'deck.gl';
4 | // import HeatmapOverlay from 'react-map-gl-heatmap-overlay';
5 | import {ScatterplotOverlay} from 'react-map-gl';
6 |
7 |
8 |
9 | const HeatMapOverlayRender = (param) => {
10 | console.log('heatmapoverlay render', param.mapData.map(d => [d.long, d.lat]))
11 | const idData = Immutable.fromJS(param.mapData);
12 | // const idData = Immutable.fromJS(param.mapData.map(d => [d.long, d.lat]));
13 | const width = param.width;
14 | const height = param.height;
15 | const zoom = param.zoom || 0;
16 | // const { width, height, mapViewState } = param;
17 | return (
18 | // [data.long, data.lat]}
24 | // />
25 | [data.get('long'), data.get('lat')]}
40 | />
41 | )
42 |
43 |
44 | };
45 |
46 | export default HeatMapOverlayRender;
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/exampleGeojson.js:
--------------------------------------------------------------------------------
1 | import React, {Component} from 'react';
2 |
3 | import DeckGL, {GeoJsonLayer} from 'deck.gl';
4 |
5 | const LIGHT_SETTINGS = {
6 | lightsPosition: [-125, 50.5, 5000, -122.8, 48.5, 8000],
7 | ambientRatio: 0.2,
8 | diffuseRatio: 0.5,
9 | specularRatio: 0.3,
10 | lightsStrength: [1.0, 0.0, 2.0, 0.0],
11 | numberOfLights: 2
12 | };
13 |
14 | export default class DeckGLOverlay extends Component {
15 |
16 | static get defaultViewport() {
17 | return {
18 | latitude: 49.254,
19 | longitude: -123.13,
20 | zoom: 11,
21 | maxZoom: 16,
22 | pitch: 0,
23 | bearing: 0
24 | };
25 | }
26 |
27 | _initialize(gl) {
28 | gl.enable(gl.DEPTH_TEST);
29 | gl.depthFunc(gl.LEQUAL);
30 | }
31 |
32 | render() {
33 | const {viewport, data, colorScale} = this.props;
34 |
35 | if (!data) {
36 | return null;
37 | }
38 |
39 | const layer = new GeoJsonLayer({
40 | id: 'geojson',
41 | data,
42 | opacity: 0.8,
43 | stroked: false,
44 | filled: true,
45 | extruded: true,
46 | wireframe: true,
47 | fp64: true,
48 | //getElevation: f => Math.sqrt(f.properties.valuePerSqm) * 10,
49 | // getFillColor: f => colorScale(f.properties.growth),
50 | // getLineColor: f => [255, 255, 255],
51 | getRadius: d => 2050,
52 | getFillColor: d => [31, 186, 214, 100],
53 | lightSettings: LIGHT_SETTINGS,
54 | pickable: Boolean(this.props.onHover),
55 | onHover: this.props.onHover
56 | });
57 |
58 | return (
59 |
60 | );
61 | }
62 | }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/geojsonDataOverlay.js:
--------------------------------------------------------------------------------
1 | import React, {Component} from 'react';
2 | import 'babel-polyfill';
3 | import DeckGL, {GeoJsonLayer} from 'deck.gl';
4 | import * as d3 from 'd3';
5 | import {convertArrToGeojsonPoints} from './../../../../utils/convertDataToGeojson';
6 |
7 | export default class GeojsonCustomOverlay extends Component {
8 |
9 | static get defaultViewport() {
10 | return {
11 | latitude: 0,
12 | longitude: 0,
13 | zoom: 5,
14 | maxZoom: 16,
15 | pitch: 45,
16 | bearing: 0
17 | };
18 | }
19 |
20 | _initialize(gl) {
21 | gl.enable(gl.DEPTH_TEST);
22 | gl.depthFunc(gl.LEQUAL);
23 | }
24 |
25 | _getRadiusScale(maxRadius, maxDataValue) {
26 | return d3.scaleSqrt().domain([0, maxDataValue]).range([20, maxRadius])
27 | }
28 |
29 | render() {
30 | const {viewport, data, maxRadius, radiusAccessor} = this.props;
31 |
32 | if (!data) {
33 | return null;
34 | }
35 |
36 | console.log('rendering data')
37 |
38 | let maxRadiusData = d3.max(data, d => d[radiusAccessor]);
39 | let radiusScale = this._getRadiusScale(maxRadius, 50);
40 | // let radiusScale = this._getRadiusScale(maxRadius, maxRadiusData);
41 | data.forEach(d => {
42 | d.radius = 50,//radiusScale(d.count);
43 | d.color = [31, 186, 214, 255]
44 | });
45 | let geojsonMapData = convertArrToGeojsonPoints(data, 'long', 'lat');
46 |
47 | let testData = {
48 | "type": "FeatureCollection",
49 | "features": geojsonMapData.features.slice(0,5)
50 | };
51 |
52 | console.log('geojson', geojsonMapData, JSON.stringify(testData));
53 |
54 | const layer = new GeoJsonLayer({
55 | id: 'geojson',
56 | data: geojsonMapData,
57 | opacity: 0.8,
58 | visible: true,
59 | // stroked: false,
60 | filled: true,
61 | getRadius: d => d.properties.radius,
62 | getFillColor: d => [31, 186, 214, 100],
63 | // pickable: true,
64 | // onHover: () => {console.log('on hohver')}//this.props.onHover
65 | });
66 |
67 | return (
68 |
69 | );
70 | }
71 | }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/components/mapOverlays/scatterplotOverlay.js:
--------------------------------------------------------------------------------
1 | import React, {Component} from 'react';
2 | import DeckGL, {ScatterplotLayer} from 'deck.gl';
3 |
4 | export default class ScatterLayer extends Component {
5 |
6 | static get defaultViewport() {
7 | return {
8 | longitude: -74,
9 | latitude: 40.7,
10 | zoom: 11,
11 | maxZoom: 16,
12 | pitch: 0,
13 | bearing: 0
14 | };
15 | }
16 |
17 | _initialize(gl) {
18 | gl.enable(gl.DEPTH_TEST);
19 | gl.depthFunc(gl.LEQUAL);
20 | }
21 |
22 | render() {
23 | console.log('rendering', DeckGL)
24 | const {viewport, data, radius} = this.props;
25 |
26 | if (!data) {
27 | return null;
28 | }
29 |
30 | console.log('layer', data)
31 | const layer = new ScatterplotLayer({
32 | id: 'scatter-plot',
33 | data,
34 | pickable: true,
35 | //radiusScale: radius,
36 | radiusMinPixels: 2,
37 | radiusMaxPixels: 280,
38 | // radiusMinPixels
39 | getPosition: d => d.position,
40 | getRadius: d => d.radius,
41 | getColor: d => [0,0,0,100]
42 | });
43 |
44 | return (
45 |
46 | );
47 | }
48 | }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/constants/actionTypes.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/containers/MapVizContainer/constants/actionTypes.js
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/constants/mapConstants.js:
--------------------------------------------------------------------------------
1 | export const MAPBOX_ACCESS_TOKEN = 'pk.eyJ1Ijoid3d5bWFrIiwiYSI6IkxEbENMZzgifQ.pxk3bdzd7n8h4pKzc9zozw';
2 |
3 |
4 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/mapVis.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
3 | font-size: 16px;
4 | margin:0;
5 | padding:0;
6 | overflow:hidden;
7 | }
8 |
9 | .mapboxgl-canvas {
10 | position: absolute;
11 | left: 0;
12 | top: 0;
13 | }
14 |
15 | .overlay-contol-container {
16 | position: absolute;
17 | bottom: 0;
18 | padding-bottom: 10px;
19 | padding-left: 10px;
20 | padding-top: 10px;
21 | z-index: 99;
22 | width: 250px;
23 | background-color: rgba(0,0,0, 0.2);
24 | }
25 |
26 | #overlay-control {
27 | display: inline-block;
28 | }
29 |
30 | #overlay-map-control {
31 | display: inline-block;
32 | }
33 |
34 | .title-label {
35 | width: 300px;
36 | color: white;
37 | }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/reducers/initialState.js:
--------------------------------------------------------------------------------
1 | const INITIAL_STATE = {
2 | mapViewState: {
3 | latitude: 0,
4 | longitude: 0,
5 | zoom: 2,
6 | pitch: 0,
7 | bearing: 0
8 | },
9 | displacementData: null, //location data of displacement reports,
10 |
11 |
12 | };
13 |
14 | export default INITIAL_STATE
15 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/reducers/mapReducers.js:
--------------------------------------------------------------------------------
1 | import initialState from './initialState';
2 |
3 | export default function (state = initialState, action) {
4 | switch (action.type) {
5 | case 'UPDATE_MAP':
6 | return {...state, mapViewState: action.mapViewState};
7 | case 'LOAD_ID_DATA_SUCCESS':
8 | return { ...state, displacementData: action.displacementData };
9 | default:
10 | return state;
11 | }
12 | }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/sagas/index.js:
--------------------------------------------------------------------------------
1 | // import { fork } from 'redux-saga/effects';
2 | // import watchMapData from './watchers';
3 | //
4 | // // Here, we register our watcher saga(s) and export as a single generator
5 | // // function (startForeman) as our root Saga.
6 | // export default function* startForman() {
7 | // yield fork(watchMapData);
8 | // }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/sagas/mapDataSaga.js:
--------------------------------------------------------------------------------
1 | // import { put, call } from 'redux-saga/effects';
2 | // import { dummyMapData } from './../../../Api/api';
3 | //
4 | // export function* mapDataSaga({ payload }) {
5 | // try {
6 | // const mapData = yield call(dummyMapData, payload);
7 | // yield [
8 | // put({ type: 'LOAD_ID_DATA_SUCCESS', mapData })
9 | // ];
10 | // } catch (error) {
11 | // yield put({ type: 'LOAD_MAPDATA_ERROR', error });
12 | // }
13 | // }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/sagas/watchers.js:
--------------------------------------------------------------------------------
1 | // import { takeLatest } from 'redux-saga/effects';
2 | // import { mapDataSaga } from './mapDataSaga';
3 | //
4 | // // Watches for LOAD_ID_DATA action type asynchronously
5 | // export default function* watchMapData() {
6 | // yield takeLatest('LOAD_ID_DATA_SUCCESS', mapDataSaga);
7 | // }
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/MapVizContainer/store/configureStore.js:
--------------------------------------------------------------------------------
1 | // import { createStore, applyMiddleware } from 'redux';
2 | // import createSagaMiddleware from 'redux-saga';
3 | // import reducer from './../reducers/mapReducers';
4 | // import rootSaga from './../sagas'; // TODO: Next step
5 | //
6 | // // Returns the store instance
7 | // // It can also take initialState argument when provided
8 | // const configureStore = () => {
9 | // const sagaMiddleware = createSagaMiddleware();
10 | // return {
11 | // ...createStore(reducer,
12 | // applyMiddleware(sagaMiddleware)),
13 | // runSaga: sagaMiddleware.run(rootSaga)
14 | // };
15 | // };
16 | //
17 | // export default configureStore;
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/app.js:
--------------------------------------------------------------------------------
1 | import React, { Component, PropTypes } from 'react';
2 | import {createStore} from 'redux';
3 | import {Provider, connect} from 'react-redux';
4 |
5 | import Header from '../common/Header';
6 | const propTypes = {
7 | children: PropTypes.element.isRequired,
8 | };
9 |
10 | export default class App extends React.Component {
11 | render() {
12 | return (
13 |
14 | {this.props.children}
15 |
16 | )
17 | }
18 | }
19 |
20 | App.propTypes = propTypes;
21 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/containers/home.js:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 |
3 | const Home = () => {
4 | return Home...nothing here yet
5 |
6 | }
7 |
8 | export default Home
9 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/index.css:
--------------------------------------------------------------------------------
1 | /*body {*/
2 | /*margin: 0;*/
3 | /*padding: 0;*/
4 | /*font-family: sans-serif;*/
5 | /*}*/
6 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/index.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom';
3 | import { Router, browserHistory } from 'react-router';
4 |
5 | import routes from './routes';
6 | import Bootstrap from 'bootstrap/dist/css/bootstrap.css';
7 | import './themeCss/css/main.css';
8 | import 'bootstrap/dist/css/bootstrap-theme.css';
9 | import { Provider } from 'react-redux';
10 | import {createStore} from 'redux';
11 | import mapReducer from './containers/MapVizContainer/reducers/mapReducers';
12 | import Layout from './layout';
13 |
14 |
15 | const store = createStore(mapReducer);
16 |
17 | // We require the routes and render to the DOM using ReactDOM API
18 | ReactDOM.render(
19 |
20 |
21 |
22 |
23 | ,
24 | document.getElementById('root')
25 |
26 | );
27 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/layout.js:
--------------------------------------------------------------------------------
1 | import Header from './common/Header';
2 | import Footer from './common/Footer';
3 | import React, {Component} from 'react';
4 |
5 | class Layout extends Component {
6 | render() {
7 | return (
8 |
9 | {this.props.children}
10 |
11 |
12 | )
13 | }
14 | };
15 |
16 | export default Layout;
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/reducers/index.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/reducers/index.js
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/routes.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import {Route, IndexRoute }from 'react-router';
3 | import App from './containers/app';
4 | import HomePage from './components/HomePage';
5 | import MapVizPage from './containers/MapVizContainer/MapVizPage';
6 | import MapVizPageTest from './containers/MapVizContainer/MapVizExample';
7 | import MapVizPageScatter from './containers/MapVizContainer/MapVizPage.scatter';
8 |
9 | let routes = (
10 |
11 |
12 |
13 |
14 |
15 |
16 | );
17 |
18 |
19 | export default (
20 |
21 |
22 |
23 |
24 |
25 |
26 | )
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/css/ie8.css:
--------------------------------------------------------------------------------
1 | /*
2 | Spectral by HTML5 UP
3 | html5up.net | @n33co
4 | Free for personal and commercial use under the CCA 3.0 license (html5up.net/license)
5 | */
6 | /* Icon */
7 | .icon.major {
8 | border: none; }
9 | .icon.major:before {
10 | font-size: 3em; }
11 |
12 | /* Form */
13 | label {
14 | color: #2E3842; }
15 |
16 | input[type="text"],
17 | input[type="password"],
18 | input[type="email"],
19 | select,
20 | textarea {
21 | border: solid 1px #dfdfdf; }
22 |
23 | /* Button */
24 | input[type="submit"],
25 | input[type="reset"],
26 | input[type="button"],
27 | button,
28 | .button {
29 | border: solid 2px #dfdfdf; }
30 | input[type="submit"].special,
31 | input[type="reset"].special,
32 | input[type="button"].special,
33 | button.special,
34 | .button.special {
35 | border: 0 !important; }
36 |
37 | /* Page Wrapper + Menu */
38 | #menu {
39 | display: none; }
40 |
41 | body.is-menu-visible #menu {
42 | display: block; }
43 |
44 | /* Header */
45 | #header nav > ul > li > a.menuToggle:after {
46 | display: none; }
47 |
48 | /* Banner + Wrapper (style4) */
49 | #banner,
50 | .wrapper.style4 {
51 | -ms-behavior: url("js/ie/backgroundsize.min.htc"); }
52 | #banner:before,
53 | .wrapper.style4:before {
54 | display: none; }
55 |
56 | /* Banner */
57 | #banner .more {
58 | height: 4em; }
59 | #banner .more:after {
60 | display: none; }
61 |
62 | /* Main */
63 | #main > header {
64 | -ms-behavior: url("js/ie/backgroundsize.min.htc"); }
65 | #main > header:before {
66 | display: none; }
67 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/css/ie9.css:
--------------------------------------------------------------------------------
1 | /*
2 | Spectral by HTML5 UP
3 | html5up.net | @n33co
4 | Free for personal and commercial use under the CCA 3.0 license (html5up.net/license)
5 | */
6 | /* Spotlight */
7 | .spotlight {
8 | display: block; }
9 | .spotlight .image {
10 | display: inline-block;
11 | vertical-align: top; }
12 | .spotlight .content {
13 | padding: 4em 4em 2em 4em ;
14 | display: inline-block; }
15 | .spotlight:after {
16 | clear: both;
17 | content: '';
18 | display: block; }
19 |
20 | /* Features */
21 | .features {
22 | display: block; }
23 | .features li {
24 | float: left; }
25 | .features:after {
26 | content: '';
27 | display: block;
28 | clear: both; }
29 |
30 | /* Banner + Wrapper (style4) */
31 | #banner,
32 | .wrapper.style4 {
33 | background-image: url("../../images/banner.jpg");
34 | background-position: center center;
35 | background-repeat: no-repeat;
36 | background-size: cover;
37 | position: relative; }
38 | #banner:before,
39 | .wrapper.style4:before {
40 | background: #000000;
41 | content: '';
42 | height: 100%;
43 | left: 0;
44 | opacity: 0.5;
45 | position: absolute;
46 | top: 0;
47 | width: 100%; }
48 | #banner .inner,
49 | .wrapper.style4 .inner {
50 | position: relative;
51 | z-index: 1; }
52 |
53 | /* Banner */
54 | #banner {
55 | padding: 14em 0 12em 0 ;
56 | height: auto; }
57 | #banner:after {
58 | display: none; }
59 |
60 | /* CTA */
61 | #cta .inner header {
62 | float: left; }
63 | #cta .inner .actions {
64 | float: left; }
65 | #cta .inner:after {
66 | clear: both;
67 | content: '';
68 | display: block; }
69 |
70 | /* Main */
71 | #main > header {
72 | background-image: url("../../images/banner.jpg");
73 | background-position: center center;
74 | background-repeat: no-repeat;
75 | background-size: cover;
76 | position: relative; }
77 | #main > header:before {
78 | background: #000000;
79 | content: '';
80 | height: 100%;
81 | left: 0;
82 | opacity: 0.5;
83 | position: absolute;
84 | top: 0;
85 | width: 100%; }
86 | #main > header > * {
87 | position: relative;
88 | z-index: 1; }
89 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/css/images/arrow.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/css/images/banner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/css/images/banner.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/css/images/bars.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/css/images/close.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/FontAwesome.otf
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/Screen Shot 2017-04-27 at 15.11.09 copy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/Screen Shot 2017-04-27 at 15.11.09 copy.png
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/banner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/banner.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/banner1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/banner1.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/d4d-logo-meetup-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/d4d-logo-meetup-banner.png
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/no02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/no02.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/no03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/no03.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/pic01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic01.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/pic02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic02.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/pic03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic03.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/pic04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic04.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/themeCss/images/pic05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal-displacement-web/client/src/themeCss/images/pic05.jpg
--------------------------------------------------------------------------------
/internal-displacement-web/client/src/utils/convertDataToGeojson.js:
--------------------------------------------------------------------------------
1 | export const convertArrToGeojsonPoints = (dataArr, lngAccessor, latAccessor) => {
2 | let outFeatures = dataArr.map(d => {
3 | return {
4 | type: 'Feature',
5 | // properties: d,
6 | geometry: {"type":"Point","coordinates":[d[lngAccessor], d[latAccessor]]}
7 | }
8 | });
9 |
10 | return {
11 | "type":"FeatureCollection",
12 | "features":outFeatures
13 | }
14 |
15 | };
16 |
--------------------------------------------------------------------------------
/internal-displacement-web/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "id-web",
3 | "version": "1.0.0",
4 | "description": "",
5 | "private": true,
6 | "dependencies": {
7 | "babel-cli": "^6.24.0",
8 | "babel-core": "^6.24.0"
9 |
10 | },
11 | "scripts": {
12 | "start": "concurrently \"nodemon server\" \"cd client && npm start\"",
13 | "start-localDB": "NODE_DB=LOCAL concurrently \"nodemon server\" \"cd client && npm start\""
14 | },
15 | "devDependencies": {
16 | "concurrently": "^3.4.0",
17 | "nodemon": "latest"
18 | },
19 | "author": "",
20 | "license": "ISC"
21 | }
22 |
--------------------------------------------------------------------------------
/internal-displacement-web/server/api/reportLocationRequest.js:
--------------------------------------------------------------------------------
1 | const db = require('./../pgDB');
2 |
3 | module.exports = function (req, res) {
4 | db.any("select * from ( (select * from report_location LEFT JOIN location on report_location.location = location.id) t1 inner join (select id, quantity from report where quantity is not null) t2 on t1.report= t2.id) t3 where t3.latlong is not null", [true])
5 | .then(data => {
6 | console.log(data);
7 | //todo need to check if data needs JSON.stringify
8 | res.json(data);
9 | })
10 | .catch(error => {
11 | console.log(error)
12 | res.status(500).json({error: error, message: 'query error'});
13 | });
14 | };
15 |
--------------------------------------------------------------------------------
/internal-displacement-web/server/api/sampleArticleRequest.js:
--------------------------------------------------------------------------------
1 | const db = require('./../pgDB');
2 |
3 | module.exports = function (req, res) {
4 | db.any("select * from article limit 1", [true])
5 | .then(data => {
6 | console.log(data);
7 | //todo need to check if data needs JSON.stringify
8 | res.json(data);
9 | })
10 | .catch(error => {
11 | console.log(error)
12 | res.status(500).json({error: error, message: 'query error'});
13 | });
14 | };
--------------------------------------------------------------------------------
/internal-displacement-web/server/api/test.js:
--------------------------------------------------------------------------------
1 | const request = require('request');
2 | module.exports = function (req, res) {
3 | console.log('trying ot get test data');
4 | const dummyMapUrl = 'https://jamesleondufour.carto.com/api/v2/sql?q=select%20count,%20long,%20lat,%20date%20from%20public.gdelt_refugee_2016';
5 | request.get(dummyMapUrl, (err, resp, body) =>{
6 | if(err) {
7 | console.log(err);
8 | resp.status(500).json({error: 'internal error'})
9 | return
10 | }
11 |
12 | if (resp.statusCode == 200) {
13 | res.json(body);
14 | } else {
15 | res.status(404).json({error: 'not foubd'});
16 | }
17 | });
18 |
19 | };
20 |
21 |
--------------------------------------------------------------------------------
/internal-displacement-web/server/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const express = require("express");
4 | const http = require('http');
5 | const env = process.env.NODE_ENV || 'development';
6 | // Setup server
7 | const app = express();
8 | const server = http.createServer(app);
9 | require('./routes')(app);
10 |
11 | app.set('port', (process.env.PORT || 3322));
12 |
13 | if (process.env.NODE_ENV === 'production') {
14 | app.use(express.static('client/build'));
15 | }
16 |
17 | function startServer() {
18 | server.listen(app.get('port'), () => {
19 | console.log('Express server listening on %d, in %s mode', app.get('port'), env);
20 | });
21 | }
22 |
23 | setImmediate(startServer);
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/internal-displacement-web/server/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "id-web-server",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "author": "",
10 | "license": "ISC",
11 | "dependencies": {
12 | "body-parser": "^1.17.1",
13 | "express": "^4.15.2",
14 | "pg": "^6.1.5",
15 | "pg-promise": "^5.6.4",
16 | "request": "^2.81.0"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/internal-displacement-web/server/pgDB/index.js:
--------------------------------------------------------------------------------
1 | const pgp = require('pg-promise')();
2 | let connectionObj;
3 | //if not using docker
4 | //create a pgConfig.js file in the same directory and put your credentials there
5 | if (process.env.NODE_DB === 'LOCAL') {
6 | connectionObj = require('./pgConfig');
7 | } else {
8 | connectionObj = {
9 | user: process.env.DB_USER,
10 | database: process.env.DB_NAME,
11 | password: process.env.DB_PASS,
12 | host: process.env.DB_HOST
13 | };
14 | }
15 |
16 | //export db instance to be shared
17 | module.exports = pgp(connectionObj);
--------------------------------------------------------------------------------
/internal-displacement-web/server/routes.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | /**
4 | * Main application routes
5 | */
6 |
7 | const path = require('path');
8 | const bodyParser = require('body-parser');
9 |
10 | module.exports = function (app) {
11 |
12 | app.use(bodyParser.json()); // for parsing application/json
13 | app.use(bodyParser.urlencoded({ extended: true })); // for parsing application/x-www-form-urlencoded
14 |
15 | // Insert routes below
16 | app.use('/api/test', require('./api/test'));
17 | app.use('/api/testDB', require('./api/sampleArticleRequest'));
18 | app.use('/api/report-location-data', require('./api/reportLocationRequest'));
19 |
20 | // All other routes should redirect to the index.html
21 | app.route('/')
22 | .get((req, res) => {
23 | res.sendFile(path.resolve(app.get('appPath') + '/index.html'));
24 | });
25 |
26 | app.use(function (req, res, next) {
27 | res.setHeader('Access-Control-Allow-Origin', '*');
28 | res.setHeader('Access-Control-Allow-Methods', 'GET, POST');
29 | res.setHeader('Access-Control-Allow-Headers', 'X-Requested-With,content-type, Authorization');
30 | next();
31 | });
32 | };
33 |
--------------------------------------------------------------------------------
/internal-displacement-web/src/db.js:
--------------------------------------------------------------------------------
1 | var pg = require('pg');
2 |
3 | // create a config to configure both pooling behavior
4 | // and client options
5 | // note: these environment variables are passed into the nodejs Docker container from docker.env
6 | var config = {
7 | user: process.env.DB_USER,
8 | database: process.env.DB_NAME,
9 | password: process.env.DB_PASS,
10 | host: process.env.DB_HOST,
11 | max: 10, // max number of clients in the pool
12 | idleTimeoutMillis: 30000, // how long a client is allowed to remain idle before being closed
13 | };
14 |
15 |
16 | //this initializes a connection pool
17 | //it will keep idle connections open for 30 seconds
18 | //and set a limit of maximum 10 idle clients
19 | var pool = new pg.Pool(config);
20 |
21 | // to run a query we can acquire a client from the pool,
22 | // run a query on the client, and then return the client to the pool
23 | pool.connect(function(err, client, done) {
24 | if(err) {
25 | return console.error('error fetching client from pool', err);
26 | }
27 | console.log('connected')
28 | client.query('SELECT $1::int AS number', ['1'], function(err, result) {
29 | //call `done(err)` to release the client back to the pool (or destroy it if there is an error)
30 | done(err);
31 |
32 | if(err) {
33 | return console.error('error running query', err);
34 | }
35 | console.log(result.rows[0].number);
36 | //output: 1
37 | });
38 | });
39 |
40 | pool.on('error', function (err, client) {
41 | // if an error is encountered by a client while it sits idle in the pool
42 | // the pool itself will emit an error event with both the error and
43 | // the client which emitted the original error
44 | // this is a rare occurrence but can happen if there is a network partition
45 | // between your application and the database, the database restarts, etc.
46 | // and so you might want to handle it and at least log it out
47 | console.error('idle client error', err.message, err.stack)
48 | })
--------------------------------------------------------------------------------
/internal_displacement/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal_displacement/__init__.py
--------------------------------------------------------------------------------
/internal_displacement/add_countries.py:
--------------------------------------------------------------------------------
1 | from internal_displacement.model.model import Country, CountryTerm, Session
2 | import pycountry
3 | from sqlalchemy import create_engine
4 | import sqlalchemy
5 |
6 |
7 | def load_countries(session):
8 |
9 | for c in pycountry.countries:
10 | country = Country(code=c.alpha_3)
11 | session.add(country)
12 | session.commit()
13 | country_name = CountryTerm(term=c.name, country=country)
14 | session.add(country_name)
15 | session.commit()
16 | try:
17 | off_name = c.official_name
18 | if off_name != c.name:
19 | official_name = CountryTerm(
20 | term=c.official_name, country=country)
21 | session.add(official_name)
22 | session.commit()
23 | except (AttributeError, sqlalchemy.exc.IntegrityError) as e:
24 | pass
25 | try:
26 | common_name = CountryTerm(term=c.common_name, country=country)
27 | session.add(common_name)
28 | session.commit()
29 | except (AttributeError, sqlalchemy.exc.IntegrityError) as e:
30 | pass
31 | session.commit()
32 |
33 |
34 | def delete_countries(session):
35 |
36 | session.execute("TRUNCATE TABLE country CASCADE;")
37 | session.commit()
38 |
--------------------------------------------------------------------------------
/internal_displacement/article.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 |
4 | def date_time_converter(dt):
5 | if isinstance(dt, datetime.datetime):
6 | return dt.__str__()
7 | else:
8 | return "Invalid datetime"
9 | # raise ValueError("{} is not a valid datetime object")
10 |
11 | def span_overlap(span1, span2):
12 | set1 = set(span1)
13 | if len(set1.intersection(span2)) > 0:
14 | return True
15 |
16 |
17 | class Article(object):
18 | """Contains article text, date, extracted information and tag
19 | Parameters
20 | ----------
21 | content: the text from the article:String
22 | publication_date: the date of publication:datetime.datetime
23 | title: the title:String
24 | authors: the authors:list[String]
25 | domain: the domain:String
26 | content_type: the type of content (text,image,video etc):String
27 | url: the url of the article:String
28 | language: the two-letter language code of the article:String
29 | see https://cloud.google.com/translate/docs/languages
30 | country_codes: a list of ISO 3166 country codes:List
31 | reports: a list of extracted reports
32 | relevance: relevance of article to IDPs:Boolean
33 |
34 | """
35 |
36 | def __init__(self, content, pub_date, title, content_type, authors, domain, url, language="EN", country_codes=[], reports=[], relevance=False):
37 | self.content = content
38 | self.publication_date = pub_date
39 | self.title = title
40 | self.authors = authors
41 | self.domain = domain
42 | self.content_type = content_type
43 | self.url = url
44 | self.language = language
45 | self.relevance = relevance
46 |
47 | def change_language(self, language):
48 | self.language = language
49 |
50 | def get_unique_tag_spans(self):
51 | '''Get a list of unique token spans
52 | for visualizing a complete article along
53 | with all extracted facts.
54 | Each extracted report has its own list of spans
55 | which may in some cases overlap, particularly
56 | for date and location tags.
57 | '''
58 | ### need to deal with overlapping spans
59 | all_spans = []
60 | for report in self.reports:
61 | all_spans.extend(report.tag_spans)
62 | unique_spans = list({v['start']: v for v in all_spans}.values())
63 | unique_spans = sorted(unique_spans, key=lambda k: k['start'])
64 | ### Check for no overlap
65 | non_overlapping_spans = []
66 | current_start = -1
67 | current_end = -1
68 | for span in unique_spans:
69 | if span['start'] > current_end:
70 | non_overlapping_spans.append(span)
71 | current_start, current_end = span['start'], span['end']
72 | else:
73 | # Create a new merged span and add it to the end of the result
74 | current_last_span = non_overlapping_spans[-1]
75 | new_span = {}
76 | new_span['type'] = ", ".join([current_last_span['type'], span['type']])
77 | new_span['start'] = current_last_span['start']
78 | new_span['end'] = max(current_last_span['end'], span['end'])
79 | non_overlapping_spans[-1] = new_span
80 | current_end = new_span['end']
81 |
82 | return non_overlapping_spans
83 |
84 | def tag(self, tag):
85 | """Use interpreter to tag article
86 | """
87 | self.tag = tag
88 |
89 | def parse(self):
90 | """Use interpreter to parse article
91 | """
92 | pass
93 |
94 | def get_pub_date_string(self):
95 | return date_time_converter(self.publication_date)
96 |
--------------------------------------------------------------------------------
/internal_displacement/classifiers/default_encoder.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal_displacement/classifiers/default_encoder.pkl
--------------------------------------------------------------------------------
/internal_displacement/classifiers/readme.txt:
--------------------------------------------------------------------------------
1 | Directory for pre-trained classification models.
--------------------------------------------------------------------------------
/internal_displacement/extracted_report.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import re
3 | from spacy.tokens import Token, Span
4 | from datetime import datetime
5 |
6 |
7 | def convert_tokens_to_strings(value):
8 | if isinstance(value, Token):
9 | return value.text
10 | if isinstance(value, Span):
11 | return value.text
12 | else:
13 | return str(value)
14 |
15 |
16 | def convert_quantity(value):
17 | '''Convert an extracted quantity to an integer.
18 | Solution forked from
19 | https://github.com/ghewgill/text2num/blob/master/text2num.py
20 | and enhanced with numerical and array input
21 | '''
22 | value = value.replace(",", "")
23 | Small = {
24 | 'zero': 0,
25 | 'one': 1,
26 | 'two': 2,
27 | 'three': 3,
28 | 'four': 4,
29 | 'five': 5,
30 | 'six': 6,
31 | 'seven': 7,
32 | 'eight': 8,
33 | 'nine': 9,
34 | 'ten': 10,
35 | 'eleven': 11,
36 | 'twelve': 12,
37 | 'thirteen': 13,
38 | 'fourteen': 14,
39 | 'fifteen': 15,
40 | 'sixteen': 16,
41 | 'seventeen': 17,
42 | 'eighteen': 18,
43 | 'nineteen': 19,
44 | 'twenty': 20,
45 | 'thirty': 30,
46 | 'forty': 40,
47 | 'fifty': 50,
48 | 'sixty': 60,
49 | 'seventy': 70,
50 | 'eighty': 80,
51 | 'ninety': 90 }
52 |
53 | Magnitude = {
54 | 'thousand': 1000,
55 | 'million': 1000000,
56 | 'billion': 1000000000,
57 | 'trillion': 1000000000000,
58 | 'quadrillion': 1000000000000000,
59 | 'quintillion': 1000000000000000000,
60 | 'sextillion': 1000000000000000000000,
61 | 'septillion': 1000000000000000000000000,
62 | 'octillion': 1000000000000000000000000000,
63 | 'nonillion': 1000000000000000000000000000000,
64 | 'decillion': 1000000000000000000000000000000000,
65 | }
66 |
67 | Vague = {
68 | 'numbers': 5,
69 | 'dozens': 55,
70 | 'tens': 55,
71 | 'hundreds': 550,
72 | 'thousands': 5500,
73 | 'millions': 5500000,
74 | 'billions': 5500000000,
75 | 'trillions': 5500000000000,
76 | 'quadrillions': 5500000000000000,
77 | 'quintillions': 5500000000000000000,
78 | 'sextillions': 5500000000000000000000,
79 | 'septillions': 5500000000000000000000000,
80 | 'octillions': 5500000000000000000000000000,
81 | 'nonillions': 5500000000000000000000000000000,
82 | 'decillions': 5500000000000000000000000000000000,
83 | }
84 |
85 | a = []
86 | if not type(value) is list:
87 | value = [value]
88 | for s_item in value:
89 | a += re.split(r"[\s-]+", str(s_item))
90 | n = 0
91 | g = 0
92 | vague_of = False
93 | for w in a:
94 | try:
95 | x = int(w)
96 | g += x
97 | except:
98 | if w.lower() == 'of':
99 | vague_of = True
100 | continue
101 |
102 | if vague_of:
103 | if w[-1:] != 's':
104 | w = w + 's'
105 | if w == 'hundreds' or w == 'hundred':
106 | g *= 100
107 | elif w[:-1] in Magnitude:
108 | g *= Magnitude[w[:-1]]
109 | continue
110 |
111 | if w in Small:
112 | g += Small[w]
113 | elif w == "hundred" and g != 0:
114 | g *= 100
115 | elif w in Magnitude:
116 | n += g * Magnitude[w]
117 | g = 0
118 | elif w in Vague:
119 | g = Vague[w]
120 | else:
121 | return None
122 |
123 | vague_of = False
124 | return n + g
125 |
126 |
127 | class ExtractedReport:
128 |
129 | def __init__(self, locations, event_term, subject_term, quantity, story, tag_spans=[]):
130 | if locations:
131 | self.locations = [convert_tokens_to_strings(l) for l in locations]
132 | else:
133 | self.locations = []
134 | self.event_term = convert_tokens_to_strings(event_term)
135 | self.subject_term = convert_tokens_to_strings(subject_term)
136 | self.quantity = convert_quantity(convert_tokens_to_strings(quantity))
137 | self.story = story
138 |
139 | def display(self):
140 | print("Location: {} DateTime: {} EventTerm: {} SubjectTerm: {} Quantity: {}"
141 | .format(self.locations, self.event_term, self.subject_term, self.quantity))
142 |
143 | def __eq__(self, other):
144 | if isinstance(other, ExtractedReport):
145 | return ((self.locations == other.locations) and
146 | (self.event_term == other.event_term) and
147 | (self.subject_term == other.subject_term) and
148 | (self.quantity == other.quantity)
149 | )
150 | else:
151 | return False
152 |
153 | def __ne__(self, other):
154 | return (not self.__eq__(other))
155 |
156 | def __repr__(self):
157 | locations = ",".join(self.locations)
158 | rep = "Locations:{} Verb:{} Noun:{} Quantity:{}".format(
159 | locations, self.event_term, self.subject_term, self.quantity)
160 | return rep
161 |
162 | def __hash__(self):
163 | return hash(self.__repr__())
164 |
165 | def to_json(self):
166 | d = {}
167 | d['Location'] = self.locations
168 | d['EventTerm'] = self.event_term
169 | d['SubjectTerm'] = self.subject_term
170 | d['Quantity'] = self.quantity
171 | return d
172 |
173 |
174 | class Fact(object):
175 | '''Wrapper for individual facts found within articles
176 | '''
177 |
178 | def __init__(self, token, full_span=None, lemma_=None, fact_type=None, start_offset=0):
179 | self.token = token
180 | self.type_ = fact_type
181 | if full_span:
182 | self.text = full_span.text
183 | elif token:
184 | self.text = token.text
185 | else:
186 | self.text = ''
187 | self.lemma_ = lemma_
188 | # Set the start index
189 | if isinstance(token, Token):
190 | self.start_idx = token.idx + start_offset
191 | elif isinstance(token, Span):
192 | self.start_idx = token[0].idx + start_offset
193 | else:
194 | self.start_idx = 0
195 | # Set the end index
196 | token_length = len(self.text)
197 | self.end_idx = self.start_idx + token_length
198 |
199 | def __str__(self):
200 | return self.text
201 |
--------------------------------------------------------------------------------
/internal_displacement/fact.py:
--------------------------------------------------------------------------------
1 | from spacy.tokens import Token,Span
2 |
3 | class Fact(object):
4 | '''Wrapper for individual facts found within articles
5 | '''
6 |
7 | def __init__(self, token, full_span=None, lemma_=None, fact_type=None, start_offset=0):
8 | self.token = token
9 | self.type_ = fact_type
10 | if full_span:
11 | self.text = full_span.text
12 | else:
13 | self.text = ''
14 | self.lemma_ = lemma_
15 | # Set the start index
16 | if isinstance(token, Token):
17 | self.start_idx = token.idx + start_offset
18 | elif isinstance(token, Span):
19 | self.start_idx = token[0].idx + start_offset
20 | else:
21 | self.start_idx = 0
22 | # Set the end index
23 | token_length = len(self.text)
24 | self.end_idx = self.start_idx + token_length
25 |
26 | def __str__(self):
27 | return self.text
--------------------------------------------------------------------------------
/internal_displacement/model/model.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from sqlalchemy import Table, text
4 | from sqlalchemy import create_engine
5 | from sqlalchemy.ext.declarative import declarative_base
6 | from sqlalchemy import Column, Integer, String, ForeignKey, DateTime, Boolean, Numeric
7 | from sqlalchemy.orm import sessionmaker, relationship, object_session
8 |
9 | Base = declarative_base()
10 | Session = sessionmaker()
11 |
12 |
13 | class Status:
14 | NEW = 'new'
15 | FETCHING = 'fetching'
16 | FETCHED = 'fetched'
17 | PROCESSING = 'processing'
18 | PROCESSED = 'processed'
19 | FETCHING_FAILED = 'fetching failed'
20 | PROCESSING_FAILED = 'processing failed'
21 |
22 |
23 | class Category:
24 | OTHER = 'other'
25 | DISASTER = 'disaster'
26 | CONFLICT = 'conflict'
27 |
28 |
29 | class UnexpectedArticleStatusException(Exception):
30 | def __init__(self, article, expected, actual):
31 | super(UnexpectedArticleStatusException, self).__init__(
32 | "Expected article {id} to be in state {expected}, but was in state {actual}".format(
33 | id=article.id, expected=expected, actual=actual
34 | ))
35 | self.expected = expected
36 | self.actual = actual
37 |
38 |
39 | class Article(Base):
40 | __tablename__ = 'article'
41 |
42 | id = Column(Integer, primary_key=True)
43 | url = Column(String)
44 | domain = Column(String)
45 | status = Column(String)
46 | title = Column(String)
47 | publication_date = Column(DateTime)
48 | authors = Column(String)
49 | language = Column(String(2))
50 | relevance = Column(Boolean)
51 | reliability = Column(Numeric)
52 | content = relationship('Content', uselist=False, back_populates='article', cascade="all, delete-orphan")
53 | reports = relationship('Report', back_populates='article', cascade="all, delete-orphan")
54 | categories = relationship('ArticleCategory', cascade="all, delete-orphan")
55 |
56 | def update_status(self, new_status):
57 | """
58 | Atomically Update the status of this Article from to new_status.
59 | If something changed the status of this article since it was loaded, raise.
60 | """
61 | session = object_session(self)
62 | if not session:
63 | raise RuntimeError("Object has not been persisted in a session.")
64 |
65 | expected_status = self.status
66 | result = session.query(Article).filter(Article.id == self.id, Article.status == self.status).update({
67 | Article.status: new_status
68 | })
69 | if result != 1:
70 | updated = session.query(Article).filter(Article.id == self.id).one()
71 | raise UnexpectedArticleStatusException(self, expected_status, updated.status)
72 |
73 |
74 | class ArticleCategory(Base):
75 | __tablename__ = 'article_category'
76 |
77 | article_id = Column('article', Integer, ForeignKey('article.id'), primary_key=True)
78 | category = Column('category', String, primary_key=True)
79 | article = relationship('Article', back_populates='categories')
80 |
81 |
82 | class Content(Base):
83 | __tablename__ = 'content'
84 |
85 | article_id = Column('article', Integer, ForeignKey('article.id'), primary_key=True)
86 | article = relationship('Article', back_populates='content')
87 | retrieval_date = Column(DateTime)
88 | content = Column(String)
89 | content_type = Column(String)
90 |
91 |
92 | class Country(Base):
93 | __tablename__ = 'country'
94 |
95 | code = Column(String(3), primary_key=True)
96 | terms = relationship('CountryTerm', back_populates='country', cascade="all, delete-orphan")
97 | locations = relationship('Location', back_populates='country', cascade="all, delete-orphan")
98 |
99 | @classmethod
100 | def lookup(cls, session, code):
101 | return session.query(cls).filter_by(code=code).one()
102 |
103 |
104 | class CountryTerm(Base):
105 | __tablename__ = 'country_term'
106 |
107 | term = Column(String, primary_key=True)
108 | code = Column('country', String(3), ForeignKey('country.code'))
109 | country = relationship('Country', back_populates='terms')
110 |
111 |
112 | report_location = Table(
113 | 'report_location', Base.metadata,
114 | Column('report', ForeignKey('report.id'), primary_key=True),
115 | Column('location', ForeignKey('location.id'), primary_key=True)
116 | )
117 |
118 |
119 | class Location(Base):
120 | __tablename__ = 'location'
121 |
122 | id = Column(Integer, primary_key=True)
123 | description = Column(String)
124 | city = Column(String)
125 | subdivision = Column(String)
126 | code = Column('country', String(3), ForeignKey('country.code'))
127 | country = relationship('Country', back_populates='locations')
128 | latlong = Column(String) # Not tackling PostGIS right now
129 | reports = relationship('Report', secondary=report_location, back_populates='locations')
130 |
131 |
132 | class Report(Base):
133 | __tablename__ = 'report'
134 |
135 | id = Column(Integer, primary_key=True, autoincrement=True)
136 | article_id = Column('article', Integer, ForeignKey('article.id'), primary_key=True)
137 | article = relationship('Article', back_populates='reports')
138 | event_term = Column(String)
139 | subject_term = Column(String)
140 | quantity = Column(Integer)
141 | tag_locations = Column(String)
142 | accuracy = Column(Numeric)
143 | analyzer = Column(String)
144 | analysis_date = Column(DateTime)
145 | locations = relationship('Location', secondary=report_location, back_populates='reports')
146 | datespans = relationship('ReportDateSpan', back_populates='report', cascade="all, delete-orphan")
147 |
148 |
149 | class ReportDateSpan(Base):
150 | __tablename__ = 'report_datespan'
151 |
152 | id = Column(Integer, primary_key=True)
153 | report_id = Column('report', Integer, ForeignKey('report.id'))
154 | report = relationship('Report', back_populates='datespans')
155 | start = Column(DateTime)
156 | finish = Column(DateTime)
157 |
--------------------------------------------------------------------------------
/internal_displacement/scraper.py:
--------------------------------------------------------------------------------
1 | import newspaper
2 | import csv
3 | import urllib
4 | from urllib import request
5 | from urllib.parse import urlparse
6 | import textract
7 | import os
8 | from collections import OrderedDict
9 | import datetime
10 | from bs4 import BeautifulSoup
11 | import re
12 |
13 | # PDF helper functions
14 |
15 |
16 | def is_pdf_simple_tests(url):
17 | '''Test a url to see if it is a pdf by looking at url and content headers
18 | If so, return the relevant pdf url for parsing
19 | '''
20 | # Simple url-based test
21 | if re.search(r'\.pdf$', url):
22 | return url
23 |
24 | # Test based on headers
25 | try:
26 | page = request.urlopen(url)
27 | content_type = page.getheader('Content-Type')
28 | if content_type == 'application/pdf':
29 | return url
30 | except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, ValueError):
31 | pass
32 |
33 |
34 | def is_pdf_iframe_test(url):
35 | '''Test a url to see if the page contains an iframe
36 | and if the iframe content is pdf or not; if True, return the pdf url
37 | '''
38 | try:
39 | page = request.urlopen(url)
40 | soup = BeautifulSoup(page, "html.parser")
41 | iframes = soup.find_all('iframe')
42 | if len(iframes) > 0:
43 | for frame in iframes:
44 | if 'src' in frame.attrs.keys():
45 | src = frame.attrs['src']
46 | # should probably replace with something more robust
47 | if 'http' in src:
48 | if is_pdf_simple_tests(src):
49 | return src
50 | except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, ValueError):
51 | pass
52 |
53 |
54 | def is_pdf_consolidated_test(url):
55 | '''Run a series of tests to determine if it is a pdf
56 | If True, return the relevant url
57 | '''
58 |
59 | # Carry out simple tests based upon url and content type
60 | pdf_attempt_1 = is_pdf_simple_tests(url)
61 | if pdf_attempt_1:
62 | return pdf_attempt_1
63 |
64 | # Carry out additional test based by looking for iframe
65 | pdf_attempt_2 = is_pdf_iframe_test(url)
66 | if pdf_attempt_2:
67 | return pdf_attempt_2
68 |
69 | return False
70 |
71 |
72 | def remove_newline(text):
73 | ''' Removes new line and   characters.
74 | '''
75 | text = text.replace('\n', ' ')
76 | text = text.replace('\xa0', ' ')
77 | return text
78 |
79 |
80 | def format_date(date_string):
81 | '''Formats date string from http headers
82 | Returns standardized date format as string
83 | '''
84 | try:
85 | dt = datetime.datetime.strptime(
86 | date_string, "%a, %d %b %Y %H:%M:%S %Z")
87 | formatted_date = dt.strftime("%Y-%m-%d %H:%M:%S")
88 | except (ValueError, TypeError, AttributeError):
89 | formatted_date = None
90 | return formatted_date
91 |
92 |
93 | class Scraper(object):
94 |
95 | def __init__(self):
96 | pass
97 |
98 | def html_article(self, url):
99 | """Downloads and extracts content plus metadata for html page
100 | Parameters
101 | ----------
102 | url: url of page to be scraped
103 |
104 | Returns
105 | -------
106 | article: An object of class Article containing the content and metadata.
107 | """
108 |
109 | a = newspaper.Article(url)
110 | a.download()
111 | if a.is_downloaded:
112 | a.parse()
113 | article_domain = a.source_url
114 | article_title = a.title
115 | article_authors = a.authors
116 | article_pub_date = a.publish_date
117 | article_text = remove_newline(a.text)
118 | # tag the type of article
119 | # currently default to text but should be able to determine img/video
120 | # etc
121 | article_content_type = 'text'
122 | return article_text, article_pub_date, article_title, article_content_type, article_authors, article_domain
123 | else: # Temporary fix to deal with https://github.com/codelucas/newspaper/issues/280
124 | return "retrieval_failed", None, "", datetime.datetime.now(), "", ""
125 |
126 | def get_pdf(self, url):
127 | ''' Takes a pdf url, downloads it and saves it locally.'''
128 | try:
129 | response = request.urlopen(url) # not sure if this is needed?
130 | publish_date = response.getheader('Last-Modified')
131 | pdf_file = open('file_to_convert.pdf', 'wb')
132 | pdf_file.write(response.read())
133 | pdf_file.close()
134 | return os.path.join('./', 'file_to_convert.pdf'), publish_date
135 | except (urllib.error.HTTPError, urllib.error.URLError, UnicodeEncodeError, ValueError) as e:
136 | return '', ''
137 |
138 | def get_body_text(self, url):
139 | ''' This function will extract all text from the url passed in
140 | '''
141 | filepath, publish_date = self.get_pdf(url)
142 | if filepath == '':
143 | return '', None
144 | else:
145 | text = str(textract.process(filepath, method='pdfminer'), 'utf-8')
146 | text = text.replace('\n', ' ') # can replace with a call to
147 | text = text.replace('\xa0', ' ') # the helper function.
148 | publish_date = format_date(publish_date)
149 | return text, publish_date
150 |
151 | def remove_pdf(self, filepath):
152 | ''' Deletes pdf from disk
153 | Not currently in use as pdfs downloads overwrite self, but may come in
154 | useful later if pdfs are downloaded and stored under different names.
155 | '''
156 | os.remove(filepath)
157 |
158 | def pdf_article(self, url):
159 | try:
160 | article_text, article_pub_date = self.get_body_text(url)
161 | if article_text == '':
162 | return "retrieval_failed", None, "", datetime.datetime.now(), "", ""
163 | else:
164 | article_domain = urlparse(url).hostname
165 | article_content_type = 'pdf'
166 | # improve parsing of pdfs to extract these?
167 | article_title = ''
168 | article_authors = ''
169 | return article_text, article_pub_date, article_title, article_content_type, article_authors, article_domain
170 | except:
171 | return "retrieval_failed", None, "", datetime.datetime.now(), "", ""
172 |
173 | def scrape(self, url, scrape_pdfs=True):
174 | """
175 | Scrapes content and metadata from an url
176 | Parameters
177 | ----------
178 | url: the url to be scraped
179 | scrape_pdfs: determines whether pdf files will be scraped or not
180 | default: True
181 |
182 | Returns
183 | -------
184 | article: An article object prepared by scraping the url.
185 |
186 |
187 | """
188 | pdf_check = is_pdf_consolidated_test(url)
189 | if pdf_check and scrape_pdfs:
190 | article = self.pdf_article(pdf_check)
191 | return article
192 | elif not pdf_check:
193 | article = self.html_article(url)
194 | return article
195 | else:
196 | pass
197 |
--------------------------------------------------------------------------------
/internal_displacement/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/internal_displacement/tests/__init__.py
--------------------------------------------------------------------------------
/internal_displacement/tests/test_ExtractedReport.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from internal_displacement.extracted_report import *
3 |
4 | class TestExtractedReport(TestCase):
5 |
6 | def test_convert_quantity(self):
7 | self.assertEqual(convert_quantity("twelve"), 12)
8 | self.assertEqual(convert_quantity("seventy five"), 75)
9 | self.assertEqual(convert_quantity("3 hundred"), 300)
10 | self.assertEqual(convert_quantity("twelve hundred"), 1200)
11 | self.assertEqual(convert_quantity("seven million"), 7000000)
12 | self.assertEqual(convert_quantity("twelve thousand three hundred four"), 12304)
13 | self.assertEqual(convert_quantity("32 thousand"), 32000)
14 | self.assertEqual(convert_quantity(["one", "million"]), 1000000)
15 | self.assertEqual(convert_quantity("hundreds of millions"), 550000000)
16 | self.assertEqual(convert_quantity("tens of thousands"), 55000)
17 | self.assertEqual(convert_quantity("tens of thousand"), 55000)
18 | self.assertEqual(convert_quantity("dozens of people"), 55)
19 |
--------------------------------------------------------------------------------
/internal_displacement/tests/test_Interpreter.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from internal_displacement.interpreter import strip_words, Interpreter
3 | from internal_displacement.article import Article
4 | from internal_displacement.model.model import Category
5 | from langdetect import detect
6 | import pycountry
7 | import spacy
8 | import datetime
9 |
10 | nlp = spacy.load("en")
11 | person_reporting_terms = [
12 | 'displaced', 'evacuated', 'forced', 'flee', 'homeless', 'relief camp',
13 | 'sheltered', 'relocated', 'stranded', 'stuck', 'stranded', "killed", "dead", "died", "drown"
14 | ]
15 |
16 | structure_reporting_terms = [
17 | 'destroyed', 'damaged', 'swept', 'collapsed',
18 | 'flooded', 'washed', 'inundated', 'evacuate'
19 | ]
20 |
21 | person_reporting_units = ["families", "person", "people", "individuals", "locals", "villagers", "residents",
22 | "occupants", "citizens", "households"]
23 |
24 | structure_reporting_units = ["home", "house", "hut", "dwelling", "building", "shop", "business", "apartment",
25 | "flat", "residence"]
26 |
27 | relevant_article_terms = ['Rainstorm', 'hurricane',
28 | 'tornado', 'rain', 'storm', 'earthquake']
29 | relevant_article_lemmas = [t.lemma_ for t in nlp(
30 | " ".join(relevant_article_terms))]
31 |
32 |
33 | class TestInterpreter(TestCase):
34 |
35 | def setUp(self):
36 |
37 | self.interpreter = Interpreter(nlp, person_reporting_terms, structure_reporting_terms,
38 | person_reporting_units, structure_reporting_units, relevant_article_lemmas, 'data/')
39 | self.date = datetime.datetime.now()
40 |
41 | def tearDown(self):
42 | pass
43 |
44 | def test_check_language(self):
45 | test_article = Article("A decent amount of test content which will be used for extracting the language",
46 | self.date, "test_title", "test_content_type", [
47 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters")
48 | language = self.interpreter.check_language(test_article.content)
49 | self.assertEqual(language, "en")
50 |
51 | def test_strip_words(self):
52 | test_place_name = 'the province county district city'
53 | self.assertEqual(strip_words(test_place_name), '')
54 | test_place_name = 'the United States'
55 | self.assertEqual(strip_words(test_place_name), 'United States')
56 |
57 | def test_extract_countries(self):
58 | test_article = Article("The United Kingdom plus Afghanistan plus Sichuan Province, as well as Toronto, Cuba and Bosnia",
59 | self.date, "test_title", "test_content_type", [
60 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters")
61 | countries = self.interpreter.extract_countries(test_article.content)
62 | self.assertIsInstance(countries, list)
63 | self.assertEqual(len(countries), 6)
64 | self.assertIn('GBR', countries)
65 | self.assertIn('AFG', countries)
66 | self.assertIn('CHN', countries)
67 | self.assertIn('CAN', countries)
68 | self.assertIn('CUB', countries)
69 | self.assertIn('BIH', countries)
70 | test_article = Article("No countries mentioned",
71 | self.date, "test_title", "test_content_type", [
72 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters")
73 | countries = self.interpreter.extract_countries(test_article)
74 | self.assertIsInstance(countries, list)
75 | self.assertEqual(len(countries), 0)
76 |
77 | def test_classify_category(self):
78 | disaster_article = Article("Afghanistan – Flash Floods in Faryab and Baghlan Leave 8 Dead", self.date, "test_title", "test_content_type", [
79 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters")
80 | conflict_article = Article("INSIGHT-India-Pakistan clashes escalate into a humanitarian tragedy", self.date, "test_title", "test_content_type", [
81 | "test_author_1", "test_author_2"], "www.butts.com", "www.butts.com/disasters")
82 | disaster = self.interpreter.classify_category(disaster_article)
83 | conflict = self.interpreter.classify_category(conflict_article)
84 | self.assertEqual(disaster, Category.DISASTER)
85 | self.assertEqual(conflict, Category.CONFLICT)
86 |
--------------------------------------------------------------------------------
/internal_displacement/tests/test_Pipeline.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from internal_displacement.interpreter import strip_words, Interpreter
3 | from internal_displacement.scraper import Scraper
4 | from sqlalchemy import create_engine
5 |
6 | from internal_displacement.model.model import Status, Session, Category, Article, Content, Country, CountryTerm, \
7 | Location, Report, ReportDateSpan, ArticleCategory, UnexpectedArticleStatusException
8 | from internal_displacement.pipeline import Pipeline
9 | import spacy
10 | import os
11 |
12 |
13 | nlp = spacy.load("en")
14 | person_reporting_terms = [
15 | 'displaced', 'evacuated', 'forced', 'flee', 'homeless', 'relief camp',
16 | 'sheltered', 'relocated', 'stranded', 'stuck', 'stranded', "killed", "dead", "died", "drown"
17 | ]
18 |
19 | structure_reporting_terms = [
20 | 'destroyed', 'damaged', 'swept', 'collapsed',
21 | 'flooded', 'washed', 'inundated', 'evacuate'
22 | ]
23 |
24 | person_reporting_units = ["families", "person", "people", "individuals", "locals", "villagers", "residents",
25 | "occupants", "citizens", "households"]
26 |
27 | structure_reporting_units = ["home", "house", "hut", "dwelling", "building", "shop", "business", "apartment",
28 | "flat", "residence"]
29 |
30 | relevant_article_terms = ['Rainstorm', 'hurricane',
31 | 'tornado', 'rain', 'storm', 'earthquake']
32 | relevant_article_lemmas = [t.lemma_ for t in nlp(
33 | " ".join(relevant_article_terms))]
34 |
35 | test_urls = [
36 | 'http://www.independent.co.uk/news/somefakenewsstory',
37 | 'http://www.eluniversal.com.mx/articulo/nacion/politica/2017/03/13/manifestantes-obligan-acortar-evento-de-amlo-en-ny',
38 | 'http://www.bbc.com/news/world-europe-39258436',
39 | 'http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html'
40 | ]
41 |
42 |
43 | class TestPipeline(TestCase):
44 |
45 | def setUp(self):
46 | db_host = os.environ.get('DB_HOST')
47 | db_url = 'postgresql://{user}:{password}@{db_host}/{db}'.format(
48 | user='tester', password='tester', db_host=db_host, db='id_test')
49 | engine = create_engine(db_url)
50 | Session.configure(bind=engine)
51 | session = Session()
52 | scraper = Scraper()
53 | interpreter = Interpreter(nlp, person_reporting_terms, structure_reporting_terms,
54 | person_reporting_units, structure_reporting_units, relevant_article_lemmas, 'data/')
55 | self.pipeline = Pipeline(session, scraper, interpreter)
56 | self.session = session
57 | # Add two countries
58 | for c in ['AFG', 'PAK']:
59 | country = Country(code=c)
60 | self.session.add(country)
61 | self.session.commit()
62 |
63 | def tearDown(self):
64 | self.session.rollback()
65 | for url in test_urls:
66 | self.session.query(Article).filter_by(url=url).delete()
67 | self.session.commit()
68 | for c in ['AFG', 'PAK']:
69 | self.session.query(Country).filter_by(code=c).delete()
70 | self.session.commit()
71 |
72 | def test_bad_url(self):
73 | url = test_urls[0]
74 | response = self.pipeline.process_url(url)
75 | self.assertEqual(response, 'fetching failed')
76 | article = self.session.query(Article).filter_by(url=url).first()
77 | self.assertIsNone(article.content)
78 |
79 | def test_non_english_url(self):
80 | url = test_urls[1]
81 | response = self.pipeline.process_url(url)
82 | self.assertEqual(response, 'Processed: Not in English')
83 | article = self.session.query(Article).filter_by(url=url).first()
84 | self.assertEqual(len(article.reports), 0)
85 | self.assertEqual(article.status, Status.PROCESSED)
86 |
87 | def test_irrelevant(self):
88 | url = test_urls[2]
89 | response = self.pipeline.process_url(url)
90 | self.assertEqual(response, 'Processed: Not relevant')
91 | article = self.session.query(Article).filter_by(url=url).first()
92 | self.assertEqual(len(article.reports), 0)
93 | self.assertEqual(article.status, Status.PROCESSED)
94 |
95 | def test_good_url(self):
96 | url = test_urls[3]
97 | response = self.pipeline.process_url(url)
98 | self.assertEqual(response, 'processed')
99 | article = self.session.query(Article).filter_by(url=url).first()
100 | self.assertEqual(len(article.reports), 9)
101 | self.assertEqual(article.status, Status.PROCESSED)
102 | country_codes = set([
103 | location.country.code for report in article.reports for location in report.locations])
104 | self.assertIn('AFG', country_codes)
105 | self.assertIn('PAK', country_codes)
106 | terms = [report.event_term for report in article.reports]
107 | self.assertIn('collapse', terms)
108 | self.assertIn('strand', terms)
109 | units = [report.subject_term for report in article.reports]
110 | self.assertIn('villager', units)
111 | self.assertIn('house', units)
112 |
113 | def test_existing_location(self):
114 | article = Article(url='test-url')
115 | self.session.add(article)
116 | report = Report(article_id=article.id)
117 | self.session.add(report)
118 | location = Location(code='AFG', description='somelocation')
119 | self.session.add(location)
120 | self.session.commit()
121 | original_id = location.id
122 | self.pipeline.process_location(report, 'somelocation')
123 | self.assertEqual(original_id, report.locations[0].id)
124 |
--------------------------------------------------------------------------------
/internal_displacement/tests/test_Scraper.py:
--------------------------------------------------------------------------------
1 | import os
2 | from unittest import TestCase
3 |
4 | from sqlalchemy import create_engine
5 |
6 | from internal_displacement.model.model import Session, Article
7 | from internal_displacement.scraper import is_pdf_simple_tests, is_pdf_iframe_test, format_date, html_article
8 |
9 |
10 | class TestScraper(TestCase):
11 |
12 | def setUp(self):
13 | pass
14 |
15 | def tearDown(self):
16 | pass
17 |
18 | def test_is_pdf_simple_tests(self):
19 | url = "http://www.securitycouncilreport.org/atf/cf/%7B65BFCF9B-6D27-4E9C-8CD3-CF6E4FF96FF9%7D/S_2015_302.pdf"
20 | pdf_test = is_pdf_simple_tests(url)
21 | self.assertEqual(pdf_test, url)
22 | url = "http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html"
23 | self.assertFalse(is_pdf_simple_tests(url))
24 |
25 | def test_is_pdf_iframe_test(self):
26 | url = "http://erccportal.jrc.ec.europa.eu/getdailymap/docId/1125"
27 | pdf_test = is_pdf_iframe_test(url)
28 | self.assertEqual(
29 | pdf_test, "http://erccportal.jrc.ec.europa.eu/ERCmaps/ECDM_20150415_Natural_Disasters_Afghanistan_v02.pdf")
30 | url = "http://html.com/tags/iframe/"
31 | self.assertFalse(is_pdf_simple_tests(url))
32 |
33 | def test_format_date(self):
34 | date_string = 'Mon, 01 Jun 2015 16:25:25 GMT'
35 | formatted_date = format_date(date_string)
36 | self.assertEqual(formatted_date, '2015-06-01 16:25:25')
37 | date_string = '16:25:25 GMT'
38 | formatted_date = format_date(date_string)
39 | self.assertEqual(formatted_date, '')
40 | date_string = None
41 | formatted_date = format_date(date_string)
42 | self.assertEqual(formatted_date, '')
43 |
44 |
45 | class TestFetch(TestCase):
46 |
47 | def setUp(self):
48 | DB_URL = os.environ.get('DB_URL')
49 | if not DB_URL.endswith('/id_test'):
50 | raise RuntimeError('Refusing to run tests against non-test database')
51 | engine = create_engine(DB_URL)
52 | Session.configure(bind=engine)
53 | self.session = Session()
54 |
55 | def tearDown(self):
56 | # self.session.rollback()
57 | pass
58 |
59 |
60 | def test_html(self):
61 | old = self.session.query(Article)\
62 | .filter_by(url='http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html')\
63 | .one_or_none()
64 | if old:
65 | self.session.delete(old)
66 | self.session.commit()
67 | article = html_article(
68 | self.session,
69 | 'http://www.independent.co.uk/news/world/asia/160-killed-and-hundreds-left-stranded-by-flooding-across-afghanistan-and-pakistan-8746566.html')
70 | self.assertEquals(article.domain, 'http://www.independent.co.uk')
71 | self.assertRegexpMatches(article.content.content, 'Flash flood')
72 |
73 |
--------------------------------------------------------------------------------
/internal_displacement/tests/test_coordinates_extraction.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from internal_displacement.pipeline import get_coordinates_mapzen
3 |
4 | class TestCoordinatesExtraction(TestCase):
5 |
6 | def test_get_coordinates_mapzen(self):
7 | res = get_coordinates_mapzen(country="Austria")
8 | self.assertEqual(res['coordinates'], "14.143702,47.522617")
9 | res = get_coordinates_mapzen("Austria") # a city
10 | self.assertEqual(res['coordinates'], "-93.33167,16.43083")
11 | self.assertEqual(res['flag'], "multiple-results")
12 | res = get_coordinates_mapzen("Vienna")
13 | self.assertEqual(res['coordinates'], "16.37208,48.20849")
14 | self.assertEqual(res['flag'], "multiple-results")
15 | res = get_coordinates_mapzen(city="Vienna", country="Austria")
16 | self.assertEqual(res['coordinates'], "16.37208,48.20849")
17 | self.assertEqual(res['flag'], "single-result")
18 | res = get_coordinates_mapzen(city="Vienna", country="United States")
19 | self.assertEqual(res['coordinates'], "-77.260053,38.898599")
20 | res = get_coordinates_mapzen(city="Vienna", subdivision="Maryland", country="United States")
21 | self.assertEqual(res['coordinates'], "-75.833966,38.483475")
22 | self.assertEqual(res['flag'], "single-result")
23 | res = get_coordinates_mapzen("Vienna", hints=['Turkey','Indonesia', 'Austria', 'France'])
24 | self.assertEqual(res['coordinates'], "16.37208,48.20849")
25 | self.assertEqual(res['flag'], "multiple-results")
26 | res = get_coordinates_mapzen("Vienna", hints=['Turkey','Germany', 'Australia', 'United States'])
27 | self.assertEqual(res['coordinates'], "-77.260053,38.898599")
28 | self.assertEqual(res['flag'], "multiple-results")
29 | res = get_coordinates_mapzen("Vienna", hints=['Turkey','Germany', 'Australia', 'United States', 'Georgia'])
30 | self.assertEqual(res['coordinates'], "-83.79545,32.09156")
31 | self.assertEqual(res['flag'], "multiple-results")
--------------------------------------------------------------------------------
/internal_displacement/tests/test_model.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from unittest import TestCase
4 |
5 | from sqlalchemy import create_engine
6 |
7 | from internal_displacement.model.model import Status, Session, Category, Article, Content, Country, CountryTerm, \
8 | Location, Report, ReportDateSpan, ArticleCategory, UnexpectedArticleStatusException
9 |
10 |
11 | class TestModel(TestCase):
12 | def setUp(self):
13 | db_host = os.environ.get('DB_HOST')
14 | db_url = 'postgresql://{user}:{passwd}@{db_host}/{db}'.format(
15 | user='tester', passwd='tester', db_host=db_host, db='id_test')
16 | engine = create_engine(db_url)
17 | Session.configure(bind=engine)
18 | self.session = Session()
19 |
20 | def tearDown(self):
21 | self.session.rollback()
22 | self.session.query(Article).filter_by(domain='example.com').delete()
23 | self.session.commit()
24 |
25 | def test_article(self):
26 | article = Article(url='http://example.com',
27 | domain='example.com',
28 | status=Status.NEW)
29 | content = Content(article=article,
30 | retrieval_date=datetime.now(),
31 | content="La la la")
32 | ArticleCategory(article=article, category=Category.DISASTER)
33 | ArticleCategory(article=article, category=Category.OTHER)
34 | self.session.add(article)
35 |
36 | article2 = self.session.query(Article).filter_by(status=Status.NEW).one()
37 | self.assertEqual(article2.domain, 'example.com')
38 | self.assertEqual(article2.content.content, "La la la")
39 | self.assertCountEqual([c.category for c in article2.categories], ['disaster', 'other'])
40 |
41 | article3 = self.session.query(Article).filter_by(status=Status.NEW).one()
42 | self.assertEqual(article3.domain, 'example.com')
43 |
44 | def test_delete_article(self):
45 | article = None
46 | try:
47 | article = Article(url='http://example.com',
48 | domain='example.com',
49 | status=Status.NEW)
50 | content = Content(article=article,
51 | retrieval_date=datetime.now(),
52 | content="La la la")
53 | ArticleCategory(article=article, category=Category.DISASTER)
54 | ArticleCategory(article=article, category=Category.OTHER)
55 | self.session.add(article)
56 | self.session.commit()
57 | self.session.delete(article)
58 | finally:
59 | self.session.rollback()
60 | if article:
61 | self.session.delete(article)
62 | self.session.commit()
63 |
64 | def test_country_term(self):
65 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR")
66 | myanmar = CountryTerm(term="Myanmar", country=mmr)
67 | burma = CountryTerm(term="Burma", country=mmr)
68 | self.session.add(mmr)
69 |
70 | myanmar = self.session.query(Country).join(CountryTerm).filter_by(term='Myanmar').one()
71 | burma = self.session.query(Country).join(CountryTerm).filter_by(term='Burma').one()
72 | self.assertEqual(myanmar, burma)
73 |
74 | def test_location(self):
75 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR")
76 | naypyidaw = Location(description="Nay Pyi Taw", country=mmr, latlong='19°45′N 96°6′E')
77 | self.session.add(mmr)
78 | self.assertIn(naypyidaw, self.session.query(Location).filter_by(country=mmr))
79 |
80 | def test_report(self):
81 | article = None
82 | report = None
83 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR")
84 | bgd = self.session.query(Country).filter_by(code="BGD").one_or_none() or Country(code="BGD")
85 | try:
86 | article = Article(url='http://example.com',
87 | domain='example.com',
88 | status=Status.NEW)
89 | report = Report(article=article,
90 | accuracy=0.55,
91 | event_term='evacuation',
92 | subject_term='family',
93 | quantity='72')
94 | self.session.add(report)
95 | self.session.commit() # have to commit here to get the ID set
96 |
97 | naypyidaw = Location(description="Nay Pyi Taw", country=mmr, latlong='19°45′N 96°6′E')
98 | report.locations.append(naypyidaw)
99 | dhaka = Location(description="Dhaka", country=bgd)
100 | report.locations.append(dhaka)
101 | now = datetime.now()
102 | when = ReportDateSpan(report=report, start=datetime.today(), finish=now)
103 |
104 | article2 = self.session.query(Article).filter_by(domain='example.com').first()
105 | self.assertEqual(len(article2.reports), 1)
106 |
107 | article3 = self.session.query(Article).join(Report).filter(Report.locations.contains(dhaka)).first()
108 | self.assertEqual(len(article3.reports), 1)
109 | finally:
110 | self.session.rollback()
111 | if report:
112 | self.session.delete(report)
113 | if article:
114 | self.session.delete(article)
115 | self.session.commit()
116 |
117 | def test_report_delete(self):
118 | article = None
119 | report = None
120 | mmr = self.session.query(Country).filter_by(code="MMR").one_or_none() or Country(code="MMR")
121 | bgd = self.session.query(Country).filter_by(code="BGD").one_or_none() or Country(code="BGD")
122 | try:
123 | article = Article(url='http://example.com',
124 | domain='example.com',
125 | status=Status.NEW)
126 | report = Report(article=article,
127 | accuracy=0.55,
128 | event_term='evacuation',
129 | subject_term='family',
130 | quantity='72')
131 | self.session.add(report)
132 |
133 | naypyidaw = Location(description="Nay Pyi Taw", country=mmr, latlong='19°45′N 96°6′E')
134 | report.locations.append(naypyidaw)
135 | dhaka = Location(description="Dhaka", country=bgd)
136 | report.locations.append(dhaka)
137 | now = datetime.now()
138 | when = ReportDateSpan(report=report, start=datetime.today(), finish=now)
139 |
140 | self.session.commit()
141 | report_id = report.id
142 | self.session.query(Report).filter_by(article=article).delete()
143 | report = None
144 | self.session.commit()
145 | self.assertEqual(self.session.query(ReportDateSpan).filter_by(report_id=report_id).all(), [])
146 | finally:
147 | self.session.rollback()
148 | if report:
149 | self.session.delete(report)
150 | if article:
151 | self.session.delete(article)
152 | self.session.commit()
153 |
154 | def test_status_update(self):
155 | article = Article(url='http://example.com',
156 | domain='example.com',
157 | status=Status.NEW)
158 | self.session.add(article)
159 | self.session.commit()
160 |
161 | article.update_status(Status.FETCHING)
162 | self.session.commit()
163 | self.assertEqual(article.status, Status.FETCHING)
164 |
165 | # meanwhile, some other process changed the status of this...
166 | self.session.execute("UPDATE article SET status = :status WHERE id = :id",
167 | { 'status': Status.FETCHING_FAILED, 'id': article.id})
168 |
169 | with self.assertRaises(UnexpectedArticleStatusException):
170 | article.update_status(Status.FETCHED)
--------------------------------------------------------------------------------
/internal_displacement/tests/test_report.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from internal_displacement.report import Report
3 |
4 | class TestReport(TestCase):
5 |
6 |
7 | def setUp(self):
8 | pass
9 | def tearDown(self):
10 | pass
11 |
12 |
13 | def test_equality(self):
14 | test_report_1 = Report(["Some Place"],["Yesterday"],"destroyed","house",12,"Yesterday 12 houses were destroyed.")
15 | test_report_2 = Report(["Some Place"],["Yesterday"],"destroyed","house",12,"Yesterday 12 houses were destroyed.")
16 | test_report_3 = Report(["Some Place"],["Yesterday"],"destroyed","house",13,"Yesterday 13 houses were destroyed.")
17 | self.assertEqual(test_report_1,test_report_2)
18 | self.assertNotEqual(test_report_1,test_report_3)
--------------------------------------------------------------------------------
/internal_displacement/textract_requirements.txt:
--------------------------------------------------------------------------------
1 | This file contains a list of packages required to get textract to run.
2 | Documentation:
3 | https://textract.readthedocs.io/en/stable/
4 | https://github.com/deanmalmgren/textract
5 | Requirements (from github page):
6 | # This file contains all python dependencies that are required by the
7 | # textract package in order for it to properly work
8 | argcomplete
9 | chardet
10 | python-pptx>=0.5.1
11 | #pdfminer.six <-- go back to this after the shebang fix is released (see https://github.com/goulu/pdfminer/issues/27)
12 | https://github.com/goulu/pdfminer/zipball/e6ad15af79a26c31f4e384d8427b375c93b03533#egg=pdfminer.six
13 | docx2txt
14 | beautifulsoup4
15 | xlrd
16 | EbookLib
17 | SpeechRecognition>=3.1.0
18 | https://github.com/mattgwwalker/msg-extractor/zipball/master
19 | six
20 |
21 | For what it's worth, following the recommended installation procedure should be
22 | all you need to do. I found that I had to install pdfminer.six separately, and
23 | that did the trick.
24 |
--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/newspaper-scrape-tests-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import newspaper\n",
12 | "import csv\n",
13 | "import pandas as pd"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 3,
19 | "metadata": {
20 | "collapsed": false
21 | },
22 | "outputs": [],
23 | "source": [
24 | "file = 'data/training_dataset.csv'"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 4,
30 | "metadata": {
31 | "collapsed": false
32 | },
33 | "outputs": [],
34 | "source": [
35 | "def urls_from_csv(csv_file, column=None):\n",
36 | " '''\n",
37 | " Takes csv directory and returns list of URLs\n",
38 | " '''\n",
39 | " with open(csv_file, 'r') as f:\n",
40 | " reader = csv.reader(f)\n",
41 | " contents = list(reader)\n",
42 | " \n",
43 | " urls = [line[1] for line in contents[1:]]\n",
44 | " return urls, contents"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 5,
50 | "metadata": {
51 | "collapsed": true
52 | },
53 | "outputs": [],
54 | "source": [
55 | "def urls_to_df(csv_file, column=None):\n",
56 | " '''\n",
57 | " Takes csv directory and returns list of URLs\n",
58 | " '''\n",
59 | " df = pd.read_csv(csv_file)\n",
60 | " df.columns = [x.lower() for x in df.columns]\n",
61 | " urls = list(df['url'])\n",
62 | " return urls, df"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 6,
68 | "metadata": {
69 | "collapsed": false
70 | },
71 | "outputs": [],
72 | "source": [
73 | "urls, contents = urls_from_csv(file)"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 7,
79 | "metadata": {
80 | "collapsed": true
81 | },
82 | "outputs": [],
83 | "source": [
84 | "def remove_newline(text):\n",
85 | " ''' Removes new line and   characters.\n",
86 | " '''\n",
87 | " text = text.replace('\\n', ' ')\n",
88 | " text = text.replace('\\xa0', ' ')\n",
89 | " return text"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 8,
95 | "metadata": {
96 | "collapsed": true
97 | },
98 | "outputs": [],
99 | "source": [
100 | "def html_report(link, nlp=False):\n",
101 | " report = {}\n",
102 | " a = newspaper.Article(link)\n",
103 | " a.download()\n",
104 | " a.parse()\n",
105 | " report['domain'] = a.source_url\n",
106 | " report['title'] = a.title\n",
107 | " report['authors'] = a.authors\n",
108 | " report['date_pub'] = a.publish_date\n",
109 | " report['text'] = remove_newline(a.text)\n",
110 | " # tag the type of article\n",
111 | " ## currently default to text but should be able to determine img/video etc\n",
112 | " report['type'] = 'text'\n",
113 | " return report"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 9,
119 | "metadata": {
120 | "collapsed": false
121 | },
122 | "outputs": [],
123 | "source": [
124 | "urls, df = urls_to_df(file)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 10,
130 | "metadata": {
131 | "collapsed": true
132 | },
133 | "outputs": [],
134 | "source": [
135 | "def scrape_from_urls(urls):\n",
136 | " reports = []\n",
137 | " for url in urls:\n",
138 | " if url[-3:] == 'pdf':\n",
139 | " continue\n",
140 | " else:\n",
141 | " report = html_report(url)\n",
142 | " reports.append(report)\n",
143 | " \n",
144 | " return reports"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 11,
150 | "metadata": {
151 | "collapsed": false
152 | },
153 | "outputs": [],
154 | "source": [
155 | "url = urls[1]"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 12,
161 | "metadata": {
162 | "collapsed": false
163 | },
164 | "outputs": [],
165 | "source": [
166 | "a = newspaper.Article(url)"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 13,
172 | "metadata": {
173 | "collapsed": false
174 | },
175 | "outputs": [],
176 | "source": [
177 | "a.download()"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 14,
183 | "metadata": {
184 | "collapsed": false
185 | },
186 | "outputs": [],
187 | "source": [
188 | "a.parse()"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {
195 | "collapsed": false
196 | },
197 | "outputs": [],
198 | "source": []
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": 15,
203 | "metadata": {
204 | "collapsed": false
205 | },
206 | "outputs": [
207 | {
208 | "ename": "NameError",
209 | "evalue": "name 'report' is not defined",
210 | "output_type": "error",
211 | "traceback": [
212 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
213 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
214 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mkeys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreport\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdict_writer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDictWriter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfieldnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mdict_writer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwriteheader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdict_writer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwriterows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreport\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
215 | "\u001b[0;31mNameError\u001b[0m: name 'report' is not defined"
216 | ]
217 | }
218 | ],
219 | "source": [
220 | "keys = report[0].keys()\n",
221 | "with open('data.csv', 'w') as f:\n",
222 | " dict_writer = csv.DictWriter(f, fieldnames=keys)\n",
223 | " dict_writer.writeheader()\n",
224 | " dict_writer.writerows(report)"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": null,
230 | "metadata": {
231 | "collapsed": true
232 | },
233 | "outputs": [],
234 | "source": []
235 | }
236 | ],
237 | "metadata": {
238 | "anaconda-cloud": {},
239 | "kernelspec": {
240 | "display_name": "Python [d4d-internal-displacement]",
241 | "language": "python",
242 | "name": "Python [d4d-internal-displacement]"
243 | },
244 | "language_info": {
245 | "codemirror_mode": {
246 | "name": "ipython",
247 | "version": 3
248 | },
249 | "file_extension": ".py",
250 | "mimetype": "text/x-python",
251 | "name": "python",
252 | "nbconvert_exporter": "python",
253 | "pygments_lexer": "ipython3",
254 | "version": "3.6.0"
255 | }
256 | },
257 | "nbformat": 4,
258 | "nbformat_minor": 0
259 | }
260 |
--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/sql-concurrent-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 0
6 | }
7 |
--------------------------------------------------------------------------------
/notebooks/information_extraction/get_abs_date_test.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": false,
8 | "deletable": true,
9 | "editable": true
10 | },
11 | "outputs": [],
12 | "source": [
13 | "import datetime\n",
14 | "import parsedatetime\n",
15 | "from functools import reduce"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {
22 | "collapsed": true,
23 | "deletable": true,
24 | "editable": true
25 | },
26 | "outputs": [],
27 | "source": [
28 | "def get_absolute_date(relative_date_string, publication_date=None):\n",
29 | " \"\"\"\n",
30 | " Turn relative dates into absolute datetimes.\n",
31 | " Currently uses API of parsedatetime\n",
32 | " https://bear.im/code/parsedatetime/docs/index.html\n",
33 | "\n",
34 | " Parameters:\n",
35 | " -----------\n",
36 | " relative_date_string the relative date in an article (e.g. 'Last week'): String\n",
37 | " publication_date the publication_date of the article: datetime\n",
38 | " \n",
39 | " Returns:\n",
40 | " --------\n",
41 | " One of: \n",
42 | " - a datetime that represents the absolute date of the relative date based on \n",
43 | " the publication_date\n",
44 | " - None, if parse is not successful\n",
45 | " \"\"\"\n",
46 | "\n",
47 | " cal = parsedatetime.Calendar()\n",
48 | " parsed_result = cal.nlp(relative_date_string, publication_date)\n",
49 | " if parsed_result is not None:\n",
50 | " # Parse is successful\n",
51 | " parsed_absolute_date = parsed_result[0][0]\n",
52 | "\n",
53 | " # Assumption: input date string is in the past\n",
54 | " # If parsed date is in the future (relative to publication_date), \n",
55 | " # we roll it back to the past\n",
56 | " \n",
57 | " if publication_date and parsed_absolute_date > publication_date:\n",
58 | " # parsedatetime returns a date in the future\n",
59 | " # likely because year isn't specified or date_string is relative\n",
60 | " \n",
61 | " # Check a specific date is included\n",
62 | " # TODO: Smarter way or regex to check if relative_date_string \n",
63 | " # contains a month name?\n",
64 | " months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', \n",
65 | " 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']\n",
66 | " contains_month = reduce( \n",
67 | " lambda result, month: result or relative_date_string.lower().find(month) != -1, \n",
68 | " months, False)\n",
69 | " \n",
70 | " if contains_month:\n",
71 | " # TODO: Is it enough to just check for month names to determine if a \n",
72 | " # date_string specifies a particular date?\n",
73 | "\n",
74 | " # If date is specified explicity, and year is not\n",
75 | " # roll back 1 year\n",
76 | " return datetime.datetime(parsed_absolute_date.year-1, \n",
77 | " parsed_absolute_date.month, parsed_absolute_date.day)\n",
78 | " else:\n",
79 | " # Use the relative datetime delta and roll back\n",
80 | " delta = parsed_absolute_date - publication_date\n",
81 | " num_weeks = int(delta.days/7)\n",
82 | " and_num_days_after = 7 if delta.days%7 == 0 else delta.days%7\n",
83 | " return publication_date - datetime.timedelta(weeks=num_weeks) - \\\n",
84 | " datetime.timedelta(7-and_num_days_after)\n",
85 | " else:\n",
86 | " # Return if date is in the past already or no publication_date is provided\n",
87 | " return parsed_absolute_date\n",
88 | " else:\n",
89 | " # Parse unsucessful\n",
90 | " return None"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {
96 | "deletable": true,
97 | "editable": true
98 | },
99 | "source": [
100 | "## Year is not specified"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "metadata": {
107 | "collapsed": false,
108 | "deletable": true,
109 | "editable": true
110 | },
111 | "outputs": [],
112 | "source": [
113 | " publication_date = datetime.datetime(2016, 10, 30, 18, 0)"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {
120 | "collapsed": false,
121 | "deletable": true,
122 | "editable": true
123 | },
124 | "outputs": [],
125 | "source": [
126 | "# Before publication_date\n",
127 | "get_absolute_date('28th December', publication_date)"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {
134 | "collapsed": false,
135 | "deletable": true,
136 | "editable": true
137 | },
138 | "outputs": [],
139 | "source": [
140 | "# After publication date\n",
141 | "get_absolute_date('26th October', publication_date)"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {
148 | "collapsed": false,
149 | "deletable": true,
150 | "editable": true
151 | },
152 | "outputs": [],
153 | "source": [
154 | "get_absolute_date('1 January', publication_date)"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {
160 | "deletable": true,
161 | "editable": true
162 | },
163 | "source": [
164 | "## Relative date string"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": null,
170 | "metadata": {
171 | "collapsed": false,
172 | "deletable": true,
173 | "editable": true
174 | },
175 | "outputs": [],
176 | "source": [
177 | "get_absolute_date('2 weeks ago', publication_date)"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": null,
183 | "metadata": {
184 | "collapsed": false,
185 | "deletable": true,
186 | "editable": true
187 | },
188 | "outputs": [],
189 | "source": [
190 | "get_absolute_date('3:30pm', publication_date)"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {
196 | "deletable": true,
197 | "editable": true
198 | },
199 | "source": [
200 | "## Year is specified"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {
207 | "collapsed": false,
208 | "deletable": true,
209 | "editable": true
210 | },
211 | "outputs": [],
212 | "source": [
213 | "get_absolute_date('March 3 2014', publication_date)"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {
219 | "deletable": true,
220 | "editable": true
221 | },
222 | "source": [
223 | "This is considered **invalid** for now. Since we are assuming articles only contain dates in the past. (for future enhancement)"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {
230 | "collapsed": false,
231 | "deletable": true,
232 | "editable": true
233 | },
234 | "outputs": [],
235 | "source": [
236 | "get_absolute_date('March 3 2018', publication_date)"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": null,
242 | "metadata": {
243 | "collapsed": true,
244 | "deletable": true,
245 | "editable": true
246 | },
247 | "outputs": [],
248 | "source": []
249 | }
250 | ],
251 | "metadata": {
252 | "kernelspec": {
253 | "display_name": "Python 3",
254 | "language": "python",
255 | "name": "python3"
256 | },
257 | "language_info": {
258 | "codemirror_mode": {
259 | "name": "ipython",
260 | "version": 3
261 | },
262 | "file_extension": ".py",
263 | "mimetype": "text/x-python",
264 | "name": "python",
265 | "nbconvert_exporter": "python",
266 | "pygments_lexer": "ipython3",
267 | "version": "3.4.4"
268 | }
269 | },
270 | "nbformat": 4,
271 | "nbformat_minor": 2
272 | }
273 |
--------------------------------------------------------------------------------
/notebooks/scraping_and_db/DatabaseExample.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Example of database usage\n",
8 | "## Create a session using the values in `docker.env`"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {
15 | "collapsed": true,
16 | "deletable": true,
17 | "editable": true
18 | },
19 | "outputs": [],
20 | "source": [
21 | "import os\n",
22 | "from sqlalchemy import create_engine\n",
23 | "from internal_displacement.model.model import Session\n",
24 | "\n",
25 | "db_url = 'postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}/{DB_NAME}'.format(**os.environ)\n",
26 | "engine = create_engine(db_url)\n",
27 | "Session.configure(bind=engine)\n",
28 | "session = Session()"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "## Query for all Articles in the DB"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {
42 | "collapsed": false,
43 | "deletable": true,
44 | "editable": true
45 | },
46 | "outputs": [],
47 | "source": [
48 | "from internal_displacement.model.model import Article, Status\n",
49 | "\n",
50 | "session.query(Article).all()"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "## Add an Article"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "collapsed": true,
65 | "deletable": true,
66 | "editable": true
67 | },
68 | "outputs": [],
69 | "source": [
70 | "article = Article(url='http://example.com',\n",
71 | " domain='example.com',\n",
72 | " status=Status.NEW)\n",
73 | "session.add(article)\n",
74 | "session.commit()"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "## See that the article was persisted"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {
88 | "collapsed": false,
89 | "deletable": true,
90 | "editable": true
91 | },
92 | "outputs": [],
93 | "source": [
94 | "session.query(Article).all()"
95 | ]
96 | }
97 | ],
98 | "metadata": {
99 | "kernelspec": {
100 | "display_name": "Python 3",
101 | "language": "python",
102 | "name": "python3"
103 | },
104 | "language_info": {
105 | "codemirror_mode": {
106 | "name": "ipython",
107 | "version": 3
108 | },
109 | "file_extension": ".py",
110 | "mimetype": "text/x-python",
111 | "name": "python",
112 | "nbconvert_exporter": "python",
113 | "pygments_lexer": "ipython3",
114 | "version": "3.5.2"
115 | }
116 | },
117 | "nbformat": 4,
118 | "nbformat_minor": 2
119 | }
120 |
--------------------------------------------------------------------------------
/notebooks/scraping_and_db/TestDatabase.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": true,
7 | "editable": true
8 | },
9 | "source": [
10 | "# Run DB Unit tests"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "metadata": {
17 | "collapsed": false,
18 | "deletable": true,
19 | "editable": true
20 | },
21 | "outputs": [],
22 | "source": [
23 | "import unittest\n",
24 | "from internal_displacement.tests.test_model import TestModel\n",
25 | "suite = unittest.TestLoader().loadTestsFromTestCase(TestModel)\n",
26 | "unittest.TextTestRunner(verbosity=3).run(suite)"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {
33 | "collapsed": true,
34 | "deletable": true,
35 | "editable": true
36 | },
37 | "outputs": [],
38 | "source": []
39 | }
40 | ],
41 | "metadata": {
42 | "kernelspec": {
43 | "display_name": "Python 3",
44 | "language": "python",
45 | "name": "python3"
46 | },
47 | "language_info": {
48 | "codemirror_mode": {
49 | "name": "ipython",
50 | "version": 3
51 | },
52 | "file_extension": ".py",
53 | "mimetype": "text/x-python",
54 | "name": "python",
55 | "nbconvert_exporter": "python",
56 | "pygments_lexer": "ipython3",
57 | "version": "3.5.2"
58 | }
59 | },
60 | "nbformat": 4,
61 | "nbformat_minor": 0
62 | }
63 |
--------------------------------------------------------------------------------
/notebooks/scraping_and_db/sql-concurrent.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 27,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import csv\n",
12 | "import concurrent\n",
13 | "import sqlite3\n",
14 | "import pandas as pd"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 29,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "sql_connection = sqlite3.connect('test.sqlite', isolation_level=None)"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 30,
31 | "metadata": {
32 | "collapsed": true
33 | },
34 | "outputs": [],
35 | "source": [
36 | "sql_cursor = sql_connection.cursor()"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 31,
42 | "metadata": {
43 | "collapsed": false
44 | },
45 | "outputs": [
46 | {
47 | "data": {
48 | "text/plain": [
49 | ""
50 | ]
51 | },
52 | "execution_count": 31,
53 | "metadata": {},
54 | "output_type": "execute_result"
55 | }
56 | ],
57 | "source": [
58 | "sql_cursor.execute(\"CREATE TABLE IF NOT EXISTS Articles (title TEXT, url TEXT,author TEXT,datetime TEXT,domain TEXT, content TEXT, content_type TEXT)\")"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 32,
64 | "metadata": {
65 | "collapsed": false
66 | },
67 | "outputs": [
68 | {
69 | "data": {
70 | "text/plain": [
71 | ""
72 | ]
73 | },
74 | "execution_count": 32,
75 | "metadata": {},
76 | "output_type": "execute_result"
77 | }
78 | ],
79 | "source": [
80 | "sql_cursor.execute(\"CREATE TABLE IF NOT EXISTS Labels (url TEXT,category TEXT)\")"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 33,
86 | "metadata": {
87 | "collapsed": true
88 | },
89 | "outputs": [],
90 | "source": [
91 | "url = 'www.theguardian.co.uk'\n",
92 | "authors = 'GR'\n",
93 | "pub_date = 'Jan'\n",
94 | "domain = 'guardian'\n",
95 | "content = 'Some stuff happened'\n",
96 | "content_type = 'article'\n",
97 | "title = 'The stuff'"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 34,
103 | "metadata": {
104 | "collapsed": false
105 | },
106 | "outputs": [
107 | {
108 | "data": {
109 | "text/plain": [
110 | ""
111 | ]
112 | },
113 | "execution_count": 34,
114 | "metadata": {},
115 | "output_type": "execute_result"
116 | }
117 | ],
118 | "source": [
119 | "sql_cursor.execute(\"INSERT INTO Articles VALUES (?,?,?,?,?,?,?)\", (title, url, authors, pub_date, domain, content, content_type))"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 35,
125 | "metadata": {
126 | "collapsed": true
127 | },
128 | "outputs": [],
129 | "source": [
130 | "sql_connection.commit()"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 40,
136 | "metadata": {
137 | "collapsed": false
138 | },
139 | "outputs": [
140 | {
141 | "data": {
142 | "text/plain": [
143 | "['www.theguardian.co.uk']"
144 | ]
145 | },
146 | "execution_count": 40,
147 | "metadata": {},
148 | "output_type": "execute_result"
149 | }
150 | ],
151 | "source": [
152 | "[r[0] for r in sql_cursor.execute(\"SELECT url FROM Articles\")]"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 42,
158 | "metadata": {
159 | "collapsed": true
160 | },
161 | "outputs": [],
162 | "source": [
163 | "df = pd.read_sql_query(\"SELECT * from Articles\", sql_connection)"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 50,
169 | "metadata": {
170 | "collapsed": false
171 | },
172 | "outputs": [],
173 | "source": [
174 | "from urllib.parse import urlparse"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 57,
180 | "metadata": {
181 | "collapsed": false
182 | },
183 | "outputs": [],
184 | "source": [
185 | "dave = urlparse('http://www.google.com/some-stuff-about-things').hostname"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 58,
191 | "metadata": {
192 | "collapsed": false
193 | },
194 | "outputs": [
195 | {
196 | "data": {
197 | "text/plain": [
198 | "'www.google.com'"
199 | ]
200 | },
201 | "execution_count": 58,
202 | "metadata": {},
203 | "output_type": "execute_result"
204 | }
205 | ],
206 | "source": [
207 | "dave"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": null,
213 | "metadata": {
214 | "collapsed": true
215 | },
216 | "outputs": [],
217 | "source": []
218 | }
219 | ],
220 | "metadata": {
221 | "kernelspec": {
222 | "display_name": "Python [d4d-internal-displacement]",
223 | "language": "python",
224 | "name": "Python [d4d-internal-displacement]"
225 | },
226 | "language_info": {
227 | "codemirror_mode": {
228 | "name": "ipython",
229 | "version": 3
230 | },
231 | "file_extension": ".py",
232 | "mimetype": "text/x-python",
233 | "name": "python",
234 | "nbconvert_exporter": "python",
235 | "pygments_lexer": "ipython3",
236 | "version": "3.6.0"
237 | }
238 | },
239 | "nbformat": 4,
240 | "nbformat_minor": 0
241 | }
242 |
--------------------------------------------------------------------------------
/notebooks/test.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/notebooks/test.sqlite
--------------------------------------------------------------------------------
/notebooks/visualize_tagged_articles.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
81 |
82 |
83 |
Article 1
84 |
85 | Flash flooding across Afghanistan and Pakistan has left more than 160 dead and dozens stranded in one of South Asia\'s worst natural disasters this year, say officials. The flooding, caused by unusually heavy rain, has left villagers stuck in remote areas without shelter, food or power. Mountainous Afghanistan was the worst hit, with 61 people killed and approximately 500 traditional mud-brick homes washed away in more than a dozen villages in Sarobi homes 500 traditional mud-brick homes washed away in more than a dozen villages in Sarobi, a rural district less than an hour from Kabul, officials said. Floods left a village devastated in the remote eastern Afghan province of Nuristan. At least 60 homes were destroyed homes 60 homes were destroyed across three districts, said provincial spokesman Mohammad Yusufi. No one was killed. Authorities have been unable to deliver aid to some badly affected villages by land as roads in the area are controlled by the Taliban, Yusufi added. “We have asked the national government for help as have an overwhelming number of locals asking for assistance, but this is a Taliban-ridden area,” Yusufi said. At least 24 people were also died in two other eastern border provinces, Khost and Nangarhar people 24 people were also died in two other eastern border provinces, Khost and Nangarhar, according to local officials. More than fifty homes and shops were destroyed homes fifty homes and shops were destroyed and thousands of acres of farmland flooded. In Pakistan monsoon rains claimed more than 80 lives, local media reported. Houses collapsing Houses collapsing, drowning and electrocution all pushed up the death toll, said Sindh Information Minister Sharjeel Inam Memon. In Karachi, the commercial capital and a southern port city that is home to 18 million people, poor neighborhoods were submerged waist-deep in water and many precincts suffered long power outages. Deaths were also reported in the north and west of the country.
86 |
87 |
88 |
89 |
90 |
Article 2
91 |
92 | Afghanistan state news agency, Bakhtar News Agency (BNA) report that at least 7 people have been killed people 7 people have been killed in flash floods in Faryab Province in the north of the country. Flash floods in Baghlan Province have killed Baghlan Province have killed 1 person 1 person and injured around 10 others. Flash floods struck on 08 May 2015 in Faryab Province after a period of heavy rainfall. The districts of Garyzan, Pashtunkot and Belcheragh were worst affected. BNA report that at least 7 people were killed people 7 people were killed and over 1,500 homes damaged. The Faizabada-Takhar highway have been closed to traffic and wide areas of crops and orchards have suffered damaged. Kuwaiti News Agency (KUNA) also report that flooding struck in the Baghlan-i-Markazi district of Baghlan province, where 1 person was killed and several injured early on Saturday 09 May 2015 killed Baghlan province, where 1 person was killed and several injured early on Saturday 09 May 2015 person 1 person was killed and several injured early on Saturday 09 May 2015. “There was heavy rain in Baghlan-e-Markazi district Friday evening and the people left their houses to safer areas. It was early Saturday when a flash flood hit the area and washed away more than 500 houses 500 houses,” district Governor Gohar Khan Babri told reporters in provincial capital Pul-e-Khumri, 160 km north of Kabul.
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/production-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | jupyter:
4 | image: aneel/internal-displacement-jupyter:spacy
5 | command: sh -c "jupyter notebook --no-browser --ip=0.0.0.0 --port 3323 /internal-displacement/notebooks"
6 | stdin_open: true
7 | tty: true
8 | ports:
9 | - "3323:3323"
10 | env_file: production.env
11 | nodejs:
12 | build: internal-displacement-web
13 | image: internal-displacement-web
14 | volumes:
15 | - ./internal-displacement-web/src:/internal-displacement-web/src
16 | - ./internal-displacement-web/package.json:/internal-displacement-web/package.json
17 | - ./internal-displacement-web/index.html:/internal-displacement-web/index.html
18 | ports:
19 | - "3322:3322"
20 | env_file: production.env
21 | nginx:
22 | build: docker/nginx
23 | image: nginx
24 | ports:
25 | - "80:80"
26 | depends_on:
27 | - nodejs
28 |
--------------------------------------------------------------------------------
/production.env:
--------------------------------------------------------------------------------
1 | DB_HOST=internal-displacement.cf1y5y4ffeey.us-west-2.rds.amazonaws.com
2 | DB_USER=d4d
3 | DB_PASS=
4 | DB_NAME=id
5 | PYTHONPATH=/internal-displacement
6 | NODE_ENV=production
7 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | appnope==0.1.0
2 | beautifulsoup4==4.5.3
3 | bleach==1.5.0
4 | cssselect==1.0.1
5 | decorator==4.0.11
6 | entrypoints==0.2.2
7 | feedfinder2==0.0.4
8 | feedparser==5.2.1
9 | html5lib==0.9999999
10 | idna==2.2
11 | ipykernel==4.5.2
12 | ipython==5.2.2
13 | ipython-genutils==0.1.0
14 | ipywidgets==5.2.2
15 | jieba3k==0.35.1
16 | Jinja2==2.9.5
17 | jsonschema==2.5.1
18 | jupyter==1.0.0
19 | jupyter-client==4.4.0
20 | jupyter-console==5.1.0
21 | jupyter-core==4.2.1
22 | langdetect==1.0.7
23 | lxml==3.7.2
24 | MarkupSafe==0.23
25 | mistune==0.7.3
26 | nbconvert==5.1.1
27 | nbformat==4.2.0
28 | newspaper3k==0.1.9
29 | nltk==3.2.2
30 | nose==1.3.7
31 | notebook==4.3.2
32 | numpy==1.12.1
33 | olefile==0.44
34 | pandas==0.19.2
35 | pandocfilters==1.4.1
36 | parsedatetime==2.3
37 | pexpect==4.2.1
38 | pickleshare==0.7.4
39 | Pillow==4.0.0
40 | prompt-toolkit==1.0.13
41 | psycopg2==2.6.2
42 | ptyprocess==0.5.1
43 | pycountry==17.1.8
44 | Pygments==2.2.0
45 | python-dateutil==2.6.0
46 | pytz==2016.10
47 | PyYAML==3.12
48 | pyzmq==16.0.2
49 | qtconsole==4.2.1
50 | records==0.5.0
51 | requests==2.13.0
52 | requests-file==1.4.1
53 | simplegeneric==0.8.1
54 | six==1.10.0
55 | spacy>=1.7.5
56 | SQLAlchemy==1.1.5
57 | terminado==0.6
58 | testpath==0.3
59 | textacy==0.3.3
60 | tldextract==2.0.2
61 | tornado==4.4.2
62 | traitlets==4.3.1
63 | wcwidth==0.1.7
64 | widgetsnbextension==1.2.6
65 |
--------------------------------------------------------------------------------
/sql_db.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Data4Democracy/internal-displacement/fb8fb880c6de4034a4ba1ef6e2fcaee921777c3c/sql_db.sqlite
--------------------------------------------------------------------------------
/workplan.md:
--------------------------------------------------------------------------------
1 |
2 | ### Raw Data
3 | We already have certain datasets containing URLs.
4 |
5 | __Activities:__
6 | - *Do we want to obtain more urls for our own dev / testing?*
7 |
8 | ### Scraping
9 | To be moved to `info-sources` repo. Interested parties can contribute there.
10 |
11 | Also covers the following two points from Filtering:
12 | - Identify language of a document (English vs. not English)
13 | - Broken URLs
14 |
15 | __Activities:__
16 | - Maintain some sort of link to `info-sources` to understand how these tools can be used / fit into the `internal-displacement` pipeline,
17 | i.e., function arguments, what they return, API type stuff.
18 |
19 | ### Pipeline
20 | Controls the input and output of data with respect to scraping and interpreting articles.
21 |
22 | __Activities__
23 | - Define how to export / store articles (discussion):
24 | + CSVs for now, but likely some sort of database will be necessary to facilitate the online / interactive tool for modeling and analysis
25 | - Take new data and extract only URLs, converting them into list
26 | - Checks and keeps a log of all previously scraped URLs
27 | - Checks whether a URL is from a domain on an RSS whitelist (future)
28 | - Manages output from scraper and either appends to original database or creates new database
29 |
30 | (please see [#86](https://github.com/Data4Democracy/internal-displacement/issues/86))
31 |
32 | ### Filtering (Interpreter)
33 | The third filtering requirement is more specific to the `internal-displacement` domain:
34 |
35 | - Filtering out documents not reporting on human mobility (binary classification)
36 |
37 | __Activities:__
38 | - Implement filtering out of documents not reporting on human mobility (modeling)
39 |
40 | > @milanoleonardo: *'this can be done by looking at the dependency trees of the sentences in the text to make sure there is a link between a “reporting term” and a “reporting unit” (see challenge for details). This would definitely remove all documents reporting on “hip displacement” or sentences like “displaced the body of people” etc.'*
41 |
42 | *How to test this functionality? Build some hand-crafted examples of things that shouldn't be included?*
43 |
44 | ### Tagging (Interpreter)
45 |
46 | - Classification of documents into 3 classes (Disasters, Conflict & Violence, Other)
47 |
48 | __Activities:__
49 | - Select approach / model that will allow for online learning or re-training in the future with new datasets. (discussion)
50 | - Build and train classifier for classifiying into the 3 required classes. (modeling)
51 |
52 | > @milanoleonardo: *'the best would be to set a fixed threshold on the probability distribution and assign a tag based on the content of the document.'*
53 |
54 | ### NLP
55 |
56 | "Fact extraction" from documents:
57 | - Publication date, locaction (ISO 3166 country codes) , reporting term, reporting units etc.
58 |
59 | __Activities:__
60 | - Select NLP tool or framework (discussion)
61 | - Build and test working tool for extracting facts (modeling)
62 |
63 | ### Article Class
64 |
65 | __Activities:__
66 | - Define the properties each Article needs to have and fill out code for instantiating new Articles (beginner friendly)
67 | - Create / fill-out functions for update articles properties by calling and using return values from Scraper and Interpreter functions (beginner friendly)
68 | - Fill out function for saving articles along with relevant properties (beginner friendly)
69 |
70 | ### Visualization
71 |
72 | Including but not limited to Interactive Map, Histograms
73 |
74 | __Activities:__
75 | - Design of visualizations (data-viz)
76 | - Selection of tool for online visualizations (i.e. D3) (discussion)
77 | - Create visualization functions that take in data in standard format and produce desired and interactive visualizations (data-viz)
78 |
79 | ### Quantitative Analysis
80 |
81 | Online tool that allows analysts to interact directly with data, choose what they visualize and how etc.
82 |
83 | __Activities:__
84 | - Design / build front-end page(s) for analysts
85 | - Create back-end functionality for connecting to database and returning necessary data, facts etc.
86 |
87 |
88 | ### Data Engineering
89 |
90 | We will need to construct a data pipeline / workflow to manage the end-to-end process, both for batch processing of files as well as (potentially) real-time processing of individual urls:
91 |
92 | + Data collection from various sources, i.e. existing csv files, new file provided by analysts etc.
93 | + Data pre-processing - applying the filtering tools created to exclude broken, irrelevant and non-English articles
94 | + Article classification - applying the pre-trained classifier, or training a new classifier
95 | + Fact Extraction - using NLP tools for extracting the key facts from the articles
96 | + Data storage - saving the article along with relevenat tags and extracted facts
97 | + API for enabling data analysts to interact with the data
98 |
99 |
100 | ### All Deliverables:
101 |
102 | - URL to working version of the tool
103 | - Source code repo
104 | - Analysis of the test dataset
105 | - User guide
106 | - Admin guide
107 |
108 | __Activities:__
109 | - Create, maintain and update user guide (documentation)
110 | - Create, maintain and update admin guide (documentation)
111 |
112 |
113 | ### Possible Libraries
114 |
115 | ___NLP:___
116 | - nltk
117 | - Tensor Flow
118 | - Spacy
119 |
120 | ___Text parsing and fact extraction:___
121 | - mordecai - Geoparsing (extracting relevant country)
122 | - Newspaper module (python 3)
123 | - goose-extractor - Text + meta-data extraction (only python 2)
124 |
125 |
126 |
--------------------------------------------------------------------------------