├── .env.example
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── agent
├── Dockerfile
├── config.toml
├── docker-compose.yml
└── requirements.txt
├── crawl.docker-compose.yml
├── create-certs.yml
├── docker-compose.yml
├── flow
├── Dockerfile
├── config.toml
├── requirements.txt
└── scripts
│ ├── UID_ISO_FIPS_LookUp_Table.csv
│ ├── crawl_google_news.py
│ ├── crawl_mapping.py
│ ├── crawl_tweets.py
│ ├── insert_france.py
│ ├── insert_france_virtests.py
│ ├── insert_owid.py
│ ├── mapping.py
│ └── parse_insert.py
├── illustrations
├── france_live_status.png
├── latest_news.png
├── live_dashboard.png
├── news_web_app.png
└── vaccination_map.png
├── insert.docker-compose.yml
├── instances
├── pem.yml
└── pkcs_12.yml
├── kibana.yml
├── news_app
├── app
│ ├── .gitignore
│ ├── Dockerfile
│ ├── app
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ ├── public
│ │ │ ├── css
│ │ │ │ └── bootstrap.css
│ │ │ ├── index.html
│ │ │ ├── logo.png
│ │ │ ├── manifest.json
│ │ │ └── robots.txt
│ │ └── src
│ │ │ ├── About.js
│ │ │ ├── App.js
│ │ │ ├── App.test.js
│ │ │ ├── Home.js
│ │ │ ├── Layout.js
│ │ │ ├── NavigationBar.js
│ │ │ ├── SearchUI.js
│ │ │ ├── index.css
│ │ │ ├── index.js
│ │ │ └── serviceWorker.js
│ ├── entrypoint.sh
│ └── package-lock.json
└── docker-compose.yml
├── pandemic_knowledge.png
└── prefect
├── Dockerfile
└── prefect.config
/.env.example:
--------------------------------------------------------------------------------
1 | PREFECT_UI_TAG="latest"
2 | PREFECT_SERVER_TAG="latest"
3 |
4 | POSTGRES_USER="prefect_user"
5 | # PLEASE CHANGE !
6 | POSTGRES_PASSWORD="prefect_password"
7 | POSTGRES_DB="prefect_db"
8 | # PLEASE CHANGE according to POSTGRES_PASSWORD !
9 | DB_CONNECTION_URL="postgresql://prefect_user:prefect_password@prefect_postgres:5432/prefect_db"
10 |
11 | PREFECT_SERVER_DB_CMD="prefect-server database upgrade -y"
12 | # PLEASE CHANGE !
13 | PREFECT_SERVER__HASURA__ADMIN_SECRET="hasura-secret-admin-secret"
14 | PREFECT_SERVER__TELEMETRY__ENABLED="false"
15 | PREFECT_SERVER__APOLLO_URL="http://localhost:4200/graphql"
16 |
17 | MINIO_SCHEME=http
18 | MINIO_ENDPOINT=172.17.0.1:9000
19 | MINIO_ACCESS_KEY=minio
20 | MINIO_SECRET_KEY=minio123
21 |
22 | ELASTIC_SCHEME=https
23 | ELASTIC_PORT=9200
24 | ELASTIC_ENDPOINT=172.17.0.1
25 | ELASTIC_USER=elastic
26 | ELASTICSEARCH_PWD=elastic
27 | MAX_ES_ROW_INJECT=1000
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | .env
3 | certs/
4 | node_modules/
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | services:
2 | - docker
3 | env:
4 | - DOCKER_COMPOSE_VERSION=1.23.2
5 |
6 | before_install:
7 | - sudo rm /usr/local/bin/docker-compose
8 | - curl -L https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-`uname -s`-`uname -m` > docker-compose
9 | - chmod +x docker-compose
10 | - sudo mv docker-compose /usr/local/bin
11 |
12 | script:
13 | - cp .env.example .env
14 | - docker-compose build
15 | - docker-compose -f crawl.docker-compose.yml build
16 | - docker-compose -f insert.docker-compose.yml build
17 | - docker-compose -f agent/docker-compose.yml build
18 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Flavien Berwick
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pandemic-Knowledge
2 |
3 | 
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | A fully-featured multi-source data pipeline for continuously extracting knowledge from COVID-19 data.
14 |
15 | - Contamination figures
16 | - Vaccination figures
17 | - Death figures
18 | - COVID-19-related news (Google News, Twitter)
19 |
20 | ## What you can achieve
21 |
22 | | Live contaminations map + Latest news | Last 7 days news |
23 | | :---------------------------------------------------------------------------------: | :---------------------------------------------------: |
24 | |  |  |
25 |
26 | | France 3-weeks live map (Kibana Canvas) | Live vaccinations map |
27 | | :-----------------------------------------------------------: | :-----------------------------------------------------------: |
28 | |  |  |
29 |
30 | ## Context
31 |
32 | This project was realized over 4 days as part of a MSc hackathon from [ETNA](https://etna.io), a french computer science school.
33 |
34 | The incentives were both to experiment/prototype a big data pipeline and contribute to an open source project.
35 |
36 | ## Install
37 |
38 | Below, you'll find the procedure to process COVID-related file and news into the Pandemic Knowledge database (elasticsearch).
39 |
40 | The process is **scheduled** to run every 24 hours so you can update the files and obtain the latest news
41 |
42 | - [Pandemic-Knowledge](#pandemic-knowledge)
43 | - [What you can achieve](#what-you-can-achieve)
44 | - [Context](#context)
45 | - [Install](#install)
46 | - [Env file](#env-file)
47 | - [Initialize elasticsearch](#initialize-elasticsearch)
48 | - [Initialize Prefect](#initialize-prefect)
49 | - [Run Prefect workers](#run-prefect-workers)
50 | - [COVID-19 data](#covid-19-data)
51 | - [News data](#news-data)
52 | - [News web app](#news-web-app)
53 |
54 | ### Env file
55 |
56 | Running this project on your local computer ? Just copy the `.env.example` file :
57 |
58 | ```bash
59 | cp .env.example .env
60 | ```
61 |
62 | Open this `.env` file and edit password-related variables.
63 |
64 | ### Initialize elasticsearch
65 |
66 | Raise your host's ulimits for ElasticSearch to handle high I/O :
67 |
68 | ```bash
69 | sudo sysctl -w vm.max_map_count=500000
70 | ```
71 |
72 | Then :
73 |
74 | ```bash
75 | docker-compose -f create-certs.yml run --rm create_certs
76 | docker-compose up -d es01 es02 es03 kibana
77 | ```
78 |
79 | ### Initialize Prefect
80 |
81 | Create a `~/.prefect/config.toml` file with the following content :
82 |
83 | ```bash
84 | # debug mode
85 | debug = true
86 |
87 | # base configuration directory (typically you won't change this!)
88 | home_dir = "~/.prefect"
89 |
90 | backend = "server"
91 |
92 | [server]
93 | host = "http://172.17.0.1"
94 | port = "4200"
95 | host_port = "4200"
96 | endpoint = "${server.host}:${server.port}"
97 | ```
98 |
99 | Run Prefect :
100 |
101 | ```bash
102 | docker-compose up -d prefect_postgres prefect_hasura prefect_graphql prefect_towel prefect_apollo prefect_ui
103 | ```
104 |
105 | We need to create a _tenant_. Execute on your host :
106 |
107 | ```bash
108 | pip3 install prefect
109 | prefect backend server
110 | prefect server create-tenant --name default --slug default
111 | ```
112 |
113 | Access the web UI at [localhost:8081](http://localhost:8081)
114 |
115 | ### Run Prefect workers
116 |
117 | Agents are services that run your scheduled flows.
118 |
119 | 1. Open and optionally edit the [`agent/config.toml`](./agent/config.toml) file.
120 |
121 | 2. Let's instanciate 3 workers :
122 |
123 | ```bash
124 | docker-compose -f agent/docker-compose.yml up -d --build --scale agent=3 agent
125 | ```
126 |
127 | > :information_source: You can run the agent on another machine than the one with the Prefect server. Edit the [`agent/config.toml`](./agent/config.toml) file for that.
128 |
129 | ### COVID-19 data
130 |
131 | Injection scripts should are scheduled in Prefect so they automatically inject data with the latest news (delete + inject).
132 |
133 | There are several data source supported by Pandemic Knowledge
134 |
135 | - [Our World In Data](https://ourworldindata.org/coronavirus-data); used by Google
136 | - docker-compose slug : `insert_owid`
137 | - MinIO bucket : `contamination-owid`
138 | - Format : CSV
139 | - [OpenCovid19-Fr](https://github.com/opencovid19-fr/data)
140 | - docker-compose slug : `insert_france`
141 | - Format : CSV (download from Internet)
142 | - [Public Health France - Virological test results](https://www.data.gouv.fr/en/datasets/donnees-relatives-aux-resultats-des-tests-virologiques-covid-19/) (official source)
143 | - docker-compose slug : `insert_france_virtests`
144 | - Format : CSV (download from Internet)
145 |
146 | 1. Start MinIO and import your files according to the buckets evoked upper.
147 |
148 | For _Our World In Data_, create the `contamination-owid` bucket and import the CSV file inside.
149 |
150 | ```bash
151 | docker-compose up -d minio
152 | ```
153 |
154 | > MinIO is available at `localhost:9000`
155 |
156 | 2. Download dependencies and start the injection service of your choice. For instance :
157 |
158 | ```bash
159 | pip3 install -r ./flow/requirements.txt
160 | docker-compose -f insert.docker-compose.yml up --build insert_owid
161 | ```
162 |
163 | 3. In [Kibana](https://localhost:5601), create an index pattern `contamination_owid_*`
164 |
165 | 4. Once injected, we recommend to adjust the number of replicas [in the DevTool](https://localhost:5601/app/dev_tools#/console) :
166 |
167 | ```json
168 | PUT /contamination_owid_*/_settings
169 | {
170 | "index" : {
171 | "number_of_replicas" : "2"
172 | }
173 | }
174 | ```
175 |
176 | 5. Start making your dashboards in [Kibana](https://localhost:5601) !
177 |
178 | ### News data
179 |
180 | There are two sources for news :
181 |
182 | - Google News (elasticsearch index: `news_googlenews`)
183 | - Twitter (elasticsearch index: `news_tweets`)
184 |
185 | 1. Run the Google News crawler :
186 |
187 | ```bash
188 | docker-compose -f crawl.docker-compose.yml up --build crawl_google_news # and/or crawl_tweets
189 | ```
190 |
191 | 2. In Kibana, create a `news_*` index pattern
192 |
193 | 3. **Edit** the index pattern fields :
194 |
195 | | Name | Type | Format |
196 | | ---- | ----------------------------------------------------- | ------- |
197 | | img | string | **Url** |
198 | | link | string **with Type: Image** with empty _URL template_ | **Url** |
199 |
200 | 4. Create your visualisation
201 |
202 | ### News web app
203 |
204 | Browse through the news with our web application.
205 |
206 | 
207 |
208 | 1. Make sure you've accepted the self-signed certificate of Elasticsearch at [`https://localhost:9200`](https://localhost:9200)
209 |
210 | 2. Start-up the app
211 |
212 | ```bash
213 | docker-compose -f news_app/docker-compose.yml up --build -d
214 | ```
215 |
216 | 3. Discover the app at [`localhost:8080`](http://localhost:8080)
217 |
218 | ---
219 |
220 |
221 | TODOs
222 |
223 | Possible improvements :
224 |
225 | - [ ] [Using Dask for parallelizing](https://docs.prefect.io/core/idioms/parallel.html) process of CSV lines by batch of 1000
226 | - [ ] Removing indices only when source process is successful (adding new index, then remove old index)
227 | - [ ] Removing indices only when crawling is successful (adding new index, then remove old index)
228 |
229 |
230 |
231 |
232 | Useful commands
233 |
234 | To stop everything :
235 |
236 | ```bash
237 | docker-compose down
238 | docker-compose -f agent/docker-compose.yml down
239 | docker-compose -f insert.docker-compose.yml down
240 | docker-compose -f crawl.docker-compose.yml down
241 | ```
242 |
243 | To start each service, step by step :
244 |
245 | ```bash
246 | docker-compose up -d es01 es02 es03 kibana
247 | docker-compose up -d minio
248 | docker-compose up -d prefect_postgres prefect_hasura prefect_graphql prefect_towel prefect_apollo prefect_ui
249 | docker-compose -f agent/docker-compose.yml up -d --build --scale agent=3 agent
250 | ```
251 |
252 |
253 |
--------------------------------------------------------------------------------
/agent/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7
2 |
3 | RUN apt update && apt install uuid -y
4 | RUN pip install --upgrade pip
5 |
6 | COPY ./requirements.txt /requirements.txt
7 |
8 | RUN pip install -r /requirements.txt
--------------------------------------------------------------------------------
/agent/config.toml:
--------------------------------------------------------------------------------
1 | # debug mode
2 | debug = true
3 |
4 | # base configuration directory (typically you won't change this!)
5 | home_dir = "~/.prefect"
6 |
7 | backend = "server"
8 |
9 | [server]
10 | host = "http://172.17.0.1"
11 | port = "4200"
12 | host_port = "4200"
13 | endpoint = "${server.host}:${server.port}"
--------------------------------------------------------------------------------
/agent/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.7"
2 |
3 | services:
4 |
5 | agent:
6 | restart: always
7 | build: .
8 | dns: 8.8.8.8
9 | command: bash -c "prefect agent local start --name $$(uuid) --no-hostname-label --label development"
10 | volumes:
11 | - /srv/docker/prefect/flows:/root/.prefect/flows
12 | - type: bind
13 | source: ./config.toml
14 | target: /root/.prefect/config.toml
15 | read_only: true
16 |
--------------------------------------------------------------------------------
/agent/requirements.txt:
--------------------------------------------------------------------------------
1 | prefect==0.14.16
2 | minio==7.0.3
3 | clevercsv==0.6.7
4 | tqdm==4.60.0
5 | elasticsearch==7.12.0
6 | geopy==2.1.0
7 | iso3166==1.0.1
8 | dateparser==1.0.0
9 | GoogleNews==1.5.7
10 | snscrape==0.3.4
11 | pandas==1.2.4
12 |
--------------------------------------------------------------------------------
/crawl.docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.7"
2 |
3 | services:
4 |
5 | crawl_google_news:
6 | build: ./flow
7 | command: python3 /usr/app/crawl_google_news.py
8 | volumes:
9 | - /srv/docker/prefect/flows:/root/.prefect/flows
10 | - "./flow/scripts:/usr/app:ro"
11 | - type: bind
12 | source: ./flow/config.toml
13 | target: /root/.prefect/config.toml
14 | read_only: true
15 | env_file:
16 | - .env
17 | environment:
18 | MAX_ES_ROW_INJECT: ${MAX_ES_ROW_INJECT}
19 | ELASTIC_SCHEME: ${ELASTIC_SCHEME}
20 | ELASTIC_PORT: ${ELASTIC_PORT}
21 | ELASTIC_ENDPOINT: ${ELASTIC_ENDPOINT}
22 | ELASTIC_USER: ${ELASTIC_USER}
23 | ELASTIC_PWD: ${ELASTICSEARCH_PWD}
24 |
25 | crawl_tweets:
26 | build: ./flow
27 | command: python3 /usr/app/crawl_tweets.py
28 | volumes:
29 | - /srv/docker/prefect/flows:/root/.prefect/flows
30 | - "./flow/scripts:/usr/app:ro"
31 | - type: bind
32 | source: ./flow/config.toml
33 | target: /root/.prefect/config.toml
34 | read_only: true
35 | env_file:
36 | - .env
37 | environment:
38 | MAX_ES_ROW_INJECT: ${MAX_ES_ROW_INJECT}
39 | ELASTIC_SCHEME: ${ELASTIC_SCHEME}
40 | ELASTIC_PORT: ${ELASTIC_PORT}
41 | ELASTIC_ENDPOINT: ${ELASTIC_ENDPOINT}
42 | ELASTIC_USER: ${ELASTIC_USER}
43 | ELASTIC_PWD: ${ELASTICSEARCH_PWD}
44 |
--------------------------------------------------------------------------------
/create-certs.yml:
--------------------------------------------------------------------------------
1 | version: '3.2'
2 |
3 | services:
4 |
5 | create_certs:
6 | container_name: create_certs
7 | image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0
8 | command: >
9 | bash -c '
10 | if [[ ! -f /certs/ca.zip ]]; then
11 | # Generating CA certificate
12 | bin/elasticsearch-certutil ca --silent --pem -out /certs/ca.zip;
13 | unzip /certs/ca.zip -d /certs;
14 | fi;
15 | if [[ ! -f /certs/pem.zip ]]; then
16 | # Generating PEM certificates (ElasticSearch nodes and Kibana)
17 | bin/elasticsearch-certutil cert --silent --pem --ca-cert "/certs/ca/ca.crt" --ca-key "/certs/ca/ca.key" --in config/certificates/pem.yml -out /certs/pem.zip;
18 | unzip /certs/pem.zip -d /certs;
19 | fi;
20 | if [[ ! -f /certs/pkcs_12.zip ]]; then
21 | # Generating PKCS#12 certificates (Enterprise Search)
22 | bin/elasticsearch-certutil cert --silent --pass "changeme" --ca-cert "/certs/ca/ca.crt" --ca-key "/certs/ca/ca.key" --in config/certificates/pkcs_12.yml -out /certs/pkcs_12.zip;
23 | unzip /certs/pkcs_12.zip -d /certs;
24 | fi;
25 | '
26 | working_dir: /usr/share/elasticsearch
27 | volumes: ['./certs:/certs', './instances:/usr/share/elasticsearch/config/certificates']
28 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.7"
2 |
3 | services:
4 | prefect_setup:
5 | build: ./prefect
6 | networks:
7 | - prefect-server
8 |
9 | prefect_postgres:
10 | restart: "always"
11 | image: "postgres:11"
12 | environment:
13 | POSTGRES_USER: ${POSTGRES_USER}
14 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
15 | POSTGRES_DB: ${POSTGRES_DB}
16 | volumes:
17 | - prefect_postgres:/var/lib/postgresql/data
18 | networks:
19 | - prefect-server
20 | healthcheck:
21 | test: pg_isready -q -d $${POSTGRES_DB} -U $${POSTGRES_USER} || exit 1
22 | interval: 10s
23 | timeout: 2s
24 | retries: 60
25 | start_period: 2s
26 | command:
27 | - "postgres"
28 | # explicitly set max connections
29 | - "-c"
30 | - "max_connections=150"
31 |
32 | prefect_hasura:
33 | restart: "always"
34 | image: "hasura/graphql-engine:v1.3.3"
35 | ports:
36 | - "3000:3000"
37 | command: "graphql-engine serve"
38 | environment:
39 | HASURA_GRAPHQL_DATABASE_URL: ${DB_CONNECTION_URL}
40 | HASURA_GRAPHQL_ENABLE_CONSOLE: "true"
41 | HASURA_GRAPHQL_SERVER_PORT: "3000"
42 | HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE: 100
43 | HASURA_GRAPHQL_LOG_LEVEL: "warn"
44 | networks:
45 | - prefect-server
46 | healthcheck:
47 | test: wget -O - http://hasura:3000/healthz &>/dev/null || exit 1
48 | interval: 10s
49 | timeout: 2s
50 | retries: 60
51 | start_period: 1s
52 | depends_on:
53 | - prefect_postgres
54 |
55 | prefect_graphql:
56 | restart: "always"
57 | image: "prefecthq/server:latest"
58 | ports:
59 | - "4201:4201"
60 | command: bash -c "${PREFECT_SERVER_DB_CMD} && python src/prefect_server/services/graphql/server.py"
61 | environment:
62 | PREFECT_SERVER_DB_CMD: ${PREFECT_SERVER_DB_CMD:-"echo 'DATABASE MIGRATIONS SKIPPED'"}
63 | PREFECT_SERVER__DATABASE__CONNECTION_URL: ${DB_CONNECTION_URL}
64 | PREFECT_SERVER__HASURA__ADMIN_SECRET: ${PREFECT_SERVER__HASURA__ADMIN_SECRET:-hasura-secret-admin-secret}
65 | PREFECT_SERVER__HASURA__HOST: prefect_hasura
66 | networks:
67 | - prefect-server
68 | healthcheck:
69 | test: curl --fail --silent "http://prefect_graphql:4201/health" &> /dev/null || exit 1
70 | interval: 20s
71 | timeout: 2s
72 | retries: 60
73 | start_period: 1s
74 | depends_on:
75 | - prefect_hasura
76 |
77 | prefect_towel:
78 | restart: "always"
79 | image: "prefecthq/server:latest"
80 | command: "python src/prefect_server/services/towel/__main__.py"
81 | environment:
82 | PREFECT_SERVER__HASURA__ADMIN_SECRET: ${PREFECT_SERVER__HASURA__ADMIN_SECRET:-hasura-secret-admin-secret}
83 | PREFECT_SERVER__HASURA__HOST: prefect_hasura
84 | networks:
85 | - prefect-server
86 | depends_on:
87 | - prefect_graphql
88 |
89 | prefect_apollo:
90 | restart: "always"
91 | image: "prefecthq/apollo:latest"
92 | command: bash -c "./post-start.sh && npm run serve"
93 | ports:
94 | - 4200:4200
95 | environment:
96 | HASURA_API_URL: http://prefect_hasura:3000/v1alpha1/graphql
97 | PREFECT_API_URL: http://prefect_graphql:4201/graphql/
98 | PREFECT_API_HEALTH_URL: http://prefect_graphql:4201/health
99 | PREFECT_SERVER__TELEMETRY__ENABLED: "false"
100 | GRAPHQL_SERVICE_HOST: http://prefect_graphql
101 | GRAPHQL_SERVICE_PORT: 4201
102 | networks:
103 | - prefect-server
104 | healthcheck:
105 | test: curl --fail --silent "http://prefect_apollo:4200/.well-known/apollo/server-health" &> /dev/null || exit 1
106 | interval: 10s
107 | timeout: 2s
108 | retries: 60
109 | start_period: 1s
110 | depends_on:
111 | - prefect_graphql
112 |
113 | prefect_ui:
114 | restart: "always"
115 | image: "prefecthq/ui:2021-02-23"
116 | ports:
117 | - "8081:8080"
118 | command: "/intercept.sh"
119 | environment:
120 | PREFECT_SERVER__APOLLO_URL: http://localhost:4200/graphql
121 | PREFECT_BACKEND: server
122 | networks:
123 | - prefect-server
124 | healthcheck:
125 | test: curl --fail --silent --head "http://prefect_ui:8080/" &> /dev/null || exit 1
126 | interval: 30s
127 | timeout: 5s
128 | retries: 3
129 | depends_on:
130 | - prefect_apollo
131 |
132 | es01:
133 | restart: always
134 | image: docker.elastic.co/elasticsearch/elasticsearch:7.12.0
135 | volumes:
136 | - "es01:/usr/share/elasticsearch/data"
137 | - "./certs:/usr/share/elasticsearch/config/certificates:ro"
138 | ports:
139 | - "9200:9200"
140 | environment:
141 | ES_JAVA_OPTS: "-Xmx512m -Xms512m"
142 | ELASTIC_PASSWORD: ${ELASTICSEARCH_PWD}
143 | node.name: es01
144 | cluster.name: es-docker-cluster
145 | discovery.seed_hosts: es02,es03
146 | cluster.initial_master_nodes: es01,es02,es03
147 | network.host: 0.0.0.0
148 | xpack.license.self_generated.type: basic
149 | xpack.monitoring.collection.enabled: "true"
150 | xpack.security.enabled: "true"
151 | xpack.security.http.ssl.enabled: "true"
152 | xpack.security.http.ssl.key: /usr/share/elasticsearch/config/certificates/es01/es01.key
153 | xpack.security.http.ssl.certificate_authorities: /usr/share/elasticsearch/config/certificates/ca/ca.crt
154 | xpack.security.http.ssl.certificate: /usr/share/elasticsearch/config/certificates/es01/es01.crt
155 | xpack.security.transport.ssl.enabled: "true"
156 | xpack.security.transport.ssl.verification_mode: certificate
157 | xpack.security.transport.ssl.certificate_authorities: /usr/share/elasticsearch/config/certificates/ca/ca.crt
158 | xpack.security.transport.ssl.certificate: /usr/share/elasticsearch/config/certificates/es01/es01.crt
159 | xpack.security.transport.ssl.key: /usr/share/elasticsearch/config/certificates/es01/es01.key
160 | cluster.routing.allocation.disk.threshold_enabled: "true"
161 | cluster.routing.allocation.disk.watermark.low: 93%
162 | cluster.routing.allocation.disk.watermark.high: 95%
163 | http.cors.enabled : "true"
164 | http.cors.allow-origin : "*"
165 | http.cors.allow-methods : OPTIONS, HEAD, GET, POST, PUT, DELETE
166 | http.cors.allow-headers : Authorization,X-Requested-With,X-Auth-Token,Content-Type, Content-Length
167 | depends_on:
168 | - es02
169 | - es03
170 | ulimits:
171 | memlock:
172 | soft: 262144
173 | hard: 500000
174 |
175 | es02:
176 | restart: always
177 | image: docker.elastic.co/elasticsearch/elasticsearch:7.12.0
178 | volumes:
179 | - "es02:/usr/share/elasticsearch/data"
180 | - "./certs:/usr/share/elasticsearch/config/certificates:ro"
181 | environment:
182 | ES_JAVA_OPTS: "-Xmx512m -Xms512m"
183 | ELASTIC_PASSWORD: ${ELASTICSEARCH_PWD}
184 | node.name: es02
185 | cluster.name: es-docker-cluster
186 | discovery.seed_hosts: es01,es03
187 | cluster.initial_master_nodes: es01,es02,es03
188 | xpack.license.self_generated.type: basic
189 | xpack.monitoring.collection.enabled: "true"
190 | xpack.security.enabled: "true"
191 | xpack.security.http.ssl.enabled: "true"
192 | xpack.security.http.ssl.key: /usr/share/elasticsearch/config/certificates/es02/es02.key
193 | xpack.security.http.ssl.certificate_authorities: /usr/share/elasticsearch/config/certificates/ca/ca.crt
194 | xpack.security.http.ssl.certificate: /usr/share/elasticsearch/config/certificates/es02/es02.crt
195 | xpack.security.transport.ssl.enabled: "true"
196 | xpack.security.transport.ssl.verification_mode: certificate
197 | xpack.security.transport.ssl.certificate_authorities: /usr/share/elasticsearch/config/certificates/ca/ca.crt
198 | xpack.security.transport.ssl.certificate: /usr/share/elasticsearch/config/certificates/es02/es02.crt
199 | xpack.security.transport.ssl.key: /usr/share/elasticsearch/config/certificates/es02/es02.key
200 | cluster.routing.allocation.disk.threshold_enabled: "true"
201 | cluster.routing.allocation.disk.watermark.low: 93%
202 | cluster.routing.allocation.disk.watermark.high: 95%
203 | ulimits:
204 | memlock:
205 | soft: 262144
206 | hard: 500000
207 |
208 | es03:
209 | restart: always
210 | image: docker.elastic.co/elasticsearch/elasticsearch:7.12.0
211 | volumes:
212 | - "es03:/usr/share/elasticsearch/data"
213 | - "./certs:/usr/share/elasticsearch/config/certificates:ro"
214 | environment:
215 | ES_JAVA_OPTS: "-Xmx512m -Xms512m"
216 | ELASTIC_PASSWORD: ${ELASTICSEARCH_PWD}
217 | node.name: es03
218 | cluster.name: es-docker-cluster
219 | discovery.seed_hosts: es01,es02
220 | cluster.initial_master_nodes: es01,es02,es03
221 | xpack.license.self_generated.type: basic
222 | xpack.monitoring.collection.enabled: "true"
223 | xpack.security.enabled: "true"
224 | xpack.security.http.ssl.enabled: "true"
225 | xpack.security.http.ssl.key: /usr/share/elasticsearch/config/certificates/es03/es03.key
226 | xpack.security.http.ssl.certificate_authorities: /usr/share/elasticsearch/config/certificates/ca/ca.crt
227 | xpack.security.http.ssl.certificate: /usr/share/elasticsearch/config/certificates/es03/es03.crt
228 | xpack.security.transport.ssl.enabled: "true"
229 | xpack.security.transport.ssl.verification_mode: certificate
230 | xpack.security.transport.ssl.certificate_authorities: /usr/share/elasticsearch/config/certificates/ca/ca.crt
231 | xpack.security.transport.ssl.certificate: /usr/share/elasticsearch/config/certificates/es03/es03.crt
232 | xpack.security.transport.ssl.key: /usr/share/elasticsearch/config/certificates/es03/es03.key
233 | cluster.routing.allocation.disk.threshold_enabled: "true"
234 | cluster.routing.allocation.disk.watermark.low: 93%
235 | cluster.routing.allocation.disk.watermark.high: 95%
236 | ulimits:
237 | memlock:
238 | soft: 262144
239 | hard: 500000
240 |
241 | kibana:
242 | image: docker.elastic.co/kibana/kibana:7.12.0
243 | restart: always
244 | volumes:
245 | - type: bind
246 | source: ./kibana.yml
247 | target: /usr/share/kibana/config/kibana.yml
248 | read_only: true
249 | - "./certs:/usr/share/elasticsearch/config/certificates:ro"
250 | ports:
251 | - "5601:5601"
252 | depends_on:
253 | - es01
254 |
255 | # source : https://docs.min.io/docs/deploy-minio-on-docker-compose.html
256 | minio:
257 | restart: always
258 | image: minio/minio:RELEASE.2021-04-06T23-11-00Z-24-g409125240
259 | command: server /data
260 | ports:
261 | - 9000:9000
262 | volumes:
263 | - minio:/data
264 | environment:
265 | MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY}
266 | MINIO_SECRET_KEY: ${MINIO_SECRET_KEY}
267 |
268 |
269 | volumes:
270 | es01:
271 | es02:
272 | es03:
273 | minio:
274 | prefect_postgres:
275 |
276 | networks:
277 | prefect-server:
278 | name: prefect-server
279 |
--------------------------------------------------------------------------------
/flow/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7
2 |
3 | RUN apt update
4 | RUN pip install --upgrade pip
5 |
6 | COPY ./requirements.txt /requirements.txt
7 | RUN pip install -r /requirements.txt
--------------------------------------------------------------------------------
/flow/config.toml:
--------------------------------------------------------------------------------
1 | # debug mode
2 | debug = true
3 |
4 | # base configuration directory (typically you won't change this!)
5 | home_dir = "~/.prefect"
6 |
7 | backend = "server"
8 |
9 | [server]
10 | host = "http://172.17.0.1"
11 | port = "4200"
12 | host_port = "4200"
13 | endpoint = "${server.host}:${server.port}"
14 |
15 | [s3]
16 | endpoint = "172.17.0.1:9000"
17 | key = "minio"
18 | secret = "minio123"
--------------------------------------------------------------------------------
/flow/requirements.txt:
--------------------------------------------------------------------------------
1 | prefect==0.14.16
2 | minio==7.0.3
3 | clevercsv==0.6.7
4 | tqdm==4.60.0
5 | elasticsearch==7.12.0
6 | geopy==2.1.0
7 | iso3166==1.0.1
8 | dateparser==1.0.0
9 | GoogleNews==1.5.7
10 | snscrape==0.3.4
11 | pandas==1.2.4
12 |
--------------------------------------------------------------------------------
/flow/scripts/crawl_google_news.py:
--------------------------------------------------------------------------------
1 | # python3
2 | import os
3 | from typing import Iterable
4 | import uuid
5 | import prefect
6 | from elasticsearch import Elasticsearch, helpers
7 | from prefect import Flow, Task, Client
8 | from datetime import timedelta, datetime
9 |
10 | from prefect.schedules import IntervalSchedule
11 | from GoogleNews import GoogleNews
12 |
13 | from crawl_mapping import mapping
14 |
15 |
16 | project_name = "pandemic-knowledge-crawl-googlenews"
17 | index_name = "news_googlenews"
18 |
19 | MAX_ES_ROW_INJECT = int(os.environ.get("MAX_ES_ROW_INJECT", 1000))
20 | ELASTIC_SCHEME = os.environ.get("ELASTIC_SCHEME")
21 | ELASTIC_PORT = os.environ.get("ELASTIC_PORT")
22 | ELASTIC_USER = os.environ.get("ELASTIC_USER")
23 | ELASTIC_PWD = os.environ.get("ELASTIC_PWD")
24 | ELASTIC_ENDPOINT = os.environ.get("ELASTIC_ENDPOINT")
25 |
26 | logger = prefect.context.get("logger")
27 |
28 | schedule = IntervalSchedule(
29 | start_date=datetime.utcnow() + timedelta(seconds=1), interval=timedelta(hours=24)
30 | )
31 |
32 |
33 | def get_es_instance():
34 | es_inst = Elasticsearch(
35 | [ELASTIC_ENDPOINT],
36 | http_auth=(ELASTIC_USER, ELASTIC_PWD),
37 | scheme=ELASTIC_SCHEME,
38 | port=ELASTIC_PORT,
39 | verify_certs=False,
40 | )
41 | return es_inst
42 |
43 |
44 | def inject_rows_to_es(rows, index_name):
45 | es_inst = get_es_instance()
46 |
47 | logger.info("Injecting {} rows in Elasticsearch".format(len(rows)))
48 |
49 | actions = [
50 | {"_index": index_name, "_id": uuid.uuid4(), "_source": row} for row in rows
51 | ]
52 |
53 | helpers.bulk(es_inst, actions)
54 |
55 |
56 | def format_new(new: dict, lang: str) -> dict:
57 | """Formatting a single Google News new for elasticsearch injection"""
58 | if len(new):
59 | return {
60 | "title": str(new["title"]),
61 | "desc": str(new["desc"]),
62 | "img": str(new["img"]),
63 | "link": "https://" + str(new["link"]),
64 | "source.crawler": "Google News",
65 | "source.website": str(new["site"]),
66 | "source.url": str(new["link"]),
67 | "date": new["datetime"],
68 | "lang": lang,
69 | }
70 | return None
71 |
72 |
73 | def get_news(googlenews: GoogleNews, lang: str, search_tag: str) -> Iterable:
74 | googlenews.get_news(search_tag)
75 | news = googlenews.results(sort=True)
76 | if news:
77 | for new in news:
78 | fmt_new = format_new(new, lang)
79 | if fmt_new:
80 | yield fmt_new
81 | return []
82 |
83 |
84 | class GetNews(Task):
85 | def run(self, index_name):
86 | googlenews = GoogleNews(
87 | period="24h", # TODO(): Improve using googlenews.set_time_range('02/01/2020','02/28/2020')
88 | encode="utf-8",
89 | )
90 | news_to_inject = []
91 | langs = ["fr", "en"]
92 | search_tags = ["COVID", "CORONA"]
93 | for lang in langs:
94 | for search_tag in search_tags:
95 | logger.info(
96 | f"Crawling GoogleNews for '{lang}' lang and {search_tag} search tag..."
97 | )
98 | googlenews.set_lang(lang)
99 | try:
100 | news = list(get_news(googlenews, lang, search_tag))
101 | news_to_inject += news if len(news) else []
102 | logger.info(f"Found {len(news)} news.")
103 | except Exception as e:
104 | logger.error(e)
105 | googlenews.clear()
106 | if len(news_to_inject) > 0:
107 | inject_rows_to_es(news_to_inject, index_name)
108 | news_to_inject = []
109 |
110 |
111 | class GenerateEsMapping(Task):
112 | def __init__(self, index_name, **kwargs):
113 | self.index_name = index_name
114 | super().__init__(**kwargs)
115 |
116 | def run(self):
117 | index_name = self.index_name
118 | es_inst = get_es_instance()
119 |
120 | logger.info("Generating mapping for index {}".format(index_name))
121 |
122 | es_inst.indices.delete(index=index_name, ignore=[400, 404])
123 |
124 | response = es_inst.indices.create(index=index_name, body=mapping, ignore=400)
125 |
126 | if "acknowledged" in response:
127 | if response["acknowledged"] == True:
128 | logger.info(
129 | "INDEX MAPPING SUCCESS FOR INDEX: {}".format(response["index"])
130 | )
131 | elif "error" in response:
132 | logger.error(response["error"]["root_cause"])
133 | logger.error("Error type: {}".format(response["error"]["type"]))
134 | raise Exception("Unable to create index mapping")
135 |
136 |
137 | with Flow("Crawl news and insert", schedule=schedule) as flow:
138 | flow.set_dependencies(
139 | upstream_tasks=[GenerateEsMapping(index_name)],
140 | task=GetNews(),
141 | keyword_tasks=dict(index_name=index_name),
142 | )
143 |
144 | if __name__ == "__main__":
145 | try:
146 | client = Client()
147 | client.create_project(project_name=project_name)
148 | except prefect.utilities.exceptions.ClientError as e:
149 | logger.info("Project already exists")
150 |
151 | flow.register(
152 | project_name=project_name,
153 | labels=["development"],
154 | add_default_labels=False,
155 | )
156 |
--------------------------------------------------------------------------------
/flow/scripts/crawl_mapping.py:
--------------------------------------------------------------------------------
1 | mapping = {
2 | "mappings": {
3 | "properties": {
4 | "title": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
5 | "desc": {"type": "text"},
6 | "date": {
7 | "type": "date",
8 | "format": "strict_date_optional_time||epoch_millis",
9 | },
10 | "link": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
11 | "img": {"type": "text"},
12 | "source": {
13 | "properties": {
14 | "crawler": {"type": "text"},
15 | "website": {"type": "text"},
16 | "author": {"type": "text"},
17 | "url": {"type": "text"},
18 | "tweet": {"properties": {"id": {"type": "text"}}},
19 | }
20 | },
21 | "lang": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
22 | }
23 | }
24 | }
--------------------------------------------------------------------------------
/flow/scripts/crawl_tweets.py:
--------------------------------------------------------------------------------
1 | # python3
2 | import os
3 | import uuid
4 | import prefect
5 | from elasticsearch import Elasticsearch, helpers
6 | from prefect import Flow, Task, Client
7 | from datetime import datetime
8 | from datetime import timedelta
9 |
10 | from prefect.schedules import IntervalSchedule
11 | import snscrape.modules.twitter as sntwitter
12 |
13 | from crawl_mapping import mapping
14 |
15 | project_name = "pandemic-knowledge-crawl-tweets"
16 | index_name = "news_tweets"
17 |
18 | lang = "en"
19 | tweet_limit = 1000
20 |
21 | MAX_ES_ROW_INJECT = int(os.environ.get("MAX_ES_ROW_INJECT", 1000))
22 | ELASTIC_SCHEME = os.environ.get("ELASTIC_SCHEME")
23 | ELASTIC_PORT = os.environ.get("ELASTIC_PORT")
24 | ELASTIC_USER = os.environ.get("ELASTIC_USER")
25 | ELASTIC_PWD = os.environ.get("ELASTIC_PWD")
26 | ELASTIC_ENDPOINT = os.environ.get("ELASTIC_ENDPOINT")
27 |
28 | logger = prefect.context.get("logger")
29 |
30 | schedule = IntervalSchedule(
31 | start_date=datetime.utcnow() + timedelta(seconds=1), interval=timedelta(hours=24)
32 | )
33 |
34 |
35 | def get_es_instance():
36 | es_inst = Elasticsearch(
37 | [ELASTIC_ENDPOINT],
38 | http_auth=(ELASTIC_USER, ELASTIC_PWD),
39 | scheme=ELASTIC_SCHEME,
40 | port=ELASTIC_PORT,
41 | verify_certs=False,
42 | )
43 | return es_inst
44 |
45 |
46 | def inject_rows_to_es(rows, index_name):
47 | es_inst = get_es_instance()
48 |
49 | logger.info("Injecting {} rows in Elasticsearch".format(len(rows)))
50 |
51 | actions = [
52 | {"_index": index_name, "_id": uuid.uuid4(), "_source": row} for row in rows
53 | ]
54 |
55 | helpers.bulk(es_inst, actions)
56 |
57 |
58 | class GetTweets(Task):
59 | def run(self, index_name):
60 | tweets_from = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
61 | to_inject = []
62 | tweets = sntwitter.TwitterSearchScraper(
63 | f"covid since:{tweets_from} lang:{lang}"
64 | ).get_items()
65 | for i, tweet in enumerate(tweets):
66 | if i > tweet_limit:
67 | break
68 | if i % 100 == 0:
69 | inject_rows_to_es(to_inject, index_name)
70 | to_inject = []
71 | to_inject.append(
72 | {
73 | "title": f"Tweet from {tweet.username} the {tweet.date}",
74 | "desc": tweet.content,
75 | "date": tweet.date,
76 | "link": tweet.url,
77 | "source.crawler": "twitter",
78 | "source.website": "https://twitter.com",
79 | "source.author": tweet.username,
80 | "source.url": tweet.url,
81 | "source.tweet.id": tweet.id,
82 | "lang": lang
83 | }
84 | )
85 | if len(to_inject):
86 | inject_rows_to_es(to_inject, index_name)
87 |
88 |
89 | class GenerateEsMapping(Task):
90 | def __init__(self, index_name, **kwargs):
91 | self.index_name = index_name
92 | super().__init__(**kwargs)
93 |
94 | def run(self):
95 | index_name = self.index_name
96 | es_inst = get_es_instance()
97 |
98 | logger.info("Generating mapping for index {}".format(index_name))
99 |
100 | es_inst.indices.delete(index=index_name, ignore=[400, 404])
101 |
102 | response = es_inst.indices.create(index=index_name, body=mapping, ignore=400)
103 |
104 | if "acknowledged" in response:
105 | if response["acknowledged"] == True:
106 | logger.info(
107 | "INDEX MAPPING SUCCESS FOR INDEX: {}".format(response["index"])
108 | )
109 | elif "error" in response:
110 | logger.error(response["error"]["root_cause"])
111 | logger.error("Error type: {}".format(response["error"]["type"]))
112 | raise Exception("Unable to create index mapping")
113 |
114 |
115 | with Flow("Crawl tweets and insert", schedule=schedule) as flow:
116 | flow.set_dependencies(
117 | upstream_tasks=[GenerateEsMapping(index_name)],
118 | task=GetTweets(),
119 | keyword_tasks=dict(index_name=index_name),
120 | )
121 |
122 | if __name__ == "__main__":
123 | try:
124 | client = Client()
125 | client.create_project(project_name=project_name)
126 | except prefect.utilities.exceptions.ClientError as e:
127 | logger.info("Project already exists")
128 |
129 | flow.register(
130 | project_name=project_name,
131 | labels=["development"],
132 | add_default_labels=False,
133 | )
134 |
--------------------------------------------------------------------------------
/flow/scripts/insert_france.py:
--------------------------------------------------------------------------------
1 | import os
2 | import dateparser
3 | import uuid
4 | import requests
5 | import prefect
6 | import clevercsv
7 | import traceback
8 | from tqdm import tqdm
9 | from prefect import Flow, Task, Client, task
10 | from datetime import timedelta, datetime
11 | from prefect.schedules import IntervalSchedule
12 | from elasticsearch import Elasticsearch, helpers
13 | from geopy.geocoders import Nominatim
14 | from requests.adapters import HTTPAdapter
15 | from requests.packages.urllib3.util.retry import Retry
16 |
17 | from mapping import mapping
18 |
19 | MINIO_SCHEME = os.environ.get("MINIO_SCHEME")
20 | MINIO_ENDPOINT = os.environ.get("MINIO_ENDPOINT")
21 | MINIO_ACCESS_KEY = os.environ.get("MINIO_ACCESS_KEY")
22 | MINIO_SECRET_KEY = os.environ.get("MINIO_SECRET_KEY")
23 | MAX_ES_ROW_INJECT = int(os.environ.get("MAX_ES_ROW_INJECT", 1000))
24 | ELASTIC_SCHEME = os.environ.get("ELASTIC_SCHEME")
25 | ELASTIC_PORT = os.environ.get("ELASTIC_PORT")
26 | ELASTIC_USER = os.environ.get("ELASTIC_USER")
27 | ELASTIC_PWD = os.environ.get("ELASTIC_PWD")
28 | ELASTIC_ENDPOINT = os.environ.get("ELASTIC_ENDPOINT")
29 |
30 | csv_endpoint = "https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv"
31 | index_name = "contamination_opencovid19_fr"
32 | project_name = f"pandemic-knowledge-opencovid19-fr"
33 | flow_name = project_name
34 |
35 | logger = prefect.context.get("logger")
36 |
37 | columns_allowed = {
38 | "date": ["date"],
39 | "location": ["maille_nom"],
40 | "location_name": ["maille_nom"],
41 | "confirmed": ["cas_confirmes"],
42 | "deaths": ["deces"],
43 | "recovered": ["gueris"],
44 | "vaccinated": [],
45 | "tested": ["depistes"],
46 | }
47 |
48 | extra_locations = {"EL": "GR"}
49 |
50 | locations_cache = {"World": None}
51 |
52 |
53 | def get_es_instance():
54 | es_inst = Elasticsearch(
55 | [ELASTIC_ENDPOINT],
56 | http_auth=(ELASTIC_USER, ELASTIC_PWD),
57 | scheme=ELASTIC_SCHEME,
58 | port=ELASTIC_PORT,
59 | verify_certs=False,
60 | )
61 | return es_inst
62 |
63 |
64 | def format_date(date):
65 | if not date:
66 | return None
67 | try:
68 | return dateparser.parse(date)
69 | except Exception as e:
70 | logger.error(e)
71 | return None
72 |
73 |
74 | def format_location(lookup_table, location_name):
75 | if not location_name:
76 | return None
77 | if location_name in locations_cache:
78 | return locations_cache[location_name]
79 | if location_name in lookup_table:
80 | return lookup_table[location_name]
81 | return None
82 |
83 |
84 | def pick_one_of_elements(haystack: list, needles: list):
85 | for needle in needles:
86 | if needle in haystack:
87 | return needle
88 | return None
89 |
90 |
91 | def pick_nonempty_cell(row, headers, potential_keys):
92 | for potential_key in potential_keys:
93 | if potential_key in headers and row[headers[potential_key]]:
94 | return row[headers[potential_key]]
95 | return None
96 |
97 |
98 | def format_row(lookup_table, row, headers, filename):
99 | date_start = date_end = format_date(
100 | pick_nonempty_cell(row, headers, columns_allowed["date"])
101 | )
102 | location = format_location(
103 | lookup_table, pick_nonempty_cell(row, headers, columns_allowed["location"])
104 | )
105 | location_name = pick_nonempty_cell(row, headers, columns_allowed["location_name"])
106 | nb_confirmed = pick_nonempty_cell(row, headers, columns_allowed["confirmed"])
107 | nb_deaths = pick_nonempty_cell(row, headers, columns_allowed["deaths"])
108 | nb_recovered = pick_nonempty_cell(row, headers, columns_allowed["recovered"])
109 | nb_vaccinated = pick_nonempty_cell(row, headers, columns_allowed["vaccinated"])
110 | nb_tested = pick_nonempty_cell(row, headers, columns_allowed["tested"])
111 | if date_start != None:
112 | return {
113 | "date_start": date_start,
114 | "date_end": date_end,
115 | "location": location[0] if location else None,
116 | "location_name": location_name,
117 | "confirmed": int(float(nb_confirmed)) if nb_confirmed else 0,
118 | "deaths": int(float(nb_deaths)) if nb_deaths else 0,
119 | "recovered": int(float(nb_recovered)) if nb_recovered else 0,
120 | "vaccinated": int(float(nb_vaccinated)) if nb_vaccinated else 0,
121 | "tested": int(float(nb_tested)) if nb_tested else 0,
122 | "filename": filename,
123 | "iso_code2": location[1] if location else None,
124 | "iso_region2": str(row[2]).replace("DEP", "FR"),
125 | }
126 | logger.warning(f"format_row(): Invalid row : {row}")
127 | return None
128 |
129 |
130 | def inject_rows_to_es(rows, index_name):
131 | es_inst = get_es_instance()
132 |
133 | logger.info("Injecting {} rows in Elasticsearch".format(len(rows)))
134 |
135 | actions = [
136 | {"_index": index_name, "_id": uuid.uuid4(), "_source": row} for row in rows
137 | ]
138 | helpers.bulk(es_inst, actions)
139 |
140 |
141 | def parse_file(lookup_table, file_path):
142 | with open(file_path, "r", newline="") as fp:
143 | char_read = 10000 if os.path.getsize(file_path) > 10000 else None
144 |
145 | try:
146 | dialect = clevercsv.Sniffer().sniff(fp.read(char_read), verbose=True)
147 | except Exception as e:
148 | logger.error(e)
149 | return []
150 |
151 | fp.seek(0)
152 | reader = clevercsv.reader(fp, dialect)
153 | headers_list = next(reader)
154 | headers = {}
155 | for i, header in enumerate(headers_list):
156 | headers[header] = i
157 | for row in tqdm(reader, unit="entry"):
158 | if row[1] != "departement": # multiple granularities
159 | continue
160 | yield format_row(lookup_table, row, headers, file_path)
161 | return []
162 |
163 |
164 | def process_file(lookup_table, index_name, file_path):
165 | to_inject = []
166 | logger.info(f"process_file(): Processing {file_path}...")
167 | for row in parse_file(lookup_table, file_path):
168 | if row is not None:
169 | to_inject.append(row)
170 | if len(to_inject) >= MAX_ES_ROW_INJECT:
171 | inject_rows_to_es(to_inject, index_name)
172 | to_inject = []
173 | else:
174 | logger.warning("process_file(): Invalid row")
175 | if len(to_inject) > 0:
176 | inject_rows_to_es(to_inject, index_name)
177 |
178 |
179 | class ParseFiles(Task):
180 | def run(self, lookup_table, index_name, http_csv_uris: list):
181 | for file_uri in tqdm(http_csv_uris):
182 | logger.info(f"Processing file {file_uri}...")
183 | file_path = f"/tmp/{uuid.uuid4()}"
184 | session = requests.Session()
185 | retry = Retry(connect=3, backoff_factor=0.5)
186 | adapter = HTTPAdapter(max_retries=retry)
187 | session.mount("http://", adapter)
188 | session.mount("https://", adapter)
189 | r = session.get(file_uri, allow_redirects=True)
190 | with open(file_path, "wb") as f:
191 | f.write(r.content)
192 | process_file(lookup_table, index_name, file_path)
193 |
194 |
195 | class GenerateEsMapping(Task):
196 | def run(self, index_name) -> str:
197 | """
198 | Returns:
199 | str: index_name
200 | """
201 | es_inst = get_es_instance()
202 | logger.info("Generating mapping for index {}".format(index_name))
203 | es_inst.indices.delete(index=index_name, ignore=[400, 404])
204 | response = es_inst.indices.create(
205 | index=index_name, body=mapping, ignore=400 # ignore 400 already exists code
206 | )
207 | if "acknowledged" in response:
208 | if response["acknowledged"] == True:
209 | logger.info(
210 | "INDEX MAPPING SUCCESS FOR INDEX: {}".format(response["index"])
211 | )
212 | elif "error" in response:
213 | logger.error(response["error"]["root_cause"])
214 | logger.error("Error type: {}".format(response["error"]["type"]))
215 | raise Exception("Unable to create index mapping")
216 | return index_name
217 |
218 |
219 | def read_lookup_table(lookup_file_path: str):
220 | logger.info("Loading lookup table...")
221 | lookup = {}
222 | with open(lookup_file_path, "r", newline="") as fp:
223 | char_read = 10000 if os.path.getsize(lookup_file_path) > 10000 else None
224 | dialect = clevercsv.Sniffer().sniff(fp.read(char_read), verbose=True)
225 | fp.seek(0)
226 | reader = clevercsv.reader(fp, dialect)
227 | next(reader)
228 | for row in tqdm(reader, unit="entry"):
229 | for location in [
230 | row[6], # Province_State
231 | row[7], # Country_Region
232 | row[10], # Combined_Key
233 | ]:
234 | if location and location not in lookup:
235 | if row[8] and row[9]: # Lat, Long
236 | lookup[location] = (
237 | {"lat": float(row[8]), "lon": float(row[9])},
238 | row[1],
239 | )
240 | logger.info(f"Found {len(lookup)} locations.")
241 | return lookup
242 |
243 |
244 | lookup_table = read_lookup_table("/usr/app/UID_ISO_FIPS_LookUp_Table.csv")
245 |
246 | schedule = IntervalSchedule(
247 | start_date=datetime.utcnow() + timedelta(seconds=1), interval=timedelta(hours=24)
248 | )
249 | with Flow(flow_name, schedule=schedule) as flow:
250 | es_mapping_task = GenerateEsMapping()
251 | index_name = es_mapping_task(index_name)
252 |
253 | parse_files_task = ParseFiles()
254 | parse_files_task(
255 | lookup_table=lookup_table,
256 | index_name=index_name,
257 | http_csv_uris=[csv_endpoint],
258 | )
259 |
260 | if __name__ == "__main__":
261 |
262 | try:
263 | client = Client()
264 | client.create_project(project_name=project_name)
265 | except prefect.utilities.exceptions.ClientError as e:
266 | logger.info("Project already exists")
267 |
268 | flow.register(
269 | project_name=project_name, labels=["development"], add_default_labels=False
270 | )
271 |
--------------------------------------------------------------------------------
/flow/scripts/insert_france_virtests.py:
--------------------------------------------------------------------------------
1 | import os
2 | import dateparser
3 | import uuid
4 | import requests
5 | import prefect
6 | import clevercsv
7 | from tqdm import tqdm
8 | from prefect import Flow, Task, Client, task
9 | from datetime import timedelta, datetime
10 | from prefect.schedules import IntervalSchedule
11 | from elasticsearch import Elasticsearch, helpers
12 | from requests.adapters import HTTPAdapter
13 | from requests.packages.urllib3.util.retry import Retry
14 |
15 | from mapping import mapping
16 |
17 | MINIO_SCHEME = os.environ.get("MINIO_SCHEME")
18 | MINIO_ENDPOINT = os.environ.get("MINIO_ENDPOINT")
19 | MINIO_ACCESS_KEY = os.environ.get("MINIO_ACCESS_KEY")
20 | MINIO_SECRET_KEY = os.environ.get("MINIO_SECRET_KEY")
21 | MAX_ES_ROW_INJECT = int(os.environ.get("MAX_ES_ROW_INJECT", 1000))
22 | ELASTIC_SCHEME = os.environ.get("ELASTIC_SCHEME")
23 | ELASTIC_PORT = os.environ.get("ELASTIC_PORT")
24 | ELASTIC_USER = os.environ.get("ELASTIC_USER")
25 | ELASTIC_PWD = os.environ.get("ELASTIC_PWD")
26 | ELASTIC_ENDPOINT = os.environ.get("ELASTIC_ENDPOINT")
27 |
28 | csv_endpoint = "https://www.data.gouv.fr/en/datasets/r/406c6a23-e283-4300-9484-54e78c8ae675"
29 | project_name = f"pandemic-knowledge-santepublic-tests"
30 | index_name = "contamination_santepublique_vir_tests_fr"
31 | flow_name = project_name
32 |
33 | logger = prefect.context.get("logger")
34 |
35 | columns_allowed = {
36 | "date": ["jour"],
37 | "location": ["dep"],
38 | "location_name": ["dep"],
39 | "confirmed": ["P"],
40 | "deaths": [],
41 | "recovered": [],
42 | "vaccinated": [],
43 | "tested": ["T"],
44 | }
45 |
46 | extra_locations = {"EL": "GR"}
47 |
48 | locations_cache = {"World": None}
49 |
50 |
51 | def get_es_instance():
52 | es_inst = Elasticsearch(
53 | [ELASTIC_ENDPOINT],
54 | http_auth=(ELASTIC_USER, ELASTIC_PWD),
55 | scheme=ELASTIC_SCHEME,
56 | port=ELASTIC_PORT,
57 | verify_certs=False,
58 | )
59 | return es_inst
60 |
61 |
62 | def format_date(date):
63 | if not date:
64 | return None
65 | try:
66 | return dateparser.parse(date)
67 | except Exception as e:
68 | logger.error(e)
69 | return None
70 |
71 |
72 | def format_location(lookup_table, location_name):
73 | if not location_name:
74 | return None
75 | if location_name in locations_cache:
76 | return locations_cache[location_name]
77 | if location_name in lookup_table:
78 | return lookup_table[location_name]
79 | return None
80 |
81 |
82 | def pick_one_of_elements(haystack: list, needles: list):
83 | for needle in needles:
84 | if needle in haystack:
85 | return needle
86 | return None
87 |
88 |
89 | def pick_nonempty_cell(row, headers, potential_keys):
90 | for potential_key in potential_keys:
91 | if potential_key in headers and row[headers[potential_key]]:
92 | return row[headers[potential_key]]
93 | return None
94 |
95 |
96 | def format_row(lookup_table, row, headers, filename):
97 | date_start = date_end = format_date(
98 | pick_nonempty_cell(row, headers, columns_allowed["date"])
99 | )
100 | location = format_location(
101 | lookup_table, pick_nonempty_cell(row, headers, columns_allowed["location"])
102 | )
103 | location_name = pick_nonempty_cell(row, headers, columns_allowed["location_name"])
104 | nb_confirmed = pick_nonempty_cell(row, headers, columns_allowed["confirmed"])
105 | nb_deaths = pick_nonempty_cell(row, headers, columns_allowed["deaths"])
106 | nb_recovered = pick_nonempty_cell(row, headers, columns_allowed["recovered"])
107 | nb_vaccinated = pick_nonempty_cell(row, headers, columns_allowed["vaccinated"])
108 | nb_tested = pick_nonempty_cell(row, headers, columns_allowed["tested"])
109 | if date_start != None:
110 | return {
111 | "date_start": date_start,
112 | "date_end": date_end,
113 | "location": location[0] if location else None,
114 | "location_name": location_name,
115 | "confirmed": int(float(nb_confirmed)) if nb_confirmed else 0,
116 | "deaths": int(float(nb_deaths)) if nb_deaths else 0,
117 | "recovered": int(float(nb_recovered)) if nb_recovered else 0,
118 | "vaccinated": int(float(nb_vaccinated)) if nb_vaccinated else 0,
119 | "tested": int(float(nb_tested)) if nb_tested else 0,
120 | "filename": filename,
121 | "iso_code2": location[1] if location else None,
122 | "iso_region2": f"FR-{location_name}",
123 | }
124 | logger.warning(f"format_row(): Invalid row : {row}")
125 | return None
126 |
127 |
128 | def inject_rows_to_es(rows, index_name):
129 | es_inst = get_es_instance()
130 |
131 | logger.info("Injecting {} rows in Elasticsearch".format(len(rows)))
132 |
133 | actions = [
134 | {"_index": index_name, "_id": uuid.uuid4(), "_source": row} for row in rows
135 | ]
136 | helpers.bulk(es_inst, actions)
137 |
138 |
139 | def parse_file(lookup_table, file_path):
140 | with open(file_path, "r", newline="") as fp:
141 | char_read = 10000 if os.path.getsize(file_path) > 10000 else None
142 |
143 | try:
144 | dialect = clevercsv.Sniffer().sniff(fp.read(char_read), verbose=True)
145 | except Exception as e:
146 | logger.error(e)
147 | return []
148 |
149 | fp.seek(0)
150 | reader = clevercsv.reader(fp, dialect)
151 | headers_list = next(reader)
152 | headers = {}
153 | for i, header in enumerate(headers_list):
154 | headers[header] = i
155 | for row in tqdm(reader, unit="entry"):
156 | yield format_row(lookup_table, row, headers, file_path)
157 | return []
158 |
159 |
160 | def process_file(lookup_table, index_name, file_path):
161 | to_inject = []
162 | logger.info(f"process_file(): Processing {file_path}...")
163 | for row in parse_file(lookup_table, file_path):
164 | if row is not None:
165 | to_inject.append(row)
166 | if len(to_inject) >= MAX_ES_ROW_INJECT:
167 | inject_rows_to_es(to_inject, index_name)
168 | to_inject = []
169 | else:
170 | logger.warning("process_file(): Invalid row")
171 | if len(to_inject) > 0:
172 | inject_rows_to_es(to_inject, index_name)
173 |
174 |
175 | class ParseFiles(Task):
176 | def run(self, lookup_table, index_name, http_csv_uris: list):
177 | for file_uri in tqdm(http_csv_uris):
178 | logger.info(f"Processing file {file_uri}...")
179 | file_path = f"/tmp/{uuid.uuid4()}"
180 | session = requests.Session()
181 | retry = Retry(connect=3, backoff_factor=0.5)
182 | adapter = HTTPAdapter(max_retries=retry)
183 | session.mount("http://", adapter)
184 | session.mount("https://", adapter)
185 | r = session.get(file_uri, allow_redirects=True)
186 | with open(file_path, "wb") as f:
187 | f.write(r.content)
188 | process_file(lookup_table, index_name, file_path)
189 |
190 |
191 | class GenerateEsMapping(Task):
192 | def run(self, index_name) -> str:
193 | """
194 | Returns:
195 | str: index_name
196 | """
197 | es_inst = get_es_instance()
198 | logger.info("Generating mapping for index {}".format(index_name))
199 | es_inst.indices.delete(index=index_name, ignore=[400, 404])
200 | response = es_inst.indices.create(
201 | index=index_name, body=mapping, ignore=400 # ignore 400 already exists code
202 | )
203 | if "acknowledged" in response:
204 | if response["acknowledged"] == True:
205 | logger.info(
206 | "INDEX MAPPING SUCCESS FOR INDEX: {}".format(response["index"])
207 | )
208 | elif "error" in response:
209 | logger.error(response["error"]["root_cause"])
210 | logger.error("Error type: {}".format(response["error"]["type"]))
211 | raise Exception("Unable to create index mapping")
212 | return index_name
213 |
214 |
215 | def read_lookup_table(lookup_file_path: str):
216 | logger.info("Loading lookup table...")
217 | lookup = {}
218 | with open(lookup_file_path, "r", newline="") as fp:
219 | char_read = 10000 if os.path.getsize(lookup_file_path) > 10000 else None
220 | dialect = clevercsv.Sniffer().sniff(fp.read(char_read), verbose=True)
221 | fp.seek(0)
222 | reader = clevercsv.reader(fp, dialect)
223 | next(reader)
224 | for row in tqdm(reader, unit="entry"):
225 | for location in [
226 | row[6], # Province_State
227 | row[7], # Country_Region
228 | row[10], # Combined_Key
229 | ]:
230 | if location and location not in lookup:
231 | if row[8] and row[9]: # Lat, Long
232 | lookup[location] = (
233 | {"lat": float(row[8]), "lon": float(row[9])},
234 | row[1],
235 | )
236 | logger.info(f"Found {len(lookup)} locations.")
237 | return lookup
238 |
239 |
240 | lookup_table = read_lookup_table("/usr/app/UID_ISO_FIPS_LookUp_Table.csv")
241 |
242 | schedule = IntervalSchedule(
243 | start_date=datetime.utcnow() + timedelta(seconds=1), interval=timedelta(hours=24)
244 | )
245 | with Flow(flow_name, schedule=schedule) as flow:
246 | es_mapping_task = GenerateEsMapping()
247 | index_name = es_mapping_task(index_name)
248 |
249 | parse_files_task = ParseFiles()
250 | parse_files_task(
251 | lookup_table=lookup_table,
252 | index_name=index_name,
253 | http_csv_uris=[csv_endpoint],
254 | )
255 |
256 | if __name__ == "__main__":
257 |
258 | try:
259 | client = Client()
260 | client.create_project(project_name=project_name)
261 | except prefect.utilities.exceptions.ClientError as e:
262 | logger.info("Project already exists")
263 |
264 | flow.register(
265 | project_name=project_name, labels=["development"], add_default_labels=False
266 | )
267 |
--------------------------------------------------------------------------------
/flow/scripts/insert_owid.py:
--------------------------------------------------------------------------------
1 | import os
2 | import dateparser
3 | import uuid
4 | import prefect
5 | import clevercsv
6 | import traceback
7 | from tqdm import tqdm
8 | from prefect import Flow, Task, Client, task
9 | from datetime import timedelta, datetime
10 | from prefect.schedules import IntervalSchedule
11 | from minio import Minio
12 | from elasticsearch import Elasticsearch, helpers
13 | from geopy.geocoders import Nominatim
14 |
15 | from mapping import mapping
16 |
17 | MINIO_SCHEME = os.environ.get("MINIO_SCHEME")
18 | MINIO_ENDPOINT = os.environ.get("MINIO_ENDPOINT")
19 | MINIO_ACCESS_KEY = os.environ.get("MINIO_ACCESS_KEY")
20 | MINIO_SECRET_KEY = os.environ.get("MINIO_SECRET_KEY")
21 | MAX_ES_ROW_INJECT = int(os.environ.get("MAX_ES_ROW_INJECT", 1000))
22 | ELASTIC_SCHEME = os.environ.get("ELASTIC_SCHEME")
23 | ELASTIC_PORT = os.environ.get("ELASTIC_PORT")
24 | ELASTIC_USER = os.environ.get("ELASTIC_USER")
25 | ELASTIC_PWD = os.environ.get("ELASTIC_PWD")
26 | ELASTIC_ENDPOINT = os.environ.get("ELASTIC_ENDPOINT")
27 |
28 | bucket_name = "contamination-owid"
29 | project_name = f"pandemic-knowledge-{bucket_name}"
30 | index_name = f"{bucket_name.replace('-', '_')}"
31 | flow_name = project_name
32 |
33 | logger = prefect.context.get("logger")
34 |
35 | columns_allowed = {
36 | "date": ["date"],
37 | "location": ["location"],
38 | "location_name": ["location"],
39 | "confirmed": ["new_cases"],
40 | "deaths": ["new_deaths"],
41 | "recovered": [],
42 | "vaccinated": ["new_vaccinations"],
43 | "tested": ["new_tests"],
44 | }
45 |
46 | extra_locations = {"EL": "GR"}
47 |
48 | locations_cache = {"World": None}
49 |
50 |
51 | def get_es_instance():
52 | es_inst = Elasticsearch(
53 | [ELASTIC_ENDPOINT],
54 | http_auth=(ELASTIC_USER, ELASTIC_PWD),
55 | scheme=ELASTIC_SCHEME,
56 | port=ELASTIC_PORT,
57 | verify_certs=False,
58 | )
59 | return es_inst
60 |
61 |
62 | def format_date(date):
63 | if not date:
64 | return None
65 | try:
66 | return dateparser.parse(date)
67 | except Exception as e:
68 | logger.error(e)
69 | return None
70 |
71 |
72 | def format_location(lookup_table, location_name):
73 | if not location_name:
74 | return None
75 | if location_name in locations_cache:
76 | return locations_cache[location_name]
77 | if location_name in lookup_table:
78 | return lookup_table[location_name]
79 |
80 | logger.info(f"Guessing geolocation for {location_name}")
81 | geolocator = Nominatim(user_agent="pandemic-knowledge")
82 | location = geolocator.geocode(
83 | extra_locations[location_name]
84 | if location_name in extra_locations
85 | else location_name,
86 | addressdetails=True,
87 | )
88 |
89 | if location and location.raw:
90 | logger.info(f"Found {location.latitude}, {location.longitude}")
91 | if "address" in location.raw and "country_code" in location.raw["address"]:
92 | locations_cache[location_name] = (
93 | {"lat": location.latitude, "lon": location.longitude},
94 | location.raw["address"]["country_code"].upper(),
95 | )
96 | return locations_cache[location_name]
97 | locations_cache[location_name] = None
98 | logger.error(
99 | f"Failed to locate (no country code and/or coordinates) for {location}"
100 | )
101 | return None
102 |
103 |
104 | def pick_one_of_elements(haystack: list, needles: list):
105 | for needle in needles:
106 | if needle in haystack:
107 | return needle
108 | return None
109 |
110 |
111 | def pick_nonempty_cell(row, headers, potential_keys):
112 | for potential_key in potential_keys:
113 | if potential_key in headers and row[headers[potential_key]]:
114 | return row[headers[potential_key]]
115 | return None
116 |
117 |
118 | def format_row(lookup_table, row, headers, filename):
119 | date_start = date_end = format_date(
120 | pick_nonempty_cell(row, headers, columns_allowed["date"])
121 | )
122 | location = format_location(
123 | lookup_table, pick_nonempty_cell(row, headers, columns_allowed["location"])
124 | )
125 | location_name = pick_nonempty_cell(row, headers, columns_allowed["location_name"])
126 | nb_confirmed = pick_nonempty_cell(row, headers, columns_allowed["confirmed"])
127 | nb_deaths = pick_nonempty_cell(row, headers, columns_allowed["deaths"])
128 | nb_recovered = pick_nonempty_cell(row, headers, columns_allowed["recovered"])
129 | nb_vaccinated = pick_nonempty_cell(row, headers, columns_allowed["vaccinated"])
130 | nb_tested = pick_nonempty_cell(row, headers, columns_allowed["tested"])
131 | if location != None and date_start != None and nb_confirmed != None:
132 | return {
133 | "date_start": date_start,
134 | "date_end": date_end,
135 | "location": location[0],
136 | "location_name": location_name,
137 | "confirmed": int(float(nb_confirmed)) if nb_confirmed else 0,
138 | "deaths": int(float(nb_deaths)) if nb_deaths else 0,
139 | "recovered": int(float(nb_recovered)) if nb_recovered else 0,
140 | "vaccinated": int(float(nb_vaccinated)) if nb_vaccinated else 0,
141 | "tested": int(float(nb_tested)) if nb_tested else 0,
142 | "filename": filename,
143 | "iso_code2": location[1] if len(location) else None,
144 | }
145 | return None
146 |
147 |
148 | def inject_rows_to_es(rows, index_name):
149 | es_inst = get_es_instance()
150 |
151 | logger.info("Injecting {} rows in Elasticsearch".format(len(rows)))
152 |
153 | actions = [
154 | {"_index": index_name, "_id": uuid.uuid4(), "_source": row} for row in rows
155 | ]
156 | helpers.bulk(es_inst, actions)
157 |
158 |
159 | def parse_file(lookup_table, minio_client, bucket_name, object_name):
160 | csv_file_path = "/tmp/" + str(uuid.uuid4())
161 | minio_client.fget_object(bucket_name, object_name, csv_file_path)
162 | with open(csv_file_path, "r", newline="") as fp:
163 | char_read = 10000 if os.path.getsize(csv_file_path) > 10000 else None
164 |
165 | try:
166 | dialect = clevercsv.Sniffer().sniff(fp.read(char_read), verbose=True)
167 | except Exception as e:
168 | logger.error(e)
169 | return []
170 |
171 | fp.seek(0)
172 | reader = clevercsv.reader(fp, dialect)
173 | headers_list = next(reader)
174 | headers = {}
175 | for i, header in enumerate(headers_list):
176 | headers[header] = i
177 | for row in tqdm(reader, unit="entry"):
178 | yield format_row(lookup_table, row, headers, object_name)
179 | return []
180 |
181 |
182 | def process_file(lookup_table, index_name, bucket_name, object_name):
183 | minio_client = Minio(
184 | MINIO_ENDPOINT,
185 | access_key=MINIO_ACCESS_KEY,
186 | secret_key=MINIO_SECRET_KEY,
187 | secure=MINIO_SCHEME == "https",
188 | )
189 | to_inject = []
190 | logger.info(f"Processing {object_name}...")
191 | for row in parse_file(lookup_table, minio_client, bucket_name, object_name):
192 | if row is not None:
193 | to_inject.append(row)
194 | if len(to_inject) >= MAX_ES_ROW_INJECT:
195 | inject_rows_to_es(to_inject, index_name)
196 | to_inject = []
197 | else:
198 | logger.info("Invalid row")
199 | if len(to_inject) > 0:
200 | inject_rows_to_es(to_inject, index_name)
201 |
202 |
203 | def get_files(bucket_name):
204 | minio_client = Minio(
205 | MINIO_ENDPOINT,
206 | access_key=MINIO_ACCESS_KEY,
207 | secret_key=MINIO_SECRET_KEY,
208 | secure=MINIO_SCHEME == "https",
209 | )
210 | logger.info("Parse file for bucket {}".format(bucket_name))
211 | if not minio_client.bucket_exists(bucket_name):
212 | logger.error("Bucket {} does not exists".format(bucket_name))
213 | return
214 | return list(minio_client.list_objects(bucket_name))
215 |
216 |
217 | class ParseFiles(Task):
218 | def run(self, lookup_table, index_name):
219 | logger.info(lookup_table)
220 | for file in tqdm(get_files(bucket_name=bucket_name)):
221 | object_name = file.object_name
222 | try:
223 | logger.info(f"Processing file {object_name}...")
224 | process_file(lookup_table, index_name, bucket_name, object_name)
225 | except Exception as e:
226 | logger.error(traceback.format_exc())
227 | logger.error(e)
228 | logger.error(f"Can't process file {object_name}")
229 |
230 |
231 | class GenerateEsMapping(Task):
232 | def run(self, index_name) -> str:
233 | """
234 | Returns:
235 | str: index_name
236 | """
237 | es_inst = get_es_instance()
238 | logger.info("Generating mapping for index {}".format(index_name))
239 | es_inst.indices.delete(index=index_name, ignore=[400, 404])
240 | response = es_inst.indices.create(
241 | index=index_name, body=mapping, ignore=400 # ignore 400 already exists code
242 | )
243 | if "acknowledged" in response:
244 | if response["acknowledged"] == True:
245 | logger.info(
246 | "INDEX MAPPING SUCCESS FOR INDEX: {}".format(response["index"])
247 | )
248 | elif "error" in response:
249 | logger.error(response["error"]["root_cause"])
250 | logger.error("Error type: {}".format(response["error"]["type"]))
251 | raise Exception("Unable to create index mapping")
252 | return index_name
253 |
254 |
255 | def read_lookup_table(lookup_file_path: str):
256 | logger.info("Loading lookup table...")
257 | lookup = {}
258 | with open(lookup_file_path, "r", newline="") as fp:
259 | char_read = 10000 if os.path.getsize(lookup_file_path) > 10000 else None
260 | dialect = clevercsv.Sniffer().sniff(fp.read(char_read), verbose=True)
261 | fp.seek(0)
262 | reader = clevercsv.reader(fp, dialect)
263 | next(reader)
264 | for row in tqdm(reader, unit="entry"):
265 | for location in [
266 | row[6], # Province_State
267 | row[7], # Country_Region
268 | row[10], # Combined_Key
269 | ]:
270 | if location and location not in lookup:
271 | if row[8] and row[9]: # Lat, Long
272 | lookup[location] = (
273 | {"lat": float(row[8]), "lon": float(row[9])},
274 | row[1],
275 | )
276 | logger.info(f"Found {len(lookup)} locations.")
277 | return lookup
278 |
279 |
280 | lookup_table = read_lookup_table("/usr/app/UID_ISO_FIPS_LookUp_Table.csv")
281 |
282 | schedule = IntervalSchedule(
283 | start_date=datetime.utcnow() + timedelta(seconds=1), interval=timedelta(hours=24)
284 | )
285 | with Flow(flow_name, schedule=schedule) as flow:
286 | es_mapping_task = GenerateEsMapping()
287 | index_name = es_mapping_task(index_name)
288 |
289 | parse_files_task = ParseFiles()
290 | parse_files_task(lookup_table=lookup_table, index_name=index_name)
291 |
292 | if __name__ == "__main__":
293 |
294 | try:
295 | client = Client()
296 | client.create_project(project_name=project_name)
297 | except prefect.utilities.exceptions.ClientError as e:
298 | logger.info("Project already exists")
299 |
300 | flow.register(
301 | project_name=project_name, labels=["development"], add_default_labels=False
302 | )
303 |
--------------------------------------------------------------------------------
/flow/scripts/mapping.py:
--------------------------------------------------------------------------------
1 | mapping = {
2 | "mappings": {
3 | "properties": {
4 | "date_start": {
5 | "type": "date",
6 | "format": "strict_date_optional_time||epoch_millis",
7 | },
8 | "date_end": {
9 | "type": "date",
10 | "format": "strict_date_optional_time||epoch_millis",
11 | },
12 | "location": {"type": "geo_point"},
13 | "location_name": {
14 | "type": "text",
15 | "fields": {"keyword": {"type": "keyword"}},
16 | },
17 | "confirmed": {"type": "long"},
18 | "deaths": {"type": "long"},
19 | "vaccinated": {"type": "long"},
20 | "tested": {"type": "long"},
21 | "recovered": {"type": "long"},
22 | "filename": {"type": "text"},
23 | "iso_code2": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
24 | "iso_region2": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
25 | "max_population": {"type": "long"},
26 | "percentage": {"type": "float"},
27 | }
28 | }
29 | }
--------------------------------------------------------------------------------
/flow/scripts/parse_insert.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import uuid
4 | import prefect
5 | import clevercsv
6 | from tqdm import tqdm
7 | from datetime import datetime, timedelta
8 | from prefect import Flow, Task, Client
9 | from minio import Minio
10 | from elasticsearch import Elasticsearch, helpers
11 | from ssl import create_default_context
12 | from geopy.geocoders import Nominatim
13 | from iso3166 import countries
14 | from prefect.schedules import IntervalSchedule
15 |
16 | from mapping import mapping
17 |
18 | MINIO_SCHEME = os.environ.get("MINIO_SCHEME")
19 | MINIO_ENDPOINT = os.environ.get("MINIO_ENDPOINT")
20 | MINIO_ACCESS_KEY = os.environ.get("MINIO_ACCESS_KEY")
21 | MINIO_SECRET_KEY = os.environ.get("MINIO_SECRET_KEY")
22 | MAX_ES_ROW_INJECT = int(os.environ.get("MAX_ES_ROW_INJECT", 1000))
23 | ELASTIC_SCHEME = os.environ.get("ELASTIC_SCHEME")
24 | ELASTIC_PORT = os.environ.get("ELASTIC_PORT")
25 | ELASTIC_USER = os.environ.get("ELASTIC_USER")
26 | ELASTIC_PWD = os.environ.get("ELASTIC_PWD")
27 | ELASTIC_ENDPOINT = os.environ.get("ELASTIC_ENDPOINT")
28 |
29 | columns_allowed = {
30 | "date": ["YearWeekISO", "dateRep", "date"],
31 | "location": ["ReportingCountry", "location", "countriesAndTerritories"],
32 | "cases": ["NumberDosesReceived", "new_vaccinations", "cases", "new_cases"],
33 | "population": ["population"],
34 | }
35 |
36 | logger = prefect.context.get("logger")
37 |
38 | extra_locations = {"EL": "GR"}
39 |
40 | locations_cache = {"World": None}
41 |
42 |
43 | def get_es_instance():
44 | es_inst = Elasticsearch(
45 | [ELASTIC_ENDPOINT],
46 | http_auth=(ELASTIC_USER, ELASTIC_PWD),
47 | scheme=ELASTIC_SCHEME,
48 | port=ELASTIC_PORT,
49 | verify_certs=False,
50 | )
51 | return es_inst
52 |
53 |
54 | def format_date(date):
55 | date = date.replace("/", "-")
56 | p = re.compile("(\\d{4})-W(\\d{2})")
57 | weekMatches = p.match(date)
58 | if weekMatches is not None:
59 | groups = weekMatches.groups()
60 | date_start = datetime.strptime(
61 | f"{groups[0]}-W{int(groups[1]) - 1}-1", "%Y-W%W-%w"
62 | ).date()
63 | date_end = date_start + timedelta(days=6.9)
64 | return date_start.strftime("%Y-%m-%d"), date_end.strftime("%Y-%m-%d")
65 | p = re.compile("(\\d{2})-(\\d{2})-(\\d{4})")
66 | frDateMatches = p.match(date)
67 | if frDateMatches is not None:
68 | groups = frDateMatches.groups()
69 | date = f"{groups[2]}-{groups[1]}-{groups[0]}"
70 | return date, date
71 | p = re.compile("(\\d{4})-(\\d{2})-(\\d{2})")
72 | dateMatches = p.match(date)
73 | if dateMatches is not None:
74 | return date, date
75 | return None, None
76 |
77 |
78 | def format_location(location_name):
79 | if location_name in locations_cache:
80 | return locations_cache[location_name]
81 | geolocator = Nominatim(user_agent="pandemic-knowledge")
82 | location = geolocator.geocode(
83 | extra_locations[location_name]
84 | if location_name in extra_locations
85 | else location_name,
86 | addressdetails=True,
87 | )
88 |
89 | if location is None or "country_code" not in location.raw["address"]:
90 | logger.info(location_name)
91 | locations_cache[location_name] = None
92 | return None
93 |
94 | iso2 = location.raw["address"]["country_code"].upper()
95 |
96 | iso3 = countries.get(iso2).alpha3
97 |
98 | locations_cache[location_name] = (
99 | {"lat": location.latitude, "lon": location.longitude},
100 | iso2,
101 | )
102 |
103 | return locations_cache[location_name]
104 |
105 |
106 | def format_row(row, columns_indexes, filename, bucket_name):
107 | date_start, date_end = format_date(row[columns_indexes["date"]])
108 | location = format_location(row[columns_indexes["location"]])
109 | if location is None:
110 | return None
111 | max_population = (
112 | int(float(row[columns_indexes["population"]]))
113 | if row[columns_indexes["population"]] != ""
114 | else 0
115 | )
116 | cases = (
117 | int(float(row[columns_indexes["cases"]]))
118 | if row[columns_indexes["cases"]] != ""
119 | else 0
120 | )
121 | percentage = (
122 | float(cases) / float(max_population) * 100 if max_population != 0 else None
123 | )
124 |
125 | formatted = {
126 | "date_start": date_start,
127 | "date_end": date_end,
128 | "location": location[0],
129 | "filename": filename,
130 | "iso_code2": location[1],
131 | "max_population": max_population,
132 | "percentage": percentage,
133 | }
134 |
135 | formatted["vaccinated" if bucket_name == "vaccination" else "confirmed"] = cases
136 |
137 | return formatted
138 |
139 |
140 | def inject_rows_to_es(rows, bucket_name):
141 | es_inst = get_es_instance()
142 |
143 | logger.info("Injecting {} rows in Elasticsearch".format(len(rows)))
144 |
145 | actions = [
146 | {"_index": bucket_name, "_id": uuid.uuid4(), "_source": row} for row in rows
147 | ]
148 |
149 | helpers.bulk(es_inst, actions)
150 |
151 |
152 | def parse_file(minio_client, obj):
153 | csv_file_path = "/tmp/" + str(uuid.uuid4())
154 | minio_client.fget_object(obj.bucket_name, obj.object_name, csv_file_path)
155 | with open(csv_file_path, "r", newline="") as fp:
156 | char_read = 100000 if os.path.getsize(csv_file_path) > 100000 else None
157 |
158 | try:
159 | dialect = clevercsv.Sniffer().sniff(fp.read(char_read), verbose=True)
160 | except Exception as e:
161 | logger.error(e)
162 | return []
163 |
164 | fp.seek(0)
165 | reader = clevercsv.reader(fp, dialect)
166 | headers = next(reader)
167 | columns_indexes = {}
168 | malformed_csv = False
169 | for name in columns_allowed:
170 | for header in headers:
171 | index = (
172 | headers.index(header) if header in columns_allowed[name] else None
173 | )
174 | if index is None:
175 | continue
176 | columns_indexes[name] = index
177 | break
178 | if name not in columns_indexes:
179 | logger.error(
180 | "Header {} cannot be found in csv {}".format(name, obj.object_name)
181 | )
182 | malformed_csv = True
183 | continue
184 | if malformed_csv is True:
185 | return []
186 | for row in tqdm(reader, unit="entry"):
187 | row = format_row(row, columns_indexes, obj.object_name, obj.bucket_name)
188 | if row is not None:
189 | yield row
190 | return []
191 |
192 |
193 | class ParseFiles(Task):
194 | def run(self, bucket_name):
195 | minio_client = Minio(
196 | MINIO_ENDPOINT,
197 | access_key=MINIO_ACCESS_KEY,
198 | secret_key=MINIO_SECRET_KEY,
199 | secure=MINIO_SCHEME == "https",
200 | )
201 | logger.info("Parse file for bucket {}".format(bucket_name))
202 | if not minio_client.bucket_exists(bucket_name):
203 | logger.error("Bucket {} does not exists".format(bucket_name))
204 | return
205 | objects = minio_client.list_objects(bucket_name)
206 | for obj in objects:
207 | to_inject = []
208 | for row in parse_file(minio_client, obj):
209 | to_inject.append(row)
210 | if len(to_inject) >= MAX_ES_ROW_INJECT:
211 | inject_rows_to_es(to_inject, bucket_name)
212 | to_inject = []
213 | if len(to_inject) > 0:
214 | inject_rows_to_es(to_inject, bucket_name)
215 |
216 |
217 | class GenerateEsMapping(Task):
218 | def __init__(self, index_name, **kwargs):
219 | self.index_name = index_name
220 | super().__init__(**kwargs)
221 |
222 | def run(self):
223 | index_name = self.index_name
224 | es_inst = get_es_instance()
225 |
226 | logger.info("Generating mapping for index {}".format(index_name))
227 |
228 | es_inst.indices.delete(index=index_name, ignore=[400, 404])
229 |
230 | response = es_inst.indices.create(
231 | index=index_name, body=mapping, ignore=400 # ignore 400 already exists code
232 | )
233 |
234 | if "acknowledged" in response:
235 | if response["acknowledged"] == True:
236 | logger.info(
237 | "INDEX MAPPING SUCCESS FOR INDEX: {}".format(response["index"])
238 | )
239 | elif "error" in response:
240 | logger.error(response["error"]["root_cause"])
241 | logger.error("Error type: {}".format(response["error"]["type"]))
242 | raise Exception("Unable to create index mapping")
243 |
244 |
245 | schedule = IntervalSchedule(
246 | interval=timedelta(hours=24), start_date=datetime.utcnow() + timedelta(seconds=1)
247 | )
248 |
249 | with Flow("Parse and insert csv files", schedule) as flow:
250 | for bucket in ["vaccination", "contamination"]:
251 | flow.set_dependencies(
252 | task=ParseFiles(),
253 | upstream_tasks=[GenerateEsMapping(bucket)],
254 | keyword_tasks=dict(bucket_name=bucket),
255 | )
256 |
257 | try:
258 | client = Client()
259 | client.create_project(project_name="pandemic-knowledge")
260 | except prefect.utilities.exceptions.ClientError as e:
261 | logger.info("Project already exists")
262 |
263 | flow.register(project_name="pandemic-knowledge", labels=["development"])
--------------------------------------------------------------------------------
/illustrations/france_live_status.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flavienbwk/Pandemic-Knowledge/e9082be16d743aac49c514034626721a608ede08/illustrations/france_live_status.png
--------------------------------------------------------------------------------
/illustrations/latest_news.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flavienbwk/Pandemic-Knowledge/e9082be16d743aac49c514034626721a608ede08/illustrations/latest_news.png
--------------------------------------------------------------------------------
/illustrations/live_dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flavienbwk/Pandemic-Knowledge/e9082be16d743aac49c514034626721a608ede08/illustrations/live_dashboard.png
--------------------------------------------------------------------------------
/illustrations/news_web_app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flavienbwk/Pandemic-Knowledge/e9082be16d743aac49c514034626721a608ede08/illustrations/news_web_app.png
--------------------------------------------------------------------------------
/illustrations/vaccination_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flavienbwk/Pandemic-Knowledge/e9082be16d743aac49c514034626721a608ede08/illustrations/vaccination_map.png
--------------------------------------------------------------------------------
/insert.docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.7"
2 |
3 | services:
4 |
5 | insert_france_virtests:
6 | build: ./flow
7 | command: python3 /usr/app/insert_france_virtests.py
8 | volumes:
9 | - /srv/docker/prefect/flows:/root/.prefect/flows
10 | - "./flow/scripts:/usr/app:ro"
11 | - type: bind
12 | source: ./flow/config.toml
13 | target: /root/.prefect/config.toml
14 | read_only: true
15 | env_file:
16 | - .env
17 | environment:
18 | MINIO_SCHEME: ${MINIO_SCHEME}
19 | MINIO_ENDPOINT: ${MINIO_ENDPOINT}
20 | MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY}
21 | MINIO_SECRET_KEY: ${MINIO_SECRET_KEY}
22 | MAX_ES_ROW_INJECT: ${MAX_ES_ROW_INJECT}
23 | ELASTIC_SCHEME: ${ELASTIC_SCHEME}
24 | ELASTIC_PORT: ${ELASTIC_PORT}
25 | ELASTIC_ENDPOINT: ${ELASTIC_ENDPOINT}
26 | ELASTIC_USER: ${ELASTIC_USER}
27 | ELASTIC_PWD: ${ELASTICSEARCH_PWD}
28 |
29 | insert_france:
30 | build: ./flow
31 | command: python3 /usr/app/insert_france.py
32 | volumes:
33 | - /srv/docker/prefect/flows:/root/.prefect/flows
34 | - "./flow/scripts:/usr/app:ro"
35 | - type: bind
36 | source: ./flow/config.toml
37 | target: /root/.prefect/config.toml
38 | read_only: true
39 | env_file:
40 | - .env
41 | environment:
42 | MINIO_SCHEME: ${MINIO_SCHEME}
43 | MINIO_ENDPOINT: ${MINIO_ENDPOINT}
44 | MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY}
45 | MINIO_SECRET_KEY: ${MINIO_SECRET_KEY}
46 | MAX_ES_ROW_INJECT: ${MAX_ES_ROW_INJECT}
47 | ELASTIC_SCHEME: ${ELASTIC_SCHEME}
48 | ELASTIC_PORT: ${ELASTIC_PORT}
49 | ELASTIC_ENDPOINT: ${ELASTIC_ENDPOINT}
50 | ELASTIC_USER: ${ELASTIC_USER}
51 | ELASTIC_PWD: ${ELASTICSEARCH_PWD}
52 |
53 | insert_owid:
54 | build: ./flow
55 | command: python3 /usr/app/insert_owid.py
56 | volumes:
57 | - /srv/docker/prefect/flows:/root/.prefect/flows
58 | - "./flow/scripts:/usr/app:ro"
59 | - type: bind
60 | source: ./flow/config.toml
61 | target: /root/.prefect/config.toml
62 | read_only: true
63 | env_file:
64 | - .env
65 | environment:
66 | MINIO_SCHEME: ${MINIO_SCHEME}
67 | MINIO_ENDPOINT: ${MINIO_ENDPOINT}
68 | MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY}
69 | MINIO_SECRET_KEY: ${MINIO_SECRET_KEY}
70 | MAX_ES_ROW_INJECT: ${MAX_ES_ROW_INJECT}
71 | ELASTIC_SCHEME: ${ELASTIC_SCHEME}
72 | ELASTIC_PORT: ${ELASTIC_PORT}
73 | ELASTIC_ENDPOINT: ${ELASTIC_ENDPOINT}
74 | ELASTIC_USER: ${ELASTIC_USER}
75 | ELASTIC_PWD: ${ELASTICSEARCH_PWD}
76 |
77 | parse_insert:
78 | build: ./flow
79 | command: python3 /usr/app/parse_insert.py
80 | volumes:
81 | - /srv/docker/prefect/flows:/root/.prefect/flows
82 | - "./flow/scripts:/usr/app:ro"
83 | - type: bind
84 | source: ./flow/config.toml
85 | target: /root/.prefect/config.toml
86 | read_only: true
87 | env_file:
88 | - .env
89 | environment:
90 | MINIO_SCHEME: ${MINIO_SCHEME}
91 | MINIO_ENDPOINT: ${MINIO_ENDPOINT}
92 | MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY}
93 | MINIO_SECRET_KEY: ${MINIO_SECRET_KEY}
94 | MAX_ES_ROW_INJECT: ${MAX_ES_ROW_INJECT}
95 | ELASTIC_SCHEME: ${ELASTIC_SCHEME}
96 | ELASTIC_PORT: ${ELASTIC_PORT}
97 | ELASTIC_ENDPOINT: ${ELASTIC_ENDPOINT}
98 | ELASTIC_USER: ${ELASTIC_USER}
99 | ELASTIC_PWD: ${ELASTICSEARCH_PWD}
100 |
--------------------------------------------------------------------------------
/instances/pem.yml:
--------------------------------------------------------------------------------
1 | instances:
2 | - name: es01
3 | dns:
4 | - es01
5 | - localhost
6 | ip:
7 | - 127.0.0.1
8 |
9 | - name: es02
10 | dns:
11 | - es02
12 | - localhost
13 | ip:
14 | - 127.0.0.1
15 |
16 | - name: es03
17 | dns:
18 | - es03
19 | - localhost
20 | ip:
21 | - 127.0.0.1
22 |
23 | - name: kibana
24 | dns:
25 | - kibana
26 | - localhost
27 | ip:
28 | - 127.0.0.1
29 |
--------------------------------------------------------------------------------
/instances/pkcs_12.yml:
--------------------------------------------------------------------------------
1 | # For the moment, Enterprise Search only accepts PKCS#12 keystore
2 | # so we are forced to specifically create a certutil file for it.
3 |
4 | instances:
5 |
6 | - name: enterprise_search
7 | dns:
8 | - enterprise_search
9 | - localhost
10 | ip:
11 | - 127.0.0.1
12 |
--------------------------------------------------------------------------------
/kibana.yml:
--------------------------------------------------------------------------------
1 | server.name: kibana
2 | server.host: "0.0.0.0"
3 | elasticsearch.hosts: [ "https://es01:9200" ]
4 | telemetry.enabled: true
5 |
6 | xpack.monitoring.ui.container.elasticsearch.enabled: "true"
7 | elasticsearch.username: elastic
8 | elasticsearch.password: elastic
9 |
10 | # Encrypt traffic between the browser and Kibana
11 | server.ssl.enabled: "true"
12 | server.ssl.certificate: "/usr/share/elasticsearch/config/certificates/kibana/kibana.crt"
13 | server.ssl.key: "/usr/share/elasticsearch/config/certificates/kibana/kibana.key"
14 |
15 | # Encrypt traffic between Kibana and Elasticsearch
16 | elasticsearch.ssl.certificateAuthorities: ["/usr/share/elasticsearch/config/certificates/ca/ca.crt"]
17 |
18 | # Enterprise Search
19 | enterpriseSearch.host: 'http://enterprise_search:3002'
20 |
--------------------------------------------------------------------------------
/news_app/app/.gitignore:
--------------------------------------------------------------------------------
1 | app/node_modules/
--------------------------------------------------------------------------------
/news_app/app/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:12-alpine
2 |
3 | WORKDIR '/app'
4 |
5 | COPY entrypoint.sh /entrypoint.sh
6 | ENTRYPOINT [ "/entrypoint.sh" ]
--------------------------------------------------------------------------------
/news_app/app/app/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pandemic-knowledge",
3 | "version": "1.0.0",
4 | "homepage": ".",
5 | "dependencies": {
6 | "@trendmicro/react-sidenav": "0.5.0",
7 | "bootstrap": "^4.3.1",
8 | "json-loader": "^0.5.7",
9 | "react": "^16.11.0",
10 | "react-bootstrap": "^1.0.0-beta.14",
11 | "react-bootstrap-icons": "1.0.1-alpha3",
12 | "react-click-outsider": "^1.1.1",
13 | "react-cookie": "4.0.3",
14 | "react-dom": "^16.11.0",
15 | "react-highlight-words": "^0.17.0",
16 | "react-loader-spinner": "3.1.14",
17 | "react-notifications": "1.6.0",
18 | "react-router-dom": "^5.1.2",
19 | "react-scripts": "3.2.0",
20 | "searchkit": "^2.4.4",
21 | "styled-components": "^4.4.0"
22 | },
23 | "scripts": {
24 | "start": "/app/node_modules/react-scripts/bin/react-scripts.js start",
25 | "build": "/app/node_modules/react-scripts/bin/react-scripts.js build",
26 | "test": "/app/node_modules/react-scripts/bin/react-scripts.js test",
27 | "eject": "/app/node_modules/react-scripts/bin/react-scripts.js eject"
28 | },
29 | "eslintConfig": {
30 | "extends": "react-app"
31 | },
32 | "browserslist": {
33 | "production": [
34 | ">0.2%",
35 | "not dead",
36 | "not op_mini all"
37 | ],
38 | "development": [
39 | "last 1 chrome version",
40 | "last 1 firefox version",
41 | "last 1 safari version"
42 | ]
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/news_app/app/app/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | Pandemic Knowledge
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/news_app/app/app/public/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flavienbwk/Pandemic-Knowledge/e9082be16d743aac49c514034626721a608ede08/news_app/app/app/public/logo.png
--------------------------------------------------------------------------------
/news_app/app/app/public/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "short_name": "PK",
3 | "name": "Pandemic Knowledge",
4 | "icons": [
5 | {
6 | "src": "logo.png",
7 | "sizes": "64x64 32x32 24x24 16x16",
8 | "type": "image/x-icon"
9 | }
10 | ],
11 | "start_url": ".",
12 | "display": "standalone",
13 | "theme_color": "#000000",
14 | "background_color": "#ffffff"
15 | }
16 |
--------------------------------------------------------------------------------
/news_app/app/app/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 |
--------------------------------------------------------------------------------
/news_app/app/app/src/About.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react'
2 | import packageJson from '../package.json';
3 |
4 | export class About extends Component {
5 |
6 | render() {
7 | return (
8 |
9 |
About
10 |
A fully-featured multi-source data pipeline for continuously extracting knowledge from COVID-19 data.
11 |
If you find an issue or have a suggestion, please open an issue on Github.
12 |
13 |
Version {packageJson["version"]}
14 |
15 | )
16 | }
17 |
18 | componentDidMount() {
19 | document.title = "About - Pandemic Knowledge";
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/news_app/app/app/src/App.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react'
2 | import { HashRouter as Router, Route, Switch } from 'react-router-dom'
3 | import { NotificationContainer } from 'react-notifications'
4 | import { NavigationBar } from './NavigationBar'
5 | import { Layout } from './Layout'
6 | import Home from './Home'
7 | import { About } from './About'
8 | import packageJson from '../package.json'
9 |
10 | export class App extends Component {
11 |
12 | /**
13 | * Child components may trigger this parent event to
14 | * inform other routes ( for example),
15 | * that authentication information have been updated.
16 | *
17 | * This allows to show the "Login" or "Logout" button
18 | * depending on user's authentication status.
19 | */
20 | onAuthUpdate = () => {}
21 |
22 | render() {
23 | return (
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 | )
37 | }
38 | }
39 |
40 | export default App
41 |
--------------------------------------------------------------------------------
/news_app/app/app/src/App.test.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom';
3 | import App from './App';
4 |
5 | it('renders without crashing', () => {
6 | const div = document.createElement('div');
7 | ReactDOM.render(, div);
8 | ReactDOM.unmountComponentAtNode(div);
9 | });
10 |
11 |
--------------------------------------------------------------------------------
/news_app/app/app/src/Home.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react';
2 | import { Container, Row, Col } from 'react-bootstrap';
3 | import styled from 'styled-components';
4 | import SearchUI from './SearchUI';
5 |
6 | const Styles = styled.div`
7 | .paddind-bottom {
8 | padding-bottom: 16px;
9 | }
10 | `;
11 |
12 | class Home extends Component {
13 | render() {
14 | return (
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | );
25 | }
26 |
27 | componentDidMount() {
28 | document.title = 'Search - Pandemic Knowledge';
29 | }
30 | }
31 |
32 | export default Home;
33 |
--------------------------------------------------------------------------------
/news_app/app/app/src/Layout.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import Container from 'react-bootstrap/Container';
3 |
4 | export const Layout = (props) => (
5 |
6 | {props.children}
7 |
8 | )
--------------------------------------------------------------------------------
/news_app/app/app/src/NavigationBar.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react'
2 | import { Nav, Navbar } from 'react-bootstrap'
3 | import styled from 'styled-components'
4 | import { Link } from 'react-router-dom'
5 | import packageJson from '../package.json'
6 |
7 | const Styles = styled.div`
8 | .navbar {
9 | background-color: #222;
10 | }
11 |
12 | .navbar-brand, .navbar-nav .nav-link {
13 | color: #bbb;
14 |
15 | &:hover {
16 | color: white;
17 | }
18 | }
19 |
20 | .brand-image {
21 | max-width: 64px;
22 | height: 30px;
23 | padding-right: 16px;
24 | }
25 | `;
26 |
27 | export class NavigationBar extends Component {
28 |
29 | render() {
30 | return (
31 |
32 |
33 |
34 |
39 | {'Pandemic Knowledge'}
40 |
41 |
42 |
43 |
46 |
47 |
48 |
49 | )
50 | }
51 |
52 | }
53 |
54 | export default NavigationBar;
--------------------------------------------------------------------------------
/news_app/app/app/src/SearchUI.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react';
2 | import { Row, Col, Card } from 'react-bootstrap';
3 | import { SearchkitManager, SearchkitProvider, SearchBox, Hits } from 'searchkit';
4 | import Highlighter from 'react-highlight-words';
5 |
6 | const search_kit = new SearchkitManager('https://172.17.0.1:9200/news_*/', {
7 | basicAuth: 'elastic:elastic'
8 | });
9 |
10 | export class SearchUI extends Component {
11 | state = {
12 | queryValue: ''
13 | };
14 |
15 | queryBuilder = (queryString) => {
16 | this.setState({ queryValue: queryString });
17 | return {
18 | bool: {
19 | must: [],
20 | filter: [
21 | {
22 | multi_match: {
23 | type: 'best_fields',
24 | query: queryString,
25 | lenient: true
26 | }
27 | }
28 | ],
29 | should: [],
30 | must_not: []
31 | }
32 | };
33 | };
34 |
35 | render() {
36 | return (
37 |
38 |
39 |
40 |
41 |
48 |
49 |
50 |
51 | } />
52 |
53 |
54 |
55 | );
56 | }
57 | }
58 |
59 | class News extends Component {
60 | render() {
61 | return (
62 | {
64 | window.open(this.props.result._source.link)
65 | }}
66 | style={{
67 | cursor: "pointer"
68 | }}
69 | title={this.props.result._source.link}
70 | >
71 | {
}
72 |
73 |
74 |
75 |
76 |
81 |
82 | {
83 | (this.props.result._source.date)
84 | ?
85 |
86 | {new Date(this.props.result._source.date).toLocaleDateString('fr-FR')}
87 |
88 | : <>>
89 | }
90 |
91 |
92 |
97 |
98 |
99 |
100 |
101 |
102 | );
103 | }
104 | }
105 |
106 | export default SearchUI;
107 |
--------------------------------------------------------------------------------
/news_app/app/app/src/index.css:
--------------------------------------------------------------------------------
1 | body {
2 | margin: 0;
3 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif;
4 | -webkit-font-smoothing: antialiased;
5 | -moz-osx-font-smoothing: grayscale;
6 | }
7 |
8 | code {
9 | font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New", monospace;
10 | }
--------------------------------------------------------------------------------
/news_app/app/app/src/index.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom';
3 | import './index.css';
4 | import App from './App';
5 | import * as serviceWorker from './serviceWorker';
6 |
7 | ReactDOM.render(, document.getElementById('root'));
8 |
9 | // If you want your app to work offline and load faster, you can change
10 | // unregister() to register() below. Note this comes with some pitfalls.
11 | // Learn more about service workers: https://bit.ly/CRA-PWA
12 | serviceWorker.unregister();
13 |
--------------------------------------------------------------------------------
/news_app/app/app/src/serviceWorker.js:
--------------------------------------------------------------------------------
1 | // This optional code is used to register a service worker.
2 | // register() is not called by default.
3 |
4 | // This lets the app load faster on subsequent visits in production, and gives
5 | // it offline capabilities. However, it also means that developers (and users)
6 | // will only see deployed updates on subsequent visits to a page, after all the
7 | // existing tabs open on the page have been closed, since previously cached
8 | // resources are updated in the background.
9 |
10 | // To learn more about the benefits of this model and instructions on how to
11 | // opt-in, read https://bit.ly/CRA-PWA
12 |
13 | const isLocalhost = Boolean(
14 | window.location.hostname === 'localhost' ||
15 | // [::1] is the IPv6 localhost address.
16 | window.location.hostname === '[::1]' ||
17 | // 127.0.0.1/8 is considered localhost for IPv4.
18 | window.location.hostname.match(
19 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/
20 | )
21 | );
22 |
23 | export function register(config) {
24 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) {
25 | // The URL constructor is available in all browsers that support SW.
26 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location.href);
27 | if (publicUrl.origin !== window.location.origin) {
28 | // Our service worker won't work if PUBLIC_URL is on a different origin
29 | // from what our page is served on. This might happen if a CDN is used to
30 | // serve assets; see https://github.com/facebook/create-react-app/issues/2374
31 | return;
32 | }
33 |
34 | window.addEventListener('load', () => {
35 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`;
36 |
37 | if (isLocalhost) {
38 | // This is running on localhost. Let's check if a service worker still exists or not.
39 | checkValidServiceWorker(swUrl, config);
40 |
41 | // Add some additional logging to localhost, pointing developers to the
42 | // service worker/PWA documentation.
43 | navigator.serviceWorker.ready.then(() => {
44 | console.log(
45 | 'This web app is being served cache-first by a service ' +
46 | 'worker. To learn more, visit https://bit.ly/CRA-PWA'
47 | );
48 | });
49 | } else {
50 | // Is not localhost. Just register service worker
51 | registerValidSW(swUrl, config);
52 | }
53 | });
54 | }
55 | }
56 |
57 | function registerValidSW(swUrl, config) {
58 | navigator.serviceWorker
59 | .register(swUrl)
60 | .then(registration => {
61 | registration.onupdatefound = () => {
62 | const installingWorker = registration.installing;
63 | if (installingWorker == null) {
64 | return;
65 | }
66 | installingWorker.onstatechange = () => {
67 | if (installingWorker.state === 'installed') {
68 | if (navigator.serviceWorker.controller) {
69 | // At this point, the updated precached content has been fetched,
70 | // but the previous service worker will still serve the older
71 | // content until all client tabs are closed.
72 | console.log(
73 | 'New content is available and will be used when all ' +
74 | 'tabs for this page are closed. See https://bit.ly/CRA-PWA.'
75 | );
76 |
77 | // Execute callback
78 | if (config && config.onUpdate) {
79 | config.onUpdate(registration);
80 | }
81 | } else {
82 | // At this point, everything has been precached.
83 | // It's the perfect time to display a
84 | // "Content is cached for offline use." message.
85 | console.log('Content is cached for offline use.');
86 |
87 | // Execute callback
88 | if (config && config.onSuccess) {
89 | config.onSuccess(registration);
90 | }
91 | }
92 | }
93 | };
94 | };
95 | })
96 | .catch(error => {
97 | console.error('Error during service worker registration:', error);
98 | });
99 | }
100 |
101 | function checkValidServiceWorker(swUrl, config) {
102 | // Check if the service worker can be found. If it can't reload the page.
103 | fetch(swUrl)
104 | .then(response => {
105 | // Ensure service worker exists, and that we really are getting a JS file.
106 | const contentType = response.headers.get('content-type');
107 | if (
108 | response.status === 404 ||
109 | (contentType != null && contentType.indexOf('javascript') === -1)
110 | ) {
111 | // No service worker found. Probably a different app. Reload the page.
112 | navigator.serviceWorker.ready.then(registration => {
113 | registration.unregister().then(() => {
114 | window.location.reload();
115 | });
116 | });
117 | } else {
118 | // Service worker found. Proceed as normal.
119 | registerValidSW(swUrl, config);
120 | }
121 | })
122 | .catch(() => {
123 | console.log(
124 | 'No internet connection found. App is running in offline mode.'
125 | );
126 | });
127 | }
128 |
129 | export function unregister() {
130 | if ('serviceWorker' in navigator) {
131 | navigator.serviceWorker.ready.then(registration => {
132 | registration.unregister();
133 | });
134 | }
135 | }
136 |
--------------------------------------------------------------------------------
/news_app/app/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | npm install
4 | npm run start
5 |
--------------------------------------------------------------------------------
/news_app/app/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "requires": true,
3 | "lockfileVersion": 1,
4 | "dependencies": {
5 | "@babel/runtime": {
6 | "version": "7.13.17",
7 | "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.13.17.tgz",
8 | "integrity": "sha512-NCdgJEelPTSh+FEFylhnP1ylq848l1z9t9N0j1Lfbcw0+KXGjsTvUmkxy+voLLXB5SOKMbLLx4jxYliGrYQseA==",
9 | "requires": {
10 | "regenerator-runtime": "^0.13.4"
11 | }
12 | },
13 | "@elastic/search-ui": {
14 | "version": "1.5.1",
15 | "resolved": "https://registry.npmjs.org/@elastic/search-ui/-/search-ui-1.5.1.tgz",
16 | "integrity": "sha512-ssfvX1q76X1UwqYASWtBni4PZ+3SYk1PvHmOjpVf9BYai1OqZLGVaj8Sw+cE1ia56zl5In7viCfciC+CP31ovA==",
17 | "requires": {
18 | "date-fns": "^1.30.1",
19 | "deep-equal": "^1.0.1",
20 | "history": "^4.9.0",
21 | "qs": "^6.7.0"
22 | }
23 | },
24 | "call-bind": {
25 | "version": "1.0.2",
26 | "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
27 | "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==",
28 | "requires": {
29 | "function-bind": "^1.1.1",
30 | "get-intrinsic": "^1.0.2"
31 | }
32 | },
33 | "date-fns": {
34 | "version": "1.30.1",
35 | "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.30.1.tgz",
36 | "integrity": "sha512-hBSVCvSmWC+QypYObzwGOd9wqdDpOt+0wl0KbU+R+uuZBS1jN8VsD1ss3irQDknRj5NvxiTF6oj/nDRnN/UQNw=="
37 | },
38 | "deep-equal": {
39 | "version": "1.1.1",
40 | "resolved": "https://registry.npmjs.org/deep-equal/-/deep-equal-1.1.1.tgz",
41 | "integrity": "sha512-yd9c5AdiqVcR+JjcwUQb9DkhJc8ngNr0MahEBGvDiJw8puWab2yZlh+nkasOnZP+EGTAP6rRp2JzJhJZzvNF8g==",
42 | "requires": {
43 | "is-arguments": "^1.0.4",
44 | "is-date-object": "^1.0.1",
45 | "is-regex": "^1.0.4",
46 | "object-is": "^1.0.1",
47 | "object-keys": "^1.1.1",
48 | "regexp.prototype.flags": "^1.2.0"
49 | }
50 | },
51 | "define-properties": {
52 | "version": "1.1.3",
53 | "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz",
54 | "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==",
55 | "requires": {
56 | "object-keys": "^1.0.12"
57 | }
58 | },
59 | "function-bind": {
60 | "version": "1.1.1",
61 | "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz",
62 | "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A=="
63 | },
64 | "get-intrinsic": {
65 | "version": "1.1.1",
66 | "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz",
67 | "integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==",
68 | "requires": {
69 | "function-bind": "^1.1.1",
70 | "has": "^1.0.3",
71 | "has-symbols": "^1.0.1"
72 | }
73 | },
74 | "has": {
75 | "version": "1.0.3",
76 | "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz",
77 | "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==",
78 | "requires": {
79 | "function-bind": "^1.1.1"
80 | }
81 | },
82 | "has-symbols": {
83 | "version": "1.0.2",
84 | "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz",
85 | "integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw=="
86 | },
87 | "history": {
88 | "version": "4.10.1",
89 | "resolved": "https://registry.npmjs.org/history/-/history-4.10.1.tgz",
90 | "integrity": "sha512-36nwAD620w12kuzPAsyINPWJqlNbij+hpK1k9XRloDtym8mxzGYl2c17LnV6IAGB2Dmg4tEa7G7DlawS0+qjew==",
91 | "requires": {
92 | "@babel/runtime": "^7.1.2",
93 | "loose-envify": "^1.2.0",
94 | "resolve-pathname": "^3.0.0",
95 | "tiny-invariant": "^1.0.2",
96 | "tiny-warning": "^1.0.0",
97 | "value-equal": "^1.0.1"
98 | }
99 | },
100 | "is-arguments": {
101 | "version": "1.1.0",
102 | "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.1.0.tgz",
103 | "integrity": "sha512-1Ij4lOMPl/xB5kBDn7I+b2ttPMKa8szhEIrXDuXQD/oe3HJLTLhqhgGspwgyGd6MOywBUqVvYicF72lkgDnIHg==",
104 | "requires": {
105 | "call-bind": "^1.0.0"
106 | }
107 | },
108 | "is-date-object": {
109 | "version": "1.0.2",
110 | "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.2.tgz",
111 | "integrity": "sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g=="
112 | },
113 | "is-regex": {
114 | "version": "1.1.2",
115 | "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.2.tgz",
116 | "integrity": "sha512-axvdhb5pdhEVThqJzYXwMlVuZwC+FF2DpcOhTS+y/8jVq4trxyPgfcwIxIKiyeuLlSQYKkmUaPQJ8ZE4yNKXDg==",
117 | "requires": {
118 | "call-bind": "^1.0.2",
119 | "has-symbols": "^1.0.1"
120 | }
121 | },
122 | "js-tokens": {
123 | "version": "4.0.0",
124 | "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
125 | "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="
126 | },
127 | "loose-envify": {
128 | "version": "1.4.0",
129 | "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
130 | "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
131 | "requires": {
132 | "js-tokens": "^3.0.0 || ^4.0.0"
133 | }
134 | },
135 | "object-inspect": {
136 | "version": "1.10.2",
137 | "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.10.2.tgz",
138 | "integrity": "sha512-gz58rdPpadwztRrPjZE9DZLOABUpTGdcANUgOwBFO1C+HZZhePoP83M65WGDmbpwFYJSWqavbl4SgDn4k8RYTA=="
139 | },
140 | "object-is": {
141 | "version": "1.1.5",
142 | "resolved": "https://registry.npmjs.org/object-is/-/object-is-1.1.5.tgz",
143 | "integrity": "sha512-3cyDsyHgtmi7I7DfSSI2LDp6SK2lwvtbg0p0R1e0RvTqF5ceGx+K2dfSjm1bKDMVCFEDAQvy+o8c6a7VujOddw==",
144 | "requires": {
145 | "call-bind": "^1.0.2",
146 | "define-properties": "^1.1.3"
147 | }
148 | },
149 | "object-keys": {
150 | "version": "1.1.1",
151 | "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
152 | "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="
153 | },
154 | "qs": {
155 | "version": "6.10.1",
156 | "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.1.tgz",
157 | "integrity": "sha512-M528Hph6wsSVOBiYUnGf+K/7w0hNshs/duGsNXPUCLH5XAqjEtiPGwNONLV0tBH8NoGb0mvD5JubnUTrujKDTg==",
158 | "requires": {
159 | "side-channel": "^1.0.4"
160 | }
161 | },
162 | "regenerator-runtime": {
163 | "version": "0.13.7",
164 | "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz",
165 | "integrity": "sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew=="
166 | },
167 | "regexp.prototype.flags": {
168 | "version": "1.3.1",
169 | "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.3.1.tgz",
170 | "integrity": "sha512-JiBdRBq91WlY7uRJ0ds7R+dU02i6LKi8r3BuQhNXn+kmeLN+EfHhfjqMRis1zJxnlu88hq/4dx0P2OP3APRTOA==",
171 | "requires": {
172 | "call-bind": "^1.0.2",
173 | "define-properties": "^1.1.3"
174 | }
175 | },
176 | "resolve-pathname": {
177 | "version": "3.0.0",
178 | "resolved": "https://registry.npmjs.org/resolve-pathname/-/resolve-pathname-3.0.0.tgz",
179 | "integrity": "sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng=="
180 | },
181 | "side-channel": {
182 | "version": "1.0.4",
183 | "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz",
184 | "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==",
185 | "requires": {
186 | "call-bind": "^1.0.0",
187 | "get-intrinsic": "^1.0.2",
188 | "object-inspect": "^1.9.0"
189 | }
190 | },
191 | "tiny-invariant": {
192 | "version": "1.1.0",
193 | "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.1.0.tgz",
194 | "integrity": "sha512-ytxQvrb1cPc9WBEI/HSeYYoGD0kWnGEOR8RY6KomWLBVhqz0RgTwVO9dLrGz7dC+nN9llyI7OKAgRq8Vq4ZBSw=="
195 | },
196 | "tiny-warning": {
197 | "version": "1.0.3",
198 | "resolved": "https://registry.npmjs.org/tiny-warning/-/tiny-warning-1.0.3.tgz",
199 | "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA=="
200 | },
201 | "value-equal": {
202 | "version": "1.0.1",
203 | "resolved": "https://registry.npmjs.org/value-equal/-/value-equal-1.0.1.tgz",
204 | "integrity": "sha512-NOJ6JZCAWr0zlxZt+xqCHNTEKOsrks2HQd4MqhP1qy4z1SkbEP467eNx6TgDKXMvUOb+OENfJCZwM+16n7fRfw=="
205 | }
206 | }
207 | }
208 |
--------------------------------------------------------------------------------
/news_app/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 |
5 | app:
6 | build: ./app
7 | restart: always
8 | ports:
9 | - "8080:3000"
10 | volumes:
11 | - ./app/app:/app
12 | environment:
13 | NODE_ENV: "development"
14 | CHOKIDAR_USEPOLLING: "true"
15 |
--------------------------------------------------------------------------------
/pandemic_knowledge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flavienbwk/Pandemic-Knowledge/e9082be16d743aac49c514034626721a608ede08/pandemic_knowledge.png
--------------------------------------------------------------------------------
/prefect/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | RUN apt-get install gcc
4 |
5 | RUN python3 -m pip install prefect
6 | COPY prefect.config /root/.prefect/config.toml
7 |
8 | ENTRYPOINT \
9 | prefect backend server && \
10 | prefect server create-tenant --name default --slug default
11 |
--------------------------------------------------------------------------------
/prefect/prefect.config:
--------------------------------------------------------------------------------
1 | # debug mode
2 | debug = true
3 |
4 | # base configuration directory (typically you won't change this!)
5 | home_dir = "~/.prefect"
6 |
7 | backend = "server"
8 |
9 | [server]
10 | host = "http://prefect_apollo"
11 | port = "4200"
12 | host_port = "4200"
13 | endpoint = "${server.host}:${server.port}"
14 |
--------------------------------------------------------------------------------